Example #1
0
                   'Mpath'   : Mpath_hdfs,
                   'wPath'   : wPath_hdfs,
                   'Npath'   : Npath_hdfs,
                   'nodes'   : nodes,
                   'savestub': gb,
                   'tableStub' : '{}_{}'.format(gb, mattype_m)}

        cmd_params_disk['opType'] = op
        cmd_params_hdfs['opType'] = op
        args_disk = cmd_args.format(**cmd_params_disk)
        args_hdfs = cmd_args.format(**cmd_params_hdfs)

        if 'SYSTEMML' in systems:
          utils.run_spark(program = 'SystemMLMatrixOps',
                          sbt_dir = './systemml',
                          cmd_args = args_hdfs)
        if 'MLLIB' in systems:
          utils.run_spark(program = 'SparkMatrixOps',
                          sbt_dir = './mllib',
                          cmd_args = args_hdfs)
        if 'MADLIB' in systems:
          utils.run_python(program = 'madlib_matrix_ops.py',
                           cmd_args = args_disk)
        if 'R' in systems:
          utils.run_pbdR(program = 'R_matrix_ops.R',
                         cmd_args = args_disk)

        if 'SCIDB' in systems:
          utils.run_python(program = 'scidb_matrix_ops.py',
                           cmd_args = args_disk)
Example #2
0
if (project_root is None):
    msg = 'Pease set environment variable "BENCHMARK_PROJECT_ROOT"'
    raise StandardError(msg)

externals = {
    'lib': '/lib',
    'disk_data': '/tests/SimpleMatrixOps (Disk Data)/output'
}
for name in externals:
    os.symlink(project_root + externals[name], '../external/' + name)

sys.path.append('../external/lib/python')
import make_utils as utils
import global_params as params

# start logging
start_make_logging()

# compile
makelog = '../../output/make.log'
utils.run_sbt('./spark', makelog=makelog)

utils.run_python(program='get_data.py')
os.putenv('SAVE_STUB', '_1')
utils.run_spark(program='SparkPreclean',
                sbt_dir='./spark',
                cmd_args='/scratch/day_1.gz true')

# stop logging
end_make_logging()
Example #3
0
feature_names = 'dense_features_scaled' if typ == 'dense' else 'features'

for op in ops:
    args_disk = cmd_arg_params.format(opType=op,
                                      inputPath=path_disk,
                                      nodes=nodes,
                                      featureNames='ignored',
                                      stub=stub)
    args_hdfs = cmd_arg_params.format(opType=op,
                                      inputPath=path_hdfs,
                                      nodes=nodes,
                                      featureNames=feature_names,
                                      stub=stub)
    os.system('rm -rf /tmp/systemml')
    if 'TF' in systems:
        utils.run_python(program='tf_algs.py', cmd_args=args_disk)
    if 'MLLIB' in systems:
        utils.run_spark(program='SparkMLAlgorithms',
                        sbt_dir='./mllib',
                        cmd_args=args_hdfs)
    if 'SYSTEMML' in systems:
        utils.run_spark(program='SystemMLMLAlgorithms',
                        sbt_dir='./systemml',
                        driver_memory='64G',
                        cmd_args=args_hdfs)
        utils.run_spark(program='SystemMLMLAlgorithms',
                        sbt_dir='./systemml',
                        driver_memory='32G',
                        cmd_args=args_hdfs + ' execSpark=true')
    if 'MADLIB' in systems:
        if typ == 'dense':
Example #4
0
# start logging
start_make_logging()

test_type = args.test_type
nodes = args.nodes
sparsity = args.sparsity
systems = args.systems
op_types = args.operators
sparse_gb = args.sparse_gb

# compile
makelog = '../../output/make.log'
utils.run_sbt('./systemml', makelog=makelog)
utils.run_sbt('./mllib', makelog=makelog)

if test_type == 'scale_nodes':
    utils.run_python(program='node_scaling_tests.py',
                     cmd_args='{} "{}" "{}" "{}" {}'.format(
                         nodes, sparsity, systems, op_types, sparse_gb))
elif test_type == 'scale_mat':
    utils.run_python(program='msize_scaling_tests.py',
                     cmd_args='{} "{}" "{}" "{}" {}'.format(
                         nodes, sparsity, systems, op_types, sparse_gb))
else:
    raise StandardError('TEST_TYPE must be one of: "scale_nodes", "scale_mat"')

remove_dir('scratch_space')

# stop logging
end_make_logging()
Example #5
0
# create symlinks to external resources
project_root = os.getenv('BENCHMARK_PROJECT_ROOT')
if (project_root is None):
    msg = 'Pease set environment variable "BENCHMARK_PROJECT_ROOT"'
    raise StandardError(msg)

externals = {
    'lib': '/lib',
    'disk_data': '/tests/SimpleMatrixOps (Disk Data)/output'
}
for name in externals:
    os.symlink(project_root + externals[name], '../external/' + name)

sys.path.append('../external/lib/python')
import make_utils as utils
import global_params as params

# start logging
start_make_logging()

# compile
makelog = '../../output/make.log'
utils.run_sbt('./spark', makelog=makelog)

utils.run_spark(program='SparkPreclean', sbt_dir='./spark', cmd_args='_1 true')
utils.run_python(program='postprocess.py', cmd_args='--sparse False --stub _1')
utils.run_python(program='build_tables.py', cmd_args='_1')

# stop logging
end_make_logging()
Example #6
0
    type=str,
    default=systems,
    help='Space delimited list of systems to compare. May be any of "{}"'.
    format(systems))

args = parser.parse_args()

# start logging
start_make_logging()

# compile
makelog = '../../output/make.log'
utils.run_sbt('./systemml', makelog=makelog)
utils.run_sbt('./mllib', makelog=makelog)

if args.test_type == 'criteo':
    utils.run_python(program='run_criteo_tests.py',
                     cmd_args='{} {} "{}" "{}"'.format(args.stub, args.nodes,
                                                       args.systems,
                                                       args.algorithms))
if args.test_type == 'scale':
    utils.run_python(program='run_scale_tests.py',
                     cmd_args='{} {} "{}" "{}" "{}" {}'.format(
                         args.stub, args.nodes, args.algorithms, args.systems,
                         args.sparsity, args.sparse_gb))

remove_dir('scratch_space')

# stop logging
end_make_logging()
Example #7
0
            ytable_name = 'adclick_y_array{}'
        else:
            ytable_name = 'adclick_y_mat{}'
        argv = {'stub': stub,
                'nodes': nodes,
                'op': alg,
                'gb': gb}
        cmd_args_R = args_R.format(**argv)
        cmd_args_madlib = args_madlib.format(**argv)
        cmd_args_hdfs = args_hdfs.format(**argv)
       
        if 'R' in systems:
            utils.run_pbdR(program='ml_algs.R',
                           cmd_args=cmd_args_R)
        if 'SYSTEMML' in systems:
            utils.run_spark(program='SystemMLMLAlgorithms',
                            sbt_dir='./systemml',
                            driver_memory='32G',
                            cmd_args=cmd_args_hdfs)
        if 'MLLIB' in systems:
            utils.run_spark(program='SparkMLAlgorithms',
                            sbt_dir='./mllib',
                            driver_memory='32G',
                            cmd_args=cmd_args_hdfs)
        if 'MADLIB' in systems:
            utils.run_python(program='madlib_algs.py', 
                             cmd_args=cmd_args_madlib)
        if 'SCIDB' in systems:
            utils.run_python(program='scidb_algs.py', 
                             cmd_args=cmd_args_madlib)
Example #8
0
# start logging
start_make_logging()

nodes = args.nodes
matsize = args.matsize
systems = args.systems
test_type = args.test_type
op_types = args.operators

# compile
makelog = '../../output/make.log'
utils.run_sbt('./systemml', makelog=makelog)
utils.run_sbt('./mllib', makelog=makelog)

if test_type == 'scale_nodes':
    utils.run_python(program='node_scaling_tests.py',
                     cmd_args='{} "{}" "{}" "{}"'.format(
                         nodes, matsize, systems, op_types))
elif test_type == 'scale_mat':
    utils.run_python(program='msize_scaling_tests.py',
                     cmd_args='{} "{}" "{}" "{}"'.format(
                         nodes, matsize, systems, op_types))
else:
    raise StandardError('TEST_TYPE must be one of: "scale_nodes", "scale_mat"')

remove_dir('scratch_space')

# stop logging
end_make_logging()
Example #9
0
# compile
makelog = '../../output/make.log'
utils.run_sbt('./systemml', makelog=makelog)
utils.run_sbt('./mllib', makelog=makelog)

cmd_args_template = 'opType={} mattype={} nrows="{}"'
mattype = 'tall'
if test_type == 'matrix-ops':
    for op in op_types:
        args = (op, mattype, nrows)
        cmd_args = cmd_args_template.format(*args)
        cmd_args += ' fixedAxis=100 step=10'

        if 'NUMPY' in systems:
            utils.run_python(program='np_matrix_ops.py', cmd_args=cmd_args)
        if 'TF' in systems:
            utils.run_python(program='tf_matrix_ops.py', cmd_args=cmd_args)
        if 'R' in systems:
            utils.run_R(program='R_matrix_ops.R', cmd_args=cmd_args)
        if 'SYSTEMML' in systems:
            utils.run_spark(program='SystemMLMatrixOps',
                            sbt_dir='./systemml',
                            cmd_args=cmd_args)
        if 'MLLIB' in systems:
            utils.run_spark(program='MLLibMatrixOps',
                            sbt_dir='./mllib',
                            cmd_args=cmd_args)
        if 'MADLIB' in systems:
            utils.run_python(program='madlib_matrix_ops.py', cmd_args=cmd_args)
        if 'SCIDB' in systems:
Example #10
0
             'nodes={nodes} opType={op}')

for alg in algorithms:
    if alg == 'logit':
        ytable_name = 'adclick_y_array{}'
    else:
        ytable_name = 'adclick_y_mat{}'
    argv = {
        'stub': stub,
        'nodes': nodes,
        'op': alg,
        'ytable': ytable_name.format(stub)
    }
    cmd_args_R = args_R.format(**argv)
    cmd_args_madlib = args_madlib.format(**argv)
    cmd_args_hdfs = args_hdfs.format(**argv)

    if 'SYSTEMML' in systems:
        utils.run_spark(program='SystemMLMLAlgorithms',
                        sbt_dir='./systemml',
                        driver_memory='32G',
                        cmd_args=cmd_args_hdfs)
    if 'MLLIB' in systems:
        utils.run_spark(program='SparkMLAlgorithms',
                        sbt_dir='./mllib',
                        driver_memory='32G',
                        cmd_args=cmd_args_hdfs)
    if 'MADLIB' in systems:
        utils.run_python(program='madlib_bigmat_algs.py',
                         cmd_args=cmd_args_madlib)
Example #11
0
# utils.run_sbt('./systemml', makelog=makelog)
# utils.run_sbt('./mllib', makelog=makelog)

cmd_args_template = 'opType={} mattype={} nrow="{}" ncol=100'
mattype = 'tall'
nproc = [1, 2, 4, 8, 16, 24]
for op in op_types:
    for num_proc in nproc:
        # This will force the process to execute only on a subset of processors
        utils.set_nproc(num_proc)
        args = (op, mattype, nrows)
        cmd_args = cmd_args_template.format(*args)
        cmd_args += ' fixedAxis=100 step=10 nproc={}'.format(num_proc)

        if 'NUMPY' in systems:
            utils.run_python(program='np_algs.py', cmd_args=cmd_args)
        if ('TF' in systems) and (op != 'logit'):
            utils.run_python(program='tf_algorithms.py', cmd_args=cmd_args)
        if 'R' in systems:
            utils.run_R(program='ml_algs.R', cmd_args=cmd_args)
        if 'SYSTEMML' in systems:
            utils.run_spark(program='SystemMLMLAlgorithms',
                            sbt_dir='./systemml',
                            driver_cores=str(num_proc),
                            cmd_args=cmd_args)
        if 'MLLIB' in systems:
            utils.run_spark(program='SparkMLAlgorithms',
                            sbt_dir='./mllib',
                            driver_cores=str(num_proc),
                            cmd_args=cmd_args)
        utils.set_nproc(999)