'Mpath' : Mpath_hdfs, 'wPath' : wPath_hdfs, 'Npath' : Npath_hdfs, 'nodes' : nodes, 'savestub': gb, 'tableStub' : '{}_{}'.format(gb, mattype_m)} cmd_params_disk['opType'] = op cmd_params_hdfs['opType'] = op args_disk = cmd_args.format(**cmd_params_disk) args_hdfs = cmd_args.format(**cmd_params_hdfs) if 'SYSTEMML' in systems: utils.run_spark(program = 'SystemMLMatrixOps', sbt_dir = './systemml', cmd_args = args_hdfs) if 'MLLIB' in systems: utils.run_spark(program = 'SparkMatrixOps', sbt_dir = './mllib', cmd_args = args_hdfs) if 'MADLIB' in systems: utils.run_python(program = 'madlib_matrix_ops.py', cmd_args = args_disk) if 'R' in systems: utils.run_pbdR(program = 'R_matrix_ops.R', cmd_args = args_disk) if 'SCIDB' in systems: utils.run_python(program = 'scidb_matrix_ops.py', cmd_args = args_disk)
if (project_root is None): msg = 'Pease set environment variable "BENCHMARK_PROJECT_ROOT"' raise StandardError(msg) externals = { 'lib': '/lib', 'disk_data': '/tests/SimpleMatrixOps (Disk Data)/output' } for name in externals: os.symlink(project_root + externals[name], '../external/' + name) sys.path.append('../external/lib/python') import make_utils as utils import global_params as params # start logging start_make_logging() # compile makelog = '../../output/make.log' utils.run_sbt('./spark', makelog=makelog) utils.run_python(program='get_data.py') os.putenv('SAVE_STUB', '_1') utils.run_spark(program='SparkPreclean', sbt_dir='./spark', cmd_args='/scratch/day_1.gz true') # stop logging end_make_logging()
feature_names = 'dense_features_scaled' if typ == 'dense' else 'features' for op in ops: args_disk = cmd_arg_params.format(opType=op, inputPath=path_disk, nodes=nodes, featureNames='ignored', stub=stub) args_hdfs = cmd_arg_params.format(opType=op, inputPath=path_hdfs, nodes=nodes, featureNames=feature_names, stub=stub) os.system('rm -rf /tmp/systemml') if 'TF' in systems: utils.run_python(program='tf_algs.py', cmd_args=args_disk) if 'MLLIB' in systems: utils.run_spark(program='SparkMLAlgorithms', sbt_dir='./mllib', cmd_args=args_hdfs) if 'SYSTEMML' in systems: utils.run_spark(program='SystemMLMLAlgorithms', sbt_dir='./systemml', driver_memory='64G', cmd_args=args_hdfs) utils.run_spark(program='SystemMLMLAlgorithms', sbt_dir='./systemml', driver_memory='32G', cmd_args=args_hdfs + ' execSpark=true') if 'MADLIB' in systems: if typ == 'dense':
# start logging start_make_logging() test_type = args.test_type nodes = args.nodes sparsity = args.sparsity systems = args.systems op_types = args.operators sparse_gb = args.sparse_gb # compile makelog = '../../output/make.log' utils.run_sbt('./systemml', makelog=makelog) utils.run_sbt('./mllib', makelog=makelog) if test_type == 'scale_nodes': utils.run_python(program='node_scaling_tests.py', cmd_args='{} "{}" "{}" "{}" {}'.format( nodes, sparsity, systems, op_types, sparse_gb)) elif test_type == 'scale_mat': utils.run_python(program='msize_scaling_tests.py', cmd_args='{} "{}" "{}" "{}" {}'.format( nodes, sparsity, systems, op_types, sparse_gb)) else: raise StandardError('TEST_TYPE must be one of: "scale_nodes", "scale_mat"') remove_dir('scratch_space') # stop logging end_make_logging()
# create symlinks to external resources project_root = os.getenv('BENCHMARK_PROJECT_ROOT') if (project_root is None): msg = 'Pease set environment variable "BENCHMARK_PROJECT_ROOT"' raise StandardError(msg) externals = { 'lib': '/lib', 'disk_data': '/tests/SimpleMatrixOps (Disk Data)/output' } for name in externals: os.symlink(project_root + externals[name], '../external/' + name) sys.path.append('../external/lib/python') import make_utils as utils import global_params as params # start logging start_make_logging() # compile makelog = '../../output/make.log' utils.run_sbt('./spark', makelog=makelog) utils.run_spark(program='SparkPreclean', sbt_dir='./spark', cmd_args='_1 true') utils.run_python(program='postprocess.py', cmd_args='--sparse False --stub _1') utils.run_python(program='build_tables.py', cmd_args='_1') # stop logging end_make_logging()
type=str, default=systems, help='Space delimited list of systems to compare. May be any of "{}"'. format(systems)) args = parser.parse_args() # start logging start_make_logging() # compile makelog = '../../output/make.log' utils.run_sbt('./systemml', makelog=makelog) utils.run_sbt('./mllib', makelog=makelog) if args.test_type == 'criteo': utils.run_python(program='run_criteo_tests.py', cmd_args='{} {} "{}" "{}"'.format(args.stub, args.nodes, args.systems, args.algorithms)) if args.test_type == 'scale': utils.run_python(program='run_scale_tests.py', cmd_args='{} {} "{}" "{}" "{}" {}'.format( args.stub, args.nodes, args.algorithms, args.systems, args.sparsity, args.sparse_gb)) remove_dir('scratch_space') # stop logging end_make_logging()
ytable_name = 'adclick_y_array{}' else: ytable_name = 'adclick_y_mat{}' argv = {'stub': stub, 'nodes': nodes, 'op': alg, 'gb': gb} cmd_args_R = args_R.format(**argv) cmd_args_madlib = args_madlib.format(**argv) cmd_args_hdfs = args_hdfs.format(**argv) if 'R' in systems: utils.run_pbdR(program='ml_algs.R', cmd_args=cmd_args_R) if 'SYSTEMML' in systems: utils.run_spark(program='SystemMLMLAlgorithms', sbt_dir='./systemml', driver_memory='32G', cmd_args=cmd_args_hdfs) if 'MLLIB' in systems: utils.run_spark(program='SparkMLAlgorithms', sbt_dir='./mllib', driver_memory='32G', cmd_args=cmd_args_hdfs) if 'MADLIB' in systems: utils.run_python(program='madlib_algs.py', cmd_args=cmd_args_madlib) if 'SCIDB' in systems: utils.run_python(program='scidb_algs.py', cmd_args=cmd_args_madlib)
# start logging start_make_logging() nodes = args.nodes matsize = args.matsize systems = args.systems test_type = args.test_type op_types = args.operators # compile makelog = '../../output/make.log' utils.run_sbt('./systemml', makelog=makelog) utils.run_sbt('./mllib', makelog=makelog) if test_type == 'scale_nodes': utils.run_python(program='node_scaling_tests.py', cmd_args='{} "{}" "{}" "{}"'.format( nodes, matsize, systems, op_types)) elif test_type == 'scale_mat': utils.run_python(program='msize_scaling_tests.py', cmd_args='{} "{}" "{}" "{}"'.format( nodes, matsize, systems, op_types)) else: raise StandardError('TEST_TYPE must be one of: "scale_nodes", "scale_mat"') remove_dir('scratch_space') # stop logging end_make_logging()
# compile makelog = '../../output/make.log' utils.run_sbt('./systemml', makelog=makelog) utils.run_sbt('./mllib', makelog=makelog) cmd_args_template = 'opType={} mattype={} nrows="{}"' mattype = 'tall' if test_type == 'matrix-ops': for op in op_types: args = (op, mattype, nrows) cmd_args = cmd_args_template.format(*args) cmd_args += ' fixedAxis=100 step=10' if 'NUMPY' in systems: utils.run_python(program='np_matrix_ops.py', cmd_args=cmd_args) if 'TF' in systems: utils.run_python(program='tf_matrix_ops.py', cmd_args=cmd_args) if 'R' in systems: utils.run_R(program='R_matrix_ops.R', cmd_args=cmd_args) if 'SYSTEMML' in systems: utils.run_spark(program='SystemMLMatrixOps', sbt_dir='./systemml', cmd_args=cmd_args) if 'MLLIB' in systems: utils.run_spark(program='MLLibMatrixOps', sbt_dir='./mllib', cmd_args=cmd_args) if 'MADLIB' in systems: utils.run_python(program='madlib_matrix_ops.py', cmd_args=cmd_args) if 'SCIDB' in systems:
'nodes={nodes} opType={op}') for alg in algorithms: if alg == 'logit': ytable_name = 'adclick_y_array{}' else: ytable_name = 'adclick_y_mat{}' argv = { 'stub': stub, 'nodes': nodes, 'op': alg, 'ytable': ytable_name.format(stub) } cmd_args_R = args_R.format(**argv) cmd_args_madlib = args_madlib.format(**argv) cmd_args_hdfs = args_hdfs.format(**argv) if 'SYSTEMML' in systems: utils.run_spark(program='SystemMLMLAlgorithms', sbt_dir='./systemml', driver_memory='32G', cmd_args=cmd_args_hdfs) if 'MLLIB' in systems: utils.run_spark(program='SparkMLAlgorithms', sbt_dir='./mllib', driver_memory='32G', cmd_args=cmd_args_hdfs) if 'MADLIB' in systems: utils.run_python(program='madlib_bigmat_algs.py', cmd_args=cmd_args_madlib)
# utils.run_sbt('./systemml', makelog=makelog) # utils.run_sbt('./mllib', makelog=makelog) cmd_args_template = 'opType={} mattype={} nrow="{}" ncol=100' mattype = 'tall' nproc = [1, 2, 4, 8, 16, 24] for op in op_types: for num_proc in nproc: # This will force the process to execute only on a subset of processors utils.set_nproc(num_proc) args = (op, mattype, nrows) cmd_args = cmd_args_template.format(*args) cmd_args += ' fixedAxis=100 step=10 nproc={}'.format(num_proc) if 'NUMPY' in systems: utils.run_python(program='np_algs.py', cmd_args=cmd_args) if ('TF' in systems) and (op != 'logit'): utils.run_python(program='tf_algorithms.py', cmd_args=cmd_args) if 'R' in systems: utils.run_R(program='ml_algs.R', cmd_args=cmd_args) if 'SYSTEMML' in systems: utils.run_spark(program='SystemMLMLAlgorithms', sbt_dir='./systemml', driver_cores=str(num_proc), cmd_args=cmd_args) if 'MLLIB' in systems: utils.run_spark(program='SparkMLAlgorithms', sbt_dir='./mllib', driver_cores=str(num_proc), cmd_args=cmd_args) utils.set_nproc(999)