cmd_params_hdfs = {'mattype' : mattype_m, 'Mpath' : Mpath_hdfs, 'wPath' : wPath_hdfs, 'Npath' : Npath_hdfs, 'nodes' : nodes, 'savestub': gb, 'tableStub' : '{}_{}'.format(gb, mattype_m)} cmd_params_disk['opType'] = op cmd_params_hdfs['opType'] = op args_disk = cmd_args.format(**cmd_params_disk) args_hdfs = cmd_args.format(**cmd_params_hdfs) if 'SYSTEMML' in systems: utils.run_spark(program = 'SystemMLMatrixOps', sbt_dir = './systemml', cmd_args = args_hdfs) if 'MLLIB' in systems: utils.run_spark(program = 'SparkMatrixOps', sbt_dir = './mllib', cmd_args = args_hdfs) if 'MADLIB' in systems: utils.run_python(program = 'madlib_matrix_ops.py', cmd_args = args_disk) if 'R' in systems: utils.run_pbdR(program = 'R_matrix_ops.R', cmd_args = args_disk) if 'SCIDB' in systems: utils.run_python(program = 'scidb_matrix_ops.py', cmd_args = args_disk)
if (project_root is None): msg = 'Pease set environment variable "BENCHMARK_PROJECT_ROOT"' raise StandardError(msg) externals = { 'lib': '/lib', 'disk_data': '/tests/SimpleMatrixOps (Disk Data)/output' } for name in externals: os.symlink(project_root + externals[name], '../external/' + name) sys.path.append('../external/lib/python') import make_utils as utils import global_params as params # start logging start_make_logging() # compile makelog = '../../output/make.log' utils.run_sbt('./spark', makelog=makelog) utils.run_python(program='get_data.py') os.putenv('SAVE_STUB', '_1') utils.run_spark(program='SparkPreclean', sbt_dir='./spark', cmd_args='/scratch/day_1.gz true') # stop logging end_make_logging()
cmd_params_disk = {'mattype' : mattype_m, 'Mpath' : Mpath_disk, 'wPath' : wPath_disk, 'Npath' : Npath_disk, 'nodes' : nodes, 'savestub': gb, 'tableStub' : '{}_{}'.format(gb, mattype_m)} cmd_params_hdfs = {'mattype' : mattype_m, 'Mpath' : Mpath_hdfs, 'wPath' : wPath_hdfs, 'Npath' : Npath_hdfs, 'nodes' : nodes, 'savestub': gb, 'tableStub' : '{}_{}'.format(gb, mattype_m)} cmd_params_disk['opType'] = op cmd_params_hdfs['opType'] = op args_disk = cmd_args.format(**cmd_params_disk) args_hdfs = cmd_args.format(**cmd_params_hdfs) if 'MLLIB' in systems: utils.run_spark(program = 'SparkDecompositions', sbt_dir = './mllib', cmd_args = args_hdfs) if 'MADLIB' in systems: utils.run_python(program = 'madlib_matrix_ops.py', cmd_args = args_disk) if 'R' in systems: utils.run_pbdR(program = 'R_matrix_ops.R', cmd_args = args_disk)
args_disk = cmd_arg_params.format(opType=op, inputPath=path_disk, nodes=nodes, featureNames='ignored', stub=stub) args_hdfs = cmd_arg_params.format(opType=op, inputPath=path_hdfs, nodes=nodes, featureNames=feature_names, stub=stub) os.system('rm -rf /tmp/systemml') if 'TF' in systems: utils.run_python(program='tf_algs.py', cmd_args=args_disk) if 'MLLIB' in systems: utils.run_spark(program='SparkMLAlgorithms', sbt_dir='./mllib', cmd_args=args_hdfs) if 'SYSTEMML' in systems: utils.run_spark(program='SystemMLMLAlgorithms', sbt_dir='./systemml', driver_memory='64G', cmd_args=args_hdfs) utils.run_spark(program='SystemMLMLAlgorithms', sbt_dir='./systemml', driver_memory='32G', cmd_args=args_hdfs + ' execSpark=true') if 'MADLIB' in systems: if typ == 'dense': utils.run_python(program='madlib_algs.py', cmd_args=args_disk) else: utils.run_python(program='madlib_sparse_algs.py',
# create symlinks to external resources project_root = os.getenv('BENCHMARK_PROJECT_ROOT') if (project_root is None): msg = 'Pease set environment variable "BENCHMARK_PROJECT_ROOT"' raise StandardError(msg) externals = { 'lib': '/lib', 'disk_data': '/tests/SimpleMatrixOps (Disk Data)/output' } for name in externals: os.symlink(project_root + externals[name], '../external/' + name) sys.path.append('../external/lib/python') import make_utils as utils import global_params as params # start logging start_make_logging() # compile makelog = '../../output/make.log' utils.run_sbt('./spark', makelog=makelog) utils.run_spark(program='SparkPreclean', sbt_dir='./spark', cmd_args='_1 true') utils.run_python(program='postprocess.py', cmd_args='--sparse False --stub _1') utils.run_python(program='build_tables.py', cmd_args='_1') # stop logging end_make_logging()
args_madlib = ('mattype=adclick ' 'xTableName=adclick_clean_indepvars_long ' 'yTableName=adclick_clean_y ' 'nodes={nodes} opType={op}') args_hdfs = ('mattype=adclick ' 'Xpath=/scratch/pass.csv ' 'Ypath=/scratch/pass.csv ' 'passPath=/scratch/pass.csv ' 'dataPath=/scratch/adclick_clean{stub}_sparse.parquet ' 'nodes={nodes} opType={op}') for alg in algorithms: argv = {'stub': stub, 'nodes': nodes, 'op': alg} cmd_args_hdfs = args_hdfs.format(**argv) if 'SYSTEMML' in systems: utils.run_spark(program='SystemMLMLAlgorithms', sbt_dir='./systemml', driver_memory='80G', cmd_args=cmd_args_hdfs) if 'MLLIB' in systems: utils.run_spark(program='SparkMLAlgorithms', sbt_dir='./mllib', driver_memory='20G', cmd_args=cmd_args_hdfs) if 'MADLIB' in systems: print 'MADLib Tests Not Implemented for Sparse Criteo' # utils.run_python(program='madlib_bigmat_algs.py', # cmd_args=cmd_args_madlib)
msg = 'Pease set environment variable "BENCHMARK_PROJECT_ROOT"' raise StandardError(msg) externals = {'lib': '/lib'} for name in externals: os.symlink(project_root + externals[name], '../external/' + name) sys.path.append('../external/lib/python') import make_utils as utils import global_params as params import gen_data as data # start logging start_make_logging() # compile makelog = '../../output/make.log' utils.run_sbt('./systemml', makelog=makelog) utils.run_sbt('./mllib', makelog=makelog) #utils.run_pbdR(program='R_pipelines.R') utils.run_spark(program='SparkPipelines', sbt_dir='./mllib', cmd_args='') #utils.run_spark(program='SystemMLPipelines', # sbt_dir='./systemml', # cmd_args='') remove_dir('scratch_space') # stop logging end_make_logging()
mattype = 'tall' if test_type == 'matrix-ops': for op in op_types: args = (op, mattype, nrows) cmd_args = cmd_args_template.format(*args) cmd_args += ' fixedAxis=100 step=10' if 'NUMPY' in systems: utils.run_python(program='np_matrix_ops.py', cmd_args=cmd_args) if 'TF' in systems: utils.run_python(program='tf_matrix_ops.py', cmd_args=cmd_args) if 'R' in systems: utils.run_R(program='R_matrix_ops.R', cmd_args=cmd_args) if 'SYSTEMML' in systems: utils.run_spark(program='SystemMLMatrixOps', sbt_dir='./systemml', cmd_args=cmd_args) if 'MLLIB' in systems: utils.run_spark(program='MLLibMatrixOps', sbt_dir='./mllib', cmd_args=cmd_args) if 'MADLIB' in systems: utils.run_python(program='madlib_matrix_ops.py', cmd_args=cmd_args) if 'SCIDB' in systems: utils.run_python(program='scidb_matrix_ops.py', cmd_args=cmd_args) if test_type == 'cpu': # ops = ['TSM','ADD'] ops = ['TSM'] nproc = [1, 2, 4, 8, 16] for num_proc in nproc:
for num_proc in nproc: # This will force the process to execute only on a subset of processors utils.set_nproc(num_proc) args = (op, mattype, nrows) cmd_args = cmd_args_template.format(*args) cmd_args += ' fixedAxis=100 step=10 nproc={}'.format(num_proc) if 'NUMPY' in systems: utils.run_python(program='np_algs.py', cmd_args=cmd_args) if ('TF' in systems) and (op != 'logit'): utils.run_python(program='tf_algorithms.py', cmd_args=cmd_args) if 'R' in systems: utils.run_R(program='ml_algs.R', cmd_args=cmd_args) if 'SYSTEMML' in systems: utils.run_spark(program='SystemMLMLAlgorithms', sbt_dir='./systemml', driver_cores=str(num_proc), cmd_args=cmd_args) if 'MLLIB' in systems: utils.run_spark(program='SparkMLAlgorithms', sbt_dir='./mllib', driver_cores=str(num_proc), cmd_args=cmd_args) utils.set_nproc(999) if 'MADLIB' in systems: utils.run_python(program='madlib_algs.py', cmd_args=cmd_args) if 'SCIDB' in systems: utils.run_python(program='scidb_algs.py', cmd_args=cmd_args) remove_dir('scratch_space') # stop logging