def main(kwargs): opType = kwargs.get('opType') savestub = kwargs.get('savestub') nodes = kwargs.get('nodes') x_table_name = kwargs.get('xTableName') y_table_name = kwargs.get('yTableName') savestub = '' if (savestub is None) else savestub print 'Evaluating: {}'.format(opType) cxn = SQLCxn(username='******', db='ubuntu', timeout=10000) colnames = [ 'nodes', 'rows', 'cols', 'time1', 'time2', 'time3', 'time4', 'time5' ] runTimes = pd.DataFrame(np.zeros((1, len(colnames)))) runTimes.columns = colnames shape = cxn.get_shape(x_table_name) env = { 'x_table_name': x_table_name, 'y_table_name': y_table_name, 'do_logit': do_logit, 'do_reg': do_reg, 'do_gnmf': do_gnmf, 'do_robust': do_robust, 'shape': shape, 'cxn': cxn } cleanup = None if opType == 'logit': call = 'do_logit(x_table_name, y_table_name, shape, cxn)' elif opType == 'gnmf': call = 'do_gnmf(x_table_name, shape, 10, cxn)' elif opType == 'reg': call = 'do_reg(x_table_name, y_table_name, cxn)' cleanup = ("map(lambda x: cxn.execute(" "'DROP TABLE {}'.format(x)), ['XTX','XTY','XTX_INV','B'])") elif opType == 'robust': cxn.execute('DROP TABLE IF EXISTS R2 CASCADE') cxn.execute( "SELECT MADLIB.matrix_random({},1,NULL,'uniform','R2',NULL)". format(shape[0])) cxn.execute('ALTER TABLE R2 RENAME COLUMN ROW TO ROW_NUM') call = 'do_robust(x_table_name, cxn)' rows = shape[0] cols = shape[1] path = '../output/madlib_adclick_{}{}.txt'.format(opType, int(nodes)) runTimes.ix[:, ['nodes', 'rows', 'cols']] = (nodes, rows, cols) res = utils.timeOp(call, env, cleanup) print res runTimes.ix[:, 3:] = res writeHeader = False if (os.path.exists(path)) else True runTimes.to_csv(path, index=False, header=writeHeader, mode='a')
def doMatrixOp(kwargs): opType = kwargs.get('opType') mattype = kwargs.get('mattype') fixedAxis = int(kwargs.get('fixedAxis')) nrow_scale = map(lambda x: int(x), kwargs['nrows'].split(' ')) nproc = kwargs.get('nproc') colnames = ['rows','time1','time2','time3','time4','time5'] runTimes = pd.DataFrame(np.zeros((1,len(colnames)))) runTimes.columns = colnames if (opType == 'TRANS'): call = 'M.T' elif (opType == 'NORM'): call = 'alg.norm(M)' elif (opType == 'GMM'): call = 'M.dot(N)' elif (opType == 'MVM'): call = 'M.dot(w)' elif (opType == 'TSM'): call = 'M.T.dot(M)' elif (opType == 'ADD'): call = 'M+X' elif (opType == 'SM'): call = 'np.multiply(10,M)' else: raise NotImplementedError('Invalid Operation') if nproc is None: path = os.path.join('..','output','np_{}_{}.txt'.format(mattype, opType)) else: path = os.path.join('..','output','np_cpu_{}_scale.txt'.format(opType)) for nr in nrow_scale: nrow = fixedAxis if opType == 'GMM' else nr ncol = nr if opType == 'GMM' else fixedAxis env = {'alg' : alg, 'np' : np} RNG = np.random env['M'] = utils.allocMatrix(nrow, ncol, RNG) if (opType == 'GMM'): env['N'] = utils.allocMatrix(ncol, nrow, RNG) elif (opType == 'MVM'): env['w'] = utils.allocMatrix(ncol, 1, RNG) elif (opType == 'ADD'): env['X'] = utils.allocMatrix(nrow, ncol, RNG) runTimes.ix[:,'rows'] = nr if nproc is None else nproc runTimes.ix[:,1:] = utils.timeOp(call, env) writeHeader = False if (os.path.exists(path)) else True runTimes.to_csv(path, index=False, header = writeHeader, mode = 'a')
def main(kwargs): mattype = kwargs['mattype'] opType = kwargs['opType'] nrow = int(kwargs['nrow']) ncol = int(kwargs['ncol']) nproc = int(kwargs['nproc']) path = '../output/tf_{}.txt'.format(opType) colnames = ['nproc', 'time1', 'time2', 'time3', 'time4', 'time5'] runTimes = pd.DataFrame(np.zeros((1, len(colnames)))) runTimes.columns = colnames env = { 'np': np, 'tf': tf, 'logit_reg': logit_reg, 'reg': reg, 'gnmf': gnmf, 'robust_se': robust_se } X = np.random.rand(nrow, ncol).astype(np.float32) if opType != 'gnmf': y = (np.random.rand(nrow, 1) >= 0.80).astype(np.int64) else: y = None if opType == 'logit': call = 'logit_reg(X,y)' elif opType == 'reg': call = 'reg(X,y)' elif opType == 'gnmf': call = 'gnmf(X, 10)' elif opType == 'robust': b = reg(X, y) y_hat = X.dot(b) env['eps'] = np.power(y_hat, 2).ravel() call = 'robust_se(X, eps)' else: raise StandardError('Invalid Operation') env['X'] = X env['y'] = y runTimes.ix[:, 'nproc'] = nproc runTimes.ix[:, 1:] = utils.timeOp(call, env) writeHeader = not os.path.exists(path) runTimes.to_csv(path, index=False, header=writeHeader, mode='a')
def main(kwargs): op_type = kwargs.get('opType') nodes = kwargs.get('nodes') x_table_name = kwargs.get('xTableName') nrow, ncol = get_dims(x_table_name) path = '../output/scidb_{}{}.txt'.format(op_type, nodes) colnames = ['nodes','rows','cols','time1','time2','time3','time4','time5'] run_times = pd.DataFrame(np.zeros((1,len(colnames)))) run_times.columns = colnames cxn = scidbpy.connect() cxn.iquery("load_library('dense_linear_algebra')") env = { 'cxn': cxn, 'reg': reg, 'logit': logit, 'gnmf': gnmf, 'robust_se': robust_se } alloc_matrix(nrow, ncol, 'X{}{}'.format(nrow, ncol), cxn, overwrite=False) alloc_matrix(nrow, 0, 'y{}{}'.format(nrow, ncol), cxn, overwrite=False, binary=True) if op_type == 'reg': call = "reg('X{0}{1}', 'y{0}{1}', cxn)".format(nrow, ncol) elif op_type == 'logit': alloc_matrix(nrow, 0, 'y{}{}b'.format(nrow, ncol), cxn, overwrite=False, binary=True) call = "logit('X{0}{1}', 'y{0}{1}b', cxn)".format(nrow, ncol) elif op_type == 'gnmf': call = "gnmf('X{0}{1}', 10, cxn)".format(nrow, ncol) elif op_type == 'robust': alloc_matrix(nrow, 0, 'r2{}{}'.format(nrow, ncol), cxn, overwrite=True, val_name='residuals') call = "robust_se('X{0}{1}', 'r2{0}{1}', cxn)".format(nrow, ncol) run_times.loc[:,['nodes','rows','cols']] = (nodes, nrow, ncol) run_times.loc[:,3:] = utils.timeOp(call, env) write_header = not os.path.exists(path) run_times.to_csv(path, index=False, header=write_header, mode='a')
def main(kwargs): mattype = kwargs['mattype'] op_type = kwargs['opType'] nrow = int(kwargs['nrow']) ncol = int(kwargs['ncol']) nproc = int(kwargs['nproc']) path = '../output/scidb_{}.txt'.format(op_type) colnames = ['nproc', 'time1', 'time2', 'time3', 'time4', 'time5'] run_times = pd.DataFrame(np.zeros((1, len(colnames)))) run_times.columns = colnames atexit.register(terminate_scidb) P, stdout, stderr = init_scidb(nproc, debug=True) cxn = scidbpy.connect() cxn.iquery("load_library('dense_linear_algebra')") print cxn.iquery("list('instances')", fetch=True) env = { 'cxn': cxn, 'reg': reg, 'logit': logit, 'gnmf': gnmf, 'robust_se': robust_se } alloc_matrix(nrow, ncol, 'X{}{}'.format(nrow, ncol), cxn, overwrite=False) alloc_matrix(nrow, 0, 'y{}{}'.format(nrow, ncol), cxn, overwrite=False, binary=True) if op_type == 'reg': call = "reg('X{0}{1}', 'y{0}{1}', cxn)".format(nrow, ncol) elif op_type == 'logit': alloc_matrix(nrow, 0, 'y{}{}b'.format(nrow, ncol), cxn, overwrite=False, binary=True) call = "logit('X{0}{1}', 'y{0}{1}b', cxn)".format(nrow, ncol) elif op_type == 'gnmf': call = "gnmf('X{0}{1}', 10, cxn)".format(nrow, ncol) elif op_type == 'robust': alloc_matrix(nrow, 0, 'r2{}{}'.format(nrow, ncol), cxn, overwrite=True, val_name='residuals') call = "robust_se('X{0}{1}', 'r2{0}{1}', cxn)".format(nrow, ncol) run_times.loc[:, 'nproc'] = nproc run_times.loc[:, 1:] = utils.timeOp(call, env) write_header = not os.path.exists(path) run_times.to_csv(path, index=False, header=write_header, mode='a') P.terminate() stdout.close() stderr.close()
def main(kwargs): opType = kwargs.get('opType') mattype = kwargs.get('mattype') rows = int(kwargs.get('nrow')) cols = int(kwargs.get('ncol')) nproc = kwargs.get('nproc') print 'Evaluating: {}'.format(opType) port = GPDB_PORT_MAP[nproc] cxn = start_gpdb(port, nproc) cxn.execute('DROP TABLE IF EXISTS M16_tall') atexit.register(stop_gpdb, nproc, cxn) shape = (rows,cols) if not cxn.table_exists('X{}{}'.format(rows, cols)): cxn.randomMatrix(rows, cols, 'X{}{}'.format(rows,cols)) if (opType not in ['gnmf']) and (not cxn.table_exists('y{}'.format(rows))): stmt = """ CREATE TABLE y_{0} AS ( SELECT ix as row_num, ARRAY[(RANDOM() > 0.80)::INTEGER] AS val FROM GENERATE_SERIES(1,{0}) ix ) DISTRIBUTED BY (row_num) """.format(rows) try: cxn.execute(stmt) except: pass colnames = ['nproc','time1','time2','time3','time4','time5'] runTimes = pd.DataFrame(np.zeros((1,len(colnames)))) runTimes.columns = colnames env = { 'cxn': cxn, 'shape': shape, 'do_logit': do_logit, 'do_reg': do_reg, 'do_gnmf': do_gnmf, 'do_robust': do_robust, 'x_table_name': 'X{}{}'.format(rows,cols), 'y_table_name': 'y_{}'.format(rows) } cleanup = None if opType == 'logit': call = 'do_logit(x_table_name, y_table_name, shape, cxn)' elif opType == 'gnmf': call = 'do_gnmf(x_table_name, shape, 10, cxn)' elif opType == 'reg': call = 'do_reg(x_table_name, y_table_name, cxn)' cleanup = ("map(lambda x: cxn.execute(" "'DROP TABLE {}'.format(x)), ['XTX','XTY','XTX_INV','B'])") elif opType == 'robust': # do_reg(x_table_name, y_table_name, cxn) # preproc = """ # DROP TABLE IF EXISTS Y_HAT; # SELECT madlib.matrix_mult('{X}',NULL,'B',NULL,'Y_HAT'); # CREATE TABLE R2 AS ( # SELECT {y}.row_num, ARRAY[POW({y}.val[1]-y_hat.val[1],2)] val # FROM {y} # INNER JOIN y_hat ON {y}.row_num = y_hat.row_num # ) DISTRIBUTED BY (row_num) # """.format(X=x_table_name, y=y_table_name) # cxn.execute(preproc) cxn.execute('DROP TABLE IF EXISTS R2') cxn.randomMatrix(rows, 1, 'R2') call = 'do_robust(x_table_name, cxn)' rows = shape[0] cols = shape[1] path = '../output/madlib_tall_{}.txt'.format(opType) runTimes.ix[:,'nproc'] = nproc res = utils.timeOp(call, env, cleanup) runTimes.ix[:,1:] = res writeHeader = False if (os.path.exists(path)) else True runTimes.to_csv(path, index=False, header = writeHeader, mode = 'a')
def main(kwargs): opType = kwargs.get('opType') savestub = kwargs.get('savestub') nodes = kwargs.get('nodes') x_table_name = kwargs.get('xTableName') y_table_name = kwargs.get('yTableName') savestub = '' if (savestub is None) else savestub print 'Evaluating: {}'.format(opType) cxn = SQLCxn(username='******', db='ubuntu', timeout=2000) colnames = [ 'nodes', 'rows', 'cols', 'time1', 'time2', 'time3', 'time4', 'time5' ] runTimes = pd.DataFrame(np.zeros((1, len(colnames)))) runTimes.columns = colnames shape = cxn.get_shape(x_table_name) env = { 'x_table_name': x_table_name, 'y_table_name': y_table_name, 'do_logit': do_logit, 'do_reg': do_reg, 'shape': shape, 'cxn': cxn } cleanup = None if opType == 'logit': call = 'do_logit(x_table_name, y_table_name, shape, cxn)' elif opType == 'gnmf': call = 'do_gnmf(x_table_name, shape, 10, cxn)' elif opType == 'reg': call = 'do_reg(x_table_name, y_table_name, cxn)' cleanup = ("map(lambda x: cxn.execute(" "'DROP TABLE {}'.format(x)), ['XTX','XTY','XTX_INV','B'])") elif opType == 'robust': do_reg(x_table_name, y_table_name, cxn) preproc = """ DROP TABLE IF EXISTS Y_HAT; SELECT madlib.matrix_mult('{X}',NULL,'B',NULL,'Y_HAT'); CREATE TABLE R2 AS ( SELECT {y}.row_num, ARRAY[POW({y}.val[1]-y_hat.val[1],2)] val FROM {y} INNER JOIN y_hat ON {y}.row_num = y_hat.row_num ) DISTRIBUTED BY (row_num) """.format(X=x_table_name, y=y_table_name) cxn.execute(preproc) call = 'do_robust(x_table_name, cxn)' elif opType == 'pca': print 'Not Implemented' return rows = shape[0] cols = shape[1] path = '../output/madlib_adclick_{}{}.txt'.format(opType, int(nodes)) runTimes.ix[:, ['nodes', 'rows', 'cols']] = (nodes, rows, cols) res = utils.timeOp(call, env, cleanup) print res runTimes.ix[:, 3:] = res writeHeader = False if (os.path.exists(path)) else True runTimes.to_csv(path, index=False, header=writeHeader, mode='a')
def main(kwargs): opType = kwargs.get('opType') savestub = kwargs.get('savestub') nodes = kwargs.get('nodes') x_table_name = kwargs.get('xTableName') y_table_name = kwargs.get('yTableName') savestub = '' if (savestub is None) else savestub print 'Evaluating: {}'.format(opType) cxn = SQLCxn(username='******', db='ubuntu', timeout=10000) colnames = ['nodes','rows','cols','time1','time2','time3','time4','time5'] runTimes = pd.DataFrame(np.zeros((1,len(colnames)))) runTimes.columns = colnames shape = cxn.get_shape(x_table_name) env = {'x_table_name': x_table_name, 'y_table_name': y_table_name, 'do_logit': do_logit, 'do_gnmf': do_gnmf, 'do_reg': do_reg, 'do_robust': do_robust, 'shape': shape, 'cxn': cxn} cleanup = None if opType == 'logit': call = 'do_logit(x_table_name, y_table_name, shape, cxn)' elif opType == 'gnmf': call = 'do_gnmf(x_table_name, shape, 10, cxn)' elif opType == 'reg': call = 'do_reg(x_table_name, y_table_name, cxn)' cleanup = ("map(lambda x: cxn.execute(" "'DROP TABLE {}'.format(x)), ['XTX','XTY','XTX_INV','B'])") elif opType == 'robust': #do_reg(x_table_name, y_table_name, cxn) #preproc = """ # DROP TABLE IF EXISTS Y_HAT; # SELECT madlib.matrix_mult('{X}',NULL,'B',NULL,'Y_HAT'); # CREATE TABLE R2 AS ( # SELECT {y}.row_num, ARRAY[POW({y}.val[1]-y_hat.val[1],2)] val # FROM {y} # INNER JOIN y_hat ON {y}.row_num = y_hat.row_num # ) DISTRIBUTED BY (row_num) #""".format(X=x_table_name, y=y_table_name) # We can just generate a vector of residuals on the fly # rather than computing them explicitly. cxn.execute('DROP TABLE IF EXISTS R2') cxn.randomMatrix(shape[0], 1, 'R2') call = 'do_robust(x_table_name, cxn)' rows = shape[0] cols = shape[1] path = '../output/madlib_tall_{}{}.txt'.format(opType, int(nodes)) runTimes.ix[:,['nodes','rows','cols']] = (nodes, rows, cols) res = utils.timeOp(call, env, cleanup) res runTimes.ix[:,3:] = res writeHeader = False if (os.path.exists(path)) else True runTimes.to_csv(path, index=False, header = writeHeader, mode = 'a')