def build_input(arguments): print "Creating input vectors (mode=%s)" % arguments.input_mode if arguments.input_mode.lower() == "deterministic": arguments.input_array = ( scidbpy.connect(arguments.scidb_url) .afl.build( "<value: double>[id=0:{},{},{}, time=0:{},{},{}]".format( arguments.patients - 1, arguments.chunk_patients, arguments.overlap_patients, arguments.vector_size - 1, arguments.chunk_vectors, arguments.overlap_vectors, ), "(double(id+1) / (time+1)) / %f - 1" % (arguments.patients / 2), ) .eval() .name ) else: arguments.input_array = ( scidbpy.connect(arguments.scidb_url) .random( (arguments.patients, arguments.vector_size), chunk_size=(arguments.chunk_patients, arguments.chunk_vectors), dim_names=["id", "time"], ) .attribute_rename("f0", "value") .eval() .name )
def test_reap_called_on_context_manager(): with connect() as sdb2: X = sdb2.random((1, 1)) name = X.name assert X.name in sdb.list_arrays() assert name not in sdb.list_arrays()
def scidb_con(): # FS Init if not os.path.exists(fs_base): os.makedirs(fs_base) con = scidbpy.connect(scidb_url, scidb_auth=('root', 'Paradigm4'), verify=False) yield con # SciDB Cleanup for query in ("drop_user('bar')", "drop_namespace('foo')"): try: con.iquery(query) except requests.exceptions.HTTPError: pass # FS Cleanup try: shutil.rmtree(fs_base) except PermissionError: pass # S3 Cleanup result = s3_con.list_objects_v2(Bucket=s3_bucket, Prefix=base_prefix + '/') if 'Contents' in result.keys(): objects = [{'Key': e['Key']} for e in result['Contents']] s3_con.delete_objects(Bucket=s3_bucket, Delete={'Objects': objects})
def do_matrix_op(kwargs): op_type = kwargs.get('opType') mattype = kwargs.get('mattype') tableStub = kwargs.get('tableStub') savestub = kwargs.get('savestub') nodes = kwargs.get('nodes') outdir = kwargs.get('outdir') sr = kwargs.get('sr') sr_val = np.float64('0.{}'.format(sr)) nrow, ncol = 125000000, 100 path = '../output/scidb_{}_{}{}.txt'.format(mattype, op_type, nodes) cxn = scidbpy.connect() print cxn.iquery("list('instances')", fetch=True) colnames = ['nodes', 'sr', 'time1', 'time2', 'time3', 'time4', 'time5'] run_times = pd.DataFrame(np.zeros((1, len(colnames)))) run_times.columns = colnames M_name = 'M{}'.format(sr) if not M_name in dir(cxn.arrays): alloc_matrix(nrow, ncol, M_name, cxn, density=sr_val) if op_type == 'GMM': if not 'M{}W' in dir(cxn.arrays): alloc_matrix(ncol, nrow, 'M{}W'.format(sr), cxn, density=sr_val) if not 'N{}'.format(sr) in dir(cxn.arrays): alloc_matrix(nrow, ncol, 'N{}'.format(sr), cxn, density=sr_val) N_name = 'N{}'.format(sr) M_name = 'M{}W'.format(sr) if op_type == 'ADD': if not 'N{}'.format(sr) in dir(cxn.arrays): alloc_matrix(nrow, ncol, 'N{}'.format(sr), cxn, density=sr_val) N_name = 'N{}'.format(sr) if op_type == 'MVM': v_name = 'v{}'.format(ncol) if not v_name in dir(cxn.arrays): alloc_vector(ncol, v_name, cxn) cxn.iquery("load_library('linear_algebra')") if op_type == 'TRANS': call = 'consume(transpose({}))'.format(M_name) elif op_type == 'NORM': call = 'aggregate(apply({}, val2, pow(val,2.0)), sum(val2))'.format( M_name) elif op_type == 'GMM': call = 'spgemm({},{})'.format(M_name, N_name) elif op_type == 'MVM': call = 'spgemm({},{})'.format(M_name, v_name) elif op_type == 'TSM': call = 'spgemm(transpose({}),{})'.format(M_name, M_name) elif op_type == 'ADD': call = 'consume(apply(join({0},{1}), sum, {0}.val+{1}.val))'.format( M_name, N_name) else: raise StandardError('Invalid operator type') run_times.ix[:, :2] = (nodes, sr) run_times.ix[:, 2:] = time_stmt(call, cxn) write_header = False if (os.path.exists(path)) else True run_times.to_csv(path, index=False, header=write_header, mode='a')
def main(): sdb = connect('http://localhost:8000') V = sdb.wrap_array("Vg") W = sdb.wrap_array("Wg") H = sdb.wrap_array("Hg") eps = 10e-8 max_iteration = 5 i = 0 print "starts to run!" start = time.time() while i < max_iteration: begin = time.time() H = H * (sdb.dot(W.transpose(), V) / (sdb.dot(sdb.dot(W.transpose(), W), H) + eps)) W = W * (sdb.dot(V, H.transpose()) / (sdb.dot(W, sdb.dot(H, H.transpose())) + eps)) i = i + 1 end = time.time() diff = end - begin print "iteration: %d, used time:%f secs\n" %(i, diff) finish = time.time() duration = finish - start print "all the %d iterations used time %f" %(max_iteration, duration) remove(sdb)
def main(): sdb = connect('http://localhost:8000') V = sdb.wrap_array("Vg") W = sdb.wrap_array("Wg") H = sdb.wrap_array("Hg") eps = 10e-8 max_iteration = 5 i = 0 print "starts to run!" start = time.time() while i < max_iteration: begin = time.time() H = H * (sdb.dot(W.transpose(), V) / (sdb.dot(sdb.dot(W.transpose(), W), H) + eps)) W = W * (sdb.dot(V, H.transpose()) / (sdb.dot(W, sdb.dot(H, H.transpose())) + eps)) i = i + 1 end = time.time() diff = end - begin print "iteration: %d, used time:%f secs\n" % (i, diff) finish = time.time() duration = finish - start print "all the %d iterations used time %f" % (max_iteration, duration) remove(sdb)
def remove_test_array(host): """ Remove the test array from SciDB. :param host: url to shim. """ with sdb.connect(host) as con: con.remove('scidbbackup_test_array')
def save_opaque(array_name, host, path): """ Save scidb array as opaque binary array. :param array_name: name of array to save :param host: url to shim. """ with sdb.connect(host) as con: con.query("save({array_name}, '{path}', -2, 'OPAQUE')", array_name=array_name, path=path)
def load_opaque(array_name, schema, path, host): """ Load scidb array from opaque binary array. :param array_name: name of array to save :param host: url to shim. """ array_entry = select_backup_record(array_name) with sdb.connect(host) as con: con.query("create array {array_name} {schema}", array_name=array_name, schema=schema) con.query("load({array_name}, '{path}', -2, 'OPAQUE')", array_name=array_name, path=path)
def _load(name, schema, url, hostname, port, worker): interface = scidbpy.connect(url) local_name = '{}_{}'.format(name, worker['id']) try: interface.query('create temp array {}{}'.format(local_name, schema)) except Exception: pass return interface.query("load({}, '{}@{}', {}, 'text')" .format(local_name, hostname, port, worker['id']))
def get_array_record(host): """ Get list of scidb arrays. :param host: url to shim. """ with sdb.connect(host) as con: array_dict = con.list_arrays() return array_dict
def build_input(arguments): print 'Creating input vectors (mode=%s)' % arguments.input_mode if arguments.input_mode.lower() == 'deterministic': arguments.input_array = ( scidbpy.connect(arguments.scidb_url) .afl.build('<value: double>[id=0:{},{},{}, time=0:{},{},{}]'.format( arguments.patients-1, arguments.chunk_patients, arguments.overlap_patients, arguments.vector_size-1, arguments.chunk_vectors, arguments.overlap_vectors), '(double(id+1) / (time+1)) / %f - 1' % (arguments.patients/2)) .eval() .name) else: arguments.input_array = ( scidbpy.connect(arguments.scidb_url) .random((arguments.patients, arguments.vector_size), chunk_size=(arguments.chunk_patients, arguments.chunk_vectors), dim_names=['id', 'time']) .attribute_rename('f0', 'value') .eval() .name)
def build_input(arguments): print 'Creating input vectors (mode=%s)' % arguments.input_mode if arguments.input_mode.lower() == 'deterministic': arguments.input_array = ( scidbpy.connect(arguments.url, username=arguments.username, password=arguments.password) .afl.build('<value: double>[id=0:{},{},{}, time=0:{},{},{}]'.format( arguments.patients-1, arguments.chunk_patients, arguments.overlap_patients, arguments.vector_size-1, arguments.chunk_vectors, arguments.overlap_vectors), '(double(id+1) / (time+1)) / %f - 1' % (arguments.patients/2)) .eval() .name) else: arguments.input_array = ( scidbpy.connect(arguments.url, username=arguments.username, password=arguments.password) .random((arguments.patients, arguments.vector_size), chunk_size=(arguments.chunk_patients, arguments.chunk_vectors), dim_names=['id', 'time']) .attribute_rename('f0', 'value') .eval() .name)
def test_interface_reap(): sdb = connect() A = sdb.random((1, 1)) B = sdb.random((1, 1)) aname = A.name bname = B.name sdb.reap() assert aname not in sdb.list_arrays() assert bname not in sdb.list_arrays()
def import_(cls, source, intermediate, *args, **kwargs): interface = scidbpy.connect(kwargs['url']) workers = utility._get_workers(interface) pool = multiprocessing.Pool(processes=len(workers)) name = source.name.replace(':', '_') schema = SciDBSchema(source.schema).local try: result = interface.query("create_array({}, {})".format(name, schema)) except Exception: pass pool.map(partial(_load, name, schema, kwargs['url'], kwargs['hostname'], kwargs['port']), workers) pool.close() pool.join()
def setup(mb): cnt = mb * 1024 * 1024 / 8 db = scidbpy.connect() db.iquery( 'store(build(<x:int64 not null>[i=0:{}], random()), bm)'.format(cnt - 1)) ar = db.iquery('scan(bm)', fetch=True, atts_only=True, as_dataframe=False) print("""\ Data size: {:6.2f} MB In-memory size: {:6.2f} MB Number of runs: {:3d}""".format(cnt * 8 / 1024. / 1024, ar.nbytes / 1024. / 1024, runs)) return db
def create_test_array(host): """ Create test array in SciDB. :param host: url to shim. """ with sdb.connect(host) as con: con.query(""" store( redimension( join( build(<x:double>[k=0:8,1,0], k), join( build(<i:int64>[k=0:8,1,0], k%3), build(<j:int64>[k=0:8,1,0], k/3))), <x:double>[i=0:8,1,0, j=0:8,1,0]), scidbbackup_test_array)""")
def import_(cls, source, intermediate, *args, **kwargs): interface = scidbpy.connect(kwargs["url"]) name = source.name.replace(':', '_') types = '({})'.format(",".join(source.schema.types[1:])) #print '=====' #print types #print '====' try: result = interface.query("create_array({}, {})".format(name, SciDBSchema(source.schema).local)) except Exception: pass #time.sleep(10) return interface.query("load({}, '{}@{}', -1, '{}')" .format(name, kwargs["hostname"], kwargs["port"], types))
def setup(mb): cnt = mb * 1024 * 1024 / 8 db = scidbpy.connect() db.iquery( 'store(build(<x:int64 not null>[i=0:{}], random()), bm)'.format( cnt - 1)) ar = db.iquery('scan(bm)', fetch=True, atts_only=True, as_dataframe=False) print("""\ Data size: {:6.2f} MB In-memory size: {:6.2f} MB Number of runs: {:3d}""".format( cnt * 8 / 1024. / 1024, ar.nbytes / 1024. / 1024, runs)) return db
def main(kwargs): op_type = kwargs.get('opType') nodes = kwargs.get('nodes') x_table_name = kwargs.get('xTableName') nrow, ncol = get_dims(x_table_name) path = '../output/scidb_{}{}.txt'.format(op_type, nodes) colnames = ['nodes','rows','cols','time1','time2','time3','time4','time5'] run_times = pd.DataFrame(np.zeros((1,len(colnames)))) run_times.columns = colnames cxn = scidbpy.connect() cxn.iquery("load_library('dense_linear_algebra')") env = { 'cxn': cxn, 'reg': reg, 'logit': logit, 'gnmf': gnmf, 'robust_se': robust_se } alloc_matrix(nrow, ncol, 'X{}{}'.format(nrow, ncol), cxn, overwrite=False) alloc_matrix(nrow, 0, 'y{}{}'.format(nrow, ncol), cxn, overwrite=False, binary=True) if op_type == 'reg': call = "reg('X{0}{1}', 'y{0}{1}', cxn)".format(nrow, ncol) elif op_type == 'logit': alloc_matrix(nrow, 0, 'y{}{}b'.format(nrow, ncol), cxn, overwrite=False, binary=True) call = "logit('X{0}{1}', 'y{0}{1}b', cxn)".format(nrow, ncol) elif op_type == 'gnmf': call = "gnmf('X{0}{1}', 10, cxn)".format(nrow, ncol) elif op_type == 'robust': alloc_matrix(nrow, 0, 'r2{}{}'.format(nrow, ncol), cxn, overwrite=True, val_name='residuals') call = "robust_se('X{0}{1}', 'r2{0}{1}', cxn)".format(nrow, ncol) run_times.loc[:,['nodes','rows','cols']] = (nodes, nrow, ncol) run_times.loc[:,3:] = utils.timeOp(call, env) write_header = not os.path.exists(path) run_times.to_csv(path, index=False, header=write_header, mode='a')
def main(): sdb = connect('http://localhost:8000') data = sdb.wrap_array("mnist8m_200") labels = sdb.wrap_array("label") V = sdb.wrap_array("V") W = sdb.wrap_array("W") V.approxdc() r = 8100000 fra = 0.01 learningRate = 0.1 batchSize = int(fra * r) iterations = 15 start = datetime.now() duration = 0 print "nn starts to run!" for i in range(iterations): starti = datetime.now() index = random.randint(1, r - batchSize) indexEnd = index + batchSize input = data[index:indexEnd, :] label = labels[index:indexEnd, :] wIn = sdb.dot(input, W) wTemp = sdb.exp(-wIn) wOut = 1 / (1 + wTemp) vIn = sdb.dot(wOut, V) vTemp = sdb.exp(-vIn) vOut = 1 / (1 + vTemp) vDelta = vTemp / (1 + vTemp)**2 * (vOut - label) wDelta = wTemp / (1 + wTemp)**2 * sdb.dot(vDelta, V.transpose()) V = V - learningRate * sdb.dot(wOut.transpose(), vDelta) / batchSize W = W - learningRate * sdb.dot(input.transpose(), wDelta) / batchSize endi = datetime.now() diff = endi - starti t = diff.microseconds / 1000000.0 + diff.seconds duration += t print "iteration:", i, " used time: ", t, "secs" end = datetime.now() print "duration:", duration diff = end - start print "all time:", diff.seconds remove(sdb)
def main(): sdb = connect('http://localhost:8000') data = sdb.wrap_array("mnist8m_200") labels = sdb.wrap_array("label") V = sdb.wrap_array("V") W = sdb.wrap_array("W") V.approxdc() r = 8100000 fra = 0.01 learningRate = 0.1 batchSize = int(fra * r) iterations = 15 start = datetime.now() duration = 0 print "nn starts to run!" for i in range(iterations): starti = datetime.now() index = random.randint(1, r - batchSize) indexEnd = index + batchSize input = data[index: indexEnd, :] label = labels[index: indexEnd, :] wIn = sdb.dot(input, W) wTemp = sdb.exp(-wIn) wOut = 1 / (1 + wTemp) vIn = sdb.dot(wOut, V) vTemp = sdb.exp(-vIn) vOut = 1 / (1 + vTemp) vDelta = vTemp / (1 + vTemp) ** 2 * (vOut - label) wDelta = wTemp / (1 + wTemp) ** 2 * sdb.dot(vDelta, V.transpose()) V = V - learningRate * sdb.dot(wOut.transpose(), vDelta) / batchSize W = W - learningRate * sdb.dot(input.transpose(), wDelta) / batchSize endi = datetime.now() diff = endi - starti t = diff.microseconds / 1000000.0 + diff.seconds duration += t print "iteration:", i, " used time: ", t, "secs" end = datetime.now() print "duration:", duration diff = end - start print "all time:", diff.seconds remove(sdb)
def main(): time_start = time.time() sdb = connect() print("Time passed: %.3fs" % (time.time() - time_start)) data = load_data_from_aws() print "Datatype:", data.dtype if True: # Reduce data to work with for coding sh = data.shape data = data[int(sh[0] * .25):int(sh[0] * .75), int(sh[1] * .25):int(sh[1] * .75), int(sh[2] * .25):int(sh[2] * .75)] print("Time passed: %.3fs" % (time.time() - time_start)) gtab = dpg.gradient_table('./bvals', './bvecs', b0_threshold=10) print("Time passed: %.3fs" % (time.time() - time_start)) data_sdb = sdb.from_array(data) # Creating mask raise () mean_b0 = data_sdb.compress(sdb.from_array(gtab.b0s_mask), axis=3) mean_b0 = mean_b0.mean(-1) _, mask = median_otsu(mean_b0.toarray(), 4, 2, False, vol_idx=np.where(gtab.b0s_mask), dilate=1) print("Time passed: %.3fs" % (time.time() - time_start))
def parse_arguments(arguments): parser = argparse.ArgumentParser(description='Execute Myria-only test') parser.add_argument('patients', type=int, help='Number of patients to evaluate') parser.add_argument('vector_size', type=int, help='Size of input vectors for each patient') parser.add_argument('--url', type=str, default='http://localhost:8080', help='SciDB Shim URL') parser.add_argument('--bins', type=int, default=10, help='Number of histogram bins') parser.add_argument('--test-id', dest='test_id', type=int, default=1, help='Index of test patient for k-NN computation') parser.add_argument('--input-mode', dest='input_mode', type=str, default='random', choices=['determinstic', 'random'], help='Mode of automatically generated input') parser.add_argument('--scidb-bin', dest='scidb_bin', type=str, default='/opt/scidb/14.12/bin/scidb.py', help='Path of scidb.py') parser.add_argument('--scidb-name', dest='scidb_name', type=str, default='mydb', help='Name of SciDB database') parser.add_argument('--chunk-patients', dest='chunk_patients', type=int, default=1, help='Chunk size for patient array') parser.add_argument('--chunk-vectors', dest='chunk_vectors', type=int, default=None, help='Chunk size for input vectors') parser.add_argument('--chunk-bins', dest='chunk_bins', type=int, default=2**32, help='Chunk size for histogram bins') parser.add_argument('--overlap-vectors', dest='overlap_vectors', type=int, default=0, help='Array overlap for input vectors') parser.add_argument('--overlap-patients', dest='overlap_patients', type=int, default=0, help='Array overlap for patient array') parser.add_argument('--overlap-bins', dest='overlap_bins', type=int, default=0, help='Array overlap for histogram bins') parser.add_argument('--username', type=str, default=None, help='Username used to authenticate with SciDB Shim') parser.add_argument('--password', type=str, default=None, help='Password used to authenticate with SciDB Shim') parser.add_argument('--restart', dest='restart', action='store_true', help='Restart SciDB before testing') parser.add_argument('--no-restart', dest='restart', action='store_false', help='Do not restart SciDB before testing') parser.set_defaults(restart=True) arguments = parser.parse_args(arguments) arguments.chunk_vectors = arguments.chunk_vectors or arguments.vector_size arguments.iterations = int(math.log(arguments.vector_size, 2)) arguments.sdb = scidbpy.connect(arguments.url, username=arguments.username, password=arguments.password) print 'Arguments: %s' % vars(arguments) return arguments
def do_matrix_op(kwargs): op_type = kwargs.get('opType') mattype = kwargs.get('mattype') fixed_axis = int(kwargs.get('fixedAxis')) nrow_scale = map(lambda x: int(x), kwargs['nrows'].split(' ')) nproc = kwargs.get('nproc', None) if nproc is None: path = os.path.join('..', 'output', 'scidb_{}_{}.txt'.format(mattype, op_type)) else: path = os.path.join('..', 'output', 'scidb_cpu_{}_{}.txt'.format(mattype, op_type)) num_procs = nproc if nproc is not None else 24 atexit.register(terminate_scidb) P, stdout, stderr = init_scidb(num_procs, debug=True, stub=op_type) cxn = scidbpy.connect() print cxn.iquery("list('instances')", fetch=True) colnames = ['rows', 'time1', 'time2', 'time3', 'time4', 'time5'] run_times = pd.DataFrame(np.zeros((1, len(colnames)))) run_times.columns = colnames for nr in nrow_scale: nrow = fixed_axis if op_type == 'GMM' else nr ncol = nr if op_type == 'GMM' else fixed_axis M_name = 'M{}{}'.format(nrow, ncol) if not M_name in dir(cxn.arrays): alloc_matrix(nrow, ncol, M_name, cxn) if op_type == 'GMM': if not 'N{}{}'.format(ncol, nrow) in dir(cxn.arrays): alloc_matrix(ncol, nrow, 'N{}{}'.format(ncol, nrow), cxn) N_name = 'N{}{}'.format(ncol, nrow) zv_name = 'ZEROS{}{}'.format(nrow, nrow) zeros(nrow, nrow, zv_name, cxn) if op_type == 'TSM': zv_name = 'ZEROS{}{}'.format(ncol, ncol) zeros(ncol, ncol, zv_name, cxn) if op_type == 'ADD': if not 'N{}{}'.format(nrow, ncol) in dir(cxn.arrays): alloc_matrix(nrow, ncol, 'N{}{}'.format(nrow, ncol), cxn) N_name = 'N{}{}'.format(nrow, ncol) if op_type == 'MVM': v_name = 'v{}'.format(fixed_axis) if not v_name in dir(cxn.arrays): alloc_vector(fixed_axis, v_name, cxn) zv_name = 'ZEROS{}'.format(nrow) zeros(nrow, 0, zv_name, cxn) cxn.iquery("load_library('dense_linear_algebra')") if op_type == 'TRANS': call = 'consume(transpose({}))'.format(M_name) elif op_type == 'NORM': call = 'aggregate(apply({}, val2, pow(val,2.0)), sum(val2))'.format( M_name) elif op_type == 'GMM': call = 'gemm({},{},{})'.format(M_name, N_name, zv_name) elif op_type == 'MVM': call = 'gemm({},{},{})'.format(M_name, v_name, zv_name) elif op_type == 'TSM': call = 'gemm({},{},{}, transa:true)'.format( M_name, M_name, zv_name) elif op_type == 'ADD': call = 'consume(apply(join({0},{1}), sum, {0}.val+{1}.val))'.format( M_name, N_name) else: raise StandardError('Invalid operator type') run_times.loc[:, 'rows'] = nr if nproc is None else nproc run_times.ix[:, 1:] = time_stmt(call, cxn) write_header = False if (os.path.exists(path)) else True run_times.to_csv(path, index=False, header=write_header, mode='a') P.terminate() stdout.close() stderr.close()
def main(kwargs): mattype = kwargs['mattype'] op_type = kwargs['opType'] nrow = int(kwargs['nrow']) ncol = int(kwargs['ncol']) nproc = int(kwargs['nproc']) path = '../output/scidb_{}.txt'.format(op_type) colnames = ['nproc', 'time1', 'time2', 'time3', 'time4', 'time5'] run_times = pd.DataFrame(np.zeros((1, len(colnames)))) run_times.columns = colnames atexit.register(terminate_scidb) P, stdout, stderr = init_scidb(nproc, debug=True) cxn = scidbpy.connect() cxn.iquery("load_library('dense_linear_algebra')") print cxn.iquery("list('instances')", fetch=True) env = { 'cxn': cxn, 'reg': reg, 'logit': logit, 'gnmf': gnmf, 'robust_se': robust_se } alloc_matrix(nrow, ncol, 'X{}{}'.format(nrow, ncol), cxn, overwrite=False) alloc_matrix(nrow, 0, 'y{}{}'.format(nrow, ncol), cxn, overwrite=False, binary=True) if op_type == 'reg': call = "reg('X{0}{1}', 'y{0}{1}', cxn)".format(nrow, ncol) elif op_type == 'logit': alloc_matrix(nrow, 0, 'y{}{}b'.format(nrow, ncol), cxn, overwrite=False, binary=True) call = "logit('X{0}{1}', 'y{0}{1}b', cxn)".format(nrow, ncol) elif op_type == 'gnmf': call = "gnmf('X{0}{1}', 10, cxn)".format(nrow, ncol) elif op_type == 'robust': alloc_matrix(nrow, 0, 'r2{}{}'.format(nrow, ncol), cxn, overwrite=True, val_name='residuals') call = "robust_se('X{0}{1}', 'r2{0}{1}', cxn)".format(nrow, ncol) run_times.loc[:, 'nproc'] = nproc run_times.loc[:, 1:] = utils.timeOp(call, env) write_header = not os.path.exists(path) run_times.to_csv(path, index=False, header=write_header, mode='a') P.terminate() stdout.close() stderr.close()
def connect(self): self.connection = connect("{address}".format(address=self.address))
import numpy as np from scidbpy import connect sdb = connect('http://localhost:48080') # connect to the database from scidbpy.parse import _fmt singles = {} dtypes = 'int8 int16 int32 int64 uint8 uint16 uint32 uint64 float double datetime datetimetz bool string char'.split() vals = [-128, -2**15+2, -2**31+2, -2**63+2, 2**8-1, 2**16-1, 2**32-1, 2**63-1, 1.23, 1e100, "'01/01/2001 12:23'", "'01/01/2001 12:23:01 +05:00'", 'true', "'test'", "'a'"] #Create def make(dtype, val): return sdb.afl.build('<x:%s>[i=0:1,10,0]' % dtype, val).eval() #Retrieve def run(make): for dtype, val in zip(dtypes, vals): a = make(dtype, val) print a print 'Plaintext:', repr(sdb._scan_array(a.name)) print 'Binary :', repr(sdb._scan_array(a.name, fmt=_fmt(a))) print 'NumPy :', a.toarray() print '-----------' #Delete a.reap() run(make)
from __future__ import print_function import numpy as np np.random.seed(42) from scidbpy import connect sdb = connect() X = np.arndom.random((5, 4)) Xsdb = sdb.from_array(X) from scipy.sparse import coo_matrix X = np.random.random((10, 10)) X[X < 0.9] = 0 # make array sparse Xcoo = coo_matrix(X) Xsdb = sdb.from_sparse(Xcoo) # Create a 10x10 array of double-precision zeros: A = sdb.zeros((10, 10)) # Create a 10x10 array of 64-bit signed integer ones: A = sdb.ones((10, 10), dtype='int64') # Create a 10x10 array of numbers between -1 and 2 (inclusive) # sampled from a uniform random distribution. A = sdb.random((10, 10), lower=-1, upper=2) # Create a 10x10 array of uniform random integers between 0 and 10 # (inclusive of 0, non-inclusive of 10)
def db(): return scidbpy.connect()
def getConnection(): return connect(self.url, self.user, self.password)
> python 2-pack-func.py [(0, 0, 0, (255, 1.))] Setup: > pip install scidb-py """ import dill import numpy import scidbpy import scidbstrm import sys db = scidbpy.connect() def get_first(df): return df.head(1) # Serialize (pack) and Upload function to SciDB ar_fun = db.input(upload_data=scidbstrm.pack_func(get_first), upload_schema=scidbpy.Schema.fromstring( '<x:binary not null>[i]')).store() que = db.stream( 'build(<x:double>[i=1:5], i)', """'python{major} -uc "
from __future__ import print_function import numpy as np np.random.seed(42) from scidbpy import connect sdb = connect() X = np.random.random((5, 4)) Xsdb = sdb.from_array(X) from scipy.sparse import coo_matrix X = np.random.random((10, 10)) X[X < 0.9] = 0 # make array sparse Xcoo = coo_matrix(X) Xsdb = sdb.from_sparse(Xcoo) # Create a 10x10 array of double-precision zeros: A = sdb.zeros((10, 10)) # Create a 10x10 array of 64-bit signed integer ones: A = sdb.ones((10, 10), dtype="int64") # Create a 10x10 array of numbers between -1 and 2 (inclusive) # sampled from a uniform random distribution. A = sdb.random((10, 10), lower=-1, upper=2) # Create a 10x10 array of uniform random integers between 0 and 10
data_path = '/home/dongfang/download/lsst_data/' visits = ["0288935", "0288976"] visit = visits[0] ccd_id = '1' #integer between 1 and 60 from astropy.io import fits hdulist = fits.open(data_path + visit + '/instcal' + visit + '.' + ccd_id + '.fits') print hdulist.info() import numpy as np a = np.array(hdulist[1].data) print "a.shape =", a.shape print "Numpy: a.mean =", a.mean() import scidbpy as sp sdb = sp.connect('http://localhost:8080') data_sdb = sdb.from_array(a.astype(np.float32)) res = data_sdb.mean() print "SciDB: mean =", res[0] print "Done!"
coordinator = 'localhost' exponents = (int(argv[1]), int(argv[1]) + 1) if len(argv) > 1 else (4, 5) sizes = [1000000 * 2**x for x in xrange(*exponents)] strategies = [ SocketBinary ] #SocketCSV] #SocketBinary, SocketCSV] #Binary #Binary #SocketCSV #FIFO #SocketBinary for size in sizes: for strategy in strategies: print 'Size: ' + str(size) print 'Strategy: ' + str(strategy) array = scidbpy.connect( 'http://{}:8080'.format(coordinator)).random(size, chunk_size=min( size / 8, 2**16), persistent=False) print array.name query = '' def test(): global query query = convert(array, strategy, coordinator) print 'Begin SciDB->Myria' print timeit.timeit('test()', setup='from __main__ import test', number=1)
file_list = ["{0}_scidb.csv".format(x) for x in id_list] phildb_du_results = [] scidb_du_results = [] phildb_du_results.append('0\\thrs_phildb\\n') scidb_du_results.append(subprocess.check_output(['du', '-s', 'scidbdata'])) write_phildb(file_list, 'phildb_initial_writes.txt', first_run = True) # Start PhilDB server now that a phildb instance exists subprocess.Popen(['phildb-server', '--port=8989', 'hrs_phildb']) write_scidb(file_list, 'scidb_initial_writes.txt', first_run = True) # Connect Python APIs sdb = scidbpy.connect('http://localhost:8080', 'scidb', 'paradigm4') pdb = PhilDB('hrs_phildb') pdb_client = PhilDBClient('http://localhost:8989') # Read the three clients, interleaving PhilDB with SciDB to try minimise # the effects of caching read_phildb(id_list, pdb, 'phildb_initial_reads.txt') read_scidb(id_list, sdb, 'scidb_initial_reads.txt') read_phildb(id_list, pdb_client, 'phildb_client_initial_reads.txt') # Do updates for i in range(1,5): # Generate file list for first update file_list = ["{0}_update{1}.csv".format(x, i) for x in id_list] mod_datetime = datetime.now()
def setup(mb, runs): cnt = mb * 1024 * 1024 / 8 / 4 db = scidbpy.connect() print(""" Setup === Number of runs: {:7d} Target size: {:10.2f} MB Buffer size: {:10.2f} MB Chunk size: {:7d} Number of records: {:7d}""".format(runs, mb, buffer_size / 1024. / 1024, chunk_size, cnt)) ar_name = ar_names[0] db.build('<z:int64 not null>[i=1:{}:0:{}]'.format(cnt, chunk_size), 'random()').apply('y', 'int64(random())', 'x', 'int64(random())', 'w', 'int64(random())').store(ar_name) ar_schemas[0] = db.arrays[ar_name].schema() scidb_bytes_fix = db.summarize(ar_name).project('bytes')['bytes'][0] chunks = db.summarize(ar_name, 'by_attribute:true').project('chunks').fetch( atts_only=True, as_dataframe=False)[0][0][1] mem_bytes_fix = 0 # mem_bytes_fix = db.scan(ar_name).fetch(atts_only=True, # as_dataframe=False).nbytes db.aio_save(ar_name, "'/dev/shm/{}'".format(ar_name), "'format=arrow'").fetch() file_bytes_fix = os.path.getsize('/dev/shm/' + ar_name) print("""Number of chunks: {:7d} Fix Size Schema (int64 only) --- SciDB size: {:7.2f} MB In-memory size: {:7.2f} MB File size: {:7.2f} MB""".format(chunks, scidb_bytes_fix / 1024. / 1024, mem_bytes_fix / 1024. / 1024, file_bytes_fix / 1024. / 1024)) ar_name = ar_names[1] db.build('<z:int64 not null>[i=1:{}:0:{}]'.format(cnt, chunk_size), 'random()').apply('y', 'int64(random())', 'x', 'int64(random())', 'w', 'int64(random())', 'v', "''").store(ar_name) ar_schemas[1] = db.arrays[ar_name].schema() scidb_bytes_var = db.summarize(ar_name).project('bytes')['bytes'][0] mem_bytes_var = 0 # mem_bytes_var = db.scan(ar_name).fetch(atts_only=True, # as_dataframe=False).nbytes db.aio_save(ar_name, "'/dev/shm/{}'".format(ar_name), "'format=arrow'").fetch() file_bytes_var = os.path.getsize('/dev/shm/' + ar_name) print(""" Variable Size Schema (int64 and string) --- SciDB size: {:7.2f} MB In-memory size: {:7.2f} MB File size: {:7.2f} MB""".format(scidb_bytes_var / 1024. / 1024, mem_bytes_var / 1024. / 1024, file_bytes_var / 1024. / 1024)) ar_name = ar_names[2] db.create_array( ar_name, '<x:double>[i=1:100:0:1; j=1:1000:0:1000; k=1:1000:0:1000]') db.build(ar_name, 'random()').store(ar_name) ar_schemas[2] = db.arrays[ar_name].schema() scidb_bytes_var = db.summarize(ar_name).project('bytes')['bytes'][0] mem_bytes_var = 0 # mem_bytes_var = db.scan(ar_name).fetch(atts_only=True, # as_dataframe=False).nbytes db.aio_save(ar_name, "'/dev/shm/{}'".format(ar_name), "'format=arrow'").fetch() file_bytes_var = os.path.getsize('/dev/shm/' + ar_name) print(""" Multi-Dimensional Schema (3-dimensional, double) --- SciDB size: {:7.2f} MB In-memory size: {:7.2f} MB File size: {:7.2f} MB""".format(scidb_bytes_var / 1024. / 1024, mem_bytes_var / 1024. / 1024, file_bytes_var / 1024. / 1024)) return db
import numpy as np np.random.seed(42) # connect to the database from scidbpy import connect sdb = connect() # Two small arrays, each with 1000 rows, and with 5 and 3 columns x = np.random.random((1000, 5)) y = np.column_stack((x[:, 0] * 2, x[:, 1] + x[:, 0] / 2., x[:, 4])) # Transfer to the database. All future computation happens there. X = sdb.from_array(x) Y = sdb.from_array(y) # Subtract the column means from X using broadcasting: XC = X - X.mean(0) # Similarly subtract the column means from Y: YC = Y - Y.mean(0) COV = sdb.dot(XC.T, YC) / (X.shape[0] - 1) # Column vector with column standard deviations of X matrix: xsd = X.std(0).reshape((5, 1)) # Row vector with column standard deviations of Y matrix: ysd = Y.std(0).reshape((1, 3)) # Their outer product: outersd = sdb.dot(xsd, ysd) COR = COV / outersd print(COR.toarray())
print(ondaName + " no contains Signal II") return def fillReadedWaves(sdb): file = open("Jupyter/readedWaves.txt", "r") arrays = dir(sdb.arrays) filelines = file.readlines() arrays.extend([w.replace('\n', '') for w in filelines]) return arrays target_url = "https://physionet.org/physiobank/database/mimic3wdb/matched/RECORDS-waveforms" # url of the waveforms of physionet data = urllib.request.urlopen( target_url) # it's a file like object and works just like a file sdb = connect('http://localhost:8080') #the url of the scidb service sdbarrays = fillReadedWaves(sdb) for line in data: # files are iterable gc.collect() file = open("Jupyter/readedWaves3.txt", "a") carpeta, subCarpeta, onda = str(line).replace('b\'', '').replace( '\'', '').replace('\\n', '').split("/") carpeta = carpeta + "/" + subCarpeta ondaName = onda.replace("-", "_") print(ondaName) if ondaName not in sdbarrays: file.write(ondaName + "\n") downloadWFDB(onda, carpeta, sdb, ondaName) file.close()
import sys import os import numpy import random OUTPUT_FILE = 'output.jpg' #Helper function to get file paths of each image def absoluteFilePaths(directory): for dirpath,_,filenames in os.walk(directory): for f in filenames: yield os.path.abspath(os.path.join(dirpath, f)) #Connect to localhost SciDB instance thorugh shim layer sdb = connect('http://localhost:8080/') if len(sys.argv)<2: print "Error: Enter input directory..." if len(sys.argv)==3: OUTPUT_FILE=sys.argv[2] #Enumerate input directory: image_files = absoluteFilePaths(sys.argv[1]) num_files = len(list(image_files)) print str(num_files) + " files in directory" #create random weights if "image_volume" in sdb.list_arrays(): sdb.query("remove(image_volume)")
def main(): db = connect('http://localhost:8080') np.savez('PheWAS.npz', GPR151=get_data('5-145895394', db), PDE3B=get_data('11-14865399', db))