Beispiel #1
0
def build_input(arguments):
    print "Creating input vectors (mode=%s)" % arguments.input_mode
    if arguments.input_mode.lower() == "deterministic":
        arguments.input_array = (
            scidbpy.connect(arguments.scidb_url)
            .afl.build(
                "<value: double>[id=0:{},{},{}, time=0:{},{},{}]".format(
                    arguments.patients - 1,
                    arguments.chunk_patients,
                    arguments.overlap_patients,
                    arguments.vector_size - 1,
                    arguments.chunk_vectors,
                    arguments.overlap_vectors,
                ),
                "(double(id+1) / (time+1)) / %f - 1" % (arguments.patients / 2),
            )
            .eval()
            .name
        )
    else:
        arguments.input_array = (
            scidbpy.connect(arguments.scidb_url)
            .random(
                (arguments.patients, arguments.vector_size),
                chunk_size=(arguments.chunk_patients, arguments.chunk_vectors),
                dim_names=["id", "time"],
            )
            .attribute_rename("f0", "value")
            .eval()
            .name
        )
Beispiel #2
0
def test_reap_called_on_context_manager():
    with connect() as sdb2:
        X = sdb2.random((1, 1))
        name = X.name
        assert X.name in sdb.list_arrays()

    assert name not in sdb.list_arrays()
Beispiel #3
0
def test_reap_called_on_context_manager():
    with connect() as sdb2:
        X = sdb2.random((1, 1))
        name = X.name
        assert X.name in sdb.list_arrays()

    assert name not in sdb.list_arrays()
Beispiel #4
0
def scidb_con():
    # FS Init
    if not os.path.exists(fs_base):
        os.makedirs(fs_base)

    con = scidbpy.connect(scidb_url,
                          scidb_auth=('root', 'Paradigm4'),
                          verify=False)
    yield con

    # SciDB Cleanup
    for query in ("drop_user('bar')", "drop_namespace('foo')"):
        try:
            con.iquery(query)
        except requests.exceptions.HTTPError:
            pass

    # FS Cleanup
    try:
        shutil.rmtree(fs_base)
    except PermissionError:
        pass

    # S3 Cleanup
    result = s3_con.list_objects_v2(Bucket=s3_bucket, Prefix=base_prefix + '/')
    if 'Contents' in result.keys():
        objects = [{'Key': e['Key']} for e in result['Contents']]
        s3_con.delete_objects(Bucket=s3_bucket, Delete={'Objects': objects})
Beispiel #5
0
def do_matrix_op(kwargs):
    op_type = kwargs.get('opType')
    mattype = kwargs.get('mattype')
    tableStub = kwargs.get('tableStub')
    savestub = kwargs.get('savestub')
    nodes = kwargs.get('nodes')
    outdir = kwargs.get('outdir')
    sr = kwargs.get('sr')
    sr_val = np.float64('0.{}'.format(sr))

    nrow, ncol = 125000000, 100
    path = '../output/scidb_{}_{}{}.txt'.format(mattype, op_type, nodes)

    cxn = scidbpy.connect()
    print cxn.iquery("list('instances')", fetch=True)
    colnames = ['nodes', 'sr', 'time1', 'time2', 'time3', 'time4', 'time5']
    run_times = pd.DataFrame(np.zeros((1, len(colnames))))
    run_times.columns = colnames

    M_name = 'M{}'.format(sr)
    if not M_name in dir(cxn.arrays):
        alloc_matrix(nrow, ncol, M_name, cxn, density=sr_val)
    if op_type == 'GMM':
        if not 'M{}W' in dir(cxn.arrays):
            alloc_matrix(ncol, nrow, 'M{}W'.format(sr), cxn, density=sr_val)
        if not 'N{}'.format(sr) in dir(cxn.arrays):
            alloc_matrix(nrow, ncol, 'N{}'.format(sr), cxn, density=sr_val)
        N_name = 'N{}'.format(sr)
        M_name = 'M{}W'.format(sr)
    if op_type == 'ADD':
        if not 'N{}'.format(sr) in dir(cxn.arrays):
            alloc_matrix(nrow, ncol, 'N{}'.format(sr), cxn, density=sr_val)
        N_name = 'N{}'.format(sr)
    if op_type == 'MVM':
        v_name = 'v{}'.format(ncol)
        if not v_name in dir(cxn.arrays):
            alloc_vector(ncol, v_name, cxn)

    cxn.iquery("load_library('linear_algebra')")
    if op_type == 'TRANS':
        call = 'consume(transpose({}))'.format(M_name)
    elif op_type == 'NORM':
        call = 'aggregate(apply({}, val2, pow(val,2.0)), sum(val2))'.format(
            M_name)
    elif op_type == 'GMM':
        call = 'spgemm({},{})'.format(M_name, N_name)
    elif op_type == 'MVM':
        call = 'spgemm({},{})'.format(M_name, v_name)
    elif op_type == 'TSM':
        call = 'spgemm(transpose({}),{})'.format(M_name, M_name)
    elif op_type == 'ADD':
        call = 'consume(apply(join({0},{1}), sum, {0}.val+{1}.val))'.format(
            M_name, N_name)
    else:
        raise StandardError('Invalid operator type')

    run_times.ix[:, :2] = (nodes, sr)
    run_times.ix[:, 2:] = time_stmt(call, cxn)
    write_header = False if (os.path.exists(path)) else True
    run_times.to_csv(path, index=False, header=write_header, mode='a')
Beispiel #6
0
def main():
    sdb = connect('http://localhost:8000')

    V = sdb.wrap_array("Vg")
    W = sdb.wrap_array("Wg")
    H = sdb.wrap_array("Hg")

    eps = 10e-8
    max_iteration = 5
    i = 0

    print "starts to run!"
    start = time.time()

    while i < max_iteration:
        begin = time.time()
        H = H * (sdb.dot(W.transpose(), V) / (sdb.dot(sdb.dot(W.transpose(), W), H) + eps))
        W = W * (sdb.dot(V, H.transpose()) / (sdb.dot(W, sdb.dot(H, H.transpose())) + eps))
        i = i + 1
        end = time.time()
        diff = end - begin
        print "iteration: %d, used time:%f secs\n" %(i, diff)

    finish = time.time()
    duration = finish - start
    print "all the %d iterations used time %f" %(max_iteration, duration)
    remove(sdb)
Beispiel #7
0
def main():
    sdb = connect('http://localhost:8000')

    V = sdb.wrap_array("Vg")
    W = sdb.wrap_array("Wg")
    H = sdb.wrap_array("Hg")

    eps = 10e-8
    max_iteration = 5
    i = 0

    print "starts to run!"
    start = time.time()

    while i < max_iteration:
        begin = time.time()
        H = H * (sdb.dot(W.transpose(), V) /
                 (sdb.dot(sdb.dot(W.transpose(), W), H) + eps))
        W = W * (sdb.dot(V, H.transpose()) /
                 (sdb.dot(W, sdb.dot(H, H.transpose())) + eps))
        i = i + 1
        end = time.time()
        diff = end - begin
        print "iteration: %d, used time:%f secs\n" % (i, diff)

    finish = time.time()
    duration = finish - start
    print "all the %d iterations used time %f" % (max_iteration, duration)
    remove(sdb)
Beispiel #8
0
def remove_test_array(host):
    """
    Remove the test array from SciDB.

    :param host: url to shim.
    """
    with sdb.connect(host) as con:
        con.remove('scidbbackup_test_array')
Beispiel #9
0
def save_opaque(array_name, host, path):
    """
    Save scidb array as opaque binary array.

    :param array_name: name of array to save
    :param host: url to shim.
    """
    with sdb.connect(host) as con:
        con.query("save({array_name}, '{path}', -2, 'OPAQUE')", array_name=array_name, path=path)
Beispiel #10
0
def load_opaque(array_name, schema, path, host):
    """
    Load scidb array from opaque binary array.
    :param array_name: name of array to save
    :param host: url to shim.
    """
    array_entry = select_backup_record(array_name)
    with sdb.connect(host) as con:
        con.query("create array {array_name} {schema}", array_name=array_name, schema=schema)
        con.query("load({array_name}, '{path}', -2, 'OPAQUE')", array_name=array_name, path=path)
def _load(name, schema, url, hostname, port, worker):
  interface = scidbpy.connect(url)
  local_name = '{}_{}'.format(name, worker['id'])

  try:
    interface.query('create temp array {}{}'.format(local_name, schema))
  except Exception:
    pass
  return interface.query("load({}, '{}@{}', {}, 'text')"
                           .format(local_name, hostname, port, worker['id']))
Beispiel #12
0
def get_array_record(host):
    """
    Get list of scidb arrays.

    :param host: url to shim.
    """

    with sdb.connect(host) as con:
        array_dict = con.list_arrays()
    return array_dict
Beispiel #13
0
def build_input(arguments):
    print 'Creating input vectors (mode=%s)' % arguments.input_mode
    if arguments.input_mode.lower() == 'deterministic':
        arguments.input_array = (
            scidbpy.connect(arguments.scidb_url)
                   .afl.build('<value: double>[id=0:{},{},{}, time=0:{},{},{}]'.format(
                                  arguments.patients-1, arguments.chunk_patients, arguments.overlap_patients,
                                  arguments.vector_size-1, arguments.chunk_vectors, arguments.overlap_vectors),
                              '(double(id+1) / (time+1)) / %f - 1' % (arguments.patients/2))
                   .eval()
                   .name)
    else:
        arguments.input_array = (
            scidbpy.connect(arguments.scidb_url)
                   .random((arguments.patients, arguments.vector_size),
                           chunk_size=(arguments.chunk_patients, arguments.chunk_vectors),
                           dim_names=['id', 'time'])
                   .attribute_rename('f0', 'value')
                   .eval()
                   .name)
def build_input(arguments):
    print 'Creating input vectors (mode=%s)' % arguments.input_mode
    if arguments.input_mode.lower() == 'deterministic':
        arguments.input_array = (
            scidbpy.connect(arguments.url, username=arguments.username, password=arguments.password)
                   .afl.build('<value: double>[id=0:{},{},{}, time=0:{},{},{}]'.format(
                                  arguments.patients-1, arguments.chunk_patients, arguments.overlap_patients,
                                  arguments.vector_size-1, arguments.chunk_vectors, arguments.overlap_vectors),
                              '(double(id+1) / (time+1)) / %f - 1' % (arguments.patients/2))
                   .eval()
                   .name)
    else:
        arguments.input_array = (
            scidbpy.connect(arguments.url, username=arguments.username, password=arguments.password)
                   .random((arguments.patients, arguments.vector_size),
                           chunk_size=(arguments.chunk_patients, arguments.chunk_vectors),
                           dim_names=['id', 'time'])
                   .attribute_rename('f0', 'value')
                   .eval()
                   .name)
Beispiel #15
0
def test_interface_reap():

    sdb = connect()
    A = sdb.random((1, 1))
    B = sdb.random((1, 1))

    aname = A.name
    bname = B.name

    sdb.reap()

    assert aname not in sdb.list_arrays()
    assert bname not in sdb.list_arrays()
Beispiel #16
0
def test_interface_reap():

    sdb = connect()
    A = sdb.random((1, 1))
    B = sdb.random((1, 1))

    aname = A.name
    bname = B.name

    sdb.reap()

    assert aname not in sdb.list_arrays()
    assert bname not in sdb.list_arrays()
  def import_(cls, source, intermediate, *args, **kwargs):
    interface = scidbpy.connect(kwargs['url'])
    workers = utility._get_workers(interface)
    pool = multiprocessing.Pool(processes=len(workers))
    name = source.name.replace(':', '_')
    schema = SciDBSchema(source.schema).local

    try:
      result = interface.query("create_array({}, {})".format(name, schema))
    except Exception:
      pass

    pool.map(partial(_load, name, schema, kwargs['url'], 
                            kwargs['hostname'], kwargs['port']), workers)
    pool.close()
    pool.join()
Beispiel #18
0
def setup(mb):
    cnt = mb * 1024 * 1024 / 8

    db = scidbpy.connect()

    db.iquery(
        'store(build(<x:int64 not null>[i=0:{}], random()), bm)'.format(cnt -
                                                                        1))
    ar = db.iquery('scan(bm)', fetch=True, atts_only=True, as_dataframe=False)
    print("""\
Data size:      {:6.2f} MB
In-memory size: {:6.2f} MB
Number of runs: {:3d}""".format(cnt * 8 / 1024. / 1024,
                                ar.nbytes / 1024. / 1024, runs))

    return db
Beispiel #19
0
def create_test_array(host):
    """
    Create test array in SciDB.

    :param host: url to shim.
    """
    with sdb.connect(host) as con:
        con.query("""
            store(
                redimension(
                    join(
                        build(<x:double>[k=0:8,1,0], k),
                    join(
                        build(<i:int64>[k=0:8,1,0], k%3),
                        build(<j:int64>[k=0:8,1,0], k/3))),
                    <x:double>[i=0:8,1,0, j=0:8,1,0]),
                scidbbackup_test_array)""")
  def import_(cls, source, intermediate, *args, **kwargs):
    interface = scidbpy.connect(kwargs["url"])
    name = source.name.replace(':', '_')

    types = '({})'.format(",".join(source.schema.types[1:]))
    #print '====='
    #print types
    #print '===='

    try:
      result = interface.query("create_array({}, {})".format(name, SciDBSchema(source.schema).local))
    except Exception:
      pass

    #time.sleep(10)
    return interface.query("load({}, '{}@{}', -1, '{}')"
                          .format(name, kwargs["hostname"], kwargs["port"], types))
Beispiel #21
0
def setup(mb):
    cnt = mb * 1024 * 1024 / 8

    db = scidbpy.connect()

    db.iquery(
        'store(build(<x:int64 not null>[i=0:{}], random()), bm)'.format(
            cnt - 1))
    ar = db.iquery('scan(bm)', fetch=True, atts_only=True, as_dataframe=False)
    print("""\
Data size:      {:6.2f} MB
In-memory size: {:6.2f} MB
Number of runs: {:3d}""".format(
      cnt * 8 / 1024. / 1024,
      ar.nbytes / 1024. / 1024,
      runs))

    return db
Beispiel #22
0
def main(kwargs):
    op_type  = kwargs.get('opType')
    nodes = kwargs.get('nodes')
    x_table_name = kwargs.get('xTableName')

    nrow, ncol = get_dims(x_table_name)

    path = '../output/scidb_{}{}.txt'.format(op_type, nodes)
    colnames = ['nodes','rows','cols','time1','time2','time3','time4','time5']
    run_times = pd.DataFrame(np.zeros((1,len(colnames))))
    run_times.columns = colnames

    cxn = scidbpy.connect()
    cxn.iquery("load_library('dense_linear_algebra')")

    env = {
        'cxn': cxn, 
        'reg': reg,
        'logit': logit,
        'gnmf': gnmf,
        'robust_se': robust_se
    }

    alloc_matrix(nrow, ncol, 'X{}{}'.format(nrow, ncol), cxn, overwrite=False)
    alloc_matrix(nrow, 0, 'y{}{}'.format(nrow, ncol), cxn,
        overwrite=False, binary=True)
    if op_type == 'reg':
        call = "reg('X{0}{1}', 'y{0}{1}', cxn)".format(nrow, ncol)
    elif op_type == 'logit':
        alloc_matrix(nrow, 0, 'y{}{}b'.format(nrow, ncol), 
            cxn, overwrite=False, binary=True)
        call = "logit('X{0}{1}', 'y{0}{1}b', cxn)".format(nrow, ncol)
    elif op_type == 'gnmf':
        call = "gnmf('X{0}{1}', 10, cxn)".format(nrow, ncol)
    elif op_type == 'robust':
        alloc_matrix(nrow, 0, 'r2{}{}'.format(nrow, ncol), 
            cxn, overwrite=True, val_name='residuals')
        call = "robust_se('X{0}{1}', 'r2{0}{1}', cxn)".format(nrow, ncol)

    run_times.loc[:,['nodes','rows','cols']] = (nodes, nrow, ncol)
    run_times.loc[:,3:] = utils.timeOp(call, env)
    write_header = not os.path.exists(path)
    run_times.to_csv(path, index=False, header=write_header, mode='a')
Beispiel #23
0
def main():
    sdb = connect('http://localhost:8000')
    data = sdb.wrap_array("mnist8m_200")
    labels = sdb.wrap_array("label")
    V = sdb.wrap_array("V")
    W = sdb.wrap_array("W")
    V.approxdc()
    r = 8100000
    fra = 0.01
    learningRate = 0.1
    batchSize = int(fra * r)
    iterations = 15

    start = datetime.now()
    duration = 0
    print "nn starts to run!"
    for i in range(iterations):
        starti = datetime.now()
        index = random.randint(1, r - batchSize)
        indexEnd = index + batchSize
        input = data[index:indexEnd, :]
        label = labels[index:indexEnd, :]
        wIn = sdb.dot(input, W)
        wTemp = sdb.exp(-wIn)
        wOut = 1 / (1 + wTemp)
        vIn = sdb.dot(wOut, V)
        vTemp = sdb.exp(-vIn)
        vOut = 1 / (1 + vTemp)
        vDelta = vTemp / (1 + vTemp)**2 * (vOut - label)
        wDelta = wTemp / (1 + wTemp)**2 * sdb.dot(vDelta, V.transpose())
        V = V - learningRate * sdb.dot(wOut.transpose(), vDelta) / batchSize
        W = W - learningRate * sdb.dot(input.transpose(), wDelta) / batchSize
        endi = datetime.now()
        diff = endi - starti
        t = diff.microseconds / 1000000.0 + diff.seconds
        duration += t
        print "iteration:", i, " used time: ", t, "secs"
    end = datetime.now()
    print "duration:", duration
    diff = end - start
    print "all time:", diff.seconds
    remove(sdb)
Beispiel #24
0
def main():
    sdb = connect('http://localhost:8000')
    data = sdb.wrap_array("mnist8m_200")
    labels = sdb.wrap_array("label")
    V = sdb.wrap_array("V")
    W = sdb.wrap_array("W")
    V.approxdc()
    r = 8100000
    fra = 0.01
    learningRate = 0.1
    batchSize = int(fra * r)
    iterations = 15

    start = datetime.now()
    duration = 0
    print "nn starts to run!"
    for i in range(iterations):
        starti = datetime.now()
        index = random.randint(1, r - batchSize)
        indexEnd = index + batchSize
        input = data[index: indexEnd, :]
        label = labels[index: indexEnd, :]
        wIn = sdb.dot(input, W)
        wTemp = sdb.exp(-wIn)
        wOut = 1 / (1 + wTemp)
        vIn = sdb.dot(wOut, V)
        vTemp = sdb.exp(-vIn)
        vOut = 1 / (1 + vTemp)
        vDelta = vTemp / (1 + vTemp) ** 2 * (vOut - label)
        wDelta = wTemp / (1 + wTemp) ** 2 * sdb.dot(vDelta, V.transpose())
        V = V - learningRate * sdb.dot(wOut.transpose(), vDelta) / batchSize
        W = W - learningRate * sdb.dot(input.transpose(), wDelta) / batchSize
        endi = datetime.now()
        diff = endi - starti
        t = diff.microseconds / 1000000.0 + diff.seconds
        duration += t
        print "iteration:", i, " used time: ", t, "secs"
    end = datetime.now()
    print "duration:", duration
    diff = end - start
    print "all time:", diff.seconds
    remove(sdb)
Beispiel #25
0
def main():
    time_start = time.time()
    sdb = connect()

    print("Time passed: %.3fs" % (time.time() - time_start))
    data = load_data_from_aws()

    print "Datatype:", data.dtype

    if True:
        # Reduce data to work with for coding
        sh = data.shape
        data = data[int(sh[0] * .25):int(sh[0] * .75),
                    int(sh[1] * .25):int(sh[1] * .75),
                    int(sh[2] * .25):int(sh[2] * .75)]

    print("Time passed: %.3fs" % (time.time() - time_start))

    gtab = dpg.gradient_table('./bvals', './bvecs', b0_threshold=10)

    print("Time passed: %.3fs" % (time.time() - time_start))

    data_sdb = sdb.from_array(data)

    # Creating mask
    raise ()
    mean_b0 = data_sdb.compress(sdb.from_array(gtab.b0s_mask), axis=3)
    mean_b0 = mean_b0.mean(-1)
    _, mask = median_otsu(mean_b0.toarray(),
                          4,
                          2,
                          False,
                          vol_idx=np.where(gtab.b0s_mask),
                          dilate=1)

    print("Time passed: %.3fs" % (time.time() - time_start))
def parse_arguments(arguments):
    parser = argparse.ArgumentParser(description='Execute Myria-only test')
    parser.add_argument('patients', type=int, help='Number of patients to evaluate')
    parser.add_argument('vector_size', type=int, help='Size of input vectors for each patient')

    parser.add_argument('--url', type=str, default='http://localhost:8080', help='SciDB Shim URL')
    parser.add_argument('--bins', type=int, default=10, help='Number of histogram bins')

    parser.add_argument('--test-id', dest='test_id', type=int, default=1, help='Index of test patient for k-NN computation')
    parser.add_argument('--input-mode', dest='input_mode', type=str, default='random', choices=['determinstic', 'random'], help='Mode of automatically generated input')

    parser.add_argument('--scidb-bin', dest='scidb_bin', type=str, default='/opt/scidb/14.12/bin/scidb.py', help='Path of scidb.py')
    parser.add_argument('--scidb-name', dest='scidb_name', type=str, default='mydb', help='Name of SciDB database')

    parser.add_argument('--chunk-patients', dest='chunk_patients', type=int, default=1, help='Chunk size for patient array')
    parser.add_argument('--chunk-vectors', dest='chunk_vectors', type=int, default=None, help='Chunk size for input vectors')
    parser.add_argument('--chunk-bins', dest='chunk_bins', type=int, default=2**32, help='Chunk size for histogram bins')

    parser.add_argument('--overlap-vectors', dest='overlap_vectors', type=int, default=0, help='Array overlap for input vectors')
    parser.add_argument('--overlap-patients', dest='overlap_patients', type=int, default=0, help='Array overlap for patient array')
    parser.add_argument('--overlap-bins', dest='overlap_bins', type=int, default=0, help='Array overlap for histogram bins')

    parser.add_argument('--username', type=str, default=None, help='Username used to authenticate with SciDB Shim')
    parser.add_argument('--password', type=str, default=None, help='Password used to authenticate with SciDB Shim')

    parser.add_argument('--restart', dest='restart', action='store_true', help='Restart SciDB before testing')
    parser.add_argument('--no-restart', dest='restart', action='store_false', help='Do not restart SciDB before testing')
    parser.set_defaults(restart=True)

    arguments = parser.parse_args(arguments)
    arguments.chunk_vectors = arguments.chunk_vectors or arguments.vector_size
    arguments.iterations = int(math.log(arguments.vector_size, 2))
    arguments.sdb = scidbpy.connect(arguments.url, username=arguments.username, password=arguments.password)

    print 'Arguments: %s' % vars(arguments)
    return arguments
Beispiel #27
0
def do_matrix_op(kwargs):
    op_type = kwargs.get('opType')
    mattype = kwargs.get('mattype')
    fixed_axis = int(kwargs.get('fixedAxis'))
    nrow_scale = map(lambda x: int(x), kwargs['nrows'].split(' '))
    nproc = kwargs.get('nproc', None)

    if nproc is None:
        path = os.path.join('..', 'output',
                            'scidb_{}_{}.txt'.format(mattype, op_type))
    else:
        path = os.path.join('..', 'output',
                            'scidb_cpu_{}_{}.txt'.format(mattype, op_type))

    num_procs = nproc if nproc is not None else 24
    atexit.register(terminate_scidb)
    P, stdout, stderr = init_scidb(num_procs, debug=True, stub=op_type)

    cxn = scidbpy.connect()
    print cxn.iquery("list('instances')", fetch=True)
    colnames = ['rows', 'time1', 'time2', 'time3', 'time4', 'time5']
    run_times = pd.DataFrame(np.zeros((1, len(colnames))))
    run_times.columns = colnames

    for nr in nrow_scale:
        nrow = fixed_axis if op_type == 'GMM' else nr
        ncol = nr if op_type == 'GMM' else fixed_axis

        M_name = 'M{}{}'.format(nrow, ncol)
        if not M_name in dir(cxn.arrays):
            alloc_matrix(nrow, ncol, M_name, cxn)
        if op_type == 'GMM':
            if not 'N{}{}'.format(ncol, nrow) in dir(cxn.arrays):
                alloc_matrix(ncol, nrow, 'N{}{}'.format(ncol, nrow), cxn)
            N_name = 'N{}{}'.format(ncol, nrow)
            zv_name = 'ZEROS{}{}'.format(nrow, nrow)
            zeros(nrow, nrow, zv_name, cxn)
        if op_type == 'TSM':
            zv_name = 'ZEROS{}{}'.format(ncol, ncol)
            zeros(ncol, ncol, zv_name, cxn)
        if op_type == 'ADD':
            if not 'N{}{}'.format(nrow, ncol) in dir(cxn.arrays):
                alloc_matrix(nrow, ncol, 'N{}{}'.format(nrow, ncol), cxn)
            N_name = 'N{}{}'.format(nrow, ncol)
        if op_type == 'MVM':
            v_name = 'v{}'.format(fixed_axis)
            if not v_name in dir(cxn.arrays):
                alloc_vector(fixed_axis, v_name, cxn)
            zv_name = 'ZEROS{}'.format(nrow)
            zeros(nrow, 0, zv_name, cxn)

        cxn.iquery("load_library('dense_linear_algebra')")
        if op_type == 'TRANS':
            call = 'consume(transpose({}))'.format(M_name)
        elif op_type == 'NORM':
            call = 'aggregate(apply({}, val2, pow(val,2.0)), sum(val2))'.format(
                M_name)
        elif op_type == 'GMM':
            call = 'gemm({},{},{})'.format(M_name, N_name, zv_name)
        elif op_type == 'MVM':
            call = 'gemm({},{},{})'.format(M_name, v_name, zv_name)
        elif op_type == 'TSM':
            call = 'gemm({},{},{}, transa:true)'.format(
                M_name, M_name, zv_name)
        elif op_type == 'ADD':
            call = 'consume(apply(join({0},{1}), sum, {0}.val+{1}.val))'.format(
                M_name, N_name)
        else:
            raise StandardError('Invalid operator type')

        run_times.loc[:, 'rows'] = nr if nproc is None else nproc
        run_times.ix[:, 1:] = time_stmt(call, cxn)
        write_header = False if (os.path.exists(path)) else True
        run_times.to_csv(path, index=False, header=write_header, mode='a')
        P.terminate()
        stdout.close()
        stderr.close()
Beispiel #28
0
def main(kwargs):
    mattype = kwargs['mattype']
    op_type = kwargs['opType']
    nrow = int(kwargs['nrow'])
    ncol = int(kwargs['ncol'])
    nproc = int(kwargs['nproc'])

    path = '../output/scidb_{}.txt'.format(op_type)
    colnames = ['nproc', 'time1', 'time2', 'time3', 'time4', 'time5']
    run_times = pd.DataFrame(np.zeros((1, len(colnames))))
    run_times.columns = colnames

    atexit.register(terminate_scidb)
    P, stdout, stderr = init_scidb(nproc, debug=True)
    cxn = scidbpy.connect()
    cxn.iquery("load_library('dense_linear_algebra')")
    print cxn.iquery("list('instances')", fetch=True)

    env = {
        'cxn': cxn,
        'reg': reg,
        'logit': logit,
        'gnmf': gnmf,
        'robust_se': robust_se
    }

    alloc_matrix(nrow, ncol, 'X{}{}'.format(nrow, ncol), cxn, overwrite=False)
    alloc_matrix(nrow,
                 0,
                 'y{}{}'.format(nrow, ncol),
                 cxn,
                 overwrite=False,
                 binary=True)
    if op_type == 'reg':
        call = "reg('X{0}{1}', 'y{0}{1}', cxn)".format(nrow, ncol)
    elif op_type == 'logit':
        alloc_matrix(nrow,
                     0,
                     'y{}{}b'.format(nrow, ncol),
                     cxn,
                     overwrite=False,
                     binary=True)
        call = "logit('X{0}{1}', 'y{0}{1}b', cxn)".format(nrow, ncol)
    elif op_type == 'gnmf':
        call = "gnmf('X{0}{1}', 10, cxn)".format(nrow, ncol)
    elif op_type == 'robust':
        alloc_matrix(nrow,
                     0,
                     'r2{}{}'.format(nrow, ncol),
                     cxn,
                     overwrite=True,
                     val_name='residuals')
        call = "robust_se('X{0}{1}', 'r2{0}{1}', cxn)".format(nrow, ncol)

    run_times.loc[:, 'nproc'] = nproc
    run_times.loc[:, 1:] = utils.timeOp(call, env)
    write_header = not os.path.exists(path)
    run_times.to_csv(path, index=False, header=write_header, mode='a')

    P.terminate()
    stdout.close()
    stderr.close()
Beispiel #29
0
 def connect(self):
     self.connection = connect("{address}".format(address=self.address))
Beispiel #30
0
import numpy as np
from scidbpy import connect
sdb = connect('http://localhost:48080') # connect to the database
from scidbpy.parse import _fmt

singles = {}

dtypes = 'int8 int16 int32 int64 uint8 uint16 uint32 uint64 float double datetime datetimetz bool string char'.split()
vals = [-128, -2**15+2, -2**31+2, -2**63+2, 2**8-1, 2**16-1, 2**32-1, 2**63-1, 1.23, 1e100, "'01/01/2001 12:23'", "'01/01/2001 12:23:01 +05:00'", 'true', "'test'", "'a'"]

#Create
def make(dtype, val):
    return sdb.afl.build('<x:%s>[i=0:1,10,0]' % dtype, val).eval()

#Retrieve
def run(make):
    for dtype, val in zip(dtypes, vals):
        a = make(dtype, val)
        print a
        print 'Plaintext:', repr(sdb._scan_array(a.name))
        print 'Binary :', repr(sdb._scan_array(a.name, fmt=_fmt(a)))
        print 'NumPy :', a.toarray()
        print '-----------'

	#Delete
        a.reap()


run(make)
Beispiel #31
0
from __future__ import print_function

import numpy as np

np.random.seed(42)

from scidbpy import connect

sdb = connect()

X = np.arndom.random((5, 4))
Xsdb = sdb.from_array(X)

from scipy.sparse import coo_matrix

X = np.random.random((10, 10))
X[X < 0.9] = 0  # make array sparse
Xcoo = coo_matrix(X)
Xsdb = sdb.from_sparse(Xcoo)

# Create a 10x10 array of double-precision zeros:
A = sdb.zeros((10, 10))
# Create a 10x10 array of 64-bit signed integer ones:
A = sdb.ones((10, 10), dtype='int64')

# Create a 10x10 array of numbers between -1 and 2 (inclusive)
#    sampled from a uniform random distribution.
A = sdb.random((10, 10), lower=-1, upper=2)

# Create a 10x10 array of uniform random integers between 0 and 10
#  (inclusive of 0, non-inclusive of 10)
Beispiel #32
0
def db():
    return scidbpy.connect()
Beispiel #33
0
	def getConnection():
		return connect(self.url, self.user, self.password)
Beispiel #34
0
> python 2-pack-func.py
[(0, 0, 0, (255, 1.))]

Setup:

> pip install scidb-py
"""

import dill
import numpy
import scidbpy
import scidbstrm
import sys


db = scidbpy.connect()


def get_first(df):
    return df.head(1)


# Serialize (pack) and Upload function to SciDB
ar_fun = db.input(upload_data=scidbstrm.pack_func(get_first),
                  upload_schema=scidbpy.Schema.fromstring(
                      '<x:binary not null>[i]')).store()


que = db.stream(
    'build(<x:double>[i=1:5], i)',
    """'python{major} -uc "
Beispiel #35
0
from __future__ import print_function

import numpy as np

np.random.seed(42)

from scidbpy import connect

sdb = connect()

X = np.random.random((5, 4))
Xsdb = sdb.from_array(X)


from scipy.sparse import coo_matrix

X = np.random.random((10, 10))
X[X < 0.9] = 0  # make array sparse
Xcoo = coo_matrix(X)
Xsdb = sdb.from_sparse(Xcoo)

# Create a 10x10 array of double-precision zeros:
A = sdb.zeros((10, 10))
# Create a 10x10 array of 64-bit signed integer ones:
A = sdb.ones((10, 10), dtype="int64")

# Create a 10x10 array of numbers between -1 and 2 (inclusive)
#    sampled from a uniform random distribution.
A = sdb.random((10, 10), lower=-1, upper=2)

# Create a 10x10 array of uniform random integers between 0 and 10
Beispiel #36
0
data_path = '/home/dongfang/download/lsst_data/'
visits = ["0288935", "0288976"]

visit = visits[0]
ccd_id = '1'  #integer between 1 and 60

from astropy.io import fits
hdulist = fits.open(data_path + visit + '/instcal' + visit + '.' + ccd_id +
                    '.fits')
print hdulist.info()

import numpy as np
a = np.array(hdulist[1].data)
print "a.shape =", a.shape

print "Numpy: a.mean =", a.mean()

import scidbpy as sp
sdb = sp.connect('http://localhost:8080')

data_sdb = sdb.from_array(a.astype(np.float32))

res = data_sdb.mean()
print "SciDB: mean =", res[0]

print "Done!"
Beispiel #37
0
    coordinator = 'localhost'
    exponents = (int(argv[1]), int(argv[1]) + 1) if len(argv) > 1 else (4, 5)
    sizes = [1000000 * 2**x for x in xrange(*exponents)]
    strategies = [
        SocketBinary
    ]  #SocketCSV] #SocketBinary, SocketCSV] #Binary #Binary #SocketCSV #FIFO #SocketBinary

    for size in sizes:
        for strategy in strategies:
            print 'Size: ' + str(size)
            print 'Strategy: ' + str(strategy)

            array = scidbpy.connect(
                'http://{}:8080'.format(coordinator)).random(size,
                                                             chunk_size=min(
                                                                 size / 8,
                                                                 2**16),
                                                             persistent=False)
            print array.name

            query = ''

            def test():
                global query
                query = convert(array, strategy, coordinator)

            print 'Begin SciDB->Myria'
            print timeit.timeit('test()',
                                setup='from __main__ import test',
                                number=1)
file_list = ["{0}_scidb.csv".format(x) for x in id_list]

phildb_du_results = []
scidb_du_results = []

phildb_du_results.append('0\\thrs_phildb\\n')
scidb_du_results.append(subprocess.check_output(['du', '-s', 'scidbdata']))

write_phildb(file_list, 'phildb_initial_writes.txt', first_run = True)
# Start PhilDB server now that a phildb instance exists
subprocess.Popen(['phildb-server', '--port=8989', 'hrs_phildb'])

write_scidb(file_list, 'scidb_initial_writes.txt', first_run = True)

# Connect Python APIs
sdb = scidbpy.connect('http://localhost:8080', 'scidb', 'paradigm4')
pdb = PhilDB('hrs_phildb')
pdb_client = PhilDBClient('http://localhost:8989')

# Read the three clients, interleaving PhilDB with SciDB to try minimise
# the effects of caching
read_phildb(id_list, pdb, 'phildb_initial_reads.txt')
read_scidb(id_list, sdb, 'scidb_initial_reads.txt')
read_phildb(id_list, pdb_client, 'phildb_client_initial_reads.txt')

# Do updates
for i in range(1,5):
    # Generate file list for first update
    file_list = ["{0}_update{1}.csv".format(x, i) for x in id_list]

    mod_datetime = datetime.now()
def setup(mb, runs):
    cnt = mb * 1024 * 1024 / 8 / 4
    db = scidbpy.connect()

    print("""
Setup
===
Number of runs:    {:7d}
Target size:       {:10.2f} MB
Buffer size:       {:10.2f} MB
Chunk size:        {:7d}
Number of records: {:7d}""".format(runs, mb, buffer_size / 1024. / 1024,
                                   chunk_size, cnt))

    ar_name = ar_names[0]
    db.build('<z:int64 not null>[i=1:{}:0:{}]'.format(cnt, chunk_size),
             'random()').apply('y', 'int64(random())', 'x', 'int64(random())',
                               'w', 'int64(random())').store(ar_name)
    ar_schemas[0] = db.arrays[ar_name].schema()
    scidb_bytes_fix = db.summarize(ar_name).project('bytes')['bytes'][0]
    chunks = db.summarize(ar_name,
                          'by_attribute:true').project('chunks').fetch(
                              atts_only=True, as_dataframe=False)[0][0][1]
    mem_bytes_fix = 0
    # mem_bytes_fix = db.scan(ar_name).fetch(atts_only=True,
    #                                        as_dataframe=False).nbytes
    db.aio_save(ar_name, "'/dev/shm/{}'".format(ar_name),
                "'format=arrow'").fetch()
    file_bytes_fix = os.path.getsize('/dev/shm/' + ar_name)

    print("""Number of chunks:  {:7d}

Fix Size Schema (int64 only)
---
SciDB size:        {:7.2f} MB
In-memory size:    {:7.2f} MB
File size:         {:7.2f} MB""".format(chunks, scidb_bytes_fix / 1024. / 1024,
                                        mem_bytes_fix / 1024. / 1024,
                                        file_bytes_fix / 1024. / 1024))

    ar_name = ar_names[1]
    db.build('<z:int64 not null>[i=1:{}:0:{}]'.format(cnt, chunk_size),
             'random()').apply('y', 'int64(random())', 'x', 'int64(random())',
                               'w', 'int64(random())', 'v',
                               "''").store(ar_name)
    ar_schemas[1] = db.arrays[ar_name].schema()
    scidb_bytes_var = db.summarize(ar_name).project('bytes')['bytes'][0]
    mem_bytes_var = 0
    # mem_bytes_var = db.scan(ar_name).fetch(atts_only=True,
    #                                        as_dataframe=False).nbytes
    db.aio_save(ar_name, "'/dev/shm/{}'".format(ar_name),
                "'format=arrow'").fetch()
    file_bytes_var = os.path.getsize('/dev/shm/' + ar_name)

    print("""
Variable Size Schema (int64 and string)
---
SciDB size:        {:7.2f} MB
In-memory size:    {:7.2f} MB
File size:         {:7.2f} MB""".format(scidb_bytes_var / 1024. / 1024,
                                        mem_bytes_var / 1024. / 1024,
                                        file_bytes_var / 1024. / 1024))

    ar_name = ar_names[2]
    db.create_array(
        ar_name, '<x:double>[i=1:100:0:1; j=1:1000:0:1000; k=1:1000:0:1000]')
    db.build(ar_name, 'random()').store(ar_name)
    ar_schemas[2] = db.arrays[ar_name].schema()
    scidb_bytes_var = db.summarize(ar_name).project('bytes')['bytes'][0]
    mem_bytes_var = 0
    # mem_bytes_var = db.scan(ar_name).fetch(atts_only=True,
    #                                        as_dataframe=False).nbytes
    db.aio_save(ar_name, "'/dev/shm/{}'".format(ar_name),
                "'format=arrow'").fetch()
    file_bytes_var = os.path.getsize('/dev/shm/' + ar_name)

    print("""
Multi-Dimensional Schema (3-dimensional, double)
---
SciDB size:        {:7.2f} MB
In-memory size:    {:7.2f} MB
File size:         {:7.2f} MB""".format(scidb_bytes_var / 1024. / 1024,
                                        mem_bytes_var / 1024. / 1024,
                                        file_bytes_var / 1024. / 1024))

    return db
Beispiel #40
0
import numpy as np
np.random.seed(42)

#  connect to the database
from scidbpy import connect
sdb = connect()

# Two small arrays, each with 1000 rows, and with 5 and 3 columns
x = np.random.random((1000, 5))
y = np.column_stack((x[:, 0] * 2, x[:, 1] + x[:, 0] / 2., x[:, 4]))

# Transfer to the database. All future computation happens there.
X = sdb.from_array(x)
Y = sdb.from_array(y)

# Subtract the column means from X using broadcasting:
XC = X - X.mean(0)
# Similarly subtract the column means from Y:
YC = Y - Y.mean(0)

COV = sdb.dot(XC.T, YC) / (X.shape[0] - 1)

# Column vector with column standard deviations of X matrix:
xsd = X.std(0).reshape((5, 1))
# Row vector with column standard deviations of Y matrix:
ysd = Y.std(0).reshape((1, 3))
# Their outer product:
outersd = sdb.dot(xsd, ysd)
COR = COV / outersd
print(COR.toarray())
def parse_arguments(arguments):
    parser = argparse.ArgumentParser(description='Execute Myria-only test')
    parser.add_argument('patients',
                        type=int,
                        help='Number of patients to evaluate')
    parser.add_argument('vector_size',
                        type=int,
                        help='Size of input vectors for each patient')

    parser.add_argument('--url',
                        type=str,
                        default='http://localhost:8080',
                        help='SciDB Shim URL')
    parser.add_argument('--bins',
                        type=int,
                        default=10,
                        help='Number of histogram bins')

    parser.add_argument('--test-id',
                        dest='test_id',
                        type=int,
                        default=1,
                        help='Index of test patient for k-NN computation')
    parser.add_argument('--input-mode',
                        dest='input_mode',
                        type=str,
                        default='random',
                        choices=['determinstic', 'random'],
                        help='Mode of automatically generated input')

    parser.add_argument('--scidb-bin',
                        dest='scidb_bin',
                        type=str,
                        default='/opt/scidb/14.12/bin/scidb.py',
                        help='Path of scidb.py')
    parser.add_argument('--scidb-name',
                        dest='scidb_name',
                        type=str,
                        default='mydb',
                        help='Name of SciDB database')

    parser.add_argument('--chunk-patients',
                        dest='chunk_patients',
                        type=int,
                        default=1,
                        help='Chunk size for patient array')
    parser.add_argument('--chunk-vectors',
                        dest='chunk_vectors',
                        type=int,
                        default=None,
                        help='Chunk size for input vectors')
    parser.add_argument('--chunk-bins',
                        dest='chunk_bins',
                        type=int,
                        default=2**32,
                        help='Chunk size for histogram bins')

    parser.add_argument('--overlap-vectors',
                        dest='overlap_vectors',
                        type=int,
                        default=0,
                        help='Array overlap for input vectors')
    parser.add_argument('--overlap-patients',
                        dest='overlap_patients',
                        type=int,
                        default=0,
                        help='Array overlap for patient array')
    parser.add_argument('--overlap-bins',
                        dest='overlap_bins',
                        type=int,
                        default=0,
                        help='Array overlap for histogram bins')

    parser.add_argument('--username',
                        type=str,
                        default=None,
                        help='Username used to authenticate with SciDB Shim')
    parser.add_argument('--password',
                        type=str,
                        default=None,
                        help='Password used to authenticate with SciDB Shim')

    parser.add_argument('--restart',
                        dest='restart',
                        action='store_true',
                        help='Restart SciDB before testing')
    parser.add_argument('--no-restart',
                        dest='restart',
                        action='store_false',
                        help='Do not restart SciDB before testing')
    parser.set_defaults(restart=True)

    arguments = parser.parse_args(arguments)
    arguments.chunk_vectors = arguments.chunk_vectors or arguments.vector_size
    arguments.iterations = int(math.log(arguments.vector_size, 2))
    arguments.sdb = scidbpy.connect(arguments.url,
                                    username=arguments.username,
                                    password=arguments.password)

    print 'Arguments: %s' % vars(arguments)
    return arguments
Beispiel #42
0
        print(ondaName + " no contains Signal II")
    return


def fillReadedWaves(sdb):
    file = open("Jupyter/readedWaves.txt", "r")
    arrays = dir(sdb.arrays)
    filelines = file.readlines()
    arrays.extend([w.replace('\n', '') for w in filelines])
    return arrays


target_url = "https://physionet.org/physiobank/database/mimic3wdb/matched/RECORDS-waveforms"  # url of the waveforms of physionet
data = urllib.request.urlopen(
    target_url)  # it's a file like object and works just like a file

sdb = connect('http://localhost:8080')  #the url of the scidb service
sdbarrays = fillReadedWaves(sdb)
for line in data:  # files are iterable
    gc.collect()
    file = open("Jupyter/readedWaves3.txt", "a")
    carpeta, subCarpeta, onda = str(line).replace('b\'', '').replace(
        '\'', '').replace('\\n', '').split("/")
    carpeta = carpeta + "/" + subCarpeta
    ondaName = onda.replace("-", "_")
    print(ondaName)
    if ondaName not in sdbarrays:
        file.write(ondaName + "\n")
        downloadWFDB(onda, carpeta, sdb, ondaName)
        file.close()
Beispiel #43
0
import sys
import os
import numpy
import random

OUTPUT_FILE = 'output.jpg'

#Helper function to get file paths of each image
def absoluteFilePaths(directory):
   for dirpath,_,filenames in os.walk(directory):
       for f in filenames:
           yield os.path.abspath(os.path.join(dirpath, f))

#Connect to localhost SciDB instance thorugh shim layer
sdb = connect('http://localhost:8080/')

if len(sys.argv)<2:
		print "Error: Enter input directory..."
if len(sys.argv)==3:
	OUTPUT_FILE=sys.argv[2]

#Enumerate input directory:
image_files = absoluteFilePaths(sys.argv[1])
num_files = len(list(image_files))
print str(num_files) + " files in directory"

#create random weights
if "image_volume" in sdb.list_arrays():
     sdb.query("remove(image_volume)")
def main():
    db = connect('http://localhost:8080')

    np.savez('PheWAS.npz',
             GPR151=get_data('5-145895394', db),
             PDE3B=get_data('11-14865399', db))