def get_mp(data, sublen_samp):
    proc_start = time.time()
    mp, mpind = pyscamp.selfjoin(data, sublen_samp, gpus=[0, 1, 2], pearson=True, precision="double")
#    mp, mpind = pyscamp.selfjoin(data, sublen_samp, gpus=[0, 1], pearson=True, precision="double")
    proc_end = time.time()
    Logger.info("Finished processing component in %f seconds" % (proc_end - proc_start))
    return mp, mpind
Esempio n. 2
0
def run_pyscamp(inputs, a, b, window, max_matches, thresh, ptype, rrows,
                rcols):
    args = {}
    args['pearson'] = True
    if thresh:
        args['threshold'] = thresh
    if rrows:
        args['mheight'] = rrows
    if rcols:
        args['mwidth'] = rcols
    if '--no_gpu' in extra_opts:
        args['gpus'] = []

    if not max_matches:
        max_matches = 5

    mp_columns_out = None
    mp_columns_out_index = None
    mp_rows_out = None
    mp_rows_out_index = None

    if ptype == "1NN_INDEX":
        if a == b:
            mp_columns_out, mp_columns_out_index = mp.selfjoin(
                inputs[a], window, **args)
        else:
            mp_columns_out, mp_columns_out_index = mp.abjoin(
                inputs[a], inputs[b], window, **args)
    elif ptype == "SUM_THRESH":
        if a == b:
            mp_columns_out = mp.selfjoin_sum(inputs[a], window, **args)
        else:
            mp_columns_out = mp.abjoin_sum(inputs[a], inputs[b], window,
                                           **args)
    elif ptype == "ALL_NEIGHBORS":
        if a == b:
            mp_columns_out = mp.selfjoin_knn(inputs[a], window, max_matches,
                                             **args)
        else:
            mp_columns_out = mp.abjoin_knn(inputs[a], inputs[b], window,
                                           max_matches, **args)
    else:
        raise ValueError(
            'pyscamp does not support profile type {}'.format(ptype))

    if mp_columns_out is not None:
        mp_columns_out = mp_columns_out.squeeze()
    if mp_columns_out_index is not None:
        mp_columns_out_index = mp_columns_out_index.squeeze()
    if mp_rows_out is not None:
        mp_rows_out = mp_rows_out.squeeze()
    if mp_rows_out_index is not None:
        mp_rows_out_index = mp_rows_out_index.squeeze()

    return mp_columns_out, mp_columns_out_index, mp_rows_out, mp_rows_out_index
Esempio n. 3
0
    return np.allclose(valid, check, equal_nan=True)


failed = False
arr = []
arr2 = []
num = 0
num2 = 0

arr = np.random.random(size=(8000, ))
arr2 = np.random.random(size=(8000, ))

dm_self = distance_matrix(arr, None, 1024)
dm_ab = distance_matrix(arr, arr2, 1024)

dist, index = mp.selfjoin(arr, 1024, pearson=True)
dist = dist.reshape((len(dist), 1))
index = index.reshape((len(index), 1))
vdist, vindex = reduce_1nn_index(dm_self)

if compare_vectors(vdist, dist) and compare_index(vindex, index):
    print("1NN INDEX Self join pass")
else:
    failed = True
    print("1NN INDEX Self join fail")

dist, index = mp.abjoin(arr, arr2, 1024, pearson=True)
dist = dist.reshape((len(dist), 1))
index = index.reshape((len(index), 1))
vdist, vindex = reduce_1nn_index(dm_ab)