Example #1
0
def delete_mixtures(params, nb_cpu, nb_gpu, use_gpu):

    data_file = params.data_file
    N_e = params.getint('data', 'N_e')
    N_total = params.nb_channels
    N_t = params.getint('detection', 'N_t')
    template_shift = params.getint('detection', 'template_shift')
    cc_merge = params.getfloat('clustering', 'cc_mixtures')
    mixtures = []
    to_remove = []

    filename = params.get('data', 'file_out_suff') + '.overlap-mixtures.hdf5'
    norm_templates = load_data(params, 'norm-templates')
    best_elec = load_data(params, 'electrodes')
    limits = load_data(params, 'limits')
    nodes, edges = get_nodes_and_edges(params)
    inv_nodes = numpy.zeros(N_total, dtype=numpy.int32)
    inv_nodes[nodes] = numpy.argsort(nodes)

    overlap = get_overlaps(params,
                           extension='-mixtures',
                           erase=True,
                           normalize=False,
                           maxoverlap=False,
                           verbose=False,
                           half=True,
                           use_gpu=use_gpu,
                           nb_cpu=nb_cpu,
                           nb_gpu=nb_gpu)
    overlap.close()

    SHARED_MEMORY = get_shared_memory_flag(params)

    if SHARED_MEMORY:
        c_overs = load_data_memshared(params,
                                      'overlaps',
                                      extension='-mixtures',
                                      use_gpu=use_gpu,
                                      nb_cpu=nb_cpu,
                                      nb_gpu=nb_gpu)
    else:
        c_overs = load_data(params, 'overlaps', extension='-mixtures')

    if SHARED_MEMORY:
        templates = load_data_memshared(params, 'templates', normalize=False)
    else:
        templates = load_data(params, 'templates')

    x, N_tm = templates.shape
    nb_temp = int(N_tm // 2)
    merged = [nb_temp, 0]

    overlap_0 = numpy.zeros(nb_temp, dtype=numpy.float32)
    distances = numpy.zeros((nb_temp, nb_temp), dtype=numpy.int32)

    for i in xrange(nb_temp - 1):
        data = c_overs[i].toarray()
        distances[i, i + 1:] = numpy.argmax(data[i + 1:, :], 1)
        distances[i + 1:, i] = distances[i, i + 1:]
        overlap_0[i] = data[i, N_t]

    all_temp = numpy.arange(comm.rank, nb_temp, comm.size)
    sorted_temp = numpy.argsort(
        norm_templates[:nb_temp])[::-1][comm.rank::comm.size]
    M = numpy.zeros((2, 2), dtype=numpy.float32)
    V = numpy.zeros((2, 1), dtype=numpy.float32)

    to_explore = xrange(comm.rank, len(sorted_temp), comm.size)
    if comm.rank == 0:
        to_explore = get_tqdm_progressbar(to_explore)

    for count, k in enumerate(to_explore):

        k = sorted_temp[k]
        electrodes = numpy.take(inv_nodes, edges[nodes[best_elec[k]]])
        overlap_k = c_overs[k]
        is_in_area = numpy.in1d(best_elec, electrodes)
        all_idx = numpy.arange(len(best_elec))[is_in_area]
        been_found = False
        t_k = None

        for i in all_idx:
            t_i = None
            if not been_found:
                overlap_i = c_overs[i]
                M[0, 0] = overlap_0[i]
                V[0, 0] = overlap_k[i, distances[k, i]]
                for j in all_idx[i + 1:]:
                    t_j = None
                    M[1, 1] = overlap_0[j]
                    M[1, 0] = overlap_i[j, distances[k, i] - distances[k, j]]
                    M[0, 1] = M[1, 0]
                    V[1, 0] = overlap_k[j, distances[k, j]]
                    try:
                        [a1, a2] = numpy.dot(scipy.linalg.inv(M), V)
                    except Exception:
                        [a1, a2] = [0, 0]
                    a1_lim = limits[i]
                    a2_lim = limits[j]
                    is_a1 = (a1_lim[0] <= a1) and (a1 <= a1_lim[1])
                    is_a2 = (a2_lim[0] <= a2) and (a2 <= a2_lim[1])
                    if is_a1 and is_a2:
                        if t_k is None:
                            t_k = templates[:, k].toarray().ravel()
                        if t_i is None:
                            t_i = templates[:, i].toarray().ravel()
                        if t_j is None:
                            t_j = templates[:, j].toarray().ravel()
                        new_template = (a1 * t_i + a2 * t_j)
                        similarity = numpy.corrcoef(t_k, new_template)[0, 1]
                        local_overlap = numpy.corrcoef(t_i, t_j)[0, 1]
                        if similarity > cc_merge and local_overlap < cc_merge:
                            if k not in mixtures:
                                mixtures += [k]
                                been_found = True
                                #print "Template", k, 'is sum of (%d, %g) and (%d,%g)' %(i, a1, j, a2)
                                break
    sys.stderr.flush()
    #print mixtures
    to_remove = numpy.unique(numpy.array(mixtures, dtype=numpy.int32))
    to_remove = all_gather_array(to_remove, comm, 0, dtype='int32')

    if len(to_remove) > 0 and comm.rank == 0:
        result = load_data(params, 'clusters')
        slice_templates(params, to_remove)
        slice_clusters(params, result, to_remove=to_remove)

    comm.Barrier()

    del c_overs

    if comm.rank == 0:
        os.remove(filename)

    return [nb_temp, len(to_remove)]
Example #2
0
def delete_mixtures(comm, params, nb_cpu, nb_gpu, use_gpu):
        
    templates      = load_data(params, 'templates')
    templates      = load_data(params, 'templates')
    N_e            = params.getint('data', 'N_e')
    N_t            = params.getint('data', 'N_t')
    cc_merge       = params.getfloat('clustering', 'cc_merge')
    x,        N_tm = templates.shape
    nb_temp        = N_tm//2
    merged         = [nb_temp, 0]
    mixtures       = []
    to_remove      = []

    overlap  = get_overlaps(comm, params, extension='-mixtures', erase=True, normalize=False, maxoverlap=False, verbose=False, half=True, use_gpu=use_gpu, nb_cpu=nb_cpu, nb_gpu=nb_gpu)
    filename = params.get('data', 'file_out_suff') + '.overlap-mixtures.hdf5'
    result   = []
    
    norm_templates   = load_data(params, 'norm-templates')
    templates        = load_data(params, 'templates')
    result           = load_data(params, 'clusters')
    best_elec        = load_data(params, 'electrodes')
    limits           = load_data(params, 'limits')
    N_total          = params.getint('data', 'N_total')
    nodes, edges     = get_nodes_and_edges(params)
    inv_nodes        = numpy.zeros(N_total, dtype=numpy.int32)
    inv_nodes[nodes] = numpy.argsort(nodes)

    distances = numpy.zeros((nb_temp, nb_temp), dtype=numpy.float32)

    over_x     = overlap.get('over_x')[:]
    over_y     = overlap.get('over_y')[:]
    over_data  = overlap.get('over_data')[:]
    over_shape = overlap.get('over_shape')[:]
    overlap.close()

    overlap    = scipy.sparse.csr_matrix((over_data, (over_x, over_y)), shape=over_shape)

    for i in xrange(nb_temp-1):
        distances[i, i+1:] = numpy.argmax(overlap[i*nb_temp+i+1:(i+1)*nb_temp].toarray(), 1)
        distances[i+1:, i] = distances[i, i+1:]

    all_temp  = numpy.arange(comm.rank, nb_temp, comm.size)
    overlap_0 = overlap[:, N_t].toarray().reshape(nb_temp, nb_temp)
    if comm.rank == 0:
        pbar = get_progressbar(size=len(all_temp)).start()

    sorted_temp    = numpy.argsort(norm_templates[:nb_temp])[::-1][comm.rank::comm.size]
    M              = numpy.zeros((2, 2), dtype=numpy.float32)
    V              = numpy.zeros((2, 1), dtype=numpy.float32)

    for count, k in enumerate(sorted_temp):

        electrodes    = numpy.take(inv_nodes, edges[nodes[best_elec[k]]])
        overlap_k     = overlap[k*nb_temp:(k+1)*nb_temp].tolil()
        is_in_area    = numpy.in1d(best_elec, electrodes)
        all_idx       = numpy.arange(len(best_elec))[is_in_area]
        been_found    = False

        for i in all_idx:
            if not been_found:
                overlap_i = overlap[i*nb_temp:(i+1)*nb_temp].tolil()
                M[0, 0]   = overlap_0[i, i]
                V[0, 0]   = overlap_k[i, distances[k, i]]
                for j in all_idx[i+1:]:
                    M[1, 1]  = overlap_0[j, j]
                    M[1, 0]  = overlap_i[j, distances[k, i] - distances[k, j]]
                    M[0, 1]  = M[1, 0]
                    V[1, 0]  = overlap_k[j, distances[k, j]]
                    try:
                        [a1, a2] = numpy.dot(scipy.linalg.inv(M), V)
                    except Exception:
                        [a1, a2] = [0, 0]
                    a1_lim   = limits[i]
                    a2_lim   = limits[j]
                    is_a1    = (a1_lim[0] <= a1) and (a1 <= a1_lim[1])
                    is_a2    = (a2_lim[0] <= a2) and (a2 <= a2_lim[1])
                    if is_a1 and is_a2:
                        new_template = (a1*templates[:, i].toarray() + a2*templates[:, j].toarray()).ravel()
                        similarity   = numpy.corrcoef(templates[:, k].toarray().ravel(), new_template)[0, 1]
                        if similarity > cc_merge:
                            if k not in mixtures:
                                mixtures  += [k]
                                been_found = True 
                                break
                                #print "Template", k, 'is sum of (%d, %g) and (%d,%g)' %(i, a1, j, a2)

        if comm.rank == 0:
            pbar.update(count)

    if comm.rank == 0:
        pbar.finish()
    
    #print mixtures
    to_remove = numpy.unique(numpy.array(mixtures, dtype=numpy.int32))    
    to_remove = all_gather_array(to_remove, comm, 0, dtype='int32')
    
    if len(to_remove) > 0:
        slice_templates(comm, params, to_remove)
        slice_clusters(comm, params, result, to_remove=to_remove)

    if comm.rank == 0:
        os.remove(filename)

    return [nb_temp, len(to_remove)]