def process_image_nn_based_on_radius(img, img_class):
    img = np.asarray(img)
    img_height, img_width, ch = img.shape
    col, row = np.meshgrid(np.arange(img_height), np.arange(img_width))
    coord = np.stack((col, row), axis=2).reshape(-1, 2) 

    #dmax: 8 neighbours; 1: 4 neighbours (with Euclidean distance)
    kdT = KDTree(coord)
    res = kdT.query_pairs(dmax)
    res = [(x[0],x[1]) for x in list(res)]
    
    res = np.transpose(res)

    ### Create a graph
    #G = nx.Graph()
    #for i in range(coord.shape[0]):
    #    G.add_node(i, intensity=img[coord[i,0], coord[i,1]], test=False, val=False, label=0)
    #G.add_edges_from(res)

    ### Add nodes
    x = torch.Tensor(img.reshape(img_height*img_width, ch))
    #G.edges()

    edge_index = torch.LongTensor(res)

    D = torch_geometric.data.Data(x = x, edge_index = edge_index, y=img_class)
    return D
def clusters(points, radius):
    """
    Find clusters of points which have neighbours closer than radius

    Parameters
    ---------
    points : (n, d) float
        Points of dimension d
    radius : float
        Max distance between points in a cluster

    Returns
    ----------
    groups : (m,) sequence of int
        Indices of points in a cluster

    """
    from . import graph
    tree = KDTree(points)

    # some versions return pairs as a set of tuples
    pairs = tree.query_pairs(r=radius, output_type='ndarray')
    # group connected components
    groups = graph.connected_components(pairs)

    return groups
Example #3
0
def find_pairs(cutoff, X, Y=None):
    """
    Find pairs with euclidean distance below C{cutoff}. Either between
    C{X} and C{Y}, or within C{X} if C{Y} is C{None}.

    Uses a KDTree and thus is memory efficient and reasonable fast.

    @type cutoff: float
    @type X: (m,n) numpy.array
    @type Y: (k,n) numpy.array
    @return: set of index tuples
    @rtype: iterable
    """
    try:
        from scipy.spatial import cKDTree as KDTree
        KDTree.query_pairs
        KDTree.query_ball_tree
    except (ImportError, AttributeError):
        from scipy.spatial import KDTree

    tree = KDTree(X, len(X))
    if Y is None:
        return tree.query_pairs(cutoff)

    other = KDTree(Y, len(Y))
    contacts = tree.query_ball_tree(other, cutoff)
    return ((i, j) for (i, js) in enumerate(contacts) for j in js)
Example #4
0
def find_pairs(cutoff, X, Y=None):
    """
    Find pairs with euclidean distance below C{cutoff}. Either between
    C{X} and C{Y}, or within C{X} if C{Y} is C{None}.

    Uses a KDTree and thus is memory efficient and reasonable fast.

    @type cutoff: float
    @type X: (m,n) numpy.array
    @type Y: (k,n) numpy.array
    @return: set of index tuples
    @rtype: iterable
    """
    try:
        from scipy.spatial import cKDTree as KDTree
        KDTree.query_pairs
        KDTree.query_ball_tree
    except (ImportError, AttributeError):
        from scipy.spatial import KDTree

    tree = KDTree(X, len(X))
    if Y is None:
        return tree.query_pairs(cutoff)

    other = KDTree(Y, len(Y))
    contacts = tree.query_ball_tree(other, cutoff)
    return ((i, j) for (i, js) in enumerate(contacts) for j in js)
def get_clusters(vectors, metric, cutoff):
    print("Making KD tree. len(vectors) ==", len(vectors))
    KDT = KDTree(vectors)
    print("KD tree done!")
    pairs = KDT.query_pairs(r=cutoff, p=inf)
    print("pairs done!")
    print("Making DJSet")
    #    ds = DisjointSets(vectors)
    print("DJSet done! Making clusters...")
    print("Making list")
    pairs = list(pairs)
    print("calling DSC. len(pairs) ==", len(pairs))
    clusters = disjoint_sets_cluster(pairs)
    print("Clustered, left numba")
    #Actually, gotta invert those. Numba can't for some reason...
    cl = defaultdict(list)
    for pt, idp in clusters:
        cl[idp].append(vectors[pt])
    print("All done")
    return cl.values()
    '''for x1, x2 in pairs:
        ds.union(vectors[x1], vectors[x2])'''
    print("Done!")
    return  #ds.get_sets()
    '''
Example #6
0
def test_crystal_gel():
    """Experimental data from a crystallizing gel."""
    pos = np.loadtxt('examples/AR-Res06A_scan2_t890.xyz', skiprows=1)
    maxbondlength = 12.5
    #spatial indexing
    tree = KDTree(pos, 12)
    #query
    bonds = tree.query_pairs(maxbondlength, output_type='ndarray')
    inside = np.all(
        (pos - pos.min(0) > maxbondlength) & (pos.max() - pos > maxbondlength),
        -1)
    #number of neighbours per particle
    Nngb = np.zeros(len(pos), int)
    np.add.at(Nngb, bonds.ravel(), 1)
    inside[Nngb < 4] = False
    #tensorial boo
    q6m = boo.bonds2qlm(pos, bonds, l=6)
    q4m = boo.bonds2qlm(pos, bonds, l=4)
    #coarse-graining
    Q6m, inside2 = boo.coarsegrain_qlm(q6m, bonds, inside)
    Q4m, inside3 = boo.coarsegrain_qlm(q4m, bonds, inside)
    assert np.all(inside2 == inside3)
    #crystals
    xpos = boo.x_particles(q6m, bonds)
    assert xpos.sum() == 14188
    #surface particles
    surf = boo.x_particles(q6m, bonds, nb_thr=2) & np.bitwise_not(xpos)
    assert surf.sum() == 9288
Example #7
0
def _fast_construct_edges(G, radius, p):
    """Construct edges for random geometric graph.

    Requires scipy to be installed.
    """
    pos = nx.get_node_attributes(G, 'pos')
    nodes, coords = list(zip(*pos.items()))
    kdtree = KDTree(coords)  # Cannot provide generator.
    edge_indexes = kdtree.query_pairs(radius, p)
    edges = ((nodes[u], nodes[v]) for u, v in edge_indexes)
    G.add_edges_from(edges)
Example #8
0
def _fast_construct_edges(G, radius, p):
    """Construct edges for random geometric graph.

    Requires scipy to be installed.
    """
    pos = nx.get_node_attributes(G, 'pos')
    nodes, coords = list(zip(*pos.items()))
    kdtree = KDTree(coords)  # Cannot provide generator.
    edge_indexes = kdtree.query_pairs(radius, p)
    edges = ((nodes[u], nodes[v]) for u, v in edge_indexes)
    G.add_edges_from(edges)
Example #9
0
def _fast_edges(G, radius, p):
    """Returns edge list of node pairs within `radius` of each other
       using scipy KDTree and Minkowski distance metric `p`

    Requires scipy to be installed.
    """
    pos = nx.get_node_attributes(G, 'pos')
    nodes, coords = list(zip(*pos.items()))
    kdtree = KDTree(coords)  # Cannot provide generator.
    edge_indexes = kdtree.query_pairs(radius, p)
    edges = ((nodes[u], nodes[v]) for u, v in edge_indexes)
    return edges
Example #10
0
def _fast_edges(G, radius, p):
    """Returns edge list of node pairs within `radius` of each other
       using scipy KDTree and Minkowski distance metric `p`

    Requires scipy to be installed.
    """
    pos = nx.get_node_attributes(G, 'pos')
    nodes, coords = list(zip(*pos.items()))
    kdtree = KDTree(coords)  # Cannot provide generator.
    edge_indexes = kdtree.query_pairs(radius, p)
    edges = ((nodes[u], nodes[v]) for u, v in edge_indexes)
    return edges
Example #11
0
def clusters(points, radius):
    """
    Find clusters of points which have neighbours closer than radius
    :param points: nxd points
    :param radius: max distance between points in a cluster
    :return: [point_list, ...]
    author: reviserd by weiwei
    date: 20210120
    """
    tree = KDTree(points)
    pairs = tree.query_pairs(radius)
    graph = from_edgelist(pairs)
    groups = list(connected_components(graph))
    return groups
Example #12
0
def cleanup_pairs_KDTree(xyz, kind, data_shape, dmin, grad):
    npoint, ndim = xyz.shape
    N = data_shape[0]
    logger.debug('Building KDTree')
    # TODO: support non square domains
    if not np.all(np.asarray(data_shape) == data_shape[0]):
        raise Exception('All axis should have the same dimension.')
    if len(xyz) == 0:
        return np.ones(0, dtype=bool)
    tree = KDTree(xyz, boxsize=data_shape[0], copy_data=True)
    pairs = tree.query_pairs(dmin, p=np.inf, output_type='ndarray')
    logger.debug('Removing close pairs')
    xc = np.round(xyz + 0.5) - 0.5
    skip = _cleanup_pairs_KDTree(xyz, xc, kind, pairs, N, data_shape,
                                 np.linalg.norm(grad, axis=1)).astype(bool)
    return ~skip
Example #13
0
def clusters(points, radius):
    '''
    Find clusters of points which have neighbours closer than radius
    
    Arguments
    ---------
    points: (n, d) points (of dimension d)
    radius: max distance between points in a cluster

    Returns:
    groups: (m) sequence of indices for points

    '''
    tree   = KDTree(points)
    pairs  = tree.query_pairs(radius)
    graph  = from_edgelist(pairs)
    groups = list(connected_components(graph))
    return groups
Example #14
0
def clusters(points, radius):
    '''
    Find clusters of points which have neighbours closer than radius
    
    Arguments
    ---------
    points: (n, d) points (of dimension d)
    radius: max distance between points in a cluster

    Returns:
    groups: (m) sequence of indices for points

    '''
    tree   = KDTree(points)
    pairs  = tree.query_pairs(radius)
    graph  = from_edgelist(pairs)
    groups = list(connected_components(graph))
    return groups
Example #15
0
def dedup(particles, radius):
    grouped = ddict(list)
    for particle in particles:
        grouped[particle['rlnMicrographName']] += [tuple(particle)]
    cleaned = []
    for image in grouped:
        group = np.array(grouped[image], dtype=particles.dtype)
        tree = KDTree(positions(group))
        pairs = tree.query_pairs(radius)
        keep = connected_components(len(group), pairs)
        #if len(pairs) > 0:
        #print('image:', image, 'has', len(pairs), 'duplicates')
        #print(pairs)
        #print(keep)
        #print('-----')
        for idx in keep:
            cleaned += [tuple(group[idx])]
    return np.array(cleaned, dtype=particles.dtype)
def clusters(points, radius):
    """
    Find clusters of points which have neighbours closer than radius

    Parameters
    ---------
    points: (n, d) points (of dimension d)
    radius: max distance between points in a cluster

    Returns
    ----------
    groups: (m) sequence of indices for points

    """
    from . import graph

    tree = KDTree(points)
    pairs = tree.query_pairs(radius)
    groups = graph.connected_components(pairs)

    return groups
Example #17
0
def geometric_edges(G, radius, p):
    """Returns edge list of node pairs within `radius` of each other

    Radius uses Minkowski distance metric `p`.
    If scipy available, use scipy KDTree to speed computation.
    """
    nodes_pos = G.nodes(data="pos")
    try:
        from scipy.spatial import cKDTree as KDTree
    except ImportError:
        # no scipy KDTree so compute by for-loop
        radius_p = radius ** p
        edges = [
            (u, v)
            for (u, pu), (v, pv) in combinations(nodes_pos, 2)
            if sum(abs(a - b) ** p for a, b in zip(pu, pv)) <= radius_p
        ]
        return edges
    # scipy KDTree is available
    nodes, coords = list(zip(*nodes_pos))
    kdtree = KDTree(coords)  # Cannot provide generator.
    edge_indexes = kdtree.query_pairs(radius, p)
    edges = [(nodes[u], nodes[v]) for u, v in edge_indexes]
    return edges
Example #18
0
class Spade:
    """ Class implementing Peng Qiu's SPADE algorithm, following S8 in the
    supplemental methods of his Nature Paper.
    """
    nsamples = 2000
    distance_metric = 1
    distance_threshold = None
    alpha = 5  # if distance_threshold is none, then distance_threshold = median_min_dist * alpha

    def __init__(self, data, use_KD_tree=True):
        # We assume that data comes in the format stored in Flowdata class
        self.data = data.transpose()
        self.use_KD_tree = use_KD_tree

        if self.use_KD_tree:
            self._init_KD_tree()

        if self.use_KD_tree is False:
            self.kd_tree = None

    def run(self):
        """ 
            Apply SPADE algorithm
        """
        # Step 1: apply density dependent downsampling
        self.estimate_median_dist()
        self.compute_local_density()
        self.downsample()

    def _init_KD_tree(self):
        self.kd_tree = KDTree(self.data)

    def estimate_median_dist(self):
        """Estimate the median distance between cells.
        This is used to compute 
        """
        # Randomly selected indices
        if self.nsamples >= self.data.shape[1]:
            index = np.random.choice(self.data.shape[0],
                                     self.nsamples,
                                     replace=False)
            x = self.data[index, :]
        else:
            index = np.range(0, self.data.shape[0])
            x = self.data

        # which ell_p norm is used

        if self.use_KD_tree:
            # We need to take the first two points (k=2), since distance of the point
            # to itself is zero.
            (dist, i) = self.kd_tree.query(x, k=2, p=self.distance_metric)
            dist = dist[:, 1]
        else:
            dist = np.zeros(self.nsamples)
            d = np.zeros(self.data.shape[0])
            for j in range(self.nsamples):
                err = (np.abs(x[j] - self.data))**distance_metric
                np.sum(err, axis=1, out=d)
                # give infinite distance to the point with itself
                d[index[j]] = float('inf')
                dist[j] = d.min()

        self.median_dist = np.median(dist)

        if self.distance_threshold is None:
            self.distance_threshold = self.alpha * self.median_dist

        return self.median_dist

    def compute_local_density_using_pairs(self):
        local_density = np.zeros(self.data.shape[0])

        if self.use_KD_tree:
            pairs = self.kd_tree.query_pairs(self.distance_threshold,
                                             p=self.distance_metric)
            print "Found {} pairs".format(len(pairs))
            for p in pairs:
                local_density[p[0]] += 1
                local_density[p[1]] += 1

        print local_density.max()

    def compute_local_density(self):
        print self.distance_threshold

        # This approach seems slightly faster, likely due to decreased memory
        # requirements
        if self.use_KD_tree:
            local_density = np.zeros(self.data.shape[0])
            for j in range(self.data.shape[0]):
                index = self.kd_tree.query_ball_point(self.data[j],
                                                      self.distance_threshold,
                                                      p=self.distance_metric)
                local_density[j] = len(index) - 1

        # A slightly slower approach, I am leaving here in case of later
        # version changes
        if self.use_KD_tree and False:
            index = self.kd_tree.query_ball_point(self.data,
                                                  self.distance_threshold,
                                                  p=self.distance_metric)

            local_density = map(lambda i: len(i) - 1, index)

        print local_density
        self.local_density = local_density
        return local_density

    def downsample(self):
        target_density = 10
        outlier_density = 3
        local_density = self.local_density
        # compute the probability of keeping vector

        # events that are in the outlier range
        prob = np.less_equal(outlier_density, local_density) * np.less(
            local_density, target_density)

        downsampled_data = self.data[prob, :]

        # events that are in high density regions
        prob2 = np.less(target_density,
                        local_density) * (target_density /
                                          (local_density + 1e-14))

        downsample_index = np.random.choice(self.data.shape[0],
                                            math.ceil(prob2.sum()),
                                            replace=False,
                                            p=prob2 / prob2.sum())
        downsampled_data = np.append(downsampled_data,
                                     self.data[downsample_index, :])
        print downsampled_data.shape

        self.downsampled_data = downsampled_data
Example #19
0
def merge_tips(mesh,
               all_paths,
               roots,
               tot_path_lengths,
               large_skel_path_threshold=5000,
               max_tip_d=2000):

    # collect all the tips of the skeletons (including roots)
    skel_tips = []
    all_tip_indices = []
    for paths, root in zip(all_paths, roots):
        tips = []
        tip_indices = []
        for path in paths:
            tip_ind = path[0]
            tip = mesh.vertices[tip_ind, :]
            tips.append(tip)
            tip_indices.append(tip_ind)
        root_tip = mesh.vertices[root, :]
        tips.append(root_tip)
        tip_indices.append(root)
        skel_tips.append(np.vstack(tips))
        all_tip_indices.append(np.array(tip_indices))
    # this is our overall tip matrix merged together
    all_tips = np.vstack(skel_tips)
    # and the vertex index of those tips in the original mesh
    all_tip_indices = np.concatenate(all_tip_indices)

    # variable to keep track of what component each tip was from
    tip_component = np.zeros(all_tips.shape[0])
    # counter to keep track of an overall tip index as we go through
    # the components with different numbers of tips
    ind_counter = 0

    # setup the prize collection steiner forest problem variables
    # prizes will be related to path length of the tip components
    tip_prizes = []
    # where to collect all the tip<>tip edges
    all_edges = []
    # where to collect all the tip<>tip edge weights
    all_edge_weights = []

    # loop over all the components and their tips
    for k, tips, path_lengths in zip(range(len(tot_path_lengths)), skel_tips,
                                     tot_path_lengths):
        # how many tips in this component
        ntips = tips.shape[0]
        # calculate the total path length in this component
        path_len = np.sum(np.array(path_lengths))
        # the prize is 0 if this is small, and the path length if big
        prize = path_len if path_len > large_skel_path_threshold else 0
        # the cost of traveling within a skeleton is 0 if big, and the path_len if small
        cost = path_len if path_len <= large_skel_path_threshold else 0
        # add a block of prizes to the tip prizes for this component
        tip_prizes.append(prize * np.ones(ntips))
        # make an array of overall tip index for this component
        comp_tips = np.arange(ind_counter, ind_counter + ntips, dtype=np.int64)
        # add edges between this components root and each of the tips
        root_tips = (ind_counter + ntips - 1) * np.ones(ntips, dtype=np.int64)
        in_tip_edges = np.hstack(
            [root_tips[:, np.newaxis], comp_tips[:, np.newaxis]])
        all_edges.append(in_tip_edges)

        # add a block for the cost of these edges
        all_edge_weights.append(cost * np.ones(ntips))
        # note what component each of these tips is from
        tip_component[comp_tips] = k
        # increment our overall index counter
        ind_counter += ntips
    # gather all the prizes into a single block
    tip_prizes = np.concatenate(tip_prizes)

    # make a kdtree with all the tips
    tip_tree = KDTree(all_tips)

    # find the tips near one another
    close_tips = tip_tree.query_pairs(max_tip_d, output_type='ndarray')
    # filter out close tips from the same component
    diff_comp = ~(tip_component[close_tips[:, 0]]
                  == tip_component[close_tips[:, 1]])
    filt_close_tips = close_tips[diff_comp]

    # add these as edges
    all_edges.append(filt_close_tips)
    # with weights equal to their euclidean distance
    dv = np.linalg.norm(all_tips[filt_close_tips[:, 0], :] -
                        all_tips[filt_close_tips[:, 1]],
                        axis=1)
    all_edge_weights.append(dv)

    # consolidate the edges and weights into a single array
    inter_tip_weights = np.concatenate(all_edge_weights)
    inter_tip_edges = np.concatenate(all_edges)

    # run the prize collecting steiner forest optimization
    mst_verts, mst_edges = pcst_fast.pcst_fast(inter_tip_edges, tip_prizes,
                                               inter_tip_weights, -1, 1, 'gw',
                                               1)
    #     # find the set of mst edges that are between connected components
    new_mst_edges = mst_edges[tip_component[inter_tip_edges[mst_edges, 0]] !=
                              tip_component[inter_tip_edges[mst_edges, 1]]]
    good_inter_tip_edges = inter_tip_edges[new_mst_edges, :]
    # get these in the original index
    new_edges_orig_ind = all_tip_indices[good_inter_tip_edges]
    #     # collect all the edges for all the paths into a single list
    #     # with the original indices of the mesh
    orig_edges = []
    for paths, root in zip(all_paths, roots):
        edges = utils.paths_to_edges(paths)
        orig_edges.append(edges)
    orig_edges = np.vstack(orig_edges)
    # and add our new mst edges
    tot_edges = np.vstack([orig_edges, new_edges_orig_ind])

    return tot_edges
Example #20
0
def Execute_Correspondences_CreateInputs(candidates, normalized_images, im_th,
                                         cycle, channels, nbit):
    inputs_df = pd.DataFrame(
        columns=['cycle', 'ch', 'x', 'y', 'Intensities_window_5x5'])
    max_df = pd.DataFrame(columns=[
        'I_T', 'I_G', 'I_C', 'I_A', 'x_T', 'y_T', 'x_G', 'y_G', 'x_C', 'y_C',
        'x_A', 'y_A', 'cycle'
    ])

    cc, n_c = label(np.amax(candidates[cycle, 2:channels, :, :], axis=0),
                    return_num=True,
                    connectivity=1)
    conn_components = np.zeros((4, candidates.shape[-2], candidates.shape[-1]))
    for ch in range(4):
        conn_components[ch, :, :] = np.multiply(
            cc, candidates[cycle, ch + 2, :, :])

    for i in tqdm(range(1, n_c + 1)):
        ch, y, x = np.where(conn_components == i)
        kdT_tmp = KDTree(np.array([x, y]).T)
        if len(list(itertools.combinations(np.arange(len(x)), 2))) == len(
                kdT_tmp.query_pairs(2, p=1)
        ):  # if connected components is too large (likely cover more signals) then split it
            df = pd.Series(
                data={
                    'I_T': np.nan,
                    'I_G': np.nan,
                    'I_C': np.nan,
                    'I_A': np.nan,
                    'x_T': np.nan,
                    'y_T': np.nan,
                    'x_G': np.nan,
                    'y_G': np.nan,
                    'x_C': np.nan,
                    'y_C': np.nan,
                    'x_A': np.nan,
                    'y_A': np.nan,
                    'cycle': cycle
                })
            df = df[[
                'I_T', 'I_G', 'I_C', 'I_A', 'x_T', 'y_T', 'x_G', 'y_G', 'x_C',
                'y_C', 'x_A', 'y_A', 'cycle'
            ]]
            for j in range(len(x)):
                df.iloc[ch[j]] = im_th[cycle, ch[j] + 2, y[j], x[j]]
                df.iloc[ch[j] * 2 + 4] = x[j]
                df.iloc[ch[j] * 2 + 4 + 1] = y[j]
            I = df['I_T':'I_A']
            col = I[I == np.nanmax(I)].index[0]  #retrieving the column
            tomove = df.index.get_loc(
                col)  #column index to reach the correct columns coordinates
            x_ch = int(df[tomove * 2 + 4])
            y_ch = int(df[tomove * 2 + 4 + 1])
            ch_idx = tomove
            cycle = int(df['cycle'])
            rect = normalized_images[cycle, ch_idx + 2, y_ch - 2:y_ch + 3,
                                     x_ch - 2:x_ch + 3]
            if not rect.size == 0:
                rect = (rect - np.amin(rect)) / (np.amax(rect) - np.amin(rect))
                rect = rect - np.mean(rect)
            row = pd.Series(
                data={
                    'cycle': cycle,
                    'ch': ch_idx + 2,
                    'x': x_ch,
                    'y': y_ch,
                    'Intensities_window_5x5': rect
                })

            inputs_df = inputs_df.append(row, ignore_index=True)
            max_df = max_df.append(df, ignore_index=True)
        else:
            coords = np.vstack((x, y))
            coords_unique = np.unique(coords, axis=1)
            for j in range(coords_unique.shape[-1]):
                coords_tmp = coords_unique[:, j][:, np.newaxis]
                coords_idx = np.argwhere(np.all(coords == coords_tmp,
                                                axis=0)).reshape((-1, ))
                df = pd.Series(
                    data={
                        'I_T': np.nan,
                        'I_G': np.nan,
                        'I_C': np.nan,
                        'I_A': np.nan,
                        'x_T': np.nan,
                        'y_T': np.nan,
                        'x_G': np.nan,
                        'y_G': np.nan,
                        'x_C': np.nan,
                        'y_C': np.nan,
                        'x_A': np.nan,
                        'y_A': np.nan,
                        'cycle': cycle
                    })
                df = df[[
                    'I_T', 'I_G', 'I_C', 'I_A', 'x_T', 'y_T', 'x_G', 'y_G',
                    'x_C', 'y_C', 'x_A', 'y_A', 'cycle'
                ]]
                for k in coords_idx:
                    df.iloc[ch[k]] = im_th[cycle, ch[k] + 2, y[k], x[k]]
                    df.iloc[ch[k] * 2 + 4] = x[k]
                    df.iloc[ch[k] * 2 + 4 + 1] = y[k]
                I = df['I_T':'I_A']
                col = I[I == np.nanmax(I)].index[0]  #retrieving the column
                tomove = df.index.get_loc(
                    col
                )  #column index to reach the correct columns coordinates
                x_ch = int(df[tomove * 2 + 4])
                y_ch = int(df[tomove * 2 + 4 + 1])
                ch_idx = tomove
                cycle = int(df['cycle'])
                rect = normalized_images[cycle, ch_idx + 2, y_ch - 2:y_ch + 3,
                                         x_ch - 2:x_ch + 3]
                if not rect.size == 0:
                    rect = (rect - np.amin(rect)) / (np.amax(rect) -
                                                     np.amin(rect))
                    rect = rect - np.mean(rect)
                row = pd.Series(
                    data={
                        'cycle': cycle,
                        'ch': ch_idx + 2,
                        'x': x_ch,
                        'y': y_ch,
                        'Intensities_window_5x5': rect
                    })

                inputs_df = inputs_df.append(row, ignore_index=True)
                max_df = max_df.append(df, ignore_index=True)

    return {'max_df': max_df, 'inputs_df': inputs_df}
Example #21
0
class Spade:
    """ Class implementing Peng Qiu's SPADE algorithm, following S8 in the
    supplemental methods of his Nature Paper.
    """
    nsamples = 2000
    distance_metric = 1
    distance_threshold = None
    alpha = 5   # if distance_threshold is none, then distance_threshold = median_min_dist * alpha

    def __init__(self, data, use_KD_tree = True):
        # We assume that data comes in the format stored in Flowdata class
        self.data = data.transpose()
        self.use_KD_tree = use_KD_tree
        
        if self.use_KD_tree:
            self._init_KD_tree()

        if self.use_KD_tree is False:
            self.kd_tree = None
    

    def run(self):
        """ 
            Apply SPADE algorithm
        """
        # Step 1: apply density dependent downsampling
        self.estimate_median_dist()
        self.compute_local_density()
        self.downsample()

    def _init_KD_tree(self):
        self.kd_tree = KDTree(self.data)

    def estimate_median_dist(self):
        """Estimate the median distance between cells.
        This is used to compute 
        """
        # Randomly selected indices
        if self.nsamples >= self.data.shape[1]:
            index = np.random.choice(self.data.shape[0], self.nsamples, replace = False)
            x = self.data[index,:]
        else:
            index = np.range(0,self.data.shape[0])
            x = self.data
        
        # which ell_p norm is used

        if self.use_KD_tree:
            # We need to take the first two points (k=2), since distance of the point
            # to itself is zero.
            (dist, i) = self.kd_tree.query(x, k=2, p = self.distance_metric)
            dist = dist[:,1] 
        else:
            dist = np.zeros(self.nsamples)
            d = np.zeros(self.data.shape[0])
            for j in range(self.nsamples):
                err = (np.abs(x[j] - self.data))**distance_metric
                np.sum(err,axis=1,out=d) 
                # give infinite distance to the point with itself
                d[index[j]] = float('inf')
                dist[j] = d.min()
        
        self.median_dist = np.median(dist)
    
        if self.distance_threshold is None:
            self.distance_threshold =  self.alpha*self.median_dist   

        return self.median_dist
    

    def compute_local_density_using_pairs(self):
        local_density = np.zeros(self.data.shape[0])
       
        if self.use_KD_tree:
            pairs = self.kd_tree.query_pairs(self.distance_threshold, p = self.distance_metric)
            print "Found {} pairs".format(len(pairs))
            for p in pairs:
                local_density[p[0]] += 1
                local_density[p[1]] += 1

        print local_density.max()
        
        
    def compute_local_density(self): 
        print self.distance_threshold 
   
        # This approach seems slightly faster, likely due to decreased memory
        # requirements 
        if self.use_KD_tree:
            local_density = np.zeros(self.data.shape[0])
            for j in range(self.data.shape[0]):
                index = self.kd_tree.query_ball_point(self.data[j], 
                            self.distance_threshold,
                            p = self.distance_metric)
                local_density[j] = len(index) -1
        
        # A slightly slower approach, I am leaving here in case of later
        # version changes
        if self.use_KD_tree and False:
            index = self.kd_tree.query_ball_point(self.data,
                        self.distance_threshold, 
                        p = self.distance_metric)
            
            local_density = map(lambda i: len(i) - 1, index)

        print local_density
        self.local_density = local_density
        return local_density

    def downsample(self):
        target_density = 10
        outlier_density = 3
        local_density = self.local_density
        # compute the probability of keeping vector

        # events that are in the outlier range
        prob = np.less_equal(outlier_density, local_density)*np.less(local_density,target_density)
        
        downsampled_data = self.data[prob,:]

        # events that are in high density regions
        prob2 = np.less(target_density, local_density)*(target_density/(local_density + 1e-14))
         
        downsample_index = np.random.choice(self.data.shape[0], 
                                math.ceil(prob2.sum()), 
                                replace = False, 
                                p = prob2/prob2.sum())
        downsampled_data = np.append(downsampled_data, self.data[downsample_index,:])
        print downsampled_data.shape

        self.downsampled_data = downsampled_data
Example #22
0
class KDicTree(dict):
    '''
    Wrapper around the scipy.spatial.KDTree for labelled points.
    Use like dict to register or update points:
    
    tree = KDicTree({'1':(0,0), 2:(2,2), '3':(45,45)})
    tree['1'] = (1, 1)
    tree['2'] = (5, 5)
    tree['3'] = (50, 50)
    
    Then use KDTree querys:
    
    tree.query_ball_point( (3, 3), 10 )
        ['1', 2, '2']

    Parameters
    ----------
    data : labelled (N,K) dict
        The data points to be indexed, labelled in a dictionary.
    leafsize : int, optional
        The number of points at which the algorithm switches over to 
        brute-force. Has to be positive.
    
    See Also
    --------
    scipy.spatial.KDTree
    scipy.spatial.cKDTree
    '''
    def __init__(self, data, leafsize=16):
        self.tree = None
        self.ids = []  # maps tree to dict keys
        self.altered = True
        self.leafsize = leafsize
        super().__init__(data)

    def __setitem__(self, key, point):
        '''Set point for self[key]'''
        super().__setitem__(key, point)
        self.altered = True

    def __delitem__(self, key):
        '''Delete self[key].'''
        super().__delitem__(key)
        self.altered = True

    def build_tree(self):
        '''Gets called automatically by a query.'''
        if not self.altered: return
        self.tree = KDTree(list(self.values()), leafsize=self.leafsize)
        self.ids = list(self.keys())
        self.altered = False

    def map_ids(self, ids):
        '''Maps the result of Querys to dict keys.'''
        if isinstance(ids, (tuple, list, ndarray)):
            return tuple(map(self.map_ids, ids))
        return self.ids[ids]

    def query(self, x, k=1, eps=0, p=2, distance_upper_bound=float("inf")):
        '''Query the kd-tree for nearest neighbors.'''
        self.build_tree()
        dists, ids = self.tree.query(x, k, eps, p, distance_upper_bound)
        return (dists, self.map_ids(ids))

    def query_ball_point(self, x, r, p=2., eps=0):
        '''Find all points within distance r of point(s) x.'''
        self.build_tree()
        return self.map_ids(self.tree.query_ball_point(x, r, p, eps))

    def query_pairs(self, r, p=2., eps=0):
        '''Find all pairs of points within a distance r.'''
        self.build_tree()
        return [
            tuple(self.map_ids(pair))
            for pair in self.tree.query_pairs(r, p=p, eps=eps)
        ]
Example #23
0
def extract_edges_in_block(db_name, db_host, soft_mask_container,
                           soft_mask_dataset, distance_threshold,
                           evidence_threshold, graph_number, block):

    graph_provider = MongoDbGraphProvider(
        db_name,
        db_host,
        mode='r+',
        position_attribute=['z', 'y', 'x'],
        directed=False,
        edges_collection='edges_g{}'.format(graph_number))

    if check_function(graph_provider.database, block,
                      "edges_g{}".format(graph_number)):
        return 0

    logger.debug("Finding edges in %s, reading from %s", block.write_roi,
                 block.read_roi)

    start = time.time()

    soft_mask_array = daisy.open_ds(soft_mask_container, soft_mask_dataset)

    graph = graph_provider[block.read_roi.intersect(soft_mask_array.roi)]

    if graph.number_of_nodes() == 0:
        logger.info("No nodes in roi %s. Skipping", block.read_roi)
        write_done(graph_provider.database, block,
                   'edges_g{}'.format(graph_number))
        return 0

    logger.debug("Read %d candidates in %.3fs", graph.number_of_nodes(),
                 time.time() - start)

    start = time.time()
    """
    candidates = [(candidate_id, 
                   np.array([data[d] for d in ['z', 'y', 'x']])) 
                   for candidate_id, data in graph.nodes(data=True) if 'z' in data]
    """
    candidates = np.array(
        [[candidate_id] + [data[d] for d in ['z', 'y', 'x']]
         for candidate_id, data in graph.nodes(data=True) if 'z' in data],
        dtype=np.uint64)

    kdtree_start = time.time()
    kdtree = KDTree([[candidate[1], candidate[2], candidate[3]]
                     for candidate in candidates])
    #kdtree = KDTree(candidates[])
    pairs = kdtree.query_pairs(distance_threshold, p=2.0, eps=0)
    logger.debug("Query pairs in %.3fs", time.time() - kdtree_start)

    soft_mask_array = daisy.open_ds(soft_mask_container, soft_mask_dataset)

    voxel_size = np.array(soft_mask_array.voxel_size, dtype=np.uint32)
    soft_mask_roi = block.read_roi.snap_to_grid(
        voxel_size=voxel_size).intersect(soft_mask_array.roi)
    soft_mask_array_data = soft_mask_array.to_ndarray(roi=soft_mask_roi)

    sm_dtype = soft_mask_array_data.dtype
    if sm_dtype == np.uint8:  # standard pipeline pm 0-255
        pass
    elif sm_dtype == np.float32 or sm_dtype == np.float64:
        if not (soft_mask_array_data.min() >= 0
                and soft_mask_array_data.max() <= 1):
            raise ValueError(
                "Provided soft_mask has dtype float but not in range [0,1], abort"
            )
        else:
            soft_mask_array_data *= 255
    else:
        raise ValueError("Soft mask dtype {} not understood".format(sm_dtype))

    soft_mask_array_data = soft_mask_array_data.astype(np.float64)

    if evidence_threshold is not None:
        soft_mask_array_data = (soft_mask_array_data >= evidence_threshold *
                                255).astype(np.float64) * 255

    offset = np.array(np.array(soft_mask_roi.get_offset()) / voxel_size,
                      dtype=np.uint64)
    evidence_start = time.time()

    if pairs:
        pairs = np.array(list(pairs), dtype=np.uint64)
        evidence_array = cpp_get_evidence(candidates, pairs,
                                          soft_mask_array_data, offset,
                                          voxel_size)
        graph.add_weighted_edges_from(evidence_array, weight='evidence')

        logger.debug("Accumulate evidence in %.3fs",
                     time.time() - evidence_start)

        logger.debug("Found %d edges", graph.number_of_edges())

        logger.debug("Extracted edges in %.3fs", time.time() - start)

        start = time.time()

        graph.write_edges(block.write_roi)

        logger.debug("Wrote edges in %.3fs", time.time() - start)
    else:
        logger.debug("No pairs in block, skip")

    write_done(graph_provider.database, block,
               'edges_g{}'.format(graph_number))
    return 0