예제 #1
0
def get_centroids(picklepath, radius=0.2):
    """This will cluster all vdms by iFG location (backbone rel_vdms) or by sidechain + iFG location (sc rel_vdms)
    and output new pickle files to a directory picklepath/clustered."""
    if picklepath[-1] != '/':
        picklepath += '/'
    if os.path.isdir(picklepath):
        for pickletype in listdir(picklepath):
            if pickletype == 'PHI_PSI':
                for phipsi_type in listdir(picklepath + pickletype):
                    for picklefile in listdir(picklepath + pickletype + '/' + phipsi_type + '/pickle/'):
                        with open(picklepath + pickletype + '/' + phipsi_type + '/pickle/'
                                  + picklefile, 'rb') as infile:
                            pick = pickle.load(infile)
                            if len(pick.shape) == 1:
                                with open(outpath + picklefile, 'wb') as outfile:
                                    pickle.dump(pick, outfile)
                            else:
                                ifg_flat = [coords.flatten() for coords in pick[:, -2]]
                                nbrs = NearestNeighbors(metric='euclidean', radius=radius)
                                nbrs.fit(ifg_flat)
                                adj_mat = nbrs.radius_neighbors_graph(ifg_flat)
                                mems, cents = cluster_adj_mat(adj_mat)
                                outpath = picklepath + 'clustered/' + pickletype + '/' + phipsi_type + '/pickle/'
                                try:
                                    os.makedirs(outpath)
                                except:
                                    pass
                                with open(outpath + picklefile, 'wb') as outfile:
                                    pickle.dump(pick[cents, :], outfile)
            else:
                for picklefile in listdir(picklepath + pickletype + '/pickle/'):
                    with open(picklepath + pickletype + '/pickle/' + picklefile, 'rb') as infile:
                        pick = pickle.load(infile)
                        outpath = picklepath + 'clustered/' + pickletype + '/pickle/'
                        try:
                            os.makedirs(outpath)
                        except:
                            pass
                        if len(pick.shape) == 1:
                            with open(outpath + picklefile, 'wb') as outfile:
                                pickle.dump(pick, outfile)
                        else:
                            if pickletype == 'SC':
                                sc_flat = [coords.flatten() for coords in pick[:, -3]]
                                ifg_flat = [coords.flatten() for coords in pick[:, -2]]
                                sc_ifg_flat = np.hstack((sc_flat, ifg_flat))
                                nbrs = NearestNeighbors(metric='euclidean', radius=radius)
                                nbrs.fit(sc_ifg_flat)
                                adj_mat = nbrs.radius_neighbors_graph(sc_ifg_flat)
                                mems, cents = cluster_adj_mat(adj_mat)
                                with open(outpath + picklefile, 'wb') as outfile:
                                    pickle.dump(pick[cents, :], outfile)
                            else:
                                ifg_flat = [coords.flatten() for coords in pick[:, -2]]
                                nbrs = NearestNeighbors(metric='euclidean', radius=radius)
                                nbrs.fit(ifg_flat)
                                adj_mat = nbrs.radius_neighbors_graph(ifg_flat)
                                mems, cents = cluster_adj_mat(adj_mat)
                                with open(outpath + picklefile, 'wb') as outfile:
                                    pickle.dump(pick[cents, :], outfile)
예제 #2
0
파일: nn_old.py 프로젝트: PeterZZQ/CellPath
def RadiusNeighborhoodGraph(X, r):
    neighbor = NearestNeighbors(radius=r)
    neighbor.fit(X)
    adj_matrix = neighbor.radius_neighbors_graph(X)
    dist_matrix = neighbor.radius_neighbors_graph(X, mode='distance')

    # symmetric matrix
    adj_matrix = adj_matrix.toarray()
    dist_matrix = dist_matrix.toarray()

    return adj_matrix, dist_matrix
예제 #3
0
def test_cnn_sparse_precomputed_different_eps():
    # test that precomputed neighbors graph is filtered if computed with
    # a radius larger than eps.
    lower_eps = 0.2
    nn = NearestNeighbors(radius=lower_eps).fit(X)
    D_sparse = nn.radius_neighbors_graph(X, mode="distance")
    cnn_lower = commonnn(D_sparse, eps=lower_eps, metric="precomputed")

    higher_eps = lower_eps + 0.7
    nn = NearestNeighbors(radius=higher_eps).fit(X)
    D_sparse = nn.radius_neighbors_graph(X, mode="distance")
    cnn_higher = commonnn(D_sparse, eps=lower_eps, metric="precomputed")

    assert_array_equal(cnn_lower, cnn_higher)
예제 #4
0
def test_dbscan_sparse_precomputed_different_eps():
    # test that precomputed neighbors graph is filtered if computed with
    # a radius larger than DBSCAN's eps.
    lower_eps = 0.2
    nn = NearestNeighbors(radius=lower_eps).fit(X)
    D_sparse = nn.radius_neighbors_graph(X, mode='distance')
    dbscan_lower = dbscan(D_sparse, eps=lower_eps, metric='precomputed')

    higher_eps = lower_eps + 0.7
    nn = NearestNeighbors(radius=higher_eps).fit(X)
    D_sparse = nn.radius_neighbors_graph(X, mode='distance')
    dbscan_higher = dbscan(D_sparse, eps=lower_eps, metric='precomputed')

    assert_array_equal(dbscan_lower[0], dbscan_higher[0])
    assert_array_equal(dbscan_lower[1], dbscan_higher[1])
예제 #5
0
 def _neigh_internal(hs_reps1, hs_reps2, dist_cst, tol):
     nbrs_high = NearestNeighbors(metric='euclidean', radius=dist_cst + tol)
     nbrs_high.fit(hs_reps2)
     adj_mat_high = nbrs_high.radius_neighbors_graph(hs_reps1, mode='distance')
     lowtol = dist_cst - tol
     wh = (adj_mat_high > lowtol).nonzero()
     return zip(wh[0], wh[1])
예제 #6
0
 def make_adj_mat_no_superpose(self):
     num_atoms = len(self.pdb_coords[0])
     nbrs = NearestNeighbors(radius=self.rmsd_cutoff * np.sqrt(num_atoms))
     nbrs_coords = np.array([s.getCoords().flatten() for s in self.pdb_coords])
     nbrs.fit(nbrs_coords)
     self.adj_mat = nbrs.radius_neighbors_graph(nbrs_coords)
     self._adj_mat = True
예제 #7
0
파일: main.py 프로젝트: UnrealAI/GameOfLife
def assign_species_names():
    global agents, n_born, color_changer

    X = [agent.dna for agent in agents]
    """for agent in agents:
        print(agent)
    print(X)"""
    nn = NearestNeighbors(metric='cosine', algorithm='brute')
    nn.fit(X)
    nbrs_graph = nn.radius_neighbors_graph(X, radius=0.001)
    _, connected_components = scipy.sparse.csgraph.connected_components(
        nbrs_graph)
    #print('HI')
    #print(connected_components)
    for agent, cluster in zip(agents, connected_components):
        agent.update_species_name('species%d' % cluster)
    if n_born == 0:
        color_changer = Pipeline([('pca',
                                   KernelPCA(n_components=3, kernel="cosine")),
                                  ('minmax', MinMaxScaler())])
        color_changer.fit(X)

    colors = (
        MinMaxScaler().fit_transform(color_changer.transform(X) * 255.) * 255.
    ).astype(
        int
    )  #(MinMaxScaler().fit_transform(KernelPCA(n_components=3,kernel="cosine").fit_transform(X))*255).astype(int).tolist()
    #print(colors)
    for agent, color in zip(agents, colors):
        print(color)
        agent.color = color
예제 #8
0
    def _build_graph(self, X):
        """Construction of connectivity graph G"""

        neighbors = NearestNeighbors(algorithm=self.neighbors_algorithm,
                                     metric=self.metric,
                                     n_jobs=self.n_jobs).fit(X)

        if self.neighborhood_method == "knn":
            # TODO: assert n_neighbors < number of points
            G = neighbors.kneighbors_graph(n_neighbors=self.n_neighbors,
                                           mode="distance")
        elif self.neighborhood_method == "eps_ball":
            # TODO: assert eps is not None
            G = neighbors.radius_neighbors_graph(radius=self.eps,
                                                 mode="distance")
        else:
            raise ValueError(
                "Unrecognized method of neighborhood selection='{0}'"
                "".format(self.neighborhood_method))

        G_sym = csr_matrix.maximum(G, G.T.tocsr())

        G_sym.data = self._kernel(G_sym.data, sigma=self.sigma)

        G.data = self._kernel(G.data, sigma=self.sigma)

        return G_sym
예제 #9
0
    def clusterHMM(self, minMagnitude = 10, treeR = 22, leafNum = 190, neighborR = 22, timeScale = 10, eps = 18, minPts = 170):
        
        if os.path.exists(self.clusterDirectory + 'Labels.npy') and not self.rewrite:
            print('Cluster label file already exists. Will not recalculate it unless rewrite flag is True')
            return

        try:
            self.obj
        except AttributeError:
            self.obj = HMMdata(filename = self.hmmFile)

        print('Identifying raw coordinate positions for cluster analysis', file = sys.stderr)
        if os.path.isfile(self.clusterDirectory + 'RawCoords.npy'):
            self.coords = np.load(self.clusterDirectory + 'RawCoords.npy')
        else:
            self.coords = self.obj.retDBScanMatrix(minMagnitude)
            np.save(self.clusterDirectory + 'RawCoords.npy', self.coords)
            
        print('Calculating nearest neighbors and pairwise distances between clusters', file = sys.stderr)
        if os.path.isfile(self.clusterDirectory + 'PairwiseDistances.npz'):
            dist = np.load(self.clusterDirectory + 'PairwiseDistances.npz')
        else:
            self.coords[:,0] = self.coords[:,0]*timeScale
            X = NearestNeighbors(radius=treeR, metric='minkowski', p=2, algorithm='kd_tree',leaf_size=leafNum,n_jobs=24).fit(self.coords)
            dist = X.radius_neighbors_graph(self.coords, neighborR, 'distance')
            scipy.sparse.save_npz(self.clusterDirectory + 'PairwiseDistances.npz', dist)
            
        label = DBSCAN(eps=eps, min_samples=minPts, metric='precomputed', n_jobs=24).fit_predict(dist)
        np.save(self.clusterDirectory + 'Labels.npy', label)
예제 #10
0
def test_dbscan_sparse_precomputed():
    D = pairwise_distances(X)
    nn = NearestNeighbors(radius=0.9).fit(X)
    D_sparse = nn.radius_neighbors_graph(mode="distance")
    # Ensure it is sparse not merely on diagonals:
    assert D_sparse.nnz < D.shape[0] * (D.shape[0] - 1)
    core_sparse, labels_sparse = dbscan(D_sparse, eps=0.8, min_samples=10, metric="precomputed")
    core_dense, labels_dense = dbscan(D, eps=0.8, min_samples=10, metric="precomputed")
    assert_array_equal(core_dense, core_sparse)
    assert_array_equal(labels_dense, labels_sparse)
예제 #11
0
def test_cnn_sparse_precomputed(include_self):
    D = pairwise_distances(X)
    nn = NearestNeighbors(radius=0.9).fit(X)
    X_ = X if include_self else None
    D_sparse = nn.radius_neighbors_graph(X=X_, mode="distance")
    # Ensure it is sparse not merely on diagonals:
    assert D_sparse.nnz < D.shape[0] * (D.shape[0] - 1)
    labels_sparse = commonnn(D_sparse,
                             eps=0.8,
                             min_samples=5,
                             metric="precomputed")
    labels_dense = commonnn(D, eps=0.8, min_samples=5, metric="precomputed")
    assert_array_equal(labels_dense, labels_sparse)
예제 #12
0
def test_dbscan_sparse_precomputed():
    D = pairwise_distances(X)
    nn = NearestNeighbors(radius=.9).fit(X)
    D_sparse = nn.radius_neighbors_graph(mode='distance')
    # Ensure it is sparse not merely on diagonals:
    assert D_sparse.nnz < D.shape[0] * (D.shape[0] - 1)
    core_sparse, labels_sparse = dbscan(D_sparse,
                                        eps=.8,
                                        min_samples=10,
                                        metric='precomputed')
    core_dense, labels_dense = dbscan(D, eps=.8, min_samples=10,
                                      metric='precomputed')
    assert_array_equal(core_dense, core_sparse)
    assert_array_equal(labels_dense, labels_sparse)
예제 #13
0
    def add_exclusion(self,
                      object_1,
                      object_2,
                      margin=1,
                      distance_metric='l1'):
        """Add exclsion constraint edges forcing object_1 and object_2 not to overlap."""

        if object_1 == object_2:
            raise ValueError('object_1 and object_2 is the same object.')

        # Get nodeids.
        object_1_nodeids = self.get_nodeids(object_1)
        object_2_nodeids = self.get_nodeids(object_2)

        # Get points.
        object_1_points = object_1.sample_points
        object_2_points = object_2.sample_points

        # TODO Avoid searching for neigbours if possible.
        if margin == 0 and object_1_points.shape == object_2_points.shape and np.all(
                object_1_points == object_2_points):
            # Add containment edges.
            self.add_pairwise_terms(object_1_nodeids, object_2_nodeids, 0, 0,
                                    0, self.inf_cap)
        else:
            # Create nearest neighbors tree.
            neigh = NearestNeighbors(radius=margin, metric=distance_metric)
            neigh.fit(object_1_points.reshape(-1, object_1_points.shape[-1]))

            # Create neighbors graph.
            # Get connectivity for all within margin.
            radius_neighbors_graph = neigh.radius_neighbors_graph(
                object_2_points.reshape(-1, object_2_points.shape[-1]))

            # Get indices for all combined graph connections.
            indices_2, indices_1, _ = sparse.find(radius_neighbors_graph)

            if indices_1.size == 0:
                # If there are no neighbors, return.
                return

            # Take the ids to add pairwise terms to.
            object_1_ids = np.take(object_1_nodeids, indices_1)
            object_2_ids = np.take(object_2_nodeids, indices_2)

            # Add exclusion edges.
            self.add_pairwise_terms(object_1_ids, object_2_ids, 0, 0, 0,
                                    self.inf_cap)
예제 #14
0
    def add_containment(self,
                        outer_object,
                        inner_object,
                        margin=1,
                        distance_metric='l1'):
        """Add containment constraint edges forcing inner_object to be within outer_object."""

        if outer_object == inner_object:
            raise ValueError(
                'outer_object and inner_object is the same object.')

        if margin is None:
            return

        # Get nodeids.
        outer_nodeids = self.get_nodeids(outer_object)
        inner_nodeids = self.get_nodeids(inner_object)

        # Get points.
        outer_points = outer_object.sample_points
        inner_points = inner_object.sample_points

        # TODO Avoid searching for neigbours if possible.
        if margin == 0 and outer_points.shape == inner_points.shape and np.all(
                outer_points == inner_points):
            # Add containment edges.
            self.add_pairwise_terms(outer_nodeids, inner_nodeids, 0,
                                    self.inf_cap, 0, 0)
        else:
            # Create nearest neighbors tree.
            neigh = NearestNeighbors(radius=margin, metric=distance_metric)
            neigh.fit(outer_points.reshape(-1, outer_points.shape[-1]))

            # Create neighbors graph.
            # Get connectivity for all within margin.
            radius_neighbors_graph = neigh.radius_neighbors_graph(
                inner_points.reshape(-1, inner_points.shape[-1]))

            # Get indices for all combined graph connections.
            indices_2, indices_1, _ = sparse.find(radius_neighbors_graph)

            outer_ids = np.take(outer_nodeids, indices_1)
            inner_ids = np.take(inner_nodeids, indices_2)

            # Add containment edges.
            self.add_pairwise_terms(outer_ids, inner_ids, 0, self.inf_cap, 0,
                                    0)
예제 #15
0
파일: utils.py 프로젝트: redzhepdx/IWC-Net
def positional_sparse_matrix(row_size, col_size, radius):
    #p2 means euclidean distance
    nn   = NearestNeighbors(radius=radius, p=2)

    rows = np.arange(row_size)
    cols = np.arange(col_size)

    mesh_grid         = np.empty((row_size, row_size, 2), dtype=np.intp)
    mesh_grid[..., 0] = rows[:, None]
    mesh_grid[..., 1] = cols
    #print("MESH_GRID : \n", mesh_grid)
    mesh_grid         = mesh_grid.reshape(-1, 2)
    #print("MESH_GRID_RESHAPE : \n", mesh_grid)
    nn.fit(mesh_grid)

    pos_sparse_matrix = nn.radius_neighbors_graph(mesh_grid, radius=radius, mode='distance')

    return pos_sparse_matrix
예제 #16
0
def visualize_graph_partition(points, k=-1):
    """ Separate visualization of k normalized clusters created by region growing algorithm 
        where the colors show the best partition of the nearest neighbor graph in each cluster
    """ 
    # import graph and partition
    import networkx as nx
    import community

    # region growing
    curvatures, normals = estimate_curvature_and_normals(points, n_neighbors=50)
    cluster_mask = region_growing(points, normals, curvatures, min_points=100)

    # get cluster indices of interest ignoreing outliers
    cluster_idx = np.unique(cluster_mask)
    cluster_idx = cluster_idx[(cluster_idx!=-1)]
    cluster_idx = cluster_idx[:k] if k > -1 else cluster_idx

    # create visualizer
    vis = Visualizer(background=(1, 1, 1))
    # visualize each cluster
    for i in cluster_idx:
        # get points of current cluster
        cluster = normalize_pc(points[(cluster_mask==i)])
        # build ajacency matrix for neighbor graph
        tree = NearestNeighbors(algorithm='ball_tree', radius=0.03, n_jobs=-1).fit(cluster)
        graph_matrix = tree.radius_neighbors_graph(cluster, mode='distance')
        # build graph from sparse adjacency matrix and compute best partition
        G = nx.from_scipy_sparse_matrix(graph_matrix)
        partition = community.best_partition(G)
        # color each partition in a random color
        community_colors, colors = {}, np.empty_like(cluster)
        for i, c in partition.items():
            # get random color
            if c not in community_colors:
                community_colors[c] = np.random.uniform(0, 1, size=3)
            # set color
            colors[i, :] = community_colors[c]

        # add to visualizer
        vis.add_by_features(cluster, colors)

    # show
    vis.run()
예제 #17
0
    def make_ConformationalNetwork(self):

        neigh = NearestNeighbors(radius=1, metric='chebyshev')
        neigh.fit(self.ijk_centers)
        net_centers = nx.from_scipy_sparse_matrix(
            neigh.radius_neighbors_graph())
        del (neigh)

        net_rotations = nx.Graph()
        net_rotations.add_nodes_from(range(self.num_rotations))
        for ii in range(self.num_rotations):
            neighs = hp.get_all_neighbours(self.nside, ii, nest=False)
            neighs[neighs == -1] = 0
            net_rotations.add_edges_from(
                zip(np.full(neighs.shape[0], ii), neighs))
        del (neighs)

        net = nx.cartesian_product(net_centers, net_rotations)

        del (net_rotations, net_centers)

        return net
예제 #18
0
    def createClusters(self, minMagnitude = 0, treeR = 22, leafNum = 190, neighborR = 22, timeScale = 10, eps = 18, minPts = 90, delta = 1.0):
        #self.loadVideo()
        self.loadHMM()
        self._print('Created ' + self.labeledCoordsFile)
        coords = self.obj.retDBScanMatrix(minMagnitude)
        np.save(self.localClusterDirectory + 'RawCoords.npy', coords)
        #subprocess.call(['rclone', 'copy', self.localClusterDirectory + 'RawCoordsFile.npy', self.cloudClusterDirectory], stderr = self.fnull)
               

        sortData = coords[coords[:,0].argsort()][:,0:3] #sort data by time for batch processing, throwing out 4th column (magnitude)
        numBatches = int(sortData[-1,0]/delta/3600) + 1 #delta is number of hours to batch together. Can be fraction.

        sortData[:,0] = sortData[:,0]*timeScale #scale time so that time distances between transitions are comparable to spatial differences
        labels = np.zeros(shape = (sortData.shape[0],1), dtype = sortData.dtype)

        #Calculate clusters in batches to avoid RAM overuse
        curr_label = 0 #Labels for each batch start from zero - need to offset these
            
        print('Calculating clusters in ' + str(numBatches) + ' total batches', file = sys.stderr)
        for i in range(numBatches):
            print('Batch: ' + str(i), file = sys.stderr)
            min_time, max_time = i*delta*timeScale*3600, (i+1)*delta*timeScale*3600 # Have to deal with rescaling of time. 3600 = # seconds in an hour
            hour_range = np.where((sortData[:,0] > min_time) & (sortData[:,0] <= max_time))
            min_index, max_index = hour_range[0][0], hour_range[0][-1] + 1
            X = NearestNeighbors(radius=treeR, metric='minkowski', p=2, algorithm='kd_tree',leaf_size=leafNum,n_jobs=24).fit(sortData[min_index:max_index])
            dist = X.radius_neighbors_graph(sortData[min_index:max_index], neighborR, 'distance')
            sub_label = DBSCAN(eps=eps, min_samples=minPts, metric='precomputed', n_jobs=24).fit_predict(dist)
            new_labels = int(sub_label.max()) + 1
            sub_label[sub_label != -1] += curr_label
            labels[min_index:max_index,0] = sub_label
            curr_label += new_labels

        sortData[:,0] = sortData[:,0]/timeScale
        self.labeledCoords = np.concatenate((sortData, labels), axis = 1).astype('int64')
        np.save(self.localClusterDirectory + self.labeledCoordsFile, self.labeledCoords)
        subprocess.call(['rclone', 'copy', self.localClusterDirectory + self.labeledCoordsFile, self.cloudClusterDirectory], stderr = self.fnull)
예제 #19
0
def hcg_cluster(
    features,
    timestamps=None,
    linkage='ward',
    distance_thr: tuple = (0.5, 0.5),
    timestamp_thr: float = 0,
    edge_thr: float = 0.7
):
    dist_neigh = NearestNeighbors(radius=distance_thr[0])
    dist_neigh.fit(features)
    dist_graph = dist_neigh.radius_neighbors_graph(mode='connectivity')

    if timestamps is not None and timestamp_thr > 0:
        time_neigh = NearestNeighbors(radius=timestamp_thr)
        time_neigh.fit(timestamps)
        time_graph = time_neigh.radius_neighbors_graph(mode='connectivity')
        dist_graph = dist_graph.multiply(time_graph)
        dist_graph.eliminate_zeros()

    dist_graph = nx.from_scipy_sparse_matrix(dist_graph)

    components = nx.connected_components(dist_graph)

    clusters = []

    clustering = None
    if distance_thr[1] > 0:
        clustering = AgglomerativeClustering(
            n_clusters=None,
            distance_threshold=distance_thr[1],
            affinity='euclidean',
            linkage=linkage
        )

    for component in components:
        n = len(component)
        if n > 3 and clustering is not None:
            sub_graph = dist_graph.subgraph(component).copy()
            component = list(component)
            if sub_graph.size() >= edge_thr * (n * (n - 1) / 2):
                clusters.append(component)
            else:
                clustering.fit(features[component])
                clusters.extend(cluster_labels(clustering.labels_, component))
        else:
            clusters.append(list(component))

    return clusters


# def cluster_features(
#     features,
#     timestamps=None,
#     distance_thr: float = 0.5,
#     timestamp_thr: float = 0,
#     grouped: bool = True
# ):
#     dist_neigh = NearestNeighbors(radius=distance_thr)
#     dist_neigh.fit(features)
#     dist_graph = dist_neigh.radius_neighbors_graph(mode='connectivity')
#
#     if timestamps is not None and timestamp_thr > 0:
#         time_neigh = NearestNeighbors(radius=timestamp_thr)
#         time_neigh.fit(timestamps)
#         time_graph = time_neigh.radius_neighbors_graph(mode='connectivity')
#         dist_graph = dist_graph.multiply(time_graph)
#         dist_graph.eliminate_zeros()
#
#     clustering = AgglomerativeClustering(
#         n_clusters=None,
#         distance_threshold=distance_thr,
#         affinity='euclidean',
#         linkage='ward',
#         connectivity=dist_graph
#     )
#
#     clustering.fit(features)
#
#     labels = clustering.labels_
#
#     if not grouped:
#         return labels, None
#
#     labels_set = set(labels) - {-1}
#
#     clusters = {label: [] for label in labels_set}
#     outliers = []
#
#     for ind, label in enumerate(labels):
#         if label != -1:
#             clusters[label].append(ind)
#         else:
#             outliers.append([ind])
#
#     return labels, list(clusters.values()) + outliers

# import networkx as nx
# from .clustering import cluster_features as aro_cluster
#
# def cluster_features(
#     features,
#     timestamps=None,
#     distance_thr: float = 0.5,
#     timestamp_thr: float = 0,
#     grouped: bool = True
# ):
#     n_features = len(features)
#     dist_neigh = NearestNeighbors(radius=1.13)
#     dist_neigh.fit(features)
#     dist_graph = dist_neigh.radius_neighbors_graph(mode='connectivity')
#
#     if timestamps is not None and timestamp_thr > 0:
#         time_neigh = NearestNeighbors(radius=timestamp_thr)
#         time_neigh.fit(timestamps)
#         time_graph = time_neigh.radius_neighbors_graph(mode='connectivity')
#         dist_graph = dist_graph.multiply(time_graph)
#         dist_graph.eliminate_zeros()
#
#     dist_graph = nx.from_scipy_sparse_matrix(dist_graph)
#
#     components = nx.connected_components(dist_graph)
#
#     clusters = []
#
#     for component in components:
#         clusters.append(list(component))
#         n = len(component)
#         if n > 3:
#             sub_graph = dist_graph.subgraph(component).copy()
#             component = list(component)
#             if sub_graph.size() == (n * (n - 1) / 2):
#                 clusters.append(component)
#             else:
#                 sub_clusters = aro_cluster(
#                     features[component],
#                     distance_thr=distance_thr,
#                     n_neighbors=min(n, 10)
#                 )
#                 for sub_cluster in sub_clusters:
#                     clusters.append([component[i] for i in sub_cluster])
#         else:
#             clusters.append(list(component))
#
#     return clusters


# def cluster_features(
#     features,
#     timestamps=None,
#     distance_thr: float = 0.5,
#     timestamp_thr: float = 0,
#     grouped: bool = True
# ):
#     n_features = len(features)
#     dist_neigh = NearestNeighbors(radius=distance_thr)
#     dist_neigh.fit(features)
#     dist_graph = dist_neigh.radius_neighbors_graph(mode='connectivity')
#
#     if timestamps is not None and timestamp_thr > 0:
#         time_neigh = NearestNeighbors(radius=timestamp_thr)
#         time_neigh.fit(timestamps)
#         time_graph = time_neigh.radius_neighbors_graph(mode='connectivity')
#         dist_graph = dist_graph.multiply(time_graph)
#         dist_graph.eliminate_zeros()
#
#     _, labels = connected_components(
#         csgraph=dist_graph,
#         directed=False,
#         return_labels=True
#     )
#
#     clusters_high = cluster_labels(labels, range(0, n_features))
#
#     # Second level clustering
#     clusters_low = []
#     clustering = AgglomerativeClustering(
#         n_clusters=None,
#         distance_threshold=1.2,
#         affinity='euclidean',
#         linkage='single'
#     )
#
#     # clustering = OPTICS(
#     #     max_eps=distance_thr,
#     #     min_samples=3,
#     #     metric='euclidean'
#     # )
#
#     for cluster in clusters_high:
#         if len(cluster) > 3:
#             clustering.fit(features[cluster])
#             clusters_low.extend(
#                 cluster_labels(clustering.labels_, cluster)
#             )
#         else:
#             clusters_low.append(cluster)
#
#     return clusters_low


# import networkx as nx
#
# def cluster_features(
#     features,
#     timestamps=None,
#     distance_thr: float = 0.5,
#     timestamp_thr: float = 0,
#     grouped: bool = True
# ):
#     n_features = len(features)
#     dist_neigh = NearestNeighbors(radius=distance_thr)
#     dist_neigh.fit(features)
#     dist_graph = dist_neigh.radius_neighbors_graph(mode='connectivity')
#
#     if timestamps is not None and timestamp_thr > 0:
#         time_neigh = NearestNeighbors(radius=timestamp_thr)
#         time_neigh.fit(timestamps)
#         time_graph = time_neigh.radius_neighbors_graph(mode='connectivity')
#         dist_graph = dist_graph.multiply(time_graph)
#         dist_graph.eliminate_zeros()
#
#     dist_graph = nx.from_scipy_sparse_matrix(dist_graph)
#
#     components = nx.connected_components(dist_graph)
#
#     clusters = []
#
#     for component in components:
#         if len(component) > 3:
#             sub_graph = dist_graph.subgraph(component).copy()
#             sub_components = nx.k_edge_components(sub_graph, k=2)
#             clusters.extend([
#                 list(sub_component) for sub_component in sub_components
#             ])
#         else:
#             clusters.append(list(component))
#
#     return clusters
    def _createClusters(self):
        print('  Creating clusters from HMM transitions,,Time: ' +
              str(datetime.datetime.now()))

        # Load in HMM data
        hmmObj = HA(self.videoObj.localHMMFile)

        # Convert into coords object and save it
        coords = hmmObj.retDBScanMatrix(self.projFileManager.minMagnitude)
        np.save(self.videoObj.localRawCoordsFile, coords)

        # Run data in batches to avoid RAM override
        sortData = coords[coords[:, 0].argsort(
        )][:, 0:
           3]  #sort data by time for batch processing, throwing out 4th column (magnitude)
        numBatches = int(
            sortData[-1, 0] / self.projFileManager.delta / 3600
        ) + 1  #delta is number of hours to batch together. Can be fraction.

        sortData[:,
                 0] = sortData[:,
                               0] * self.projFileManager.timeScale  #scale time so that time distances between transitions are comparable to spatial differences
        labels = np.zeros(shape=(sortData.shape[0], 1),
                          dtype=sortData.dtype)  # Initialize labels

        #Calculate clusters in batches to avoid RAM overuse
        curr_label = 0  #Labels for each batch start from zero - need to offset these
        print('   ' + str(numBatches) + ' total batches. On batch: ',
              end='',
              flush=True)
        for i in range(numBatches):
            print(str(i) + ',', end='', flush=True)

            min_time, max_time = i * self.projFileManager.delta * self.projFileManager.timeScale * 3600, (
                i + 1
            ) * self.projFileManager.delta * self.projFileManager.timeScale * 3600  # Have to deal with rescaling of time. 3600 = # seconds in an hour
            hour_range = np.where((sortData[:, 0] > min_time)
                                  & (sortData[:, 0] <= max_time))
            min_index, max_index = hour_range[0][0], hour_range[0][-1] + 1
            X = NearestNeighbors(radius=self.projFileManager.treeR,
                                 metric='minkowski',
                                 p=2,
                                 algorithm='kd_tree',
                                 leaf_size=self.projFileManager.leafNum,
                                 n_jobs=24).fit(sortData[min_index:max_index])
            dist = X.radius_neighbors_graph(sortData[min_index:max_index],
                                            self.projFileManager.neighborR,
                                            'distance')
            sub_label = DBSCAN(eps=self.projFileManager.eps,
                               min_samples=self.projFileManager.minPts,
                               metric='precomputed',
                               n_jobs=self.workers).fit_predict(dist)
            new_labels = int(sub_label.max()) + 1
            sub_label[sub_label != -1] += curr_label
            labels[min_index:max_index, 0] = sub_label
            curr_label += new_labels
        print()
        # Concatenate and save information
        sortData[:, 0] = sortData[:, 0] / self.projFileManager.timeScale
        labeledCoords = np.concatenate((sortData, labels),
                                       axis=1).astype('int64')
        np.save(self.videoObj.localLabeledCoordsFile, labeledCoords)
        print('  Concatenating and summarizing clusters,,Time: ' +
              str(datetime.datetime.now()))

        df = pd.DataFrame(labeledCoords, columns=['T', 'X', 'Y', 'LID'])
        clusterData = df.groupby('LID').apply(
            lambda x: pd.Series({
                'projectID': self.lp.projectID,
                'videoID': self.videoObj.baseName,
                'N': x['T'].count(),
                't': int(x['T'].mean()),
                'X': int(x['X'].mean()),
                'Y': int(x['Y'].mean()),
                't_span': int(x['T'].max() - x['T'].min()),
                'X_span': int(x['X'].max() - x['X'].min()),
                'Y_span': int(x['Y'].max() - x['Y'].min()),
                'ManualAnnotation': 'No',
                'ManualLabel': '',
                'ClipCreated': 'No',
                'DepthChange': np.nan,
            }))
        clusterData['TimeStamp'] = clusterData.apply(
            lambda row:
            (self.videoObj.startTime + datetime.timedelta(seconds=int(row.t))),
            axis=1)
        clusterData['ClipName'] = clusterData.apply(lambda row: '__'.join([
            str(x) for x in [
                self.lp.projectID, self.videoObj.baseName, row.name, row.N, row
                .t, row.X, row.Y
            ]
        ]),
                                                    axis=1)
        # Identify clusters to make clips for
        #self._print('Identifying clusters to make clips for', log = False)
        delta_xy = self.projFileManager.delta_xy
        delta_t = self.projFileManager.delta_t
        smallClips, clipsCreated = 0, 0  # keep track of clips with small number of pixel changes
        for row in clusterData.sample(n=clusterData.shape[0]).itertuples(
        ):  # Randomly go through the dataframe
            LID, N, t, x, y, time = row.Index, row.N, row.t, row.X, row.Y, row.TimeStamp
            if x - delta_xy < 0 or x + delta_xy >= self.videoObj.height or y - delta_xy < 0 or y + delta_xy >= self.videoObj.width:
                continue
            # Check temporal compatability (part a):
            elif self.videoObj.framerate * t - delta_t < 0 or LID == -1:
                continue
            # Check temporal compatability (part b):
            elif time < self.lightsOnTime or time > self.lightsOffTime:
                continue
            else:
                clusterData.loc[clusterData.index == LID,
                                'ClipCreated'] = 'Yes'
                if N < self.projFileManager.smallLimit:
                    if smallClips > self.videoObj.nManualLabelClips / 20:
                        continue
                    smallClips += 1
                if clipsCreated < self.videoObj.nManualLabelClips:
                    clusterData.loc[clusterData.index == LID,
                                    'ManualAnnotation'] = 'Yes'
                    clipsCreated += 1

        clusterData.to_csv(self.videoObj.localLabeledClustersFile, sep=',')
        self.clusterData = clusterData
}
relvdm_backbone.load_rel_vdms_pickle(sample, subset=no_charge)
relvdm_backbone.set_rel_vdm_bb_coords()
relvdm_backbone.set_rois_rot_trans(sample)
relvdm_backbone.set_rel_vdm_tags(sample)
print('moving vdMs')
relvdm_backbone.move_rel_vdms(sample)
print('removing clashing vdMs')
relvdm_backbone.remove_clash(sample)
relvdm_backbone.reshape_ifgs()
all_ifgs = functools.reduce(lambda a, b: np.vstack((a, b)),
                            [val for val in relvdm_backbone._ifgs.values()])
print('finding hotspots preproline carbonyl')
nbrs = NearestNeighbors(metric='euclidean', radius=1.0, algorithm='kd_tree')
nbrs.fit(all_ifgs)
adj_mat = nbrs.radius_neighbors_graph(all_ifgs)
print('clustering...')

mems, cents = combs.Cluster.greedy_cluster_pc(adj_mat, pc=0.8)

all_resnum_chid = functools.reduce(lambda a, b: np.vstack((a, b)), [
    np.array([tuple(key)] * len(val), dtype=object)
    for key, val in relvdm_backbone._vdm_tags.items()
])
all_vdm_tags = functools.reduce(lambda a, b: np.vstack(
    (a, b)), [val for val in relvdm_backbone._vdm_tags.values()])
all_resn = functools.reduce(lambda a, b: np.hstack((a, b)),
                            [val for val in relvdm_backbone._resn.values()])
all_type = functools.reduce(lambda a, b: np.hstack((a, b)),
                            [val for val in relvdm_backbone._type.values()])
all_indices = functools.reduce(lambda a, b: np.hstack(
예제 #22
0
    def cc_regions(self,
                   selected_components=None,
                   n_neighbors=None,
                   radius=None,
                   expansion_factor=None):
        embedding = self.embedding
        '''
        if embedding.shape[1] == 2:
            selected_components = [0,1]
        elif selected_components is not None:
            if type(selected_components) == list:
                selected_components = selected_components
            else:
                raise ValueError("selected_components parameter must be od type list.")
        else:
            raise ValueError("'DimensionReductionRegions' was initialized with more than two components. Provide selected_components as list.")
        '''
        #embedding = embedding[:, selected_components]

        if n_neighbors is None:
            n_neighbors = 0

        if radius is None:
            radius = 0

        if expansion_factor is not None:
            embedding = embedding**expansion_factor * np.sign(embedding)

        nn = NearestNeighbors(n_neighbors=n_neighbors, radius=radius)
        nn.fit(embedding)

        knn = nn.kneighbors_graph()
        knn_nr_components, knn_cc_labels = connected_components(knn,
                                                                directed=False)
        print("Number of knn components: %i" % (knn_nr_components))
        knn_cc = [
            tuple(np.where(knn_cc_labels == lbl)[0])
            for lbl in range(max(knn_cc_labels) + 1)
        ]

        rnn = nn.radius_neighbors_graph()
        rnn_nr_components, rnn_cc_labels = connected_components(rnn,
                                                                directed=False)
        print("Number of rnn components: %i" % (rnn_nr_components))
        rnn_cc = [
            tuple(np.where(rnn_cc_labels == lbl)[0])
            for lbl in range(max(rnn_cc_labels) + 1)
        ]

        regions_idx = list(set(knn_cc + rnn_cc))

        print("Total number of components: %i" % (len(regions_idx)))

        regions = []

        for idx in regions_idx:
            idx = list(idx)
            img = create_empty_img(self.height, self.width, False)
            img[(self.gy[idx], self.gx[idx])] = 1
            regions.append(img)
        regions = np.array(regions)
        regions_dict = {}
        for idx, region in enumerate(regions):
            regions_dict[idx] = region
        region_sum = np.sum(regions, axis=0)

        return region_sum, regions_dict
    ]).T
    # data = np.array([rawdata['X'], rawdata['Y'], rawdata['Z']]).T

    # get the true labels and group names
    labels_true = np.array(rawdata['row'])
    groups = np.array(rawdata['group'])

    # convert data to 32 bit
    data = np.array(data, dtype=np.float32)

    # get nearest neighbours
    printlog('Work out nearest neighbours...')
    start = time.time()
    neigh = NearestNeighbors(radius=20, metric='euclidean')
    neigh.fit(data)
    neighbours = neigh.radius_neighbors_graph(data, mode='distance')
    end = time.time()
    printlog('\t Time taken = {0} s'.format(end - start))

    # ----------------------------------------------------------------------
    # DBscan example from :
    #      scikit-learn.org/stable/modules/clustering.html#dbscan
    #      http://scikit-learn.org/stable/auto_examples/cluster/plot_dbscan
    #          .html#sphx-glr-auto-examples-cluster-plot-dbscan-py
    printlog("Calculating clustering using 'DBSCAN'...")
    start = time.time()

    sargs = dict(eps=10, min_samples=50, metric='precomputed')
    db = DBSCAN(**sargs).fit(neighbours)
    end = time.time()
    # get mask and labels
예제 #24
0
    def add_layered_exclusion(self,
                              object_1,
                              object_2,
                              margin=1,
                              distance_metric='l1',
                              reduce_redundancy=True):
        """Add exclsion constraint edges forcing object_1 and object_2 not to overlap.
        This function assumes a layered boundary cost has been applied to the objects.
        """

        if object_1 == object_2:
            raise ValueError('object_1 and object_2 is the same object.')

        # Get nodeids.
        object_1_nodeids = self.get_nodeids(object_1)
        object_2_nodeids = self.get_nodeids(object_2)

        # Get points.
        object_1_points = object_1.sample_points
        object_2_points = object_2.sample_points

        # TODO Avoid searching for neigbours if possible.

        # Create nearest neighbors tree.
        neigh = NearestNeighbors(radius=margin, metric=distance_metric)
        neigh.fit(object_1_points.reshape(-1, object_1_points.shape[-1]))

        # Create neighbors graph.
        # Get connectivity for all within margin.
        radius_neighbors_graph = neigh.radius_neighbors_graph(
            object_2_points.reshape(-1, object_2_points.shape[-1]))

        # Get indices for all combined graph connections.
        indices_2, indices_1, _ = sparse.find(radius_neighbors_graph)

        if indices_1.size == 0:
            # If there are no neighbors, return.
            return

        # Remove redundany neighbors.
        if reduce_redundancy:

            # Find sizes of the columns.
            column_size_1 = np.product(object_1_nodeids.shape[1:])
            column_size_2 = np.product(object_2_nodeids.shape[1:])

            # Get the column indices of the node indices.
            column_indices_2 = indices_2 % column_size_2
            # Get first unique combination of comlumns.
            _, unique_column_indices = np.unique([indices_1, column_indices_2],
                                                 return_index=True,
                                                 axis=1)

            # Filter indices to have only one edge between each column.
            indices_1 = indices_1[unique_column_indices]
            indices_2 = indices_2[unique_column_indices]

            # Get the column indices of the node indices.
            column_indices_1 = indices_1 % column_size_1
            # Get first unique combination of comlumns.
            _, unique_column_indices = np.unique([column_indices_1, indices_2],
                                                 return_index=True,
                                                 axis=1)

            # Filter indices to have only one edge between each column.
            indices_1 = indices_1[unique_column_indices]
            indices_2 = indices_2[unique_column_indices]

        # Add exclusion terms.
        self.add_pairwise_terms(object_1_nodeids.flat[indices_1],
                                object_2_nodeids.flat[indices_2], 0, 0, 0,
                                self.inf_cap)
예제 #25
0
파일: _gcg.py 프로젝트: altest-com/dnfal
def gcg_cluster(features,
                timestamps=None,
                distance_thr: float = 0.5,
                timestamp_thr: float = 0,
                edge_thr: float = 0.5):
    n_features = len(features)
    dist_neigh = NearestNeighbors(radius=distance_thr)
    dist_neigh.fit(features)
    dist_graph = dist_neigh.radius_neighbors_graph(mode='distance')

    if timestamps is not None and timestamp_thr > 0:
        time_neigh = NearestNeighbors(radius=timestamp_thr)
        time_neigh.fit(timestamps)
        time_graph = time_neigh.radius_neighbors_graph(mode='connectivity')
        dist_graph = dist_graph.multiply(time_graph)
        dist_graph.eliminate_zeros()

    dist_graph = nx.from_scipy_sparse_matrix(dist_graph)

    # Dict of nodes that have not a cluster label assigned
    no_labels = {u: u for u in range(n_features)}

    # Init the clusters list with a random one-element cluster
    clusters = {0: [0]}  # TODO: really do this random?

    # Remove node 0 form no labels list
    del no_labels[0]

    # Current growing cluster
    cur_label = 0

    node_boundary = nx.algorithms.boundary.node_boundary

    while True:
        cur_cluster = clusters[cur_label]
        boundary = node_boundary(dist_graph, cur_cluster, no_labels.keys())
        cluster_grow = False

        if len(boundary):
            scores = []
            for node in boundary:
                node_neighs = dist_graph[node]
                inside_edges = [
                    node_neighs[u]['weight'] for u in node_neighs
                    if u in cur_cluster
                ]
                scores.append((len(inside_edges), sum(inside_edges), node))

            best_score = sorted(scores, key=lambda s: (-s[0], s[1]))[0]
            if best_score[0] >= int(edge_thr * len(cur_cluster) + 1):
                best_node = best_score[2]
                clusters[cur_label].append(best_node)
                del no_labels[best_node]
                cluster_grow = True

        if not len(no_labels):
            break

        if not cluster_grow:
            # Increase current cluster label
            cur_label += 1
            # Take the next no labeled node and seed a new cluster with it
            next_node = no_labels.popitem()[0]
            clusters[cur_label] = [next_node]

            if not len(no_labels):
                break

    return list(clusters.values())
예제 #26
0
class Kernel(object):
    """
    Class abstracting the evaluation of kernel functions on the dataset.

    Parameters
    ----------
    type : string, optional
        Type of kernel to construct. Currently the only option is 'gaussian', but more will be implemented.
    epsilon : string, optional
        Method for choosing the epsilon.  Currently, the only options are to provide a scalar (epsilon is set to the provided scalar) or 'bgh' (Berry, Giannakis and Harlim).
    k : int, optional
        Number of nearest neighbors over which to construct the kernel.
    neighbor_params : dict or None, optional
        Optional parameters for the nearest Neighbor search. See scikit-learn NearestNeighbors class for details.
    metric : string, optional
        Distance metric to use in constructing the kernel.  This can be selected from any of the scipy.spatial.distance metrics, or a callable function returning the distance.
    metric_params : dict or None, optional
        Optional parameters required for the metric given.
    """
    def __init__(self,
                 kernel_type='gaussian',
                 epsilon='bgh',
                 k=64,
                 neighbor_params=None,
                 metric='euclidean',
                 metric_params=None,
                 nearest_neighbors_algo='knearest'):
        self.type = kernel_type
        self.epsilon = epsilon
        self.k = k
        self.metric = metric
        self.metric_params = metric_params
        if neighbor_params is None:
            neighbor_params = {}
        self.neighbor_params = neighbor_params
        self.d = None
        self.epsilon_fitted = None
        self.nearest_neighbors_algo = nearest_neighbors_algo

    def fit(self, X):
        """
        Fits the kernel to the data X, constructing the nearest neighbor tree.

        Parameters
        ----------
        X : array-like, shape (n_query, n_features)
            Data upon which to fit the nearest neighbor tree.

        Returns
        -------
        self : the object itself
        """
        self.k0 = min(self.k, np.shape(X)[0])
        self.data = X
        # Construct Nearest Neighbor Tree
        with warnings.catch_warnings():
            warnings.filterwarnings(
                "ignore",
                message=
                "Parameter p is found in metric_params. The corresponding parameter from __init__ is ignored."
            )
            self.neigh = NearestNeighbors(n_neighbors=self.k,
                                          radius=1.0,
                                          metric=self.metric,
                                          metric_params=self.metric_params,
                                          **self.neighbor_params)
        self.neigh.fit(X)
        self.choose_optimal_epsilon()
        return self

    def compute(self, Y=None):
        """
        Computes the sparse kernel matrix.

        Parameters
        ----------
        Y : array-like, shape (n_query, n_features), optional.
            Data against which to calculate the kernel values.  If not provided, calculates against the data provided in the fit.

        Returns
        -------
        K : array-like, shape (n_query_X, n_query_Y)
            Values of the kernel matrix.

        """
        if Y is None:
            Y = self.data
        # perform k nearest neighbour search on X and Y and construct sparse matrix
        if self.nearest_neighbors_algo == 'knearest':
            K = self.neigh.kneighbors_graph(Y, mode='distance')
        elif self.nearest_neighbors_algo == 'radius':
            radius = 10.0 * np.sqrt(self.epsilon_fitted)
            K = self.neigh.radius_neighbors_graph(Y, radius, mode='distance')
        else:
            raise ValueError(
                'Did not understand neares neighbors method. Choose from knearest or radius.'
            )
        # retrieve all nonzero elements and apply kernel function to it
        v = K.data
        if (self.type == 'gaussian'):
            K.data = np.exp(-v**2 / self.epsilon_fitted)
        else:
            raise ("Error: Kernel type not understood.")
        return K

    def choose_optimal_epsilon(self, epsilon=None):
        """
        Chooses the optimal value of epsilon and automatically detects the
        dimensionality of the data.

        Parameters
        ----------
        epsilon : string or scalar, optional
            Method for choosing the epsilon.  Currently, the only options are to provide a scalar (epsilon is set to the provided scalar) or 'bgh' (Berry, Giannakis and Harlim).

        Returns
        -------
        self : the object itself
        """
        if epsilon is None:
            epsilon = self.epsilon

        # Choose Epsilon according to method provided.
        if isinstance(epsilon, numbers.Number):  # if user provided.
            self.epsilon_fitted = epsilon
            return self
        elif epsilon == 'bgh':  # Berry, Giannakis Harlim method.
            dists = self.neigh.kneighbors_graph(self.data,
                                                mode='distance').data
            sq_distances = dists**2
            if (self.metric !=
                    'euclidean'):  # TODO : replace with call to scipy metrics.
                warnings.warn(
                    'The BGH method for choosing epsilon assumes a euclidean metric.  However, the metric being used is %s.  Proceed at your own risk...'
                    % self.metric)
            self.epsilon_fitted, self.d = choose_optimal_epsilon_BGH(
                sq_distances)
        else:
            raise ValueError(
                "Method for automatically choosing epsilon was given as %s, but this was not recognized"
                % epsilon)
        return self
예제 #27
0
    def add_layered_containment(self,
                                outer_object,
                                inner_object,
                                min_margin=0,
                                max_margin=None,
                                distance_metric='l2',
                                reduce_redundancy=True):
        """Add layered containment."""

        if outer_object == inner_object:
            raise ValueError(
                'outer_object and inner_object is the same object.')

        # Get nodeids.
        outer_nodeids = self.get_nodeids(outer_object)
        inner_nodeids = self.get_nodeids(inner_object)

        # Get points.
        outer_points = outer_object.sample_points
        inner_points = inner_object.sample_points

        if outer_points.ndim != inner_points.ndim or outer_points.shape[
                -1] != inner_points.shape[-1]:
            raise ValueError(
                'outer_object points and inner_object points must have the same number of dimensions and the same size last dimension.'
            )

        # Check if the points are identical.
        if outer_points.shape == inner_points.shape and np.all(
                outer_points == inner_points):
            # If shapes and points match, this is the fast way.

            if max_margin is not None and outer_nodeids.shape[0] > max_margin:
                # Add max margin edges.
                if max_margin == 0:
                    self.add_pairwise_terms(inner_nodeids, outer_nodeids, 0,
                                            self.inf_cap, 0, 0)
                else:
                    self.add_pairwise_terms(inner_nodeids[:-max_margin],
                                            outer_nodeids[max_margin:], 0,
                                            self.inf_cap, 0, 0)
                    self.add_pairwise_terms(inner_nodeids[-max_margin:],
                                            outer_nodeids[-1], 0, self.inf_cap,
                                            0, 0)

            if min_margin is not None and outer_nodeids.shape[0] > min_margin:
                # Add min margin edges.
                if min_margin == 0:
                    self.add_pairwise_terms(outer_nodeids, inner_nodeids, 0,
                                            self.inf_cap, 0, 0)
                else:
                    self.add_pairwise_terms(outer_nodeids[min_margin:],
                                            inner_nodeids[:-min_margin], 0,
                                            self.inf_cap, 0, 0)
                    self.add_pairwise_terms(outer_nodeids[:min_margin],
                                            inner_nodeids[0], 0, self.inf_cap,
                                            0, 0)

        # Else we need to find nodes to connect.
        else:
            # Create flattened arrays of points.
            outer_points_flat = outer_points.reshape(-1,
                                                     outer_points.shape[-1])
            inner_points_flat = inner_points.reshape(-1,
                                                     inner_points.shape[-1])

            # Find sizes of the columns.
            outer_columns_size = np.product(outer_nodeids.shape[1:])
            inner_column_size = np.product(inner_nodeids.shape[1:])

            # Create nearest neighbors tree.
            neigh = NearestNeighbors(metric=distance_metric)
            neigh.fit(outer_points_flat)

            if max_margin is not None:

                # Find direction of points.
                outer_point_gradients = np.gradient(outer_points, axis=0)
                inner_point_gradients = np.gradient(inner_points, axis=0)

                # Move inner points in the direction of the gradient.
                # The distance moved is the max margin.
                inner_points_moved = inner_points + \
                    (max_margin * inner_point_gradients /
                     np.sqrt(np.sum(inner_point_gradients**2, axis=-1)[..., np.newaxis]))
                inner_points_moved_flat = inner_points_moved.reshape(
                    -1, outer_points.shape[-1])

                # Find the 4 nearest neighbours for moved points. This should be enough.
                radius_neighbors_graph = neigh.kneighbors_graph(
                    inner_points_moved_flat,
                    n_neighbors=4,
                    mode='connectivity')

                # Get indices for all combined graph connections.
                inner_indices, outer_indices, _ = sparse.find(
                    radius_neighbors_graph)
                if outer_indices.size > 0:
                    # The following code filters out redundant terms before adding terms and tries to ensure a meaningful max margin.

                    # Find distances between neighbours.
                    # Create mask for neighbours futher than max margin away.
                    distance_mask = np.sum(
                        (outer_points_flat[outer_indices] -
                         inner_points_flat[inner_indices])**2,
                        axis=-1) > max_margin**2

                    # Only keep edges longer than max margin.
                    outer_indices = outer_indices[distance_mask]
                    inner_indices = inner_indices[distance_mask]

                    # Find angles between gradients.
                    angles = np.einsum(
                        'ij,ij->i',
                        outer_point_gradients.reshape(
                            -1, outer_points.shape[-1])[outer_indices],
                        inner_point_gradients.reshape(
                            -1, outer_points.shape[-1])[inner_indices])
                    angle_mask = angles > 0

                    # Only keep edges where the gradients are points in the same direction.
                    outer_indices = outer_indices[angle_mask]
                    inner_indices = inner_indices[angle_mask]
                    angles = angles[angle_mask]

                    # Reverse indices to get bigger indices first.
                    outer_indices = np.flip(outer_indices)
                    inner_indices = np.flip(inner_indices)
                    angles = np.flip(angles)

                    # Get the column indices of the node indices.
                    inner_column_indices = inner_indices % inner_column_size
                    # Get first unique combination of comlumns.
                    _, unique_column_indices = np.unique(
                        [outer_indices, inner_column_indices],
                        return_index=True,
                        axis=1)

                    # Filter indices to have only one from an outer node to each inner column.
                    outer_indices = outer_indices[unique_column_indices]
                    inner_indices = inner_indices[unique_column_indices]
                    angles = angles[unique_column_indices]

                    # Get sort indices, large angles (dot product) first.
                    angle_sort = np.argsort(-angles)

                    # Sort indices.
                    outer_indices = outer_indices[angle_sort]
                    inner_indices = inner_indices[angle_sort]

                    # Get the outer connection with biggest dot product.
                    _, unique_column_indices = np.unique(outer_indices,
                                                         return_index=True)

                    # Only keep the connections to the outer node with the largest angle.
                    outer_indices = outer_indices[unique_column_indices]
                    inner_indices = inner_indices[unique_column_indices]

                    outer_ids = np.take(outer_nodeids, outer_indices)
                    inner_ids = np.take(inner_nodeids, inner_indices)

                    # Add containment edges.
                    self.add_pairwise_terms(inner_ids, outer_ids, 0,
                                            self.inf_cap, 0, 0)

            if min_margin is not None:

                radius_neighbors_graph = neigh.radius_neighbors_graph(
                    inner_points_flat, radius=min_margin)
                # Removed K-nieghbors search for now.
                # Adding them may improve stability when resolution is low.
                # kneighbors_graph = neigh.kneighbors_graph(inner_points_flat, n_neighbors=2, mode='connectivity')
                # radius_neighbors_graph += kneighbors_graph

                # Get indices for all combined graph connections.
                inner_indices, outer_indices, _ = sparse.find(
                    radius_neighbors_graph)
                if outer_indices.size > 0:

                    # Remove redundany neighbors.
                    if reduce_redundancy:

                        # The following code filters out redundant terms before adding terms.

                        # Get the column indices of the node indices.
                        inner_column_indices = inner_indices % inner_column_size
                        # Get first unique combination of comlumns.
                        _, unique_column_indices = np.unique(
                            [outer_indices, inner_column_indices],
                            return_index=True,
                            axis=1)

                        # Filter indices to have only one edge between each column.
                        outer_indices = outer_indices[unique_column_indices]
                        inner_indices = inner_indices[unique_column_indices]

                        # Reverse indices.
                        outer_indices = np.flip(outer_indices)
                        inner_indices = np.flip(inner_indices)

                        # Get the column indices of the node indices.
                        outer_column_indices = outer_indices % outer_columns_size
                        # Get first unique combination of comlumns.
                        _, unique_column_indices = np.unique(
                            [outer_column_indices, inner_indices],
                            return_index=True,
                            axis=1)

                        # Filter indices to have only one edge between each column.
                        outer_indices = outer_indices[unique_column_indices]
                        inner_indices = inner_indices[unique_column_indices]

                    outer_ids = np.take(outer_nodeids, outer_indices)
                    inner_ids = np.take(inner_nodeids, inner_indices)

                    # Add containment edges.
                    self.add_pairwise_terms(outer_ids, inner_ids, 0,
                                            self.inf_cap, 0, 0)
예제 #28
0
    def createClusters(self,
                       minMagnitude=0,
                       treeR=22,
                       leafNum=190,
                       neighborR=22,
                       timeScale=10,
                       eps=18,
                       minPts=90,
                       delta=1.0,
                       Nclips=200,
                       delta_xy=100,
                       delta_t=60,
                       smallLimit=500):
        self.loadVideo()
        self.loadHMM()
        self._print('Clustering HMM transitions using DBScan')
        coords = self.obj.retDBScanMatrix(minMagnitude)
        np.save(self.localClusterDirectory + 'RawCoords.npy', coords)
        #subprocess.call(['rclone', 'copy', self.localClusterDirectory + 'RawCoordsFile.npy', self.cloudClusterDirectory], stderr = self.fnull)

        sortData = coords[coords[:, 0].argsort(
        )][:, 0:
           3]  #sort data by time for batch processing, throwing out 4th column (magnitude)
        numBatches = int(
            sortData[-1, 0] / delta / 3600
        ) + 1  #delta is number of hours to batch together. Can be fraction.

        sortData[:,
                 0] = sortData[:,
                               0] * timeScale  #scale time so that time distances between transitions are comparable to spatial differences
        labels = np.zeros(shape=(sortData.shape[0], 1), dtype=sortData.dtype)

        #Calculate clusters in batches to avoid RAM overuse
        curr_label = 0  #Labels for each batch start from zero - need to offset these

        print('Calculating clusters in ' + str(numBatches) + ' total batches',
              file=sys.stderr)
        for i in range(numBatches):
            print('Batch: ' + str(i), file=sys.stderr)
            min_time, max_time = i * delta * timeScale * 3600, (
                i + 1
            ) * delta * timeScale * 3600  # Have to deal with rescaling of time. 3600 = # seconds in an hour
            hour_range = np.where((sortData[:, 0] > min_time)
                                  & (sortData[:, 0] <= max_time))
            min_index, max_index = hour_range[0][0], hour_range[0][-1] + 1
            X = NearestNeighbors(radius=treeR,
                                 metric='minkowski',
                                 p=2,
                                 algorithm='kd_tree',
                                 leaf_size=leafNum,
                                 n_jobs=24).fit(sortData[min_index:max_index])
            dist = X.radius_neighbors_graph(sortData[min_index:max_index],
                                            neighborR, 'distance')
            sub_label = DBSCAN(eps=eps,
                               min_samples=minPts,
                               metric='precomputed',
                               n_jobs=24).fit_predict(dist)
            new_labels = int(sub_label.max()) + 1
            sub_label[sub_label != -1] += curr_label
            labels[min_index:max_index, 0] = sub_label
            curr_label += new_labels

        sortData[:, 0] = sortData[:, 0] / timeScale
        self.labeledCoords = np.concatenate((sortData, labels),
                                            axis=1).astype('int64')
        np.save(self.localClusterDirectory + self.labeledCoordsFile,
                self.labeledCoords)
        subprocess.call([
            'rclone', 'copy', self.localClusterDirectory +
            self.labeledCoordsFile, self.cloudClusterDirectory
        ],
                        stderr=self.fnull)

        uniqueLabels = set(self.labeledCoords[:, 3])
        uniqueLabels.remove(-1)
        print(
            str(self.labeledCoords[self.labeledCoords[:, 3] != -1].shape[0]) +
            ' HMM transitions assigned to ' + str(len(uniqueLabels)) +
            ' clusters',
            file=sys.stderr)

        df = pd.DataFrame(self.labeledCoords, columns=['T', 'X', 'Y', 'LID'])
        clusterData = df.groupby('LID').apply(
            lambda x: pd.Series({
                'projectID': self.projectID,
                'videoID': self.baseName,
                'N': x['T'].count(),
                't': int(x['T'].mean()),
                'X': int(x['X'].mean()),
                'Y': int(x['Y'].mean()),
                't_span': int(x['T'].max() - x['T'].min()),
                'X_span': int(x['X'].max() - x['X'].min()),
                'Y_span': int(x['Y'].max() - x['Y'].min()),
                'ManualAnnotation': 'No',
                'ManualLabel': '',
                'MLLabel': ''
            }))

        clusterData['X_depth'] = df.apply(
            lambda row: (self.transM[0][0] * row.X + self.transM[0][1] * row.Y
                         + self.transM[0][2]) /
            (self.transM[2][0] * row.X + self.transM[2][1] * row.Y + self.
             transM[2][2]),
            axis=1)
        clusterData['Y_depth'] = df.apply(
            lambda row: (self.transM[1][0] * row.X + self.transM[1][1] * row.Y
                         + self.transM[1][2]) /
            (self.transM[2][0] * row.X + self.transM[2][1] * row.Y + self.
             transM[2][2]),
            axis=1)

        clusterData.to_csv(self.localClusterDirectory + self.clusterFile,
                           sep='\t')
        clusterData = pd.read_csv(self.localClusterDirectory +
                                  self.clusterFile,
                                  sep='\t',
                                  header=0)

        # Identify rows for manual labeling
        manualClips = 0
        smallClips = 0

        cap = cv2.VideoCapture(self.localMasterDirectory + self.videofile)
        framerate = cap.get(cv2.CAP_PROP_FPS)

        for row in clusterData.sample(n=clusterData.shape[0]).itertuples():
            if manualClips > Nclips:
                break

            LID, N, t, x, y = row.LID, row.N, row.t, row.X, row.Y
            if x - delta_xy < 0 or x + delta_xy >= self.height or y - delta_xy < 0 or y + delta_xy >= self.width or LID == -1 or framerate * t - delta_t < 0 or framerate * t + delta_t >= self.frames:
                continue
            if smallClips > Nclips / 20:
                continue
            clusterData.loc[clusterData.LID == LID, 'ManualAnnotation'] = 'Yes'
            manualClips += 1
            if N < smallLimit:
                smallClips += 1

        clusterData.to_csv(self.localClusterDirectory + self.clusterFile,
                           sep='\t')
        subprocess.call([
            'rclone', 'sync', self.localClusterDirectory,
            self.cloudClusterDirectory
        ],
                        stderr=self.fnull)
        self.clusterData = clusterData
        self.createClusterClips()
예제 #29
0
def dbscan(adata, basis='tsne', n_comps=2, eps=None, min_samples=None, n_jobs=None, copy=False):
    """Cluster cells using DBSCAN

    This wraps sklearn.cluster.DBSCAN and shares most of the parameters.

    Parameters
    ----------
    eps : float or None, optional
        The maximum distance between samples for being considered as in the same
        neighborhood. Clusters are "grown" from samples that have more than
        min_samples points in their neighborhood. Increasing eps therefore
        allows clusters to spread over wider regions.
    min_samples : int or None, optional
        The number of samples (or total weight) in a neighborhood for a point
        to be considered as a core point. This includes the point itself.
    n_jobs : int (default: None)
        Number of threads to use. Defaults to sett.n_jobs.
    copy : bool (default: False)

    References
    ----------
    Ester et al. (1996), "A Density-Based Algorithm for Discovering Clusters in
    Large Spatial Databases with Noise".
    In: Proceedings of the 2nd International Conference on Knowledge Discovery
    and Data Mining, Portland, OR, AAAI Press, pp. 226-231.

    Pedregosa et al. (2011) ...
    """
    logg.m('starting DBSCAN', r=True)
    adata = adata.copy() if copy else adata
    if basis not in {'tsne', 'pca'}:
        raise ValueError('`basis` needs to be "tsne" or "pca"')
    if 'X_tsne' in adata.smp and basis == 'tsne':
        X = adata.smp['X_tsne'][:, :n_comps]
    elif 'X_pca' in adata.smp and basis == 'pca':
        X = adata.smp['X_pca'][:, :n_comps]
    else:
        raise ValueError('Run {} first.'.format(basis))
    n_jobs = sett.n_jobs if n_jobs is None else n_jobs
    range_1 = np.max(X[:, 0]) - np.min(X[:, 0])
    range_2 = np.max(X[:, 1]) - np.min(X[:, 1])

    if eps is None:
        if n_comps == 2:
            avg_area_per_point = (range_1 * range_2 / X.shape[0])
            logg.m('... the "drawing range" is', range_1, '×', range_2,
                   'with the average area per point', avg_area_per_point)
            eps = 1.7 * np.sqrt(avg_area_per_point)
        else:
            eps = 5
    if min_samples is None: min_samples = 30
    logg.m('... using eps =', eps, end=', ')
    logg.m('min_samples =', min_samples, end=', ')
    logg.m('basis =', basis, end=', ')
    logg.m('n_comps =', basis, end=', ')
    logg.m('n_jobs =', n_jobs)  #, end=', ')
    logg.m('increase `min_samples` if you find too many clusters', v='hint')
    logg.m('reduce eps if "everything is connected"', v='hint')
    from sklearn.cluster import DBSCAN
    from sklearn.neighbors import NearestNeighbors
    nn = NearestNeighbors(n_neighbors=min_samples, n_jobs=n_jobs)
    nn.fit(X)
    D = nn.radius_neighbors_graph(radius=eps, mode='distance')
    db = DBSCAN(eps=eps, min_samples=min_samples,
                n_jobs=n_jobs, metric='precomputed').fit(D)
    labels = db.labels_
    dont_know = labels == -1
    labels = labels.astype(str)
    labels[dont_know] = '?'
    # loop_over_labels = (label for label in np.unique(labels) if label >= 0)
    adata.smp['dbscan_groups'] = labels
    from natsort import natsorted
    adata.add['dbscan_groups_order'] = np.array(natsorted(np.unique(labels)))[:-1]
    logg.m('    finished', t=True, end=' ')
    logg.m('and found', len(np.unique(labels))-1, 'clusters, added\n'
           '    "dbscan_groups", the cluster labels (adata.smp)\n'
           '    "dbscan_groups_order", the unique cluster labels (adata.add)')
    return adata if copy else None
print "dimension : ", len(dimen)

X = np.array(X_tmp)

from sklearn.neighbors import NearestNeighbors
from sklearn.neighbors import LSHForest

for n in range(2, 10):
    print "[[[[[" + str(n) + "]]]]]"
    start = time.time()
    # nbrs = NearestNeighbors(n_neighbors=n, algorithm='ball_tree').fit(X)
    # print nbrs.kneighbors_graph(X).toarray()
    neigh = NearestNeighbors(n_neighbors=n)
    neigh.fit(X)
    a = neigh.radius_neighbors_graph(X).toarray()
    print a
    # a = neigh.kneighbors_graph(X).toarray()
    pc.dump(a, open("knn" + str(n) + ".txt", "w"))
    end = time.time()
    print "NearestNeighbors", end - start

    start = time.time()
    lshf = LSHForest(n_neighbors=n, random_state=10000)
    lshf.fit(X)
    # distances, indices = lshf.kneighbors(X, n_neighbors=n)
    # print lshf.kneighbors_graph(X).toarray()
    a = lshf.radius_neighbors_graph(X).toarray()
    print a
    pc.dump(a, open("lsh" + str(n) + ".txt", "w"))
    end = time.time()
예제 #31
0
relvdm_amino.rel_vdm_path = '/Users/npolizzi/Projects/combs/results/amino/rel_vdms_hbond/20171025/'
relvdm_amino.load_rel_vdms_pickle(sample)
relvdm_amino.set_rel_vdm_bb_coords()
relvdm_amino.set_rois_rot_trans(sample)
relvdm_amino.set_rel_vdm_tags(sample)
print('moving vdMs')
relvdm_amino.move_rel_vdms(sample)
print('removing clashing vdMs')
relvdm_amino.remove_clash(sample)
relvdm_amino.reshape_ifgs()
all_ifgs_amino = functools.reduce(lambda a, b: np.vstack((a, b)),
                                  [val for val in relvdm_amino._ifgs.values()])
print('finding hotspots amino')
nbrs = NearestNeighbors(metric='euclidean', radius=1.1, algorithm='kd_tree')
nbrs.fit(all_ifgs_amino)
adj_mat = nbrs.radius_neighbors_graph(all_ifgs_amino)
print('clustering...')
mems, cents = combs.analysis.cluster.greedy_cluster_pc(adj_mat, pc=0.7)

all_resnum_chid = functools.reduce(lambda a, b: np.vstack((a, b)), [
    np.array([tuple(key)] * len(val), dtype=object)
    for key, val in relvdm_amino._vdm_tags.items()
])
all_vdm_tags = functools.reduce(lambda a, b: np.vstack(
    (a, b)), [val for val in relvdm_amino._vdm_tags.values()])
all_resn = functools.reduce(lambda a, b: np.hstack((a, b)),
                            [val for val in relvdm_amino._resn.values()])
all_type = functools.reduce(lambda a, b: np.hstack((a, b)),
                            [val for val in relvdm_amino._type.values()])
all_indices = functools.reduce(lambda a, b: np.hstack(
    (a, b)), [val for val in relvdm_amino._indices.values() if len(val) > 0])
예제 #32
0
class LSAD(object):
    
    def __init__(self,name,hop,dim_list,K):

        assert len(dim_list) >= 2, 'specify input output dimension'
        self.name = name
        self.dim_list = dim_list
        self.data = None
        self.neigh = None
        self.r = None
        with tf.variable_scope(self.name):
            self.center = tf.placeholder(tf.float32, name="center")
            self.X = tf.placeholder(tf.float32,   [None,dim_list[0] ])
            self.expanded_X = tf.expand_dims(self.X,2)
            #print(self.expanded_X.get_shape())
            self.A = tf.sparse_placeholder(tf.float32)
            self.P_Indices = tf.placeholder(tf.int32, [None])
            self.U_Indices = tf.placeholder(tf.int32, [None])
            
            self.Ws =[]
            self.layers = []

            

            self.layers.append(self.X)
            with tf.variable_scope("MLP_Configuration"):
                for idx,dim in enumerate(dim_list):
                    if idx==0: continue
                    W = tf.get_variable('%s'%(idx) ,shape=[dim_list[idx-1],dim],initializer=tf.contrib.layers.xavier_initializer())
                    layer = tf.matmul(self.layers[idx-1],W)
                        
                    bias= tf.get_variable('b%s'%(idx) ,shape=[dim],initializer=tf.contrib.layers.xavier_initializer())
                    bias= tf.Variable(tf.zeros([dim]))
                    layer = tf.nn.bias_add(layer,bias)
                    if idx < len(dim_list)-1 and idx >= 1 :
                        layer = tf.nn.relu(layer)

                    self.Ws.append(tf.reduce_sum(tf.square(W)))
                    self.layers.append(layer)

            
            self.vectors = [tf.nn.l2_normalize(self.layers[-1],axis=1)]    
            
            if K <=0:
                for i in range(hop):
                    self.vectors.append(tf.sparse_tensor_dense_matmul(self.A,self.vectors[i]))
                for i in range(1,hop+1):
                    self.vectors[i] = tf.nn.l2_normalize(self.vectors[i],axis=1)
            else:
                for i in range(hop):
                    self.vectors.append(tf.sparse_tensor_dense_matmul(self.A,self.vectors[i]))
                # (optional)
#                 for i in range(1,hop+1):
#                     self.vectors[i] = tf.nn.l2_normalize(self.vectors[i],axis=1)
                    
            self.P = []
            self.U = []
            for i in range(hop+1):
                self.P.append( tf.nn.embedding_lookup(self.vectors[i], self.P_Indices) )
                self.U.append( tf.nn.embedding_lookup(self.vectors[i], self.U_Indices) )
          
            self.expanded_P = []
            self.expanded_U = []
            for i in range(hop+1):
                self.expanded_P.append( tf.expand_dims(self.P[i],1))
                self.expanded_U.append( tf.expand_dims(self.U[i],1))
            
            assert hop >= 1, "hop is less then 1"
             
            self.P_ref = self.expanded_P[1]
            self.U_ref = self.expanded_U[1]
            for h in range(2,hop+1):
                self.P_ref = tf.concat((self.P_ref,self.expanded_P[h]),1)
                self.U_ref = tf.concat((self.U_ref,self.expanded_U[h]),1)
                
            
            self.P_loss = tf.reduce_mean( ( tf.reduce_mean( (tf.reduce_sum(self.expanded_P[0]*self.P_ref,2)),1) ) )
            
            self.U_loss = tf.reduce_mean( -tf.reduce_mean( (tf.reduce_sum(self.expanded_U[0]*self.U_ref,2)),1))
            self.scores = -tf.reduce_mean(tf.reduce_sum(self.expanded_U[0]*self.U_ref,2),1)#1d
    

    def build_optimizer(self,learning_rate):
        
        self.loss = self.P_loss + self.U_loss
        self.optimizer = tf.train.AdamOptimizer(learning_rate)
        self.trainStep = self.optimizer.minimize(self.loss)
    
    def TPA(self,data,params,method='closest-K'):
        
        print("Converting a set of data points to simplicial complexes")
        if method=='persistent-homology':
            return self.rGraph(X=data,params=params)
        elif method=='closest-K':
            return self.kGraph(X=data,params=params)
        
    def rGraph(self,X,params):
         
        X= np.array(X)
        TRAIN = params['TRAIN']
        if TRAIN ==True:
            rip = ripser(X)
            zero_dimensional_homology = rip['dgms'][0][:,1][:-1]
            one_dimensional_homology = rip['dgms'][1][:,1][:-1]#optional
            mu = np.mean(zero_dimensional_homology)
            sigma = np.std(zero_dimensional_homology)
            self.r = mu+2.0*sigma
        
            self.data = X
            self.neigh = NearestNeighbors(radius=self.r, n_jobs=-1)
            self.neigh.fit(X)
            train_graph = self.neigh.radius_neighbors_graph(X,self.r)
            coo = train_graph.tocoo()
            return np.mat([coo.row, coo.col]).transpose()
        else:
            coo = self.neigh.radius_neighbors_graph(X,self.r).tocoo()
            return np.mat([coo.row+len(self.data), coo.col]).transpose()
            
    def kGraph(self,X,params):
        
        X= np.array(X)
        k = params['K']
        TRAIN = params['TRAIN']
        if TRAIN == True:
            self.data = X
            self.neigh = NearestNeighbors(n_neighbors=(k+1),n_jobs=-1)
            self.neigh.fit(X)
            train_graph = self.neigh.kneighbors_graph(X)
            coo = train_graph.tocoo()
            return np.mat([coo.row, coo.col]).transpose()
        else:
            coo = self.neigh.kneighbors_graph(X).tocoo()
            return np.mat([coo.row+len(self.data), coo.col ]).transpose()