class EmbeddingNetworkBuilder: """ Basically a wrapper around sklearns LSH forest """ def __init__(self, lsh_init=None): if lsh_init == None: self._lsh_forest = LSHForest(n_estimators=25, n_candidates=1000) else: self._lsh_forest = lsh_init self.iw = None self.m = None def fit_lsh_forest(self, embedding): self._lsh_forest.fit(embedding.m) self._embedding = embedding def extract_nn_network(self, nn=20): dir_graph_mat = self._lsh_forest.kneighbors_graph(X=self._embedding.m, n_neighbors=nn + 1) return dir_graph_mat def make_undirected(self, dir_graph_mat): nodes = set(range(dir_graph_mat.shape[0])) edges = set([]) for node_i in dir_graph_mat.shape[0]: for node_j in dir_graph_mat[node_i].nonzero()[1]: edges.add((node_i, node_j)) return nodes, edges def get_forest(self): return self._lsh_forest def get_node_to_word(self): return self.iw
class EmbeddingNetworkBuilder: """ Basically a wrapper around sklearns LSH forest """ def __init__(self, lsh_init=None): if lsh_init == None: self._lsh_forest = LSHForest(n_estimators=25, n_candidates=1000) else: self._lsh_forest = lsh_init self.iw = None self.m = None def fit_lsh_forest(self, embedding): self._lsh_forest.fit(embedding.m) self._embedding = embedding def extract_nn_network(self, nn=20): dir_graph_mat = self._lsh_forest.kneighbors_graph(X=self._embedding.m, n_neighbors=nn+1) return dir_graph_mat def make_undirected(self, dir_graph_mat): nodes = set(range(dir_graph_mat.shape[0])) edges = set([]) for node_i in dir_graph_mat.shape[0]: for node_j in dir_graph_mat[node_i].nonzero()[1]: edges.add((node_i, node_j)) return nodes, edges def get_forest(self): return self._lsh_forest def get_node_to_word(self): return self.iw
def test_graphs(): """Smoke tests for graph methods.""" n_samples_sizes = [5, 10, 20] n_features = 3 rng = np.random.RandomState(42) for n_samples in n_samples_sizes: X = rng.rand(n_samples, n_features) lshf = LSHForest(min_hash_match=0) lshf.fit(X) kneighbors_graph = lshf.kneighbors_graph(X) radius_neighbors_graph = lshf.radius_neighbors_graph(X) assert_equal(kneighbors_graph.shape[0], n_samples) assert_equal(kneighbors_graph.shape[1], n_samples) assert_equal(radius_neighbors_graph.shape[0], n_samples) assert_equal(radius_neighbors_graph.shape[1], n_samples)
def test_graphs(): # Smoke tests for graph methods. n_samples_sizes = [5, 10, 20] n_features = 3 rng = np.random.RandomState(42) for n_samples in n_samples_sizes: X = rng.rand(n_samples, n_features) lshf = LSHForest(min_hash_match=0) ignore_warnings(lshf.fit)(X) kneighbors_graph = lshf.kneighbors_graph(X) radius_neighbors_graph = lshf.radius_neighbors_graph(X) assert_equal(kneighbors_graph.shape[0], n_samples) assert_equal(kneighbors_graph.shape[1], n_samples) assert_equal(radius_neighbors_graph.shape[0], n_samples) assert_equal(radius_neighbors_graph.shape[1], n_samples)
import numpy as np from sklearn.neighbors import NearestNeighbors, LSHForest from igraph import Graph, EdgeSeq from timeit import timeit import random random.seed(100) #robjects.r['load']('../processed_sub_Data.RData') print "Reading sparce matrix..." matrix = mmread("sub_matrix") print "Converting matrix to dense format..." a = np.array(matrix.todense()) print a.shape print "Initialize LSH..." lshf = LSHForest(n_neighbors=10, random_state=1, n_estimators=10) print "fit LSH..." lshf.fit(a) K = lshf.kneighbors_graph(a) print "convert into adjacency matrix..." K = K.toarray() g = Graph.Adjacency(K.tolist()) es = EdgeSeq(g) print "writing graph edgelist..." g.write_edgelist("src_dst_lsh.csv")