def edge_weight(self, node, edge='default', generalize=False): """Returns the weight of an edge to another node. Between 0 and 1 inclusive. """ assert edge in self.graph.edges row = edge if self.graph.EDGE_ROWS else '_row' row_vec = self.row_vecs[row] if generalize: form, factor = generalize normalize = vectors.normalize # optimization # Get gen_vec, a generalized form of row_vec. if form == 'dynamic': gen_vec = self.dynamic_row_vecs[row] elif form == 'similarity': sims = np.array([self.similarity(n) for n in self.graph.nodes]) all_row_vecs = np.array([normalize(n.row_vecs[row]) for n in self.graph.nodes]) gen_vec = sims @ all_row_vecs # matrix multiplication # Add the generalized row_vec to the original row_vec. row_vec = (normalize(row_vec) * (1 - factor) + normalize(gen_vec) * factor) labeled_id = self.graph.vector_model.label(node.id_vec, edge) weight = vectors.cosine(row_vec, labeled_id) return max(weight, 0.0)
def test_dynamic_generalize(): graph = VectorGraph(['edge'], DIM=1000, PERCENT_NON_ZERO=.01, ) _add_nodes(graph) a, b, c, d, e, f = (graph[x] for x in 'ABCDEF') edge_counts = [ ((a, c), 5), ((a, d), 5), ((a, e), 5), ((b, d), 5), ((b, e), 5), ((b, f), 5), ] for (n1, n2), count in edge_counts: n1.bump_edge(n2, 'edge', count) print('--- NORMAL WEIGHTS ---') print('a -> c', a.edge_weight(c, 'edge')) print('a -> d', a.edge_weight(d, 'edge')) print('a -> e', a.edge_weight(e, 'edge')) print('b -> d', b.edge_weight(d, 'edge')) print('b -> e', b.edge_weight(e, 'edge')) print('b -> f', b.edge_weight(f, 'edge')) print('b -> c', b.edge_weight(c, 'edge')) print('--- GENERALIZED WEIGHTS ---') print('a -> c', a.edge_weight(c, 'edge', dynamic=True)) print('a -> d', a.edge_weight(d, 'edge', dynamic=True)) print('a -> e', a.edge_weight(e, 'edge', dynamic=True)) print('b -> d', b.edge_weight(d, 'edge', dynamic=True)) print('b -> e', b.edge_weight(e, 'edge', dynamic=True)) print('b -> f', b.edge_weight(f, 'edge', dynamic=True)) print('b -> c', b.edge_weight(c, 'edge', dynamic=True)) assert a.edge_weight(c, 'edge') > 0.3 assert a.edge_weight(d, 'edge') > 0.3 assert a.edge_weight(e, 'edge') > 0.3 assert b.edge_weight(d, 'edge') > 0.3 assert b.edge_weight(e, 'edge') > 0.3 assert b.edge_weight(f, 'edge') > 0.3 # A links to C, so C's dynamic vec should link to A. assert vectors.cosine(c.dynamic_row_vecs['edge'], a.row_vecs['edge']) > 0.4 # B is connected to C because A is connected to C # and B is connected to similar nodes as A. assert b.edge_weight(c, 'edge', dynamic=True) > 0.2
def similarity_matrix(model, round_to=None, num=None) -> pd.DataFrame: """A distance matrix of all nodes in the graph.""" graph = model.graph if not graph.nodes: raise ValueError("Graph is empty, can't make distance matrix.") if num: #ind = np.argpartition(graph.counts, -num)[-num:] raise NotImplementedError() row_vecs = [node.row_vec for node in graph.nodes] num_nodes = len(graph.nodes) matrix = np.zeros((num_nodes, num_nodes)) for i in range(num_nodes): for j in range(i, num_nodes): matrix[i,j] = matrix[j,i] = vectors.cosine(row_vecs[i], row_vecs[j]) if round_to is not None: matrix = np.around(matrix, round_to) labels = graph.string_to_index.keys() return pd.DataFrame(matrix, columns=labels, index=labels)
def word_sim(model, word1, word2): return vectors.cosine(model.graph[word1].row_vec, model.graph[word2].row_vec)
def similarity(self, node): """Weighted geometric mean of cosine similarities for each row.""" edge_sims = [max(0.0, vectors.cosine(self.row_vecs[row], node.row_vecs[row])) for row in self.row_vecs] return min(1.0, stats.gmean(edge_sims)) # clip precision error