def edge_weight(self, node, edge='default', generalize=False):
        """Returns the weight of an edge to another node.

        Between 0 and 1 inclusive.
        """
        assert edge in self.graph.edges

        row = edge if self.graph.EDGE_ROWS else '_row'
        row_vec = self.row_vecs[row]

        if generalize:
            form, factor = generalize
            normalize = vectors.normalize  # optimization

            # Get gen_vec, a generalized form of row_vec.
            if form == 'dynamic':
                gen_vec = self.dynamic_row_vecs[row]
                
            elif form == 'similarity':
                sims = np.array([self.similarity(n) for n in self.graph.nodes])
                all_row_vecs = np.array([normalize(n.row_vecs[row])
                                         for n in self.graph.nodes])
                gen_vec = sims @ all_row_vecs  # matrix multiplication
            
            # Add the generalized row_vec to the original row_vec.
            row_vec = (normalize(row_vec) * (1 - factor)
                       + normalize(gen_vec) * factor)

        labeled_id = self.graph.vector_model.label(node.id_vec, edge)
        weight = vectors.cosine(row_vec, labeled_id)
        return max(weight, 0.0)
Esempio n. 2
0
def test_dynamic_generalize():
    graph = VectorGraph(['edge'], DIM=1000, PERCENT_NON_ZERO=.01, )
    _add_nodes(graph)
    a, b, c, d, e, f = (graph[x] for x in 'ABCDEF')

    edge_counts = [
        ((a, c), 5),
        ((a, d), 5),
        ((a, e), 5),
        ((b, d), 5),
        ((b, e), 5),
        ((b, f), 5),
    ]

    for (n1, n2), count in edge_counts:
        n1.bump_edge(n2, 'edge', count)

    print('--- NORMAL WEIGHTS ---')
    print('a -> c', a.edge_weight(c, 'edge'))
    print('a -> d', a.edge_weight(d, 'edge'))
    print('a -> e', a.edge_weight(e, 'edge'))
    print('b -> d', b.edge_weight(d, 'edge'))
    print('b -> e', b.edge_weight(e, 'edge'))
    print('b -> f', b.edge_weight(f, 'edge'))
    print('b -> c', b.edge_weight(c, 'edge'))

    print('--- GENERALIZED WEIGHTS ---')
    print('a -> c', a.edge_weight(c, 'edge', dynamic=True))
    print('a -> d', a.edge_weight(d, 'edge', dynamic=True))
    print('a -> e', a.edge_weight(e, 'edge', dynamic=True))
    print('b -> d', b.edge_weight(d, 'edge', dynamic=True))
    print('b -> e', b.edge_weight(e, 'edge', dynamic=True))
    print('b -> f', b.edge_weight(f, 'edge', dynamic=True))
    print('b -> c', b.edge_weight(c, 'edge', dynamic=True))

    assert a.edge_weight(c, 'edge') > 0.3
    assert a.edge_weight(d, 'edge') > 0.3
    assert a.edge_weight(e, 'edge') > 0.3
    assert b.edge_weight(d, 'edge') > 0.3
    assert b.edge_weight(e, 'edge') > 0.3
    assert b.edge_weight(f, 'edge') > 0.3

    # A links to C, so C's dynamic vec should link to A.
    assert vectors.cosine(c.dynamic_row_vecs['edge'], a.row_vecs['edge']) > 0.4

    # B is connected to C because A is connected to C
    # and B is connected to similar nodes as A.
    assert b.edge_weight(c, 'edge', dynamic=True) > 0.2
def similarity_matrix(model, round_to=None, num=None) -> pd.DataFrame:
    """A distance matrix of all nodes in the graph."""
    graph = model.graph
    if not graph.nodes:
        raise ValueError("Graph is empty, can't make distance matrix.")
    if num:
        #ind = np.argpartition(graph.counts, -num)[-num:]
        raise NotImplementedError()
    row_vecs = [node.row_vec for node in graph.nodes]
    num_nodes = len(graph.nodes)
    matrix = np.zeros((num_nodes, num_nodes))
    for i in range(num_nodes):
        for j in range(i, num_nodes):
            matrix[i,j] = matrix[j,i] = vectors.cosine(row_vecs[i], row_vecs[j])
    if round_to is not None:
        matrix = np.around(matrix, round_to)

    labels = graph.string_to_index.keys()
    return pd.DataFrame(matrix, 
                     columns=labels,
                     index=labels)
def word_sim(model, word1, word2):
    return vectors.cosine(model.graph[word1].row_vec, model.graph[word2].row_vec)
 def similarity(self, node):
     """Weighted geometric mean of cosine similarities for each row."""
     edge_sims = [max(0.0, vectors.cosine(self.row_vecs[row], node.row_vecs[row]))
                  for row in self.row_vecs]
     
     return min(1.0, stats.gmean(edge_sims))  # clip precision error