def graph_propagate(embeddings, positive_seeds, negative_seeds, **kwargs):
    """
    Graph propagation method dapted from Velikovich, Leonid, et al. "The viability of web-derived polarity lexicons."
    http://www.aclweb.org/anthology/N10-1119
    Should be used with arccos=True
    """
    def run_graph_propagate(seeds, alpha_mat, trans_mat, T=1, **kwargs):
        def get_rel_edges(ind_set):
            rel_edges = set([])
            for node in ind_set:
                rel_edges = rel_edges.union([
                    (node, other) for other in trans_mat[node, :].nonzero()[1]
                ])
            return rel_edges

        for seed in seeds:
            F = set([seed])
            for t in range(T):
                for edge in get_rel_edges(F):
                    alpha_mat[seed, edge[1]] = max(
                        alpha_mat[seed, edge[1]],
                        alpha_mat[seed, edge[0]] * trans_mat[edge[0], edge[1]])
                    F.add(edge[1])
        return alpha_mat

    M = similarity_matrix(embeddings, **kwargs)
    M = (M + M.T) / 2
    print "Getting positive scores.."
    pos_alpha = M.copy()
    neg_alpha = M.copy()
    M = csr_matrix(M)
    pos_alpha = run_graph_propagate(
        [embeddings.wi[seed] for seed in positive_seeds], pos_alpha, M,
        **kwargs)
    pos_alpha = pos_alpha + pos_alpha.T
    print "Getting negative scores.."
    neg_alpha = run_graph_propagate(
        [embeddings.wi[seed] for seed in negative_seeds], neg_alpha, M,
        **kwargs)
    neg_alpha = neg_alpha + neg_alpha.T
    print "Computing final scores..."
    polarities = {}
    index = embeddings.wi
    pos_pols = {w: 1.0 for w in positive_seeds}
    for w in negative_seeds:
        pos_pols[w] = 0.0
    neg_pols = {w: 1.0 for w in negative_seeds}
    for w in positive_seeds:
        neg_pols[w] = 0.0
    for w in util.logged_loop(index):
        if w not in positive_seeds and w not in negative_seeds:
            pos_pols[w] = sum(pos_alpha[index[w], index[seed]]
                              for seed in positive_seeds if seed in index)
            neg_pols[w] = sum(neg_alpha[index[w], index[seed]]
                              for seed in negative_seeds if seed in index)
    beta = np.sum(pos_pols.values()) / np.sum(neg_pols.values())
    for w in index:
        polarities[w] = pos_pols[w] - beta * neg_pols[w]
    return polarities
def dist(embeds, positive_seeds, negative_seeds, **kwargs):
    polarities = {}
    sim_mat = similarity_matrix(embeds, **kwargs)
    for i, w in enumerate(embeds.iw):
        if w not in positive_seeds and w not in negative_seeds:
            pol = sum(sim_mat[embeds.wi[p_seed], i] for p_seed in positive_seeds)
            pol -= sum(sim_mat[embeds.wi[n_seed], i] for n_seed in negative_seeds)
            polarities[w] = pol
    return polarities
def dist(embeds, positive_seeds, negative_seeds, **kwargs):
    polarities = {}
    sim_mat = similarity_matrix(embeds, **kwargs)
    for i, w in enumerate(embeds.iw):
        if w not in positive_seeds and w not in negative_seeds:
            pol = sum(sim_mat[embeds.wi[p_seed], i] for p_seed in positive_seeds)
            pol -= sum(sim_mat[embeds.wi[n_seed], i] for n_seed in negative_seeds)
            polarities[w] = pol
    return polarities
def graph_propagate(embeddings, positive_seeds, negative_seeds, **kwargs):
    """
    Graph propagation method dapted from Velikovich, Leonid, et al. "The viability of web-derived polarity lexicons."
    http://www.aclweb.org/anthology/N10-1119
    Should be used with arccos=True
    """
    def run_graph_propagate(seeds, alpha_mat, trans_mat, T=1, **kwargs):
        def get_rel_edges(ind_set):
            rel_edges = set([])
            for node in ind_set:
                rel_edges = rel_edges.union(
                        [(node, other) for other in trans_mat[node,:].nonzero()[1]])
            return rel_edges

        for seed in seeds:
            F = set([seed])
            for t in range(T):
                for edge in get_rel_edges(F):
                    alpha_mat[seed, edge[1]] = max(
                            alpha_mat[seed, edge[1]], 
                            alpha_mat[seed, edge[0]] * trans_mat[edge[0], edge[1]])
                    F.add(edge[1])
        return alpha_mat

    M = similarity_matrix(embeddings, **kwargs)
    M = (M + M.T)/2
    print "Getting positive scores.."
    pos_alpha = M.copy()
    neg_alpha = M.copy()
    M = csr_matrix(M)
    pos_alpha = run_graph_propagate([embeddings.wi[seed] for seed in positive_seeds],
            pos_alpha, M, **kwargs)
    pos_alpha = pos_alpha + pos_alpha.T
    print "Getting negative scores.."
    neg_alpha = run_graph_propagate([embeddings.wi[seed] for seed in negative_seeds],
            neg_alpha, M, **kwargs)
    neg_alpha = neg_alpha + neg_alpha.T
    print "Computing final scores..."
    polarities = {}
    index = embeddings.wi
    pos_pols = {w:1.0 for w in positive_seeds}
    for w in negative_seeds:
        pos_pols[w] = 0.0
    neg_pols = {w:1.0 for w in negative_seeds}
    for w in positive_seeds:
        neg_pols[w] = 0.0
    for w in util.logged_loop(index):
        if w not in positive_seeds and w not in negative_seeds:
            pos_pols[w] = sum(pos_alpha[index[w], index[seed]] for seed in positive_seeds if seed in index) 
            neg_pols[w] = sum(neg_alpha[index[w], index[seed]] for seed in negative_seeds if seed in index)
    beta = np.sum(pos_pols.values()) / np.sum(neg_pols.values())
    for w in index:
        polarities[w] = pos_pols[w] - beta * neg_pols[w]
    return polarities
Ejemplo n.º 5
0
def dist(embeds, seeds_map, normalize=True, **kwargs):
    polarities = {}
    sim_mat = similarity_matrix(embeds, **kwargs)
    for i, w in enumerate(embeds.iw):
        found = False
        for seed_list in seeds_map.values():
            if w in seed_list:
                found = True
                break
        if not found:
            polarities[w] = Counter()
            for seed_key, seed_list in seeds_map.items():
                pol = np.mean(
                    [sim_mat[embeds.wi[seed], i] for seed in seed_list])
                polarities[w][seed_key] = pol
            if normalize:
                polarities[w] = normalize_counter(polarities[w])
    return polarities