Esempi in Python per WordEmbeddings.most_similar

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: word_embeddings

Classe/tipologia: WordEmbeddings

Metodo/funzione: most_similar

Esempi su hotexamples.com: 2

WordEmbeddings.most_similar in Python: 2 esempi trovati. Questi sono i migliori esempi reali in Python per word_embeddings.WordEmbeddings.most_similar, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

WordEmbeddings(19)

load_w2v_model(2)

load_wc2v_model(2)

most_similar(2)

__init__(1)

create_embeddings_from_file(1)

create_reduced_embeddings(1)

get_det_val_dataset(1)

get_det_vals_run(1)

get_spacy_sim_dataset(1)

get_spacy_sim_run(1)

get_sum_vals_dataset(1)

get_sum_vals_run(1)

load_models(1)

load_word_vectors(1)

similar_words(1)

Esempio n. 1

Mostra file

File: k_hop_graph.py Progetto: GalacticGlum/csc110-course-project

def build_k_hop_graph(embeddings: WordEmbeddings,
                      target_word: str,
                      k: int,
                      alpha: Optional[float] = 0.50) -> nx.Graph:
    """Builds the k-hop graph for a word embeddings space.

    Args:
        embeddings: The word embeddings to generate the graph for.
        target_word: The word of interest.
        k: The number of 'hops' between the word of interest and every node
            in the graph. The resultant graph has the property that the word
            of interest is reachable from any node in at most k edges.
        alpha: The similarity threshold. Words that have a cosine similarity
            of at least this threshold are kept, and the rest are discarded.
    """
    # Verify the alpha threshold is <= max(similarity between interest word).
    max_alpha = embeddings.most_similar(target_word, k=1)[0][1]
    if alpha > max_alpha:
        raise ValueError(
            'Alpha threshold too high! The word of interest was not included '
            'in the graph. For the given target word, '
            '\'{}\', alpha can be AT MOST {}!'.format(target_word, max_alpha))

    graph = build_infinity_hop_graph(embeddings, alpha)

    # Get the word index of the word of interest.
    T = embeddings._vocabulary[target_word]

    # Compute the shortest paths from the word of interest to all reachable nodes.
    logger.info('Computing shortest paths')
    paths = nx.single_source_shortest_path_length(graph, T)

    logger.info('Building k-hop graph')
    nodes_to_delete = set()
    for node in tqdm.tqdm(graph.nodes):
        # Remove the node if the word of interest is not reachable in at most k edges.
        if node not in paths or paths[node] > k:
            nodes_to_delete.add(node)

    for node in nodes_to_delete:
        graph.remove_node(node)

    logger.info('Generated k-hop graph (nodes: {}, edges: {})'.format(
        len(graph.nodes), len(graph.edges)))
    return graph

Esempio n. 2

Mostra file

File: k_hop_graph.py Progetto: GalacticGlum/csc110-course-project

def draw_k_hop_graph(embeddings: WordEmbeddings,
                     target_word: str,
                     k: int,
                     alpha: Optional[float] = 0.50,
                     min_node_size: Optional[float] = 20,
                     max_node_size: Optional[float] = 120,
                     min_font_size: Optional[float] = 6,
                     max_font_size: Optional[float] = 24,
                     node_alpha: Optional[float] = 1,
                     edge_alpha: Optional[float] = 0.05,
                     target_word_label_colour: Optional[str] = 'black',
                     community_colour_map: Optional[str] = 'plasma') -> None:
    """Draw the k-hop graph for the given word embeddings and interest word.
    This function DOES NOT show the matplotlib plot.

    Args:
        embeddings: The word embeddings to generate the graph for.
        target_word: The word of interest.
        k: The number of 'hops' between the word of interest and every node
            in the graph. The resultant graph has the property that the word
            of interest is reachable from any node in at most k edges.
        alpha: The similarity threshold. Words that have a cosine similarity
            of at least this threshold are kept, and the rest are discarded.
        min_node_size: The minimum size of a node, in pixels.
        max_node_size: The maximum size of a node, in pixels.
        min_font_size: The minimum size of a label, in pixels.
        max_font_size: The maximum size of a label, in pixels.
        node_alpha: The alpha/transparency to draw nodes with.
        edge_alpha: The alpha/transparency to draw edges with.
        target_word_label_colour: The colour of the target word label.
            Makes the target word stand out. Useless when there are many words.
        community_colour_map: The colour map to use when assigning colours to communities.
    """
    if alpha is None:
        _, similarity = embeddings.most_similar(target_word, k=1)[0]
        alpha = similarity - 0.05
        logger.info(
            'No alpha threshold provided. Using alpha = {}'.format(alpha))

    graph = build_k_hop_graph(embeddings, target_word, k, alpha=alpha)

    logger.info('Computing best partition (Louvain community detection)')
    # compute the best partition
    partition = community_louvain.best_partition(graph)

    logger.info('Computing layout (ForceAtlas2)')
    forceatlas2 = ForceAtlas2(outboundAttractionDistribution=True,
                              edgeWeightInfluence=1.0,
                              jitterTolerance=1.0,
                              barnesHutOptimize=True,
                              barnesHutTheta=1.2,
                              scalingRatio=2.0,
                              strongGravityMode=False,
                              gravity=1.0,
                              verbose=False)

    positions = forceatlas2.forceatlas2_networkx_layout(graph)

    logger.info('Rendering graph with matplotlib')
    cmap = cm.get_cmap(community_colour_map, max(partition.values()) + 1)

    degrees = dict(graph.degree)
    max_degree = max(degrees.values())
    size_multipliers = {i: degrees[i] / max_degree for i in positions}

    # Generate node sizes
    node_size = [
        max(max_node_size * size_multipliers[i], min_node_size)
        for i in positions
    ]

    # Draw the nodes
    nx.draw_networkx_nodes(graph,
                           positions,
                           partition.keys(),
                           node_size=node_size,
                           cmap=cmap,
                           node_color=list(partition.values()),
                           alpha=node_alpha)

    # Draw the edges with a bezier curve
    curves = curved_edges(graph, positions)
    # Remove nan values
    curves = np.nan_to_num(curves)

    # Assign a colour to each edge, based on the community of the source node.
    edge_color = [cmap(partition[a]) for a, _ in graph.edges]
    edge_lines = LineCollection(curves,
                                color=edge_color,
                                cmap=cmap,
                                alpha=edge_alpha,
                                linewidths=1)
    plt.gca().add_collection(edge_lines)

    # Draw node labels (words)
    for i, (x, y) in positions.items():
        # The size of the label is proportional to the degree of the node.
        fontsize = max(max_font_size * size_multipliers[i]**4, min_font_size)
        word = embeddings.words[i]
        colour = target_word_label_colour if word == target_word else 'black'
        plt.text(x,
                 y,
                 word,
                 fontsize=fontsize,
                 ha='center',
                 va='center',
                 color=colour)