def build_k_hop_graph(embeddings: WordEmbeddings,
                      target_word: str,
                      k: int,
                      alpha: Optional[float] = 0.50) -> nx.Graph:
    """Builds the k-hop graph for a word embeddings space.

    Args:
        embeddings: The word embeddings to generate the graph for.
        target_word: The word of interest.
        k: The number of 'hops' between the word of interest and every node
            in the graph. The resultant graph has the property that the word
            of interest is reachable from any node in at most k edges.
        alpha: The similarity threshold. Words that have a cosine similarity
            of at least this threshold are kept, and the rest are discarded.
    """
    # Verify the alpha threshold is <= max(similarity between interest word).
    max_alpha = embeddings.most_similar(target_word, k=1)[0][1]
    if alpha > max_alpha:
        raise ValueError(
            'Alpha threshold too high! The word of interest was not included '
            'in the graph. For the given target word, '
            '\'{}\', alpha can be AT MOST {}!'.format(target_word, max_alpha))

    graph = build_infinity_hop_graph(embeddings, alpha)

    # Get the word index of the word of interest.
    T = embeddings._vocabulary[target_word]

    # Compute the shortest paths from the word of interest to all reachable nodes.
    logger.info('Computing shortest paths')
    paths = nx.single_source_shortest_path_length(graph, T)

    logger.info('Building k-hop graph')
    nodes_to_delete = set()
    for node in tqdm.tqdm(graph.nodes):
        # Remove the node if the word of interest is not reachable in at most k edges.
        if node not in paths or paths[node] > k:
            nodes_to_delete.add(node)

    for node in nodes_to_delete:
        graph.remove_node(node)

    logger.info('Generated k-hop graph (nodes: {}, edges: {})'.format(
        len(graph.nodes), len(graph.edges)))
    return graph
def draw_k_hop_graph(embeddings: WordEmbeddings,
                     target_word: str,
                     k: int,
                     alpha: Optional[float] = 0.50,
                     min_node_size: Optional[float] = 20,
                     max_node_size: Optional[float] = 120,
                     min_font_size: Optional[float] = 6,
                     max_font_size: Optional[float] = 24,
                     node_alpha: Optional[float] = 1,
                     edge_alpha: Optional[float] = 0.05,
                     target_word_label_colour: Optional[str] = 'black',
                     community_colour_map: Optional[str] = 'plasma') -> None:
    """Draw the k-hop graph for the given word embeddings and interest word.
    This function DOES NOT show the matplotlib plot.

    Args:
        embeddings: The word embeddings to generate the graph for.
        target_word: The word of interest.
        k: The number of 'hops' between the word of interest and every node
            in the graph. The resultant graph has the property that the word
            of interest is reachable from any node in at most k edges.
        alpha: The similarity threshold. Words that have a cosine similarity
            of at least this threshold are kept, and the rest are discarded.
        min_node_size: The minimum size of a node, in pixels.
        max_node_size: The maximum size of a node, in pixels.
        min_font_size: The minimum size of a label, in pixels.
        max_font_size: The maximum size of a label, in pixels.
        node_alpha: The alpha/transparency to draw nodes with.
        edge_alpha: The alpha/transparency to draw edges with.
        target_word_label_colour: The colour of the target word label.
            Makes the target word stand out. Useless when there are many words.
        community_colour_map: The colour map to use when assigning colours to communities.
    """
    if alpha is None:
        _, similarity = embeddings.most_similar(target_word, k=1)[0]
        alpha = similarity - 0.05
        logger.info(
            'No alpha threshold provided. Using alpha = {}'.format(alpha))

    graph = build_k_hop_graph(embeddings, target_word, k, alpha=alpha)

    logger.info('Computing best partition (Louvain community detection)')
    # compute the best partition
    partition = community_louvain.best_partition(graph)

    logger.info('Computing layout (ForceAtlas2)')
    forceatlas2 = ForceAtlas2(outboundAttractionDistribution=True,
                              edgeWeightInfluence=1.0,
                              jitterTolerance=1.0,
                              barnesHutOptimize=True,
                              barnesHutTheta=1.2,
                              scalingRatio=2.0,
                              strongGravityMode=False,
                              gravity=1.0,
                              verbose=False)

    positions = forceatlas2.forceatlas2_networkx_layout(graph)

    logger.info('Rendering graph with matplotlib')
    cmap = cm.get_cmap(community_colour_map, max(partition.values()) + 1)

    degrees = dict(graph.degree)
    max_degree = max(degrees.values())
    size_multipliers = {i: degrees[i] / max_degree for i in positions}

    # Generate node sizes
    node_size = [
        max(max_node_size * size_multipliers[i], min_node_size)
        for i in positions
    ]

    # Draw the nodes
    nx.draw_networkx_nodes(graph,
                           positions,
                           partition.keys(),
                           node_size=node_size,
                           cmap=cmap,
                           node_color=list(partition.values()),
                           alpha=node_alpha)

    # Draw the edges with a bezier curve
    curves = curved_edges(graph, positions)
    # Remove nan values
    curves = np.nan_to_num(curves)

    # Assign a colour to each edge, based on the community of the source node.
    edge_color = [cmap(partition[a]) for a, _ in graph.edges]
    edge_lines = LineCollection(curves,
                                color=edge_color,
                                cmap=cmap,
                                alpha=edge_alpha,
                                linewidths=1)
    plt.gca().add_collection(edge_lines)

    # Draw node labels (words)
    for i, (x, y) in positions.items():
        # The size of the label is proportional to the degree of the node.
        fontsize = max(max_font_size * size_multipliers[i]**4, min_font_size)
        word = embeddings.words[i]
        colour = target_word_label_colour if word == target_word else 'black'
        plt.text(x,
                 y,
                 word,
                 fontsize=fontsize,
                 ha='center',
                 va='center',
                 color=colour)