Example #1
0
    def evaluate_distances_to_all_vertices(self,
                                           source_ix,
                                           *,
                                           batch_size=None,
                                           dijkstra_parameters=None,
                                           callback=lambda x: x,
                                           **kwargs):
        assert np.ndim(
            source_ix) == 1, "source_ix must be a vector of vertex indices"
        batch_size = batch_size or len(source_ix)
        if dijkstra_parameters is None:
            dijkstra_parameters = self.graph_embedding.prepare_for_dijkstra()
        source_ix = np.array(source_ix, dtype=np.int32)
        targets = np.broadcast_to(
            np.arange(self.graph_embedding.num_vertices, dtype=np.int32),
            (len(source_ix), self.graph_embedding.num_vertices))

        dists = np.empty((len(source_ix), self.graph_embedding.num_vertices),
                         dtype=np.float32)
        for batch_start in callback(range(0, len(source_ix), batch_size)):
            chunk_idx = slice(batch_start, batch_start + batch_size)
            paths = self.graph_embedding.compute_paths(source_ix[chunk_idx],
                                                       targets[chunk_idx],
                                                       dijkstra_parameters,
                                                       **kwargs)
            dists[chunk_idx] = check_numpy(
                self.graph_embedding(**paths)['target_distances'])
        return dists
Example #2
0
    def evaluate_norms(self, dijkstra_parameters=None, **kwargs):
        if dijkstra_parameters is None:
            dijkstra_parameters = self.graph_embedding.prepare_for_dijkstra()

        norms_info = self.graph_embedding.compute_paths(
            np.array([self.null_vertex], dtype='int32'),
            np.arange(self.graph_embedding.num_vertices, dtype='int32')[None],
            dijkstra_parameters, **kwargs)
        norms = check_numpy(
            self.graph_embedding(**norms_info)['target_distances'])
        return norms.reshape([self.graph_embedding.num_vertices])
Example #3
0
def visualize_cluster(emb: GraphEmbedding,
                      cluster_vertex_ids,
                      dictionary,
                      coords=None,
                      vertex_labels=None,
                      deterministic=None,
                      edge_probability_threshold=0.5,
                      weighted=False,
                      scale_factor=3.0,
                      cmap=plt.get_cmap('nipy_spectral_r'),
                      default_color='#c8eda8',
                      **kwargs):
    """ Like visualize_embeddings, but visualizes only a single cluster of vertices """

    cluster_vertices = list(cluster_vertex_ids.nodes)

    # 1. assemble in-cluster edges

    from_ix, to_ix = emb.edge_sources, emb.edge_targets
    weights = F.softplus(emb.edge_weight_logits).view(-1).data.numpy()
    mean_weight = weights[1:].mean()
    num_vertices, num_edges = len(emb.slices) - 1, len(from_ix)
    edge_probabilities = torch.sigmoid(
        emb.edge_adjacency_logits.view(-1)).data.numpy()
    if deterministic:
        existence = edge_probabilities >= edge_probability_threshold
    else:
        existence = np.random.rand(num_edges) < edge_probabilities

    edge_dict = defaultdict(list)
    edge_width = defaultdict(dict)
    for edge_i in range(1, num_edges):  # skip first "technical" loop edge
        if existence[edge_i]:
            from_i, to_i, weight = from_ix[edge_i], to_ix[edge_i], weights[
                edge_i]
            if from_i in cluster_vertices and to_i in cluster_vertices:
                from_c, to_c = map(cluster_vertices.index, [from_i, to_i])
                edge_dict[from_c].append(to_c)
                edge_width[from_c][to_c] = scale_factor / (weight / mean_weight + 1e-3) \
                    if weighted else 1.0

    # 2. compute pairwise distances
    vertex_ids = torch.as_tensor(list(cluster_vertex_ids.nodes),
                                 dtype=torch.int32)
    targets = torch.as_tensor(np.repeat(vertex_ids[None],
                                        len(vertex_ids),
                                        axis=0),
                              dtype=torch.int32)

    with training_mode(emb, is_train=False):
        pairwise_distances = check_numpy(
            GraphEmbedding.forward(emb, vertex_ids, targets,
                                   soft=False)['target_distances'])

    pairwise_distances[np.isinf(pairwise_distances)] = np.max(
        pairwise_distances[np.isfinite(pairwise_distances)])
    # ^-- [num_vertices x num_vertices]
    if coords is None:
        coords = TSNE(metric='precomputed').fit_transform(pairwise_distances)

    # 3. assemble graph metadate
    cluster_size = len(vertex_ids)

    if vertex_labels is not None:
        vertex_color = (vertex_labels -
                        np.min(vertex_labels)) * 1.0 / np.max(vertex_labels)
        vertex_color = rgba_to_hex(cmap(vertex_color)[:, :3])
    else:
        vertex_color = default_color

    vertex_stats = dict(vertex_id=np.arange(cluster_size),
                        num_edges=np.array([
                            np.sum(
                                check_numpy(emb.get_edges(i).p_adjacent) >=
                                edge_probability_threshold) for i in vertex_ids
                        ],
                                           dtype='int32'))

    assert coords.shape == (cluster_size, 2)
    if vertex_labels is not None:
        assert vertex_labels.shape == (cluster_size, )
        vertex_stats['label'] = vertex_labels

    ix_to_token = {i: t for t, i in dictionary.items()}
    tokens = list(map(ix_to_token.get, map(int, vertex_ids)))

    # ... and finally, draw the resulting graph
    return draw_graph(*coords.T,
                      edges=edge_dict,
                      vertex_text=tokens,
                      edge_width=edge_width,
                      token=tokens,
                      vertex_color=vertex_color,
                      **vertex_stats,
                      vertex_alpha=1.0,
                      edge_alpha=0.25,
                      **kwargs)
Example #4
0
def visualize_embeddings(emb: GraphEmbedding,
                         coords=None,
                         vertex_labels=None,
                         deterministic=None,
                         edge_probability_threshold=0.5,
                         weighted=False,
                         scale_factor=3.0,
                         cmap=plt.get_cmap('nipy_spectral_r'),
                         **kwargs):
    """
    Draws learned graph using bokeh and some magic. Please set bokeh output (notebook / file / etc.) in advance
    :type emb: GraphEmbedding
    :param coords: a matrix[num_vertices, 2] of 2d point vertex coordinates, defaults to TSNE on pairwise distances
    :param vertex_labels: if given, assigns a label to each vertex and paints it to the respective color
    :param deterministic: if True, only use edges with p >= 0.5, otherwise sample edges with learned probability
    :param weighted: if True, edge widths are inversely proportional to their weights, default = all widths are equal
    :param scale_factor: multiplies edge widths by this number
    :param cmap: a callable(array) -> rgb(a) matrix used to paint vertices if vertex_labels are specified
    :param kwargs: see utils.draw_graph
    """
    if deterministic is None:
        deterministic = emb.training

    # handle edges
    from_ix, to_ix = emb.edge_sources, emb.edge_targets
    weights = F.softplus(emb.edge_weight_logits).view(-1).data.numpy()
    mean_weight = weights[1:].mean()
    num_vertices, num_edges = len(emb.slices) - 1, len(from_ix)
    edge_probabilities = torch.sigmoid(
        emb.edge_adjacency_logits.view(-1)).data.numpy()
    if deterministic:
        existence = edge_probabilities >= edge_probability_threshold
    else:
        existence = np.random.rand(num_edges) < edge_probabilities

    edge_dict = defaultdict(list)
    edge_width = defaultdict(dict)
    for edge_i in range(1, num_edges):  # skip first "technical" loop edge
        if existence[edge_i]:
            from_i, to_i, weight = from_ix[edge_i], to_ix[edge_i], weights[
                edge_i]
            edge_dict[from_i].append(to_i)
            edge_width[from_i][to_i] = scale_factor / (weight / mean_weight + 1e-3) \
                if weighted else 1.0

    # handle vertices
    if coords is None:
        pairwise_distances = emb.compute_pairwise_distances(
            edge_threshold=edge_probability_threshold)
        pairwise_distances[np.isinf(pairwise_distances)] = np.max(
            pairwise_distances[np.isfinite(pairwise_distances)])
        # ^-- [num_vertices x num_vertices]
        coords = TSNE(metric='precomputed').fit_transform(pairwise_distances)

    if vertex_labels is not None:
        vertex_color = (vertex_labels -
                        np.min(vertex_labels)) * 1.0 / np.max(vertex_labels)
        vertex_color = rgba_to_hex(cmap(vertex_color)[:, :3])
    else:
        vertex_color = 'blue'

    vertex_stats = dict(vertex_id=np.arange(num_vertices),
                        num_edges=np.array([
                            np.sum(
                                check_numpy(emb.get_edges(i).p_adjacent) >=
                                edge_probability_threshold)
                            for i in range(emb.num_vertices)
                        ],
                                           dtype='int32'))

    assert coords.shape == (num_vertices, 2)
    if vertex_labels is not None:
        assert vertex_labels.shape == (num_vertices, )
        vertex_stats['label'] = vertex_labels

    return draw_graph(*coords.T,
                      edges=edge_dict,
                      edge_width=edge_width,
                      vertex_color=vertex_color,
                      **vertex_stats,
                      **kwargs)