Esempio n. 1
0
def rng_next_goal_rejection_sampling(start_node_ids, batch_size, gtG, rng,
                                     max_dist, min_dist, max_dist_to_compute,
                                     sampling_d, target_d, nodes, n_ori,
                                     step_size, bins, M):
    sample_start_nodes = start_node_ids is None
    dists = []
    pred_maps = []
    end_node_ids = []
    start_node_ids_ = []
    hardnesss = []
    gt_dists = []
    num_nodes = gtG.num_vertices()
    for i in range(batch_size):
        done = False
        while not done:
            if sample_start_nodes:
                start_node_id = rng.choice(num_nodes)
            else:
                start_node_id = start_node_ids[i]

            gt_dist = gt.topology.shortest_distance(gt.GraphView(
                gtG, reversed=False),
                                                    source=start_node_id,
                                                    target=None,
                                                    max_dist=max_dist)
            gt_dist = np.array(gt_dist.get_array())
            ind = np.where(
                np.logical_and(gt_dist <= max_dist, gt_dist >= min_dist))[0]
            ind = rng.permutation(ind)
            gt_dist = gt_dist[ind] * 1.
            h_dist = heuristic_fn_vec(nodes[ind, :], nodes[[start_node_id], :],
                                      n_ori, step_size)[:, 0]
            hardness = 1. - h_dist / gt_dist
            sampled_ind = _rejection_sampling(rng, sampling_d, target_d, bins,
                                              hardness, M)
            if sampled_ind < ind.size:
                # print sampled_ind
                end_node_id = ind[sampled_ind]
                hardness = hardness[sampled_ind]
                gt_dist = gt_dist[sampled_ind]
                done = True

        # Compute distance from end node to all nodes, to return.
        dist, pred_map = gt.topology.shortest_distance(
            gt.GraphView(gtG, reversed=True),
            source=end_node_id,
            target=None,
            max_dist=max_dist_to_compute,
            pred_map=True)
        dist = np.array(dist.get_array())
        pred_map = np.array(pred_map.get_array())

        hardnesss.append(hardness)
        dists.append(dist)
        pred_maps.append(pred_map)
        start_node_ids_.append(start_node_id)
        end_node_ids.append(end_node_id)
        gt_dists.append(gt_dist)
        paths = None
    return start_node_ids_, end_node_ids, dists, pred_maps, paths, hardnesss, gt_dists
Esempio n. 2
0
def _get_gt_graph(g, directed, weights, combine_weights=None,
                  return_all=False):
    ''' Returns the correct graph(view) given the options '''
    import graph_tool as gt
    from graph_tool.stats import label_parallel_edges

    directed = g.is_directed() if directed is None else directed

    weights = "weight" if weights is True else weights
    weights = None if weights is False else weights

    if not directed and g.is_directed():
        if weights is not None:
            u, weights = _to_undirected(g, weights, combine_weights)

            if return_all:
                return u, u.graph, u.edge_attributes[weights]

            return u.graph

        graph = gt.GraphView(g.graph, directed=False)
        graph = gt.GraphView(graph, efilt=label_parallel_edges(graph).fa == 0)

        if return_all:
            return g, graph, None

        return graph

    if return_all:
        return g, g.graph, weights

    return g.graph
def get_distance_node_list(gtG, source_nodes, direction, weights=None):
  gtG_ = gt.Graph(gtG)
  v = gtG_.add_vertex()

  if weights is not None:
    weights = gtG_.edge_properties[weights]

  for s in source_nodes:
    e = gtG_.add_edge(s, int(v))
    if weights is not None:
      weights[e] = 0.

  if direction == 'to':
    dist = gt.topology.shortest_distance(
        gt.GraphView(gtG_, reversed=True), source=gtG_.vertex(int(v)),
        target=None, weights=weights)
  elif direction == 'from':
    dist = gt.topology.shortest_distance(
        gt.GraphView(gtG_, reversed=False), source=gtG_.vertex(int(v)),
        target=None, weights=weights)
  dist = np.array(dist.get_array())
  dist = dist[:-1]
  if weights is None:
    dist = dist-1
  return dist
Esempio n. 4
0
def graph_edges_split(G, p):
    """
    Split graph edges for validation and training disjoint sets.

    Parameters
    ----------
    G: graph_tool.Graph
        Graph object from graph-tool module.
    p: int, (0, 1]
        Test proportion.
    Returns
    -------
    train_graph, test_graph: graph_tool.Graph
    
    """
    N = G.num_edges()
    K = np.int(N * p)

    train_mask = np.array([0] * K + [1] * (N-K), dtype=np.bool)
    np.random.shuffle(train_mask)
    test_mask = ~train_mask

    train_graph = gt.GraphView(G, directed=False)
    test_graph = gt.GraphView(G, directed=False)

    prop_train = train_graph.new_edge_property("bool")
    prop_train.a = train_mask

    prop_test = test_graph.new_edge_property("bool")
    prop_test.a = test_mask

    train_graph.set_edge_filter(prop_train)
    test_graph.set_edge_filter(prop_test)

    return train_graph, test_graph
Esempio n. 5
0
    def subgraph(self,
                 nodes=None,
                 vertex_filter=None,
                 edge_filter=None,
                 copy_positions=True):
        if nodes is not None:
            view = graph_tool.GraphView(
                self.network,
                vfilt=lambda x: self.id_to_label[x] in nodes).copy()
        elif vertex_filter is not None or edge_filter is not None:
            view = graph_tool.GraphView(self.network,
                                        vfilt=vertex_filter,
                                        efilt=edge_filter).copy()
        else:
            raise ValueError("at least one filter must be specified")

        old_vertex_ids = set(map(int, view.vertices()))

        if copy_positions and self.node_layout is not None:
            vertex_positions = [
                self.node_layout.get_position(v_id)
                for v_id in view.vertices()
            ]
        else:
            vertex_positions = None

        node_map_keys = valfilter(lambda x: x in old_vertex_ids,
                                  self.node_map).keys()

        # TODO: vertex ids also change
        # TODO: vertex weights

        if self.edge_weight is not None:
            edge_weight = [self.edge_weight[e_id] for e_id in view.edges()]
        else:
            edge_weight = None

        view.purge_vertices()
        view.purge_edges()

        if edge_weight is not None:
            weight_prop = view.new_edge_property("double")
            weight_prop.a = edge_weight
        else:
            weight_prop = None

        result = Network(view, edge_weight=weight_prop)
        result.node_map = dict(zip(node_map_keys, map(int, view.vertices())))
        # print(result.node_map)
        result.id_to_label = itemmap(reversed, result.node_map)

        if vertex_positions:
            result.layout_nodes(method="precomputed",
                                positions=vertex_positions)
        return result
def rng_next_goal(start_node_ids, batch_size, gtG, rng, max_dist,
                  max_dist_to_compute, node_room_ids, nodes=None,
                  compute_path=False, dists_from_start_node=None):
  # Compute the distance field from the starting location, and then pick a
  # destination in another room if possible otherwise anywhere outside this
  # room.
  dists = []; pred_maps = []; paths = []; end_node_ids = [];
  for i in range(batch_size):
    room_id = node_room_ids[start_node_ids[i]]
    # Compute distances.
    if dists_from_start_node == None:
      dist, pred_map = gt.topology.shortest_distance(
        gt.GraphView(gtG, reversed=False), source=gtG.vertex(start_node_ids[i]),
        target=None, max_dist=max_dist_to_compute, pred_map=True)
      dist = np.array(dist.get_array())
    else:
      dist = dists_from_start_node[i]

    # Randomly sample nodes which are within max_dist.
    near_ids = dist <= max_dist
    near_ids = near_ids[:, np.newaxis]
    # Check to see if there is a non-negative node which is close enough.
    non_same_room_ids = node_room_ids != room_id
    non_hallway_ids = node_room_ids != -1
    good1_ids = np.logical_and(near_ids, np.logical_and(non_same_room_ids, non_hallway_ids))
    good2_ids = np.logical_and(near_ids, non_hallway_ids)
    good3_ids = near_ids
    if np.any(good1_ids):
      end_node_id = rng.choice(np.where(good1_ids)[0])
    elif np.any(good2_ids):
      end_node_id = rng.choice(np.where(good2_ids)[0])
    elif np.any(good3_ids):
      end_node_id = rng.choice(np.where(good3_ids)[0])
    else:
      logging.error('Did not find any good nodes.')

    # Compute distance to this new goal for doing distance queries.
    dist, pred_map = gt.topology.shortest_distance(
        gt.GraphView(gtG, reversed=True), source=gtG.vertex(end_node_id),
        target=None, max_dist=max_dist_to_compute, pred_map=True)
    dist = np.array(dist.get_array())
    pred_map = np.array(pred_map.get_array())

    dists.append(dist)
    pred_maps.append(pred_map)
    end_node_ids.append(end_node_id)

    path = None
    if compute_path:
      path = get_path_ids(start_node_ids[i], end_node_ids[i], pred_map)
    paths.append(path)

  return start_node_ids, end_node_ids, dists, pred_maps, paths
Esempio n. 7
0
def get_hardness_distribution(gtG, max_dist, min_dist, rng, trials, bins,
                              nodes, n_ori, step_size):
    heuristic_fn = lambda node_ids, node_id: \
      heuristic_fn_vec(nodes[node_ids, :], nodes[[node_id], :], n_ori, step_size)
    num_nodes = gtG.num_vertices()
    gt_dists = []
    h_dists = []
    for i in range(trials):
        end_node_id = rng.choice(num_nodes)
        gt_dist = gt.topology.shortest_distance(gt.GraphView(gtG,
                                                             reversed=True),
                                                source=gtG.vertex(end_node_id),
                                                target=None,
                                                max_dist=max_dist)
        gt_dist = np.array(gt_dist.get_array())
        ind = np.where(np.logical_and(gt_dist <= max_dist,
                                      gt_dist >= min_dist))[0]
        gt_dist = gt_dist[ind]
        h_dist = heuristic_fn(ind, end_node_id)[:, 0]
        gt_dists.append(gt_dist)
        h_dists.append(h_dist)
    gt_dists = np.concatenate(gt_dists)
    h_dists = np.concatenate(h_dists)
    hardness = 1. - h_dists * 1. / gt_dists
    hist, _ = np.histogram(hardness, bins)
    hist = hist.astype(np.float64)
    hist = hist / np.sum(hist)
    return hist
Esempio n. 8
0
def connect_task(graph, task, task_vtx):
    target = task.get('slot_id', False)
    if target:
        for v in graph.vertices():
            if graph.vp.slot_id[v] == target:
                add_edge_type(graph, task_vtx, v, 'slot')
            else:
                raise RuntimeError("no matching slot-id found")
    else:
        # print 'deriving task slot'
        executable = gt.GraphView(graph, vfilt=graph.vp.executable)
        # print executable
        e_leaves = gt.GraphView(executable, vfilt=lambda v: v.in_degree() == 1 and v.out_degree() == 0)
        # print e_leaves
        for v in e_leaves.vertices():
            add_edge_type(graph, task_vtx, v, 'slot')
Esempio n. 9
0
    def __init__(self,
                 graph: Graph,
                 vfilt_name: str = None,
                 efilt_name: str = None):
        super(SubGraph, self).__init__()

        # Add vertex and/or edge filter that define the sub-graph. By default, they are treated bool.
        assert vfilt_name is not None or efilt_name is not None, "At least one of vfilt/efilt must be given."
        self._vfilt_name = vfilt_name
        self._efilt_name = efilt_name
        gt_vfilt = gt_efilt = None

        # Add (internalize) vertex and edge filters to graph

        if vfilt_name is not None:
            graph.add_vertex_property(name=vfilt_name,
                                      of_type="bool",
                                      default=False)
            gt_vfilt = graph._graph.vertex_properties[vfilt_name]

        if efilt_name is not None:
            graph.add_edge_property(name=efilt_name,
                                    of_type="bool",
                                    default=True)
            gt_efilt = graph._graph.edge_properties[efilt_name]

        # Update internal graph representation
        self._graph = gt.GraphView(g=graph._graph,
                                   vfilt=gt_vfilt,
                                   efilt=gt_efilt)
Esempio n. 10
0
def local_eff(g):
    """Returns the local efficiency of a graph g."""
    n = g.num_vertices()
    eff_sum = 0
    for node in range(n):
        # Extract the neighbors of node_i
        vfilt = g.new_vertex_property('bool')
        neighbor = g.vertex(node).all_neighbors()

        # Create a sub graph containing those neighbors
        # and calculate their shortest distances
        for n_node in neighbor:
            vfilt[n_node] = True
        sub = gt.GraphView(g, vfilt)
        sub = Graph(sub, prune=True)  # prune for true copy
        sub_dist = gt.topology.shortest_distance(sub)

        # Calculate the local efficiency of node_i
        eff_sum_i = 0
        for dist_row in sub_dist:
            for dist in dist_row:
                if dist != 0:
                    eff_sum_i += 1 / dist

        deg = g.vertex(node).out_degree()
        if deg > 1:
            eff_sum_i = eff_sum_i / (deg * (deg - 1))
        eff_sum += eff_sum_i

    return eff_sum / n
Esempio n. 11
0
    def find_subgraphs(self, Q_synset, syn_graph):
        nodes = dict()

        for syn_id, activation_value in Q_synset.iteritems():
            if activation_value > self.tau_3:
                try:
                    node = syn_graph.get_node_for_synset_id(syn_id)
                    nodes[node.use_graph_tool()] = node
                except Exception:
                    continue

        filt = syn_graph.use_graph_tool().new_vertex_property('boolean')
        for vertex, node in nodes.iteritems():
            filt[vertex] = True

        g = gt.GraphView(syn_graph.use_graph_tool(), filt)

        lead_nodes = []
        for graph in self.subgraphs(g):
            lead = None
            for vertex in graph.vertices():
                if not lead:
                    lead = nodes[vertex]
                else:
                    lead_id = lead.synset.synset_id
                    new_id = nodes[vertex].synset.synset_id
                    if Q_synset[lead_id] < Q_synset[new_id]:
                        lead = nodes[vertex]
            lead_nodes.append(lead)

        return lead_nodes
Esempio n. 12
0
def gt_subgraph(g: gt.Graph, vertices):
    filter_option = g.new_vertex_property('bool')
    for v in vertices:
        filter_option[v] = True
    sub = gt.GraphView(g, filter_option).copy()

    return sub
def prepare_cora(directed=False):
    print("directed", directed)
    edges = np.genfromtxt('res/cora/cora.edges', dtype=np.int,
                          delimiter=',')[:, :2] - 1
    labels = np.genfromtxt('res/cora/cora.node_labels',
                           dtype=np.int,
                           delimiter=',')[:, 1]

    g = graph_tool.Graph(directed=directed)

    g.add_edge_list(edges)
    vfilt = graph_tool.topology.label_largest_component(g, directed=False)

    labels = labels[vfilt.a.astype(np.bool)]
    g = graph_tool.GraphView(g, vfilt=vfilt)
    g.purge_vertices()

    weight_prop = g.new_edge_property("int", val=1)
    #spc = shortest_path_cover_logn_apx(g, weight_prop)
    spc = pickle.load(
        open("res/cora/largest_comp_new_spc_" + str(directed) + ".p", "rb"))

    print("spc", len(spc))

    pickle.dump(
        spc, open("res/cora/largest_comp_new_spc_" + str(directed) + ".p",
                  "wb"))

    #spc = pickle.load(open("res/cora/largest_comp_new_spc_" + str(directed) + ".p", "rb"))

    return g, labels, spc
Esempio n. 14
0
def rng_target_dist_field(batch_size, gtG, rng, max_dist, max_dist_to_compute,
                          nodes=None, compute_path=False):
  # Sample a single node, compute distance to all nodes less than max_dist,
  # sample nodes which are a particular distance away.
  dists = []; pred_maps = []; paths = []; start_node_ids = []
  end_node_ids = rng.choice(gtG.num_vertices(), size=(batch_size,),
                            replace=False).tolist()

  for i in range(batch_size):
    dist, pred_map = gt.topology.shortest_distance(
        gt.GraphView(gtG, reversed=True), source=gtG.vertex(end_node_ids[i]),
        target=None, max_dist=max_dist_to_compute, pred_map=True)
    dist = np.array(dist.get_array())
    pred_map = np.array(pred_map.get_array())
    dists.append(dist)
    pred_maps.append(pred_map)

    # Randomly sample nodes which are withing max_dist
    near_ids = np.where(dist <= max_dist)[0]
    start_node_id = rng.choice(near_ids, size=(1,), replace=False)[0]
    start_node_ids.append(start_node_id)

    path = None
    if compute_path:
      path = get_path_ids(start_node_ids[i], end_node_ids[i], pred_map)
    paths.append(path)

  return start_node_ids, end_node_ids, dists, pred_maps, paths
    def subgraph(self, vertices: List[int]) -> 'DWGraph':
        filter_option = self.new_vertex_property('bool')
        for v in vertices:
            filter_option[v] = True
        sub = gt.GraphView(self, filter_option)
        new_g = DWGraph.from_graph(sub, self.is_directed_prop, self.is_weighted_prop)

        return new_g
Esempio n. 16
0
    def subgraphs(self, g):
        if g.num_vertices() == 0:
            raise StopIteration
        prop = label_largest_component(g, False)
        filt = g.new_vertex_property('boolean')
        for v in g.vertices():
            if prop[v] > 0:
                filt[v] = True
        yield gt.GraphView(g, filt)

        filt = g.new_vertex_property('boolean')
        for v in g.vertices():
            if prop[v] <= 0:
                filt[v] = True
        gv = gt.GraphView(g, filt)

        for sgv in self.subgraphs(gv):
            yield sgv
 def make(self, target):
     # flow from receptive_field_list to receptive_field_graph
     receptive_field_list = self.make_receptive_field(target)
     receptive_field_graph = graph_tool.GraphView(
         self.graph, vfilt=lambda x: x in receptive_field_list)
     receptive_field_graph = self.register_distance(target,
                                                    receptive_field_graph)
     canonized_receptive_field = self.canonize_receptive_field(
         receptive_field_graph)
     return canonized_receptive_field[:self.
                                      receptive_field_size], receptive_field_graph
Esempio n. 18
0
def save_root_children(g: gt.Graph, root_id) -> gt.Graph:
    zero_id = vp_map(g, 'zero_id', 'int')
    roots = set()
    for v in g.vertices():
        if zero_id[v] == root_id:
            roots.add(v)

    leave_prop = g.new_vertex_property('bool')
    for v in roots:
        gt_top.label_out_component(g, v, leave_prop)

    sub = gt.GraphView(g, leave_prop)
    new_g = create_q_graph(sub, add_back_reference=False)
    return new_g
Esempio n. 19
0
def subsample_archive_from_matching(a: gt.Graph, mg: gt.Graph,
                                    t_graph: gt.Graph, e_idx):
    leave_prop = a.new_vertex_property('bool')
    one_prop = vp_map(mg, 'one_id', 'int')

    to_save = set([one_prop[n] for n in mg.vertices()])
    for v in a.vertices():
        leave_prop[v] = a.vertex_index[v] in to_save

    # get rid of all the nodes in A that aren't there
    sub = gt.GraphView(a, leave_prop)
    new_g = create_q_graph(sub, add_back_reference=True)

    return new_g
def prepare_coil(weighted=False):
    print("weighted", weighted)
    dataset = 3
    X = np.genfromtxt('res/benchmark/SSL,set=' + str(dataset) + ',X.tab')
    # X = (X - np.min(X, axis=0)) / (np.max(X, axis=0) - np.min(X, axis=0))
    y = (np.genfromtxt('res/benchmark/SSL,set=' + str(dataset) + ',y.tab'))
    n = X.shape[0]

    dists = scipy.spatial.distance.cdist(X, X)
    y = y[:n]

    W = dists[:n, :n]  # np.exp(-(dists) ** 2 / (2 * sigma ** 2))
    k = 10

    for i in range(n):
        W[i,
          np.argsort(W[i])[(k + 1):]] = np.inf  #k and itself with zero weight

    np.fill_diagonal(W, 0)

    weights = W[(W < np.inf) & (W > 0)].flatten()
    edges = np.array(np.where((W < np.inf) & (W > 0))).T
    edges = edges[edges[:, 0] < edges[:, 1]]

    g = graph_tool.Graph(directed=False)

    # construct actual graph
    g.add_vertex(n)
    g.add_edge_list(edges)

    vfilt = graph_tool.topology.label_largest_component(g, directed=False)

    y = y[vfilt.a.astype(np.bool)]
    g = graph_tool.GraphView(g, vfilt=vfilt)
    g.purge_vertices()

    if weighted:
        weight_prop = g.new_edge_property("double", vals=weights)
    else:
        weight_prop = g.new_edge_property("int", val=1)

    #spc = shortest_path_cover_logn_apx(g, weight_prop)
    spc = pickle.load(
        open("res/coil/largest_comp_10knn_spc_" + "_" + str(weighted) + ".p",
             "rb"))
    #pickle.dump(spc, open("res/coil/largest_comp_10knn_spc_" + "_" + str(weighted) + ".p", "wb"))

    print(len(spc))

    return g, y, spc
Esempio n. 21
0
    def build_community_graph(self):
        from aves.visualization.networks import NodeLink

        (
            tree,
            membership,
            order,
        ) = graph_tool.inference.nested_blockmodel.get_hierarchy_tree(
            self.state, empty_branches=False)
        self.nested_graph = tree
        self.nested_graph.set_directed(False)

        self.radial_positions = np.array(
            list(
                graph_tool.draw.radial_tree_layout(
                    self.nested_graph,
                    self.nested_graph.num_vertices() - 1)))

        self.node_angles = np.degrees(
            np.arctan2(self.radial_positions[:, 1], self.radial_positions[:,
                                                                          0]))
        self.node_angles_dict = dict(
            zip(map(int, self.nested_graph.vertices()), self.node_angles))
        self.node_ratio = np.sqrt(
            np.dot(self.radial_positions[0], self.radial_positions[0]))

        self.network.layout_nodes(
            method="precomputed",
            positions=self.radial_positions[:self.network.num_vertices()],
            angles=self.node_angles,
            ratios=np.sqrt(
                np.sum(self.radial_positions * self.radial_positions, axis=1)),
        )

        self.community_graph = Network(
            graph_tool.GraphView(
                self.nested_graph,
                directed=True,
                vfilt=lambda x: x >= self.network.num_vertices(),
            ))
        self.community_nodelink = NodeLink(self.community_graph)
        self.community_nodelink.layout_nodes(
            method="precomputed",
            positions=self.radial_positions[self.network.num_vertices():],
            angles=self.node_angles,
            ratios=self.node_ratio,
        )
        self.community_nodelink.set_node_drawing(method="plain")
        self.community_nodelink.set_edge_drawing(method="plain")
def dense_activations_to_graph(model, x_in, thresh=1.0E-5):
    G = dense_model_to_graph(model)
    input_layer = model.layers[0]
    dense_layers = [l for l in model.layers if isinstance(l, Dense)]
    dense_outputs = [layer.output for layer in dense_layers]
    # initialize keras op that includes all layer outputs
    dense_func = K.function(inputs=model.inputs, outputs=dense_outputs)
    # apply function to input
    layer_outputs = dense_func(x_in)
    # add axis to all outputs (including input layer)
    outputs = [
        np.expand_dims(output, axis=-1) for output in [x_in] + layer_outputs
    ]
    # compute output masks
    masks = [output > thresh for output in outputs]
    # for each layer, starting with the input, broadcast the output of shape (batch, d, 1) to (batch, d, k)
    # where k is the dimension of the next layer; i.e. repeat outputs for each node
    Ws = [np.broadcast_to(output, (output.shape[0], output.shape[1], layer.get_weights()[0].shape[-1])) \
          for layer, output in zip(dense_layers, outputs)]

    # 1. apply masks to outputs
    # 2. reshape/flatten masks from (batch, d, k) to (batch, d*k)
    def apply_mask(W, mask):
        return (W * mask).reshape((-1, np.prod(W.shape[1:])))

    # 3. concatenate all masked outputs together to get full edge list
    Ws_masked = np.concatenate(
        [apply_mask(W, mask) for W, mask in zip(Ws, masks)], axis=1)
    # create mask over final outputs for graph-tool edge filter
    edge_masks = ~np.isclose(Ws_masked, 0.0, atol=thresh)
    # initialize graph views for all outputs in batch
    Gs = [
        gt.GraphView(G, efilt=G.new_ep('bool', vals=mask))
        for mask in edge_masks
    ]
    layer_sizes = [x_in.shape[-1]] + [layer.units for layer in dense_layers]
    labels = reduce(lambda cum, n: cum + n,
                    [[i] * size for i, size in enumerate(layer_sizes)])
    activations = np.concatenate(outputs, axis=1).squeeze()
    # add degree and layer properties to graph
    for i, (g, acts) in enumerate(zip(Gs, Ws_masked)):
        g.ep['activation'] = g.new_ep('float', vals=acts)
        g.vp['activation'] = g.new_vp('float', vals=activations[i])
        g.vp['degree'] = g.degree_property_map('total',
                                               weight=g.ep['activation'])
        g.vp['layer'] = g.new_vp('int', labels)
    return G, Gs
Esempio n. 23
0
def rng_room_to_room(batch_size, gtG, rng, max_dist, max_dist_to_compute,
                     node_room_ids, nodes=None, compute_path=False):
  # Sample one of the rooms, compute the distance field. Pick a destination in
  # another room if possible otherwise anywhere outside this room.
  dists = []; pred_maps = []; paths = []; start_node_ids = []; end_node_ids = [];
  room_ids = np.unique(node_room_ids[node_room_ids[:,0] >= 0, 0])
  for i in range(batch_size):
    room_id = rng.choice(room_ids)
    end_node_id = rng.choice(np.where(node_room_ids[:,0] == room_id)[0])
    end_node_ids.append(end_node_id)

    # Compute distances.
    dist, pred_map = gt.topology.shortest_distance(
        gt.GraphView(gtG, reversed=True), source=gtG.vertex(end_node_id),
        target=None, max_dist=max_dist_to_compute, pred_map=True)
    dist = np.array(dist.get_array())
    pred_map = np.array(pred_map.get_array())
    dists.append(dist)
    pred_maps.append(pred_map)

    # Randomly sample nodes which are within max_dist.
    near_ids = dist <= max_dist
    near_ids = near_ids[:, np.newaxis]

    # Check to see if there is a non-negative node which is close enough.
    non_same_room_ids = node_room_ids != room_id
    non_hallway_ids = node_room_ids != -1
    good1_ids = np.logical_and(near_ids, np.logical_and(non_same_room_ids, non_hallway_ids))
    good2_ids = np.logical_and(near_ids, non_hallway_ids)
    good3_ids = near_ids
    if np.any(good1_ids):
      start_node_id = rng.choice(np.where(good1_ids)[0])
    elif np.any(good2_ids):
      start_node_id = rng.choice(np.where(good2_ids)[0])
    elif np.any(good3_ids):
      start_node_id = rng.choice(np.where(good3_ids)[0])
    else:
      logging.error('Did not find any good nodes.')

    start_node_ids.append(start_node_id)

    path = None
    if compute_path:
      path = get_path_ids(start_node_ids[i], end_node_ids[i], pred_map)
    paths.append(path)

  return start_node_ids, end_node_ids, dists, pred_maps, paths
Esempio n. 24
0
def ext_local_eff(g, d):
    """Returns the extended local efficiency of a network g up to a
    neighbor depth d.

    The measure is experimental, not an established concept.
    Motivated by the concept of extended clustering.
    """
    n = g.num_vertices()
    eff_sum = 0
    # Extract the neighbors of node_i up to depth d and
    # create a sub graph containing those neighbors
    for node in range(n):
        vfilt = g.new_vertex_property('bool')
        neighbors = []
        node_n1 = node
        j = -1

        # Code is a bit convoluted to have variable depth
        for _ in range(d):
            k = len(neighbors)
            while j < k:
                for node_n2 in g.vertex(node_n1).all_neighbors():
                    neighbors.append(int(node_n2))
                    vfilt[node_n2] = True
                j += 1
                node_n1 = neighbors[j]

        # Calculate the neighbor distances up to depth d
        deg = g.vertex(node).out_degree()
        sub = gt.GraphView(g, vfilt)
        sub_dist = []
        for i in range(deg):
            sub_dist = sub_dist + list(
                gt.topology.shortest_distance(
                    sub,
                    source=g.vertex(neighbors[i]),
                    target=[g.vertex(m) for m in neighbors[:deg]]))

        # Calculate the local efficiency of node_i
        eff_sum_i = 0
        for dist in sub_dist:
            if dist != 0:
                eff_sum_i += 1 / dist

        eff_sum += eff_sum_i / (deg * (deg - 1))

    return eff_sum / n
def prepare_citeseer(directed=False, weighted=False):
    print("directed", directed)
    attributes_df = pd.read_csv('res/citeseer/citeseer.content',
                                sep="\t",
                                header=None,
                                dtype=np.str)
    features = attributes_df.iloc[:, 1:-1].to_numpy(dtype=np.int)
    labels, _ = pd.factorize(attributes_df.iloc[:, -1])
    new_ids, old_ids = pd.factorize(attributes_df.iloc[:, 0])

    edges_df = pd.read_csv('res/citeseer/citeseer.cites',
                           sep="\t",
                           header=None,
                           dtype=np.str)
    edges_df = edges_df[edges_df.iloc[:, 0].apply(lambda x: x in old_ids)]
    edges_df = edges_df[edges_df.iloc[:, 1].apply(lambda x: x in old_ids)]
    renamed = edges_df.replace(old_ids, new_ids)
    edges = renamed.to_numpy(dtype=np.int)
    edges = np.fliplr(edges)
    g = graph_tool.Graph(directed=directed)

    g.add_edge_list(edges)

    vfilt = graph_tool.topology.label_largest_component(g, directed=False)

    labels = labels[vfilt.a.astype(np.bool)]
    g = graph_tool.GraphView(g, vfilt=vfilt)
    g.purge_vertices()

    weight = np.sum(np.abs(features[edges[:, 0]] - features[edges[:, 1]]),
                    axis=1)
    if weighted:
        weight_prop = g.new_edge_property("int", vals=weight)
    else:
        weight_prop = g.new_edge_property("int", val=1)

    #spc = shortest_path_cover_logn_apx(g, weight_prop)
    spc = pickle.load(
        open(
            "res/citeseer/largest_comp_new_spc_directed_" + str(directed) +
            "_weighted_" + str(weighted) + ".p", "rb"))

    print("spc", len(spc))

    #pickle.dump(spc, open("res/citeseer/largest_comp_new_spc_directed_"+str(directed)+"_weighted_"+str(weighted)+".p", "wb"))

    return g, labels, spc
Esempio n. 26
0
def clear_unconnected(g: gt.Graph, key_node) -> gt.Graph:
    # Get rid of any component that doesn't contain the key node.
    label_map, hist = gt_top.label_components(g, directed=False)

    zero_id = vp_map(g, 'zero_id', 'int')
    key_comps = set()
    for v in g.vertices():
        if zero_id[v] == key_node:
            key_comps.add(label_map[v])

    leave_prop = g.new_vertex_property('bool')
    for v in g.vertices():
        leave_prop[v] = label_map[v] in key_comps

    sub = gt.GraphView(g, leave_prop)
    new_g = create_q_graph(sub, add_back_reference=False)

    return new_g
def gt_net(model):

    # Extract nodes
    mets = [m.id for m in model.metabolites]
    rxns = [r.id for r in model.reactions]
    nodes = mets + rxns

    # make dict: {index:node_id}
    dd = {nodes[n]: n for n in range(len(nodes))}

    #Extract edges
    substrates = []
    products = []

    for r in model.reactions:
        for s in r.reactants:
            substrates.append((s.id, r.id))
        for p in r.products:
            products.append((r.id, p.id))

    edges = substrates + products

    # Translate edges_id -> edges_index
    edges_index = [(dd[e[0]], dd[e[1]]) for e in edges]

    #Initialize Graph
    G = gt.Graph()

    #Populate Graph
    G.add_edge_list(edges_index)

    #Extract largest network component
    l = gt.topology.label_largest_component(G)
    GG = gt.GraphView(G, vfilt=l)

    return GG
Esempio n. 28
0
def sgm_match(t_graph: gt.Graph, g_graph: gt.Graph, delta, tau, n_idx,
              e_idx) -> gt.Graph:
    # T is a query tree
    # G is a query graph
    # Delta is the score delta that we can accept from perfect match
    # tau is how far off this tree is from the graph, at most.
    # nIdx is an index containing node attributes
    # eIdx is an index containing edge attributes
    # root_match = [n for n, d in list(T.in_degree().items()) if d == 0]

    root_match = [v for v in t_graph.vertices() if v.in_degree() == 0]
    root = root_match[0]
    n_keys = list(n_idx.keys())[0]
    e_keys = list(e_idx.keys())[0]

    #    print 'Building matching graph'

    print('Printing MDST Graph')
    print(root)
    print_graph(t_graph)

    # Step 1: Get all the matches for the nodes
    node_matches = dict()
    for v in t_graph.vertices():
        if t_graph.vp[n_keys][v] in list(n_idx[n_keys].keys()):
            node_matches[v] = n_idx[n_keys][t_graph.vp[n_keys][v]]
        else:
            node_matches[v] = set()

    # Step 2: Get all the edge matches for the node
    edge_matches = dict()
    for e in t_graph.edges():
        if t_graph.ep[e_keys][e] in list(e_idx[e_keys].keys()):
            edge_matches[e] = e_idx[e_keys][t_graph.ep[e_keys][e]]
        else:
            edge_matches[e] = set()
        # Make sure you count just the ones that have matching nodes too.
        edge_matches[e] = set([
            em for em in edge_matches[e] if em[0] in node_matches[e.source()]
            and em[1] in node_matches[e.target()]
        ])

    # Scoring, initially, is going to be super-simple:
    # You get a 1 if you match, and a 0 if you don't.  Everything's created equal.

    # Score everything and put it in a graph.

    for k in list(edge_matches.keys()):
        if len(edge_matches[k]) == 0:
            pass
            # stop_here = 1

    match_graph = gt.Graph(directed=True)
    #    for nT in T.nodes():
    #        for nG in node_matches[nT]:
    #            MatchGraph.add_node(tuple([nT,nG]),score=1,solo_score=1)
    mg_edges = set()
    mg_vertices = set()
    mg_vertices_to_index = {}
    for eT in t_graph.edges():
        for eG in edge_matches[eT]:
            v1 = (eT.source(), eG[0])
            v2 = (eT.target(), eG[1])
            mg_vertices.add(v1)
            mg_vertices.add(v2)
            mg_edges.add((v1, v2))

    # match_graph.add_edge([(eT.source(), eG.source()), (eT.target(), eG.target())])
    zero_id = vp_map(match_graph, 'zero_id')
    one_id = vp_map(match_graph, 'one_id')

    for tup in mg_vertices:
        v = match_graph.add_vertex()
        zero_id[v], one_id[v] = tup
        mg_vertices_to_index[tup] = v

    # it = iter(mg_vertices)
    # for v in match_graph.vertices():
    #     tup = next(it)
    #     zero_id[v], one_id[v] = tup
    #     mg_vertices_to_index[tup] = v

    for t1, t2 in mg_edges:
        match_graph.add_edge(mg_vertices_to_index[t1],
                             mg_vertices_to_index[t2])

    # debug_match_graph(match_graph)

    solo_score_vp = vp_map(match_graph, 'solo_score', 'int')
    score_vp = vp_map(match_graph, 'score_v', 'int')
    score_ep = ep_map(match_graph, 'score_e', 'int')
    path_vp = vp_map(match_graph, 'path', 'object')

    g_graph_original = original_vp(g_graph)
    t_graph_original = original_vp(t_graph)

    for v in match_graph.vertices():
        solo_score_vp[v] = 1
        score_vp[v] = 1

        # Here we insert original nodes
        d = coll.deque()
        d.append((t_graph_original[zero_id[v]], g_graph_original[one_id[v]]))
        path_vp[v] = d

    for e in match_graph.edges():
        score_ep[e] = 1

    # gt_draw.graph_draw(match_graph, vprops={'text': zero_id})

    # Get rid of anybody flying solo
    match_graph = clear_unconnected(match_graph,
                                    root)  # this is clearly not working.

    # Now acquire/organize all hypotheses with scores above Max_Score - tau - delta

    # Figure out how much score you could possibly get at every node in the query.
    max_score_v = vp_map(t_graph, 'max_score_v', 'int')
    max_score_e = ep_map(t_graph, 'max_score_e', 'int')
    score_vp = vp_map(match_graph, 'score_v', 'int')
    score_ep = ep_map(match_graph, 'score_e', 'int')
    path_vp = vp_map(match_graph, 'path', 'object')
    zero_id = vp_map(match_graph, 'zero_id')

    # gt_draw.graph_draw(match_graph, vprops={'text': zero_id})

    for n in t_graph.vertices():
        max_score_v[n] = 1
    for e in t_graph.edges():
        max_score_e[e] = 1

    bfs_edges = list(gt_s.bfs_iterator(t_graph, source=root))
    reversed_bfs_edges = list(reversed(bfs_edges))

    t_index = t_graph.vertex_index

    # debug_match_graph(match_graph)

    for e in reversed_bfs_edges:  # Reverse BFS search - should do leaf nodes first.
        # What's the best score we could get at this node?
        v1, v2 = e

        max_score_v[v1] += max_score_v[v2] + max_score_e[e]

        # Find all the edges equivalent to this one in the match graph
        edge_matches = [
            (eG1, eG2) for eG1, eG2 in match_graph.edges()
            if zero_id[eG1] == t_index[v1] and zero_id[eG2] == t_index[v2]
        ]

        parent_nodes = set([eM1 for eM1, eM2 in edge_matches])

        for p in parent_nodes:
            child_nodes = [eM2 for eM1, eM2 in edge_matches if eM1 == p]
            # First, check if the bottom node has a score
            best_score = 0
            # best_node = None
            c_path = None
            for c in child_nodes:
                c_edge = match_graph.edge(p, c)
                c_score = score_vp[c] + score_ep[c_edge]
                c_path = path_vp[c]

                if c_score > best_score:
                    best_score = c_score
                    # best_child_path = c_path
            score_vp[p] += best_score
            for pathNode in c_path:
                path_vp[p].appendleft(pathNode)

    leave_prop = match_graph.new_vertex_property('bool')

    # CLEAN IT UP.
    for n in match_graph.vertices():
        leave_prop[n] = score_vp[n] >= max_score_v[t_graph.vertex(
            zero_id[n])] - delta

    sub = gt.GraphView(match_graph, leave_prop)
    new_match_graph = create_q_graph(sub, add_back_reference=False)

    # Get rid of anybody flying solo
    match_graph = save_root_children(new_match_graph, root)
    zero_id = vp_map(match_graph, 'zero_id')
    one_id = vp_map(match_graph, 'one_id')
    path_list_vp = vp_map(match_graph, 'path_list', 'object')
    for n in match_graph.vertices():
        d = coll.deque()
        d.append((t_graph_original[zero_id[n]], g_graph_original[one_id[n]]))
        path_list_vp[n] = [d]

    # Get a list of solutions alive in the graph
    for e in reversed_bfs_edges:
        v1, v2 = e
        edge_matches = [
            (eG1, eG2) for eG1, eG2 in match_graph.edges()
            if zero_id[eG1] == t_index[v1] and zero_id[eG2] == t_index[v2]
        ]

        parent_nodes = set([eM1 for eM1, eM2 in edge_matches])

        for p in parent_nodes:
            child_nodes = [eM2 for eM1, eM2 in edge_matches if eM1 == p]
            # First, check if the bottom node has a score
            tmpList = []
            for c in child_nodes:
                for _p in path_list_vp[p]:
                    for _c in path_list_vp[c]:
                        tmpList.append(_p + _c)
            path_list_vp[p] = tmpList

    # debug_match_graph(match_graph)

    # Score the root solutions
    return match_graph
Esempio n. 29
0
def calculate_mdst_v2(g: gt.Graph, n_idx, e_idx, used_stuff=set()):
    # Step 1: Figure out the weights.
    n_att_name = list(n_idx.keys())[0]
    e_att_name = list(e_idx.keys())[0]

    # Create an MDSTWeight vector on the nodes and edges.
    v_weight = vp_map(g, 'MDST_v_weight', 'float')
    e_weight = ep_map(g, 'MDST_e_weight', 'float')

    v_attribute_list = list(n_idx[n_att_name].keys())
    e_attribute_list = list(e_idx[e_att_name].keys())

    v_a_map = vp_map(g, n_att_name)
    e_a_map = ep_map(g, e_att_name)

    for n in g.vertices():
        if v_a_map[n] in v_attribute_list:
            v_weight[n] = len(
                n_idx[n_att_name][v_a_map[n]]) / n_idx[n_att_name]['size']
        else:
            v_weight[n] = 0

    for e in g.edges():
        if e in used_stuff:
            e_weight[e] = 1
        else:
            if e_a_map[e] in e_attribute_list:
                e_weight[e] = len(
                    e_idx[e_att_name][e_a_map[e]]) / e_idx[e_att_name]['size']
            else:
                e_weight[e] = 0

    #    for e1,e2 in G.edges():
    #        G.adj[e1][e2]['Nonsense'] = 5

    # Step 2: Calculate the MST.
    # gt.draw.graph_draw(g, vertex_text=g.vp['old'], vertex_font_size=18, output_size=(300, 300), output='G.png')
    t_map = gt_top.min_spanning_tree(g, e_weight, g.vertex(0))

    # T = nx.algorithms.minimum_spanning_tree(G,weight='Nonsense')
    # Step 3: Figure out which root results in us doing the least work.

    t = gt.GraphView(g, efilt=t_map, directed=False)
    # gt.draw.graph_draw(t, vertex_text=t.vp['old'], vertex_font_size=18, output_size=(300, 300), output='T.png')

    best_t = None
    best_score = np.inf
    for root in t.vertices():
        # Generate a new tree

        it = gt_s.bfs_iterator(t, root)
        nodes = []
        edges = []
        for e in it:
            edges.append(e)
            nodes.extend([e.source(), e.target()])
        nodes = np.unique(nodes)
        new_t = create_q_graph(t, q_nodes=nodes, q_edges=edges, directed=True)

        new_t_score = mdst_score_v2(t, root)
        if new_t_score < best_score:
            # print(best_score)
            best_t = new_t
            best_score = new_t_score

    return best_t, best_score
Esempio n. 30
0
                       vertex_fill_color=bv,
                       edge_pen_width=be,
                       output="filtered-bt.png")

    g.set_edge_filter(None)
    bv, be = gt.centrality.betweenness(g)
    be.a /= (be.a.max() / 5)
    gt.draw.graph_draw(g,
                       pos=pos,
                       vertex_fill_color=bv,
                       edge_pen_width=be,
                       output="nonfiltered-bt.png")

    assert not g.is_directed(), ""

    ug = gt.GraphView(g, directed=True)
    assert ug.is_directed(), ""

    tv = gt.GraphView(g, efilt=tree)

    bv, be = gt.centrality.betweenness(tv)
    be.a /= (be.a.max() / 5)
    gt.draw.graph_draw(tv,
                       pos=pos,
                       vertex_fill_color=bv,
                       edge_pen_width=be,
                       output="mst-view.png")

    bv, be = gt.centrality.betweenness(g)
    u = gt.GraphView(g, efilt=lambda e: be[e] > be.a.max() / 2)
    be.a /= (be.a.max() / 5)