Python Graph.set_vertex_filter Exemples, graph_tool.Graph.set_vertex_filter Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : sample.py Projet : ffrankies/GraphChallenge

    def __init__(self, state: SampleState, graph: Graph,
                 old_true_block_assignment: np.ndarray) -> None:
        """Creates a new Sample object. Contains information about the sampled vertices and edges, the mapping of
        sampled vertices to the original graph vertices, and the true block membership for the sampled vertices.

        Parameters
        ----------
        state : SampleState
            contains the sampled vertices
        graph : Graph
            the graph from which the sample is taken
        old_true_block_assignment : np.ndarray[int]
            the vertex-to-community assignment array. Currently assumes that community assignment is non-overlapping.
        """
        self.state = state
        sampled_vertices = sorted(state.sample_idx[-state.sample_size:])
        self.vertex_mapping = dict([(v, k)
                                    for k, v in enumerate(sampled_vertices)])
        binary_filter = np.zeros(graph.num_vertices())
        binary_filter[sampled_vertices] = 1
        graph.set_vertex_filter(
            graph.new_vertex_property("bool", binary_filter))
        self.graph = Graph(
            graph, prune=True
        )  # If ordering is wacky, may need to play around with vorder
        graph.clear_filters()
        true_block_assignment = old_true_block_assignment[sampled_vertices]
        # Assuming the sample doesn't capture all the blocks, the block numbers in the sample may not be consecutive
        # The true_blocks_mapping ensures that they are consecutive
        true_blocks = list(set(true_block_assignment))
        self.true_blocks_mapping = dict([(v, k)
                                         for k, v in enumerate(true_blocks)])
        self.true_block_assignment = np.asarray(
            [self.true_blocks_mapping[b] for b in true_block_assignment])
        self.sample_num = len(self.vertex_mapping)

Exemple #2

0

Afficher le fichier

Fichier : steiner_tree.py Projet : xiaohan2012/reconstructing-cascade

def build_closure(g, terminals, debug=False, verbose=False):
    terminals = list(terminals)
    # build closure
    gc = Graph(directed=False)
    gc.add_vertex(g.num_vertices())

    edges_with_weight = set()
    r2pred = {}

    for r in terminals:
        if debug:
            print('root {}'.format(r))
        vis = init_visitor(g, r)
        pbfs_search(g, source=r, terminals=terminals, visitor=vis)
        new_edges = set(get_edges(vis.dist, r, terminals))
        if debug:
            print('new edges {}'.format(new_edges))
        edges_with_weight |= new_edges
        r2pred[r] = vis.pred

    for u, v, c in edges_with_weight:
        gc.add_edge(u, v)

    eweight = gc.new_edge_property('int')
    weights = np.array([c for _, _, c in edges_with_weight])
    eweight.set_2d_array(weights)

    vfilt = gc.new_vertex_property('bool')
    vfilt.a = False
    for v in terminals:
        vfilt[v] = True
    gc.set_vertex_filter(vfilt)
    return gc, eweight, r2pred

Exemple #3

0

Afficher le fichier

Fichier : test_graph_tool.py Projet : moskytw/graph-tool-lab

def compose_graph(uid_pid_pairs):

    # set up graph
    g = Graph()
    g.vp['pid'] = v_pid_p = g.new_vertex_property('string')
    g.vp['count'] = v_count_p = g.new_vertex_property('int')
    g.ep['count'] = e_count_p = g.new_edge_property('int')

    pid_v_map = {}
    uid_last_v_map = {}
    vv_e_map = {}

    for uid, pid in uid_pid_pairs:

        # vertex

        v = pid_v_map.get(pid)
        if v is None:
            v = g.add_vertex()
            v_pid_p[v] = pid
            v_count_p[v] = 0
            pid_v_map[pid] = v
        v_count_p[v] += 1

        # edge

        last_v = uid_last_v_map.get(uid)
        uid_last_v_map[uid] = v
        if last_v is None:
            continue

        vv = (last_v, v)
        e = vv_e_map.get(vv)
        if e is None:
            e = g.add_edge(*vv)
            e_count_p[e] = 0
            vv_e_map[vv] = e
        e_count_p[e] += 1

    # calculate closeness
    g.vp['closeness'] = v_closeness_p = g.new_vertex_property('float')
    e_inverse_count_p = g.new_edge_property('int')
    e_inverse_count_p.a = e_count_p.a.max()-e_count_p.a
    debug('e_inverse_count_p.a: {}', e_inverse_count_p.a)
    closeness(g, weight=e_inverse_count_p, vprop=v_closeness_p)
    debug('v_closeness_p.a    : {}', v_closeness_p.a)
    v_closeness_p.a = nan_to_num(v_closeness_p.a)
    debug('v_closeness_p.a    : {}', v_closeness_p.a)

    # fillter
    g.vp['picked'] = v_picked_p = g.new_vertex_property('bool')
    debug('v_count_p.a.mean() : {}', v_count_p.a.mean())
    v_picked_p.a = v_count_p.a > v_count_p.a.mean()
    debug('v_picked_p.a       : {}', v_picked_p.a)
    g.set_vertex_filter(v_picked_p)
    g.set_vertex_filter(None)

    return g

Exemple #4

0

Afficher le fichier

Fichier : steiner_tree_greedy.py Projet : xiaohan2012/active-infection-source-finding

def steiner_tree_greedy(
        g, root, infection_times, source, obs_nodes,
        debug=False,
        verbose=True):
    # root = min(obs_nodes, key=infection_times.__getitem__)
    sorted_obs = list(sorted(obs_nodes, key=infection_times.__getitem__))[1:]
    tree_nodes = {root}
    tree_edges = set()
    for u in sorted_obs:
        # connect u to the tree
        vis = init_visitor(g, u)
        if debug:
            print('connect {} to tree'.format(u))
            print('nodes connectable: {}'.format(tree_nodes))
        forbidden_nodes = list(set(obs_nodes) - tree_nodes)
        cpbfs_search(g, u, visitor=vis,
                     terminals=list(tree_nodes),
                     forbidden_nodes=forbidden_nodes,
                     count_threshold=1)

        # add edge
        reachable_nodes = set(np.nonzero(vis.dist > 0)[0]).intersection(tree_nodes)

        if debug:
            print('reachable_nodes: {}'.format(reachable_nodes))

        assert len(reachable_nodes) > 0
        sorted_ancestors = sorted(reachable_nodes, key=vis.dist.__getitem__)
        ancestor = sorted_ancestors[0]

        if debug:
            print('ancestor: {}'.format(ancestor))
            print('dist to reachable: {}'.format(vis.dist[sorted_ancestors]))

        new_edges = extract_edges_from_pred(g, u, ancestor, vis.pred)
        new_edges = {(v, u) for u, v in new_edges}  # needs to reverse the order

        if debug:
            print('new_edges: {}'.format(new_edges))

        tree_edges |= set(new_edges)
        tree_nodes |= {v for e in new_edges for v in e}

    t = Graph(directed=True)
    for _ in range(g.num_vertices()):
        t.add_vertex()

    vfilt = t.new_vertex_property('bool')
    vfilt.a = False
    for v in tree_nodes:
        vfilt[t.vertex(v)] = True

    for u, v in tree_edges:
        t.add_edge(t.vertex(u), t.vertex(v))

    t.set_vertex_filter(vfilt)

    return t

Exemple #5

0

Afficher le fichier

Fichier : minimum_steiner_tree.py Projet : xiaohan2012/cascade-reconstruction-by-tree-samples

def build_closure(g, terminals, p=None, debug=False, verbose=False):
    """build the transitive closure on terminals"""
    def get_edges(dist, root, terminals):
        """get adjacent edges to root with weight"""
        return {(root, t, dist[t])
                for t in terminals if dist[t] != -1 and t != root}

    terminals = list(terminals)
    gc = Graph(directed=False)

    gc.add_vertex(g.num_vertices())

    edges_with_weight = set()
    r2pred = {}  # root to predecessor map (from bfs)

    # shortest path to all other nodes
    for r in terminals:
        if debug:
            print('root {}'.format(r))

        targets = list(set(terminals) - {r})
        dist_map, pred_map = shortest_distance(g,
                                               source=r,
                                               target=targets,
                                               weights=p,
                                               pred_map=True)
        dist_map = dict(zip(targets, dist_map))
        # print(dist_map)
        # print(pred_map)
        new_edges = get_edges(dist_map, r, targets)
        # if p is None:
        #     vis = init_visitor(g, r)
        #     bfs_search(g, source=r, visitor=vis)
        #     new_edges = set(get_edges(vis.dist, r, terminals))
        # else:
        #     print('weighted graph')

        if debug:
            print('new edges {}'.format(new_edges))
        edges_with_weight |= new_edges
        # r2pred[r] = vis.pred
        r2pred[r] = pred_map

    for u, v, c in edges_with_weight:
        gc.add_edge(u, v)

    # edge weights
    eweight = gc.new_edge_property('int')
    weights = np.array([c for _, _, c in edges_with_weight])
    eweight.set_2d_array(weights)

    vfilt = gc.new_vertex_property('bool')
    vfilt.a = False
    for v in terminals:
        vfilt[v] = True
    gc.set_vertex_filter(vfilt)
    return gc, eweight, r2pred

Exemple #6

0

Afficher le fichier

Fichier : sample.py Projet : ffrankies/GraphChallenge

    def create_sample(graph: Graph, old_true_block_assignment: np.ndarray,
                      args: argparse.Namespace,
                      prev_state: SampleState) -> 'Sample':
        """Performs sampling according to the sample type in args.

        TODO: either re-write how this method is used, or get rid of it - it seems to be a code smell.
        """
        # get rid of 1-degree vertices
        degrees = graph.get_total_degrees(np.arange(graph.num_vertices()))
        degree_filter = degrees > 2
        mapping = np.where(degrees > 2)[0]
        graph.set_vertex_filter(
            graph.new_vertex_property("bool", degree_filter))
        filtered_graph = Graph(graph, prune=True)
        print(filtered_graph.num_vertices())
        graph.clear_filters()
        # TODO: keep track of the mapping to original graph
        # TODO: below methods can return a SampleState, which we map back to original vertices here, then create the
        # sample before return. This is brilliant! I am genius!
        if args.sample_type == "degree_weighted":
            state = Sample.degree_weighted_sample(filtered_graph,
                                                  graph.num_vertices(),
                                                  prev_state, args)
        elif args.sample_type == "expansion_snowball":
            state = Sample.expansion_snowball_sample(filtered_graph,
                                                     graph.num_vertices(),
                                                     prev_state, args)
        elif args.sample_type == "forest_fire":
            state = Sample.forest_fire_sample(filtered_graph,
                                              graph.num_vertices(), prev_state,
                                              args)
        elif args.sample_type == "max_degree":
            state = Sample.max_degree_sample(filtered_graph,
                                             graph.num_vertices(), prev_state,
                                             args)
        elif args.sample_type == "random_jump":
            state = Sample.random_jump_sample(filtered_graph,
                                              graph.num_vertices(), prev_state,
                                              args)
        elif args.sample_type == "random_node_neighbor":
            state = Sample.random_node_neighbor_sample(filtered_graph,
                                                       graph.num_vertices(),
                                                       prev_state, args)
        elif args.sample_type == "random_walk":
            state = Sample.random_walk_sample(filtered_graph,
                                              graph.num_vertices(), prev_state,
                                              args)
        elif args.sample_type == "uniform_random":
            state = Sample.uniform_random_sample(filtered_graph,
                                                 graph.num_vertices(),
                                                 prev_state, args)
        else:
            raise NotImplementedError(
                "Sample type: {} is not implemented!".format(args.sample_type))
        state.sample_idx = mapping[state.sample_idx]
        return Sample(state, graph, old_true_block_assignment)

Exemple #7

0

Afficher le fichier

def tree1():
    g = Graph(directed=True)
    g.add_vertex(5)  # one remaining singleton
    g.add_edge_list([(0, 1), (1, 2), (1, 3)])

    # to test 4 is not included
    vfilt = g.new_vertex_property('bool')
    vfilt.set_value(True)
    vfilt[4] = False
    g.set_vertex_filter(vfilt)
    return g

Exemple #8

0

Afficher le fichier

Fichier : minimum_steiner_tree.py Projet : xiaohan2012/gt_min_steiner_tree

def build_closure(g, terminals, debug=False, verbose=False):
    """build the transitive closure on terminals"""
    def get_edges(dist, root, terminals):
        """get adjacent edges to root with weight"""
        return ((root, t, dist[t]) for t in terminals
                if dist[t] != -1 and t != root)

    terminals = list(terminals)
    gc = Graph(directed=False)

    gc.add_vertex(g.num_vertices())

    edges_with_weight = set()
    r2pred = {}  # root to predecessor map (from bfs)

    # bfs to all other nodes
    for r in terminals:
        if debug:
            print('root {}'.format(r))
        vis = init_visitor(g, r)
        bfs_search(g, source=r, visitor=vis)
        new_edges = set(get_edges(vis.dist, r, terminals))
        if debug:
            print('new edges {}'.format(new_edges))
        edges_with_weight |= new_edges
        r2pred[r] = vis.pred

    for u, v, c in edges_with_weight:
        gc.add_edge(u, v)

    # edge weights
    eweight = gc.new_edge_property('int')
    weights = np.array([c for _, _, c in edges_with_weight])
    eweight.set_2d_array(weights)

    #
    vfilt = gc.new_vertex_property('bool')
    vfilt.a = False
    for v in terminals:
        vfilt[v] = True
    gc.set_vertex_filter(vfilt)
    return gc, eweight, r2pred

Exemple #9

0

Afficher le fichier

Fichier : steiner_tree.py Projet : xiaohan2012/active-infection-source-finding

def build_closure(g, terminals,
                  debug=False,
                  verbose=False):
    terminals = list(terminals)
    # build closure
    gc = Graph(directed=False)

    for _ in range(g.num_vertices()):
        gc.add_vertex()

    edges_with_weight = set()
    r2pred = {}

    for r in terminals:
        if debug:
            print('root {}'.format(r))
        vis = init_visitor(g, r)
        pbfs_search(g, source=r, terminals=terminals, visitor=vis)
        new_edges = set(get_edges(vis.dist, r, terminals))
        if debug:
            print('new edges {}'.format(new_edges))
        edges_with_weight |= new_edges
        r2pred[r] = vis.pred
    
    for u, v, c in edges_with_weight:
        gc.add_edge(u, v)
        
    eweight = gc.new_edge_property('int')
    weights = np.array([c for _, _, c in edges_with_weight])
    eweight.set_2d_array(weights)

    vfilt = gc.new_vertex_property('bool')
    vfilt.a = False
    for v in terminals:
        vfilt[v] = True
    gc.set_vertex_filter(vfilt)
    return gc, eweight, r2pred

Exemple #10

0

Afficher le fichier

Fichier : cascade_generator.py Projet : xiaohan2012/cascade-reconstruction-by-tree-samples

def si(g, p, source=None, stop_fraction=0.5):
    """
    g: the graph
    p: edge-wise infection probability
    stop_fraction: stopping if more than N x stop_fraction nodes are infected
    """
    weighted = False
    if isinstance(p, PropertyMap):
        weighted = True
    else:
        # is float and uniform
        assert 0 < p and p <= 1

    if source is None:
        source = random.choice(np.arange(g.num_vertices()))
    infected = {source}
    infection_times = np.ones(g.num_vertices()) * -1
    infection_times[source] = 0
    time = 0
    edges = []

    stop = False

    infected_nodes_until_t = copy(infected)
    while True:
        infected_nodes_until_t = copy(infected)
        # print('current cascade size: {}'.format(len(infected_nodes_until_t)))
        time += 1
        for i in infected_nodes_until_t:
            vi = g.vertex(i)
            for e in vi.all_edges():
                if weighted:
                    inf_proba = p[e]
                else:
                    inf_proba = p
                vj = e.target()
                j = int(vj)
                rand = random.random()
                # print('rand=', rand)
                # print('inf_proba=', inf_proba)
                # print('{} infected?'.format(j), j not in infected)
                if j not in infected and rand <= inf_proba:
                    # print('SUCCESS')
                    infected.add(j)
                    infection_times[j] = time
                    edges.append((i, j))

                    # stop when enough nodes have been infected
                    if (len(infected) / g.num_vertices()) >= stop_fraction:
                        stop = True
                        break
            if stop:
                break
        if stop:
            break

    tree = Graph(directed=True)
    for _ in range(g.num_vertices()):
        tree.add_vertex()

    vertex_nodes = set()
    for u, v in edges:
        tree.add_edge(u, v)
        vertex_nodes.add(u)
        vertex_nodes.add(v)

    vfilt = tree.new_vertex_property('bool')
    vfilt.set_value(False)
    vfilt.a[list(vertex_nodes)] = True
    tree.set_vertex_filter(vfilt)

    return source, infection_times, tree

Exemple #11

0

Afficher le fichier

Fichier : greedy.py Projet : xiaohan2012/reconstructing-cascade

def find_tree_greedy(g,
                     root,
                     infection_times,
                     source,
                     obs_nodes,
                     debug=False,
                     verbose=True):
    # root = min(obs_nodes, key=infection_times.__getitem__)
    sorted_obs = list(sorted(obs_nodes, key=infection_times.__getitem__))[1:]
    tree_nodes = {root}
    tree_edges = set()
    for u in sorted_obs:
        if u in tree_nodes:
            continue

        # connect u to the tree
        vis = init_visitor(g, u)
        if debug:
            print('connect {} to tree'.format(u))
            print('nodes connectable: {}'.format(tree_nodes))
        forbidden_nodes = list(set(obs_nodes) - tree_nodes)
        cpbfs_search(g,
                     u,
                     visitor=vis,
                     terminals=list(tree_nodes),
                     forbidden_nodes=forbidden_nodes,
                     count_threshold=1)

        # add edge
        reachable_nodes = set(filter(lambda k: vis.dist[k] > 0,
                                     vis.dist)).intersection(tree_nodes)

        if debug:
            print('reachable_nodes: {}'.format(reachable_nodes))

        assert len(reachable_nodes) > 0
        sorted_ancestors = sorted(reachable_nodes, key=vis.dist.__getitem__)
        ancestor = sorted_ancestors[0]

        if debug:
            print('ancestor: {}'.format(ancestor))
            print('dist to reachable: {}'.format(vis.dist[sorted_ancestors]))

        new_edges = extract_edges_from_pred(g, u, ancestor, vis.pred)
        new_edges = {(v, u)
                     for u, v in new_edges}  # needs to reverse the order

        if debug:
            print('new_edges: {}'.format(new_edges))

        tree_edges |= set(new_edges)
        tree_nodes |= {v for e in new_edges for v in e}

    t = Graph(directed=True)
    t.add_vertex(g.num_vertices())

    vfilt = t.new_vertex_property('bool')
    vfilt.a = False
    for v in tree_nodes:
        vfilt[t.vertex(v)] = True

    for u, v in tree_edges:
        t.add_edge(t.vertex(u), t.vertex(v))

    t.set_vertex_filter(vfilt)

    return t

Exemple #12

0

Afficher le fichier

class graphtool():
    def get_edges(self):
        self.edges = []
        for dev in Device.objects:
            port = dev['ports']
            for port in dev['ports']:
                if not port['acc']:
                    self.edges.append([int(port['dev']), int(dev['devid'])])
        for edge in self.edges:
            if edge[::-1] in self.edges:
                self.edges.remove(edge)

    def create_graph(self):
        self.get_edges()
        self.g = Graph(directed=False)
        self.g.add_edge_list(self.edges)

    def load_graph(self):
        self.g = pickle.loads(System.objects.first().graph.read())

    def shortestpath(self, source, dest):
        if source == dest:
            return ('нужны разные пипишники')
        #ip to id
        source = Device.objects(uri=source)
        dest = Device.objects(uri=dest)
        if len(source) > 0 and len(dest) > 0:
            source = self.g.vertex(source[0].devid)
            dest = self.g.vertex(dest[0].devid)
            result = graph_tool.topology.shortest_path(self.g, source, dest)
            path = [self.g.vertex_index[x] for x in result[0]]
            filteredge = self.g.new_edge_property('bool')
            filteredge[result[1][0]] = True
            self.g.set_edge_filter(filteredge, inverted=True)
            result = graph_tool.topology.shortest_path(self.g, source, dest)
            second_path = [self.g.vertex_index[x] for x in result[0]]
            self.g.clear_filters()
            another_paths = []
            all_shortest = graph_tool.topology.all_shortest_paths(
                self.g, source, dest)
            for i in all_shortest:
                another_paths.append([self.g.vertex_index[j] for j in i])

            self.all_paths = [path] + [second_path] + another_paths
            self.all_paths = [tuple(t) for t in self.all_paths]
            self.all_paths = [t for t in self.all_paths if len(t) > 0]
            self.all_paths = list(set(self.all_paths))
            self.all_paths = [list(t) for t in self.all_paths]

            dev_from_stp = []
            count = 0
            for path in self.all_paths:
                for dev in path:
                    dev = Device.objects(devid=dev).first().uri
                    if Stpdomins.objects(devices__=dev):
                        count += 1
                        [
                            dev_from_stp.append(x) for x in Stpdomins.objects(
                                devices__=dev).first().devices
                            if x not in dev_from_stp
                        ]

            if len(dev_from_stp) > 0 and count > 1:
                print('stp domains')
                filtevertex = self.g.new_vertex_property('bool')
                for x in dev_from_stp:
                    filtevertex[self.g.vertex(
                        Device.objects(uri=x).first().devid)] = True
                self.g.set_vertex_filter(filtevertex)

                source = self.g.vertex(
                    Device.objects(uri=dev_from_stp[0]).first().devid)
                dest = self.g.vertex(
                    Device.objects(uri=dev_from_stp[-1]).first().devid)
                result = graph_tool.topology.all_paths(self.g, source, dest)
                for x in result:
                    self.all_paths.append([int(self.g.vertex(i)) for i in x])
                self.g.clear_filters()
                self.all_paths.sort()
                self.all_paths = list(
                    self.all_paths
                    for self.all_paths, _ in itertools.groupby(self.all_paths))
                self.all_paths = [
                    path for path in self.all_paths if len(path) > 0
                ]

        return self.all_paths

    def fancy_shortest(self):
        self.fancy_paths = []
        for path in self.all_paths:
            fancy = []
            for i in path:
                d = Device.objects(devid=i).first()
                if d.devtype not in passive:
                    fancy.append([d.uri, d.addr, dev_type_dict[d.devtype]])
            self.fancy_paths.append(fancy)
        return self.fancy_paths

    def paths_ports(self):
        output = []
        for path in self.all_paths:
            for i, j in zip(path, path[1:]):

                dev = Device.objects(devid=i).first()
                if dev.devtype in supported:
                    ports = [x['num'] for x in dev.ports if x['dev'] == j]
                    if len(ports) == 0:
                        ports = 0
                    else:
                        ports = ports[0]
                    output.append([dev.uri, dev.devtype, ports])

                dev = Device.objects(devid=j).first()
                if dev.devtype in supported:
                    ports = [x['num'] for x in dev.ports if x['dev'] == i]
                    if len(ports) == 0:
                        ports = 0
                    else:
                        ports = ports[0]
                    output.append([dev.uri, dev.devtype, ports])

        g_fancy_output = dict()
        g_output = dict()
        for key, group in groupby(output, lambda x: x[0]):
            ports = []
            for i in group:
                ports.append(i[2])
            if key in g_output:
                # print (g_output[key]['ports'], ports)
                g_output[key]['ports'] = g_output[key]['ports'] + ports
            else:
                g_output[key] = {'type': i[1], 'ports': ports}

        for key in g_output:
            g_output[key]['ports'] = list(set(g_output[key]['ports']))

        g_fancy_output = copy.deepcopy(g_output)
        for i in g_fancy_output:
            g_fancy_output[i]['type'] = dev_type_dict[g_fancy_output[i]
                                                      ['type']]
        return g_fancy_output, g_output

Exemple #13

0

Afficher le fichier

Fichier : quickAndDirty.py Projet : optas/graph_roles

def import_e_coli_ppi(save=False, export=False):
    '''
    Imports the dataset E_Coli and saves it as a graph (Snap, GTools and Greach format).
    '''
    saveLoadFolder = "E_Coli"
    graphName = "E_Coli"
    graphFile = "../Data/Graphs/" + saveLoadFolder + "/E_Coli_Edge_List.txt"
    #     labelsFile     = "../Data/Graphs/"+saveLoadFolder+"/E_Coli_Essential_Genes.txt"
    labelsFile = "../Data/Graphs/" + saveLoadFolder + "/E_Coli_Labels.csv"

    g = Graph(directed=False)
    #     interactionWeight = g.new_edge_property("double")

    proteinNameToNode = dict()
    with open(graphFile, "r") as inF:
        for line in inF:
            #             splitted = line.rstrip().split('|')
            #             fromNode = splitted[1].strip()
            #             toNode   = splitted[3].strip()
            #             weight   = float(splitted[4])

            fromNode, toNode = line.strip().split()
            fromNode = fromNode.strip()
            toNode = toNode.strip()

            #             print fromNode, toNode

            #             print  fromNode, toNode, weight

            if fromNode not in proteinNameToNode:
                newNode = g.add_vertex()
                proteinNameToNode[fromNode] = int(newNode)
            if toNode not in proteinNameToNode:
                newNode = g.add_vertex()
                proteinNameToNode[toNode] = int(newNode)
            source = proteinNameToNode[fromNode]
            target = proteinNameToNode[toNode]
            edge = g.add_edge(g.vertex(source), g.vertex(target))


#             interactionWeight[edge] = weight

    essentiality = g.new_vertex_property("short")
    essentiality.a = 0
    symbolToInt = {'N': 0, 'E': 1, '?': '2', 'X': 3}

    print g

    import csv
    with open(labelsFile, "r") as inFile:
        count = 0
        data = [row for row in csv.reader(inFile.read().splitlines())]
        for pair in data:
            proteinName, attribute = pair
            try:
                essentiality.a[proteinNameToNode[
                    proteinName.lower()]] = symbolToInt[attribute]
            except:
                count += 1
        print count

    g.vp["essentiality"] = essentiality

    lethalOrNot = essentiality.a == 0
    lethalOrNot += essentiality.a == 1

    lethality = g.new_vertex_property("boolean")
    lethality.a = lethalOrNot

    g.set_vertex_filter(lethality)
    g.purge_vertices()
    print g
    p()

    lethality.a = 0
    lethality.a[essentiality.a == 1] = 1

    g.vp["lethality"] = lethality

    #     lethality    = g.new_vertex_property("boolean")
    #     lethality.a  = 0

    #     with open(labelsFile, "r") as inF:
    #         for line in inF:
    #             try:
    #                 nodeID = proteinNameToNode[line.rstrip()]
    #                 lethality.a[nodeID] = 1
    #             except: #we don't have this node in the PPI net
    #                 pass
    #
    #
    #     g.vp["lethality"] = lethality
    # #     g.ep["weights"]   = interactionWeight

    g = graph_analysis.IO.make_simple_graph(g, undirected=True, gcc=True)

    if save:
        graph_analysis.IO.save_data(
            "../Data/Graphs/" + saveLoadFolder + "/" + graphName + ".GT.graph",
            g)
    if export:
        exportToSnapAndGreach(graphName, saveLoadFolder)

    return g

Exemple #14

0

Afficher le fichier

Fichier : graph.py Projet : hopefulp/sandbox

class graph:
    def __init__(self, mol):
        """
        instantiate a graph object which will be attached to the parent mol

        :Parameter:

             - mol : a mol type object (can be a derived type like bb or topo as well)
        """
        self._mol = mol
        logger.debug("generated the graph addon")
        return

    def make_graph(self, idx=None, hashes=True):
        """
        generate a graph for the mol object (atoms should be typed)
        we use the atomtype name with the "_" and everything after it (rule=2) truncated.
        in other words the vertex property is the element plus the coordination number

        """
        if idx == None: idx = range(self._mol.natoms)
        self.molg = Graph(directed=False)
        # now add vertices
        self.molg.vp.type = self.molg.new_vertex_property("string")
        self.vert2atom = [
        ]  # this list maps vertex indices to the real atoms becasue we omit the hydrogens in the graph
        ig = 0
        for i in idx:
            if self._mol.elems[i] != "x":
                self.molg.add_vertex()
                self.vert2atom.append(i)
                vtype = self._mol.atypes[i]
                # extract element and coordination number
                if "_" in vtype:
                    vtype = vtype.split("_")[0]
                # if the coordination number is one replace the element by a #
                if hashes:
                    if vtype[-1] == "1":
                        vtype = "#"
                self.molg.vp.type[ig] = vtype
                ig += 1
        self.nvertices = len(self.vert2atom)
        logger.info("generated a graph for a mol object with %d vertices" %
                    self.nvertices)
        # now add edges ... only bonds between vertices
        for i in range(self.nvertices):
            ia = self.vert2atom[i]
            for ja in self._mol.conn[ia]:
                if ja >= ia:  #we need a .le. here for those atoms/vertices connected to itself twice in different boxes
                    if ja in self.vert2atom:
                        # print("bond from %d to %d" % (ia, ja))
                        # print(self._mol.atypes[ia], self._mol.atypes[ja])
                        self.molg.add_edge(
                            self.molg.vertex(i),
                            self.molg.vertex(self.vert2atom.index(ja)))
                        #self.molg.add_edge( self.molg.vertex(self.vert2atom.index(ja)),self.molg.vertex(i))
        return

    def plot_graph(self,
                   fname,
                   g=None,
                   size=1000,
                   fsize=16,
                   vsize=8,
                   ptype="pdf",
                   method='arf'):
        """
        plot the grap (needs more tuning options

        :Parameter:
            - fname  : filename (will write filename.pdf)
            - size   : outputsize will be (size, size) in px [default 800]
            - fsize  : font size [default 10]
            - method : placement method to draw graph, can be one of
                       arf
                       frucht
                       radtree
                       sfdp
                       random
        """
        if g:
            draw_g = g
        else:
            draw_g = self.molg
        import graph_tool.draw
        import graph_tool.draw as gt
        g = draw_g
        if method == 'arf':
            pos = graph_tool.draw.arf_layout(draw_g, max_iter=0)
        elif method == 'frucht':
            pos = graph_tool.draw.fruchterman_reingold_layout(draw_g,
                                                              n_iter=1000)
        elif method == 'radtree':
            pos = gt.radial_tree_layout(g, g.vertex(0))
        elif method == 'sfdp':
            pos = gt.sfdp_layout(g)
        elif method == 'sfdp':
            pos = gt.random_layout(g)
        else:
            pos = None
        from graph_tool.draw import graph_draw
        graph_draw(draw_g,pos=pos, vertex_text=draw_g.vp.type, vertex_font_size=fsize, vertex_size=vsize, \
            output_size=(size, size), output=fname+"."+ptype, bg_color=[1,1,1,1])
        return

    def find_subgraph(self, graph, subg):
        """
        use graph_tools subgraph_isomorphism tool to find substructures

        :Parameter:

            - graph : parent graph to be searched
            - subg  : graph to be found

        :Returns:

            a list of lists with the (sorted) vertex indices of the substructure
        """
        maps = subgraph_isomorphism(subg,
                                    graph,
                                    vertex_label=(subg.vp.type, graph.vp.type))
        subs = []
        subs_check = []
        for m in maps:
            sl = list(m)
            sl_check = copy.deepcopy(sl)
            sl_check.sort()
            if sl_check not in subs_check:
                subs.append(sl)
                subs_check.append(sl_check)
        return subs

    def find_sub(self, subg):
        """
        use graph_tools subgraph_isomorphism tool to find substructures

        :Parameter:

            - subg : graph object (from another molsys) to be searched

        :Returns:

            a list of lists with the (sorted) vertex indices of the substructure
        """
        subs = self.find_subgraph(self.molg, subg.molg)
        return subs

    def find_fragment(self, frag, add_hydrogen=False):
        """
        find a complete fragment (including the hydrogen atoms not included in the graph)
        Note that the fragment found can be different from the fragment by the number of hydrogen atoms!!

        :Parameter:

            - frag : mol object with graph addon to be found

        :Returns:

            a list of lists with the atom indices of the fragment in the full system
        """
        subs = self.find_sub(frag.graph)
        frags = []
        for s in subs:
            # loop over all vertices
            f = []
            for v in s:
                a = self.vert2atom[v]
                f.append(a)
                # check all atoms connected to this atom if they are hydrogen
                if add_hydrogen:
                    for ca in self._mol.conn[a]:
                        if self._mol.elems[ca] == "h":
                            f.append(ca)
            frags.append(f)
        return frags

    def util_graph(self, vertices, conn):
        """
        generate a generate a graph with vertices and connectivity in conn
        """
        g = Graph(directed=False)
        # now add vertices
        g.vp.type = g.new_vertex_property("string")
        for i, v in enumerate(vertices):
            g.add_vertex()
            g.vp.type[i] = v
        # now add edges ...
        for i, v in enumerate(vertices):
            for j in conn[i]:
                if j >= i:
                    g.add_edge(g.vertex(i), g.vertex(j))
        return g

    def filter_graph(self, idx):
        """
        filters all atoms besides the given out of the graph
        :Parameters:
            - idx (list): indices of atoms to keep
        """
        # TODO use vert2atom
        assert type(idx) == list
        self.molg.clear_filters()
        filter = self.molg.new_vertex_property("bool")
        filter.set_value(False)
        for i in idx:
            filter[self.molg.vertex(i)] = True
        self.molg.set_vertex_filter(filter)
        return

Exemple #15

0

Afficher le fichier

Fichier : core.py Projet : xiaohan2012/reconstructing-cascade

def find_tree_by_closure(g,
                         root,
                         infection_times,
                         terminals,
                         closure_builder=build_closure_with_order,
                         strictly_smaller=True,
                         return_closure=False,
                         k=-1,
                         debug=False,
                         verbose=True):
    """find the steiner tree by trainsitive closure
    
    """
    gc, eweight = closure_builder(g,
                                  root,
                                  terminals,
                                  infection_times,
                                  strictly_smaller=strictly_smaller,
                                  k=k,
                                  return_r2pred=False,
                                  debug=debug,
                                  verbose=verbose)

    # get the minimum spanning arborescence
    # graph_tool does not provide minimum_spanning_arborescence
    if verbose:
        print('getting mst')
    tree_edges = find_minimum_branching(gc, [root], weights=eweight)

    efilt = gc.new_edge_property('bool')
    efilt.a = False
    for u, v in tree_edges:
        efilt[gc.edge(u, v)] = True

    mst_tree = GraphView(gc, efilt=efilt)

    if verbose:
        print('extract edges from original graph')

    # extract the edges from the original graph

    # sort observations by time
    # and also topological order
    # why doing this: we want to start collecting the edges
    # for nodes with higher order
    topological_index = {}
    for i, e in enumerate(bfs_iterator(mst_tree, source=root)):
        topological_index[int(e.target())] = i

    try:
        sorted_obs = sorted(set(terminals) - {root},
                            key=lambda o:
                            (infection_times[o], topological_index[o]))
    except KeyError:
        raise TreeNotFound(
            "it's likely that the input cannot produce a feasible solution, " +
            "because the topological sort on terminals does not visit all terminals"
        )

    # next, we start reconstructing the minimum steiner arborescence
    tree_nodes = {root}
    tree_edges = set()
    # print('root', root)
    for u in sorted_obs:
        if u in tree_nodes:
            if debug:
                print('{} covered already'.format(u))
            continue
        # print(u)
        v, u = map(int, next(mst_tree.vertex(u).in_edges()))  # v is ancestor
        tree_nodes.add(v)

        late_nodes = [
            n for n in terminals if infection_times[n] > infection_times[u]
        ]
        vis = init_visitor(g, u)
        # from child to any tree node, including v

        cpbfs_search(g,
                     source=u,
                     terminals=list(tree_nodes),
                     forbidden_nodes=late_nodes,
                     visitor=vis,
                     count_threshold=1)
        # dist, pred = shortest_distance(g, source=u, pred_map=True)
        node_set = {v for v, d in vis.dist.items() if d > 0}
        reachable_tree_nodes = node_set.intersection(tree_nodes)
        ancestor = min(reachable_tree_nodes, key=vis.dist.__getitem__)

        edges = extract_edges_from_pred(g, u, ancestor, vis.pred)
        edges = {(j, i) for i, j in edges}  # need to reverse it
        if debug:
            print('tree_nodes', tree_nodes)
            print('connecting {} to {}'.format(v, u))
            print('using ancestor {}'.format(ancestor))
            print('adding edges {}'.format(edges))
        tree_nodes |= {u for e in edges for u in e}

        tree_edges |= edges

    t = Graph(directed=True)
    t.add_vertex(g.num_vertices())

    for u, v in tree_edges:
        t.add_edge(t.vertex(u), t.vertex(v))

    tree_nodes = {u for e in tree_edges for u in e}
    vfilt = t.new_vertex_property('bool')
    vfilt.a = False
    for v in tree_nodes:
        vfilt[t.vertex(v)] = True

    t.set_vertex_filter(vfilt)

    if return_closure:
        return t, gc, mst_tree
    else:
        return t

Exemple #16

0

Afficher le fichier

Fichier : core.py Projet : xiaohan2012/reconstructing-cascade

def build_closure_with_order(g,
                             cand_source,
                             terminals,
                             infection_times,
                             k=-1,
                             strictly_smaller=True,
                             return_r2pred=False,
                             debug=False,
                             verbose=False):
    """
    build transitive closure with infection order constraint

    g: gt.Graph(directed=False)
    cand_source: int
    terminals: list of int
    infection_times: dict int -> float

    build a clojure graph in which cand_source + terminals are all connected to each other.
    the number of neighbors of each node is determined by k

    the larger the k, the denser the graph

    note that vertex ids are preserved (without re-mapping to consecutive integers)

    return:

    gt.Graph(directed=True)
    """
    if return_r2pred:
        r2pred = {}
    edges = {}
    terminals = list(terminals)

    # from cand_source to terminals
    vis = init_visitor(g, cand_source)
    cpbfs_search(g,
                 source=cand_source,
                 visitor=vis,
                 terminals=terminals,
                 forbidden_nodes=terminals,
                 count_threshold=k)
    if return_r2pred:
        r2pred[cand_source] = vis.pred
    for u, v, c in get_edges(vis.dist, cand_source, terminals):
        edges[(u, v)] = c

    if debug:
        print('cand_source: {}'.format(cand_source))
        print('#terminals: {}'.format(len(terminals)))
        print('edges from cand_source: {}'.format(edges))

    if verbose:
        terminals_iter = tqdm(terminals)
        print('building closure graph')
    else:
        terminals_iter = terminals

    # from terminal to other terminals
    for root in terminals_iter:

        if strictly_smaller:
            late_terminals = [
                t for t in terminals
                if infection_times[t] > infection_times[root]
            ]
        else:
            # respect what the paper presents
            late_terminals = [
                t for t in terminals
                if infection_times[t] >= infection_times[root]
            ]

        late_terminals = set(late_terminals) - {
            cand_source
        }  # no one can connect to cand_source
        if debug:
            print('root: {}'.format(root))
            print('late_terminals: {}'.format(late_terminals))
        vis = init_visitor(g, root)
        cpbfs_search(
            g,
            source=root,
            visitor=vis,
            terminals=list(late_terminals),
            forbidden_nodes=list(set(terminals) - set(late_terminals)),
            count_threshold=k)

        if return_r2pred:
            r2pred[root] = vis.pred

        for u, v, c in get_edges(vis.dist, root, late_terminals):
            if debug:
                print('edge ({}, {})'.format(u, v))
            edges[(u, v)] = c

    if verbose:
        print('returning closure graph')

    gc = Graph(directed=True)

    gc.add_vertex(g.num_vertices())

    vfilt = gc.new_vertex_property('bool')
    vfilt.a = False

    for (u, v) in edges:
        gc.add_edge(u, v)
        vfilt[u] = vfilt[v] = True

    eweight = gc.new_edge_property('int')
    eweight.set_2d_array(np.array(list(edges.values())))
    gc.set_vertex_filter(vfilt)

    rets = (gc, eweight)
    if return_r2pred:
        rets += (r2pred, )
    return rets

Exemple #17

0

Afficher le fichier

Fichier : graph.py Projet : lkopocinski/paintball

class BaseGraph(object):
    """
    Class representing a graph. We do not use pure graph_tool.Graph for we want
    to be able to easily change this library. Neither we use inheritance
    as graph_tool has inconvenient licence.
    """
    def __init__(self):
        self._g = None
        self._node_dict = {}
        self._syn_to_vertex_map = None
        self._lemma_to_nodes_dict = None
        self._lu_on_vertex_dict = None

    def use_graph_tool(self):
        """
        Returns underlying graph_tool.Graph. It should be avoided at all costs.
        """
        return self._g

    def get_node_for_synset_id(self, syn_id):
        """
        Lazy function to makes the map of synset identifiers to nodes into
        the graph. The building of map is made only on the first funcion call.
        The first and the next calls of this function will return the built map.
        """
        if not self._syn_to_vertex_map:
            self._syn_to_vertex_map = {}
            for node in self.all_nodes():
                if node.synset:
                    synset_id = node.synset.synset_id
                    self._syn_to_vertex_map[synset_id] = node
        return self._syn_to_vertex_map.get(syn_id, None)

    def pickle(self, filename):
        self._g.save(filename)

    def unpickle(self, filename):
        self._g = load_graph(filename)

    def init_graph(self, drctd=False):
        self._g = Graph(directed=drctd)

    def copy_graph_from(self, g):
        self._g = g._g.copy()

    def set_directed(self, drctd):
        self._g.set_directed(drctd)

    def is_directed(self):
        return self._g.is_directed()

    def merge_graphs(self, g1, g2):
        self._g = graph_union(g1._g, g2._g, internal_props=True)

    # Node operations:
    def all_nodes(self):
        for node in self._g.vertices():
            yield BaseNode(self._g, node)

    def create_node_attribute(self, name, kind, value=None):
        if not self.has_node_attribute(name):
            node_attr = self._g.new_vertex_property(kind, value)
            self._g.vertex_properties[name] = node_attr

    def create_node_attributes(self, node_attributes_list):
        for attr in node_attributes_list:
            if not self.has_node_attribute(attr[0]):
                node_attr = self._g.new_vertex_property(attr[1])
                self._g.vertex_properties[attr[0]] = node_attr

    def has_node_attribute(self, name):
        """ Checks if a node attribute already exists """
        return name in self._g.vertex_properties

    def delete_node_attribute(self, name):
        """ Delete node attribute """
        del self._g.vertex_properties[name]

    def add_node(self, name, node_attributes_list=None):
        if node_attributes_list is None:
            node_attributes_list = []

        if name not in self._node_dict:
            new_node = self._g.add_vertex()
            self._node_dict[name] = BaseNode(self._g, new_node)
            for attr in node_attributes_list:
                self._g.vertex_properties[attr[0]][new_node] = attr[1]
        return self._node_dict[name]

    def get_node(self, name):
        return self._node_dict[name]

    def remove_node(self, name):
        self._g.remove_vertex(self._node_dict[name]._node)
        del self._node_dict[name]

    def nodes_filter(self,
                     nodes_to_filter_set,
                     inverted=False,
                     replace=False,
                     soft=False):
        """
        Filters out nodes from set

        Args:
          nodes_to_filter_set (Iterable): Nodes which fill be filtered out.
          inverted (bool): If True, nodes NOT in set will be filtered out.
            Defaults to False.
          replace (bool): Replace current filter instead of combining the two.
            Defaults to False.
          soft (bool): Hide nodes without removing them so they can be restored
            with reset_nodes_filter. Defaults to False.
        """
        predicate = lambda node: node not in nodes_to_filter_set
        self.nodes_filter_conditional(predicate, inverted, replace, soft)

    def nodes_filter_conditional(self,
                                 predicate,
                                 inverted=False,
                                 replace=False,
                                 soft=False):
        """
        Filters node based on a predicate

        Args:
          predicate (Callable): Predicate returning False for nodes that should be
            filtered out.
          inverted (bool): Invert condition. Defaults to False.
          replace (bool): Replace current filter instead of combining the two.
            Defaults to False.
          soft (bool): Hide nodes without removing them so they can be restored
            with reset_nodes_filter. Defaults to False.
        """

        (old_filter, old_inverted) = self._g.get_vertex_filter()
        new_filter = self._g.new_vertex_property("bool")

        for node in self.all_nodes():
            kept = predicate(node) != inverted
            if not replace and old_filter:
                old_kept = bool(old_filter[node._node]) != old_inverted
                kept = kept and old_kept
            new_filter[node._node] = kept

        self._g.set_vertex_filter(new_filter, False)
        if not soft:
            self.apply_nodes_filter()

    def apply_nodes_filter(self):
        """ Removes nodes that are currently filtered out """
        self._g.purge_vertices()

    def reset_nodes_filter(self):
        """ Clears node filter """
        self._g.set_vertex_filter(None)

    # Edge operations:
    def num_edges(self):
        return self._g.num_edges()

    def all_edges(self):
        for e in self._g.edges():
            yield BaseEdge(self._g, e)

    def get_edges_between(self, source, target):
        """
        Return all edges between source and target. Source and target can be either
        BaseNode or integer.
        """
        if isinstance(source, BaseNode):
            source = source._node
        if isinstance(target, BaseNode):
            target = target._node
        for e in self._g.edge(source, target, all_edges=True):
            yield BaseEdge(self._g, e)

    def get_edge(self, source, target, add_missing=False):
        """
        Return some edge between source and target. Source and target can be either
        BaseNode or integer.
        """
        if isinstance(source, BaseNode):
            source = source._node
        if isinstance(target, BaseNode):
            target = target._node
        e = self._g.edge(source, target, add_missing)
        if e is not None:
            return BaseEdge(self._g, e)
        else:
            return None

    def create_edge_attribute(self, name, kind, value=None):
        if not self.has_edge_attribute(name):
            edge_attr = self._g.new_edge_property(kind, value)
            self._g.edge_properties[name] = edge_attr

    def alias_edge_attribute(self, name, alias):
        self._g.edge_properties[alias] = self._g.edge_properties[name]

    def create_edge_attributes(self, edge_attributes_list):
        for attr in edge_attributes_list:
            if not self.has_edge_attribute(attr[0]):
                edge_attr = self._g.new_edge_property(attr[1])
                self._g.edge_properties[attr[0]] = edge_attr

    def has_edge_attribute(self, name):
        """ Checks if an edge attribute already existst """
        return name in self._g.edge_properties

    def delete_edge_attribute(self, name):
        """ Delete edge attribute """
        del self._g.edge_properties[name]

    def add_edge(self, parent, child, edge_attributes_list=None):
        if edge_attributes_list is None:
            edge_attributes_list = []

        new_edge = self._g.add_edge(parent._node, child._node)
        for attr in edge_attributes_list:
            self._g.edge_properties[attr[0]][new_edge] = attr[1]

        return BaseEdge(self._g, new_edge)

    def edges_filter(self, edges_to_filter_set):
        edge_filter = self._g.new_edge_property("bool")

        for e in self.all_edges():
            if e in edges_to_filter_set:
                edge_filter[e._edge] = False
            else:
                edge_filter[e._edge] = True

        self._g.set_edge_filter(edge_filter)
        self._g.purge_edges()

    def ungraph_tool(self, thingy, lemma_on_only_synset_node_dict):
        """
        Converts given data structure so that it no longer have any graph_tool dependencies.
        """
        logger = logging.getLogger(__name__)

        if type(thingy) == dict:
            return {
                self.ungraph_tool(k, lemma_on_only_synset_node_dict):
                self.ungraph_tool(thingy[k], lemma_on_only_synset_node_dict)
                for k in thingy
            }

        nodes_to_translate = set()
        for vset in lemma_on_only_synset_node_dict.values():
            for v in vset:
                nodes_to_translate.add(v)

        if type(thingy) == gt.PropertyMap:
            dct = {}
            if thingy.key_type() == 'v':
                for node in nodes_to_translate:
                    dct[node] = thingy[node.use_graph_tool()]
            elif thingy.key_type() == 'e':
                for edge in self.all_edges():
                    dct[edge] = thingy[edge.use_graph_tool()]
            else:
                logger.error('Unknown property type %s', thingy.key_type())
                raise NotImplemented
            return dct

    def generate_lemma_to_nodes_dict_synsets(self):
        """
        This method generates a utility dictionary, which maps lemmas to
        corresponding node objects. It is expensive in menas of time
        needed to generate the dictionary. It should therefore be executed
        at the beginning of the runtime and later its results should be reused
        as many times as needed without re-executing the function.
        """
        lemma_to_nodes_dict = defaultdict(set)
        for node in self.all_nodes():
            try:
                lu_set = node.synset.lu_set
            except KeyError:
                continue

            for lu in lu_set:
                lemma = lu.lemma.lower()
                lemma_to_nodes_dict[lemma].add(node)

        self._lemma_to_nodes_dict = lemma_to_nodes_dict

    def generate_lemma_to_nodes_dict_lexical_units(self):
        """
        This method generates a utility dictionary, which maps lemmas to
        corresponding node objects. It is expensive in menas of time
        needed to generate the dictionary. It should therefore be executed
        at the beginning of the runtime and later its results should be reused
        as many times as needed without re-executing the function.
        """
        lemma_to_nodes_dict = defaultdict(set)

        for node in self.all_nodes():
            try:
                lemma = node.lu.lemma.lower()
                lemma_to_nodes_dict[lemma].add(node)
            except:
                continue

        self._lemma_to_nodes_dict = lemma_to_nodes_dict

    @property
    def lemma_to_nodes_dict(self):
        return self._lemma_to_nodes_dict

    def _make_lu_on_v_dict(self):
        """
        Makes dictionary lu on vertex
        """
        lu_on_vertex_dict = defaultdict(set)
        for node in self.all_nodes():
            try:
                nl = node.lu
            except Exception:
                continue

            if nl:
                lu_on_vertex_dict[node.lu.lu_id] = node

        self._lu_on_vertex_dict = lu_on_vertex_dict

Exemple #18

0

Afficher le fichier

Fichier : build_question_graph.py Projet : erichan2046/multi-label-text-classification-2

qm = m * m.T  # question adj matrix via unipartite projection

g = Graph()
edges = zip(*qm.nonzero())
g.add_edge_list(edges)

vfilt = label_largest_component(g)
f = np.sum(vfilt.a) / len(vfilt.a)
print('fraciton of nodes in largest cc: {}'.format(f))


prop_question_id = g.new_vertex_property('int')
prop_question_id.a = np.array(list(id2q_map.values()))

# focus on largest CC
g.set_vertex_filter(vfilt)

# re-index the graph
# SO qustion: https://stackoverflow.com/questions/46264296/graph-tool-re-index-vertex-ids-to-be-consecutive-integers
n2i = {n: i for i, n in enumerate(g.vertices())}
i2n = dict(zip(n2i.values(), n2i.keys()))

new_g = Graph()
new_g.add_edge_list([(n2i[e.source()], n2i[e.target()]) for e in g.edges()])


# update question ids
new_prop_question_id = new_g.new_vertex_property('int')
new_prop_question_id.a = [prop_question_id[i2n[i]] for i in range(new_g.num_vertices())]
new_g.vertex_properties['question_id'] = new_prop_question_id