Example #1
0
def session_draw_bis_melty(sessions_id, weblog, weblog_columns_dict):
    """
    Draw the graph of sessions with sessions_id given in entry
    """
    from graph_tool.all import Graph
    from graph_tool.all import graph_draw
    session = weblog[weblog.session_id == sessions_id]
    session = session.rename(index=str,columns = {weblog_columns_dict['requested_page_column']:'requested_page',\
                                                  weblog_columns_dict['referrer_page_column']:'referrer_page'})
    s_pages = session[['requested_page', 'requested_external']]
    s_pages_ref = session[['referrer_page', 'referrer_external']]
    s_pages_ref = s_pages_ref.rename(index=str,
                                     columns={
                                         'referrer_page': 'requested_page',
                                         'referrer_external':
                                         'requested_external'
                                     })
    s_pages = s_pages.append(s_pages_ref)
    s_pages.drop_duplicates(subset='requested_page', inplace=True)
    g = Graph()
    v = {}
    halo = g.new_vertex_property("bool")
    for row in s_pages.itertuples():
        v[row.requested_page] = g.add_vertex()
        if row.requested_external:
            halo[v[row.requested_page]] = True
        else:
            halo[v[row.requested_page]] = False
    session.apply(
        lambda x: g.add_edge(v[x.referrer_page], v[x.requested_page]), axis=1)
    graph_draw(g,
               vertex_halo=halo,
               output="./_session" + str(sessions_id) + ".png")
    return
Example #2
0
    def gen_graph_from_mongo(self):
        '''
        load graph structure from storage. note that add_edge_list will not
        match vertex ids (str ids) in subsequent calls of the function
        '''

        self.from_storage(find={'projection': {'material_id': 1, 'edges': 1}})
        sources = self.memory['material_id']
        destinations = self.memory['edges']

        self.memory = None  # cleanup memory attribute

        print('loaded data structures')

        edge_list = [(sources[i], destinations[i][j])
                     for i in range(len(sources))
                     for j in range(len(destinations[i]))]

        print('generated edge list')

        sources = None  # cleanup temporary data variables
        destinations = None

        graph = Graph(directed=False)
        graph.add_edge_list(edge_list, hashed=True, string_vals=True)

        return graph
Example #3
0
 def make_article_graph(self, layout="arf"):
     """Make an article graph"""
     self.graph = Graph(directed=False)
     # add vertex
     self.graph.add_vertex(len(self.db))
     # add properties
     cb = self.graph.new_vertex_property("int", self.db['Cited by'].values)
     self.graph.vertex_properties['nmb_citation'] = cb
     # Add links
     auths = list(self.author_betweeness.keys())
     auth2ind = {auths[i]: i
                 for i in range(len(auths))}
     auth2pub = self._get_author_publication()
     for _, pubs in auth2pub.items():
         if len(pubs) < 2:
             continue
         combis = itertools.combinations(pubs, 2)
         self.graph.add_edge_list(list(combis))
     # layout
     if layout == "arf":
         self.layout_pos = arf_layout(self.graph)
     elif layout == "sfpd":
         self.layout_pos = sfdp_layout(self.graph)
     elif layout == "fr":
         self.layout_pos = fruchterman_reingold_layout(self.graph)
     elif layout == "radial":
         self.layout_pos = radial_tree_layout(self.graph,
                                              auth2ind['Logan, B.E.'])
     else:
         raise ValueError()
def simulate_cascade(g, p, source=None, return_tree=False):
    """
    graph_tool version of simulating cascade
    return np.ndarray on vertices as the infection time in cascade
    uninfected node has dist -1
    """
    if source is None:
        source = random.choice(np.arange(g.num_vertices(), dtype=int))
    gv = sample_graph_by_p(g, p)

    times = get_infection_time(gv, source)
    if return_tree:
        all_edges = set()
        for target in np.nonzero(times != -1)[0]:
            path = shortest_path(gv, source=source, target=gv.vertex(target))[0]
            edges = set(zip(path[:-1], path[1:]))
            all_edges |= edges
        tree = Graph(directed=True)
        for _ in range(g.num_vertices()):
            tree.add_vertex()
        for u, v in all_edges:
            tree.add_edge(int(u), int(v))
        return source, times, tree
    else:
        return source, times
Example #5
0
def test_feasibility(g, weights):
    internal_g = from_gt(g, weights)
    edges = minimum_branching(internal_g, [0])

    tree = Graph(directed=True)
    tree.add_edge_list(edges)
    assert is_arborescence(tree)
Example #6
0
def construct_motif_graph(graph_container, motif, vertex_maps=None):
    """Construct and return a undirected gt graph containing
    motif relationship. Note that graph_tool generates empty nodes
    to fill in the missing indices. For example, if we add edge (1,2)
    to an empty graph, the graph will have 3 nodes: 0, 1, 2 and 1 edge (1,2).
    For this reason, the returned `m_graph` usually has a large number of
    disconnected nodes.

    Parameters:
    graph_container - GraphContainer - Store the original network
    motif - Motif - Motif in study

    Returns:
    m_graph - gt.Graph - Undirected graph for motif cooccurence
    """
    if motif.anchors is None:
        print("Warning: Turning motif groups into cliques.")
    graph = graph_container.get_gt_graph()
    graph.set_directed(motif.gt_motif.is_directed())
    # graph_tool.Graph
    m_graph = Graph(directed=False)
    if vertex_maps is None:
        m, c, vertex_maps = count_motif(graph, motif)
    for prop_list in vertex_maps:
        for prop in prop_list:
            edges = [i for i in motif.anchored_edges(graph, prop.get_array())]
            m_graph.add_edge_list(edges)
    return m_graph
def alignment_graph(lengths=[], pairings=[], alignments=[]):
    #print('making graph')
    g = Graph(directed=False)
    seq_index = g.new_vertex_property("int")
    time = g.new_vertex_property("int")
    #add vertices
    g.add_vertex(sum(lengths))
    seq_index.a = np.concatenate([np.repeat(i,l) for i,l in enumerate(lengths)])
    time.a = np.concatenate([np.arange(l) for l in lengths])
    #add edges (alignments)
    alignment_index = g.new_edge_property("int")
    segment_index = g.new_edge_property("int")
    for i,a in enumerate(alignments):
        if len(a) > 0:
            j, k = pairings[i]
            pairs = np.concatenate(a, axis=0)
            indicesJ = (np.arange(lengths[j]) + sum(lengths[:j]))[pairs.T[0]]
            indicesK = (np.arange(lengths[k]) + sum(lengths[:k]))[pairs.T[1]]
            seg_indices = np.concatenate([np.repeat(i, len(a))
                for i,a in enumerate(a)])
            g.add_edge_list(np.vstack([indicesJ, indicesK,
                np.repeat(i, len(pairs)), seg_indices]).T,
                eprops=[alignment_index, segment_index])
    #g.add_edge_list([(b, a) for (a, b) in g.edges()])
    #print('created alignment graph', g)
    #g = prune_isolated_vertices(g)
    #print('pruned alignment graph', g)
    #g = transitive_closure(g)
    #graph_draw(g, output_size=(1000, 1000), output="results/casey_jones_bars.pdf")
    return g, seq_index, time, alignment_index, segment_index
Example #8
0
def graph_measures(graph: gt.Graph) -> pd.DataFrame:
    def get_attrs(attrs):
        return (attrs[1][0], attrs[1][1][1], attrs[0])

    def append_val(key, prop, v):
        measures[key][0].append(prop[v])

    _, vp_authority, vp_hub = gt.hits(graph)

    measures = {
        key: ([], prop)
        for key, prop in {
            'tp_group': graph.vp.group_name,
            'tp_author': graph.vp.username,
            'tn_degree_in': graph.degree_property_map('in'),
            'tn_degree_out': graph.degree_property_map('out'),
            'tn_degree_total': graph.degree_property_map('total'),
            'tn_pagerank': gt.pagerank(graph),
            'tn_betweenness': gt.betweenness(graph)[0],
            'tn_closeness': gt.closeness(graph),
            'tn_eigenvector': gt.eigenvector(graph)[1],
            'tn_authority': vp_authority,
            'tn_hub': vp_hub,
            'tn_lcc': gt.local_clustering(graph)
        }.items()
    }

    for attrs in product(graph.vertices(), measures.items()):
        append_val(*get_attrs(attrs))

    return pd.DataFrame(
        dict(map(lambda item: (item[0], item[1][0]),
                 measures.items()))).fillna(0)
Example #9
0
    def __init__(self, edges, weights):
        self.edges = edges
        self.graph = Graph()
        self.size = len(edges['target'])
        self.graph.add_vertex(self.size)
        self.weights = weights

        # init weights part
        self.graph.vp.weights = self.graph.new_vertex_property('int16_t')
        for index in range(0, self.size):
            self.graph.vp.weights[index] = weights[index]

        for source in self.edges['source'].keys():
            for target in self.edges['source'][source]:
                self._add_edge(source, target)

        self.depth_per_node = {x: 0 for x in range(0, self.size)}
        self.accum_weights = {x: 0 for x in range(0, self.size)}
        self.find_depth()
        self.find_accum_weights(self.size - 1)
        self.depth = {x: [] for x in set(self.depth_per_node.values())}

        for node, depth in self.depth_per_node.items():
            self.depth[depth].append(node)

        self.routes_t = {}
        self.find_routes(self.size - 1, 0, self.routes_t)

        self.routes = []
        self.transpose_routes(self.size - 1, self.routes_t[self.size - 1])
Example #10
0
    def gen_sub_graph_from_mongo(self, center, snn=1):
        '''
        load graph structure from storage. note that add_edge_list will not
        match vertex ids (str ids) in subsequent calls of the function

        Args:
            center (str) mp-id of the center of the graph
            snn (int) the number of second nearest neighbors to expand to
        '''

        edge_list = []

        self.from_storage(
            find={
                'filter': {
                    'material_id': center
                },
                'projection': {
                    'material_id': 1,
                    'edges': 1
                }
            })
        sources = self.memory['material_id'][0]
        destinations = self.memory['edges'][0]

        edge_list.extend([(sources, destinations[j])
                          for j in range(len(destinations))])

        for i in range(snn):
            self.from_storage(
                find={
                    'filter': {
                        'material_id': {
                            '$in': destinations
                        }
                    },
                    'projection': {
                        'material_id': 1,
                        'edges': 1
                    }
                })
            sources = self.memory['material_id']
            destinations = self.memory['edges']

            edge_list.extend([(sources[i], destinations[i][j])
                              for i in range(len(sources))
                              for j in range(len(destinations[i]))])

            destinations = [
                destinations[i][j] for i in range(len(sources))
                for j in range(len(destinations[i]))
            ]

        print('generated edge list')

        graph = Graph(directed=False)
        graph.add_edge_list(edge_list, hashed=True, string_vals=True)

        return graph
Example #11
0
def test_feasibility(g, weights):
    edges = [(e[0], e[1], w) for e, w in zip(g.get_edges(), weights)]

    min_edges = find_minimum_branching(g.num_vertices(), edges, roots=[0])

    tree = Graph(directed=True)
    tree.add_edge_list(min_edges)
    assert is_arborescence(tree)
def init_graph(graphml_path):
    global g
    g = Graph(directed=True)
    t0 = time()
    g.load(graphml_path)
    t1 = time()
    print "Loaded from GraphML in", t1-t0
    print "Loaded", g.num_vertices(), "nodes"
    print "Loaded", g.num_edges(), "edges"
Example #13
0
    def __init__(self):
        self.graph = Graph()

        self.v_names = self.graph.new_vertex_property("string")
        self.v_colors = self.graph.new_vertex_property("string")
        self.e_names = self.graph.new_edge_property("string")

        self.vertices = {}
        self.edges = {}
Example #14
0
def get_graph(fname: str) -> Graph:
    fdir = os.path.join(f'..', f'data')
    fpath = os.path.join(fdir, name)
    if os.path.exists(fpath):
        g = Graph()
        g.load(file_name=f'../data/{name}', fmt='gt')
        return g
    else:
        raise FileNotFoundError('Invalid Graph, options are:\n' +
                                '\n'.join(os.listdir(fdir)))
Example #15
0
def build_word_graph(model_fname, limiar=0.2):
    """
    Constroi um grafo de walavras ponderado pela similaridade entre elas
    de acordo com o modelo.
    :param model_fname: Nome do arquivo com o modelo word2vec como foi salvo
    :return: objeto grafo
    """
    m = Word2Vec.load(model_fname)
    g = Graph()
    freq = g.new_vertex_property("int")
    weight = g.new_edge_property("float")
    i = 0
    vdict = {}
    for w1, w2 in combinations(m.vocab.keys(), 2):
        if w1 == '' or w2 == '':
            continue
        # print(w1,w2)

        v1 = g.add_vertex() if w1 not in vdict else vdict[w1]
        vdict[w1] = v1
        freq[v1] = m.vocab[w1].count
        v2 = g.add_vertex() if w2 not in vdict else vdict[w2]
        vdict[w2] = v2
        freq[v2] = m.vocab[w2].count
        sim = m.similarity(w1, w2)
        if sim > 0.1:
            e = g.add_edge(v1, v2)
            weight[e] = sim
        if i > 10000:
            break
        i += 1
    g.vertex_properties['freq'] = freq
    g.edge_properties['sim'] = weight
    return g
Example #16
0
def simulate_cascade(g, p, source=None, return_tree=False):
    """
    graph_tool version of simulating cascade
    return np.ndarray on vertices as the infection time in cascade
    uninfected node has dist -1
    """
    gv = sample_graph_by_p(g, p)

    if source is None:
        # consider the largest cc
        infected_nodes = np.nonzero(label_largest_component(gv).a)[0]
        source = np.random.choice(infected_nodes)

    times = get_infection_time(gv, source)

    if return_tree:
        # get the tree edges
        _, pred_map = shortest_distance(gv, source=source, pred_map=True)
        edges = [(pred_map[i], i) for i in infected_nodes if i != source]

        # create tree
        tree = Graph(directed=True)
        tree.add_vertex(g.num_vertices())
        for u, v in edges:
            tree.add_edge(int(u), int(v))
            vfilt = tree.new_vertex_property('bool')
            vfilt.a = False
        for v in set(itertools.chain(*edges)):
            vfilt[v] = True
        tree.set_vertex_filter(vfilt)

    if return_tree:
        return source, times, tree
    else:
        return source, times
Example #17
0
def get_incompatible_segments(g, seg_index, out_edges):
    incomp_graph = Graph(directed=False)
    num_segs = np.max(seg_index.a)+1
    incomp_graph.add_vertex(num_segs)
    for v in g.get_vertices():
        for vs in group_adjacent(sorted(g.get_out_neighbors(v))):
            edges = out_edges[v][np.where(np.isin(out_edges[v][:,1], vs))][:,2]
            segments = list(np.unique(seg_index.a[edges]))
            [incomp_graph.add_edge(s,t)
                for i,s in enumerate(segments) for t in segments[i+1:]]
    return label_components(incomp_graph)[0].a
Example #18
0
 def __init__(self,
              size: Tuple[int] = (10, 10),
              field_size: Tuple[int] = (100, 100)):
     self.g = Graph(directed=True)
     self.n_zones = size[0] * size[1]
     self.fwidth = field_size[0]
     self.fheight = field_size[1]
     self.n_rows = size[0]
     self.n_cols = size[1]
     self.row_size: float = self.fheight / self.n_rows
     self.col_size: float = self.fwidth / self.n_cols
     self.g.add_vertex(self.n_zones)
Example #19
0
    def reset_variables(self):
        """Resets all variables."""
        self.__graph = Graph()
        self.__vertices_by_school_id = {}
        self.__vertices_by_student_id = {}
        self.__students_by_id = {}
        self.__schools_by_id = {}

        self.__entity_id = self.__graph.new_vertex_property("int")
        self.__graph.vertex_properties["entity_id"] = self.__entity_id

        self.__entity_type = self.__graph.new_vertex_property("string")
        self.__graph.vertex_properties["entity_type"] = self.__entity_type
Example #20
0
def build_word_graph(model_fname, limiar=0.2):
    """
    Constroi um grafo de walavras ponderado pela similaridade entre elas
    de acordo com o modelo.
    :param model_fname: Nome do arquivo com o modelo word2vec como foi salvo
    :return: objeto grafo
    """
    m = Word2Vec.load(model_fname)
    g = Graph()
    freq = g.new_vertex_property("int")
    weight = g.new_edge_property("float")
    i = 0
    vdict = {}
    for w1, w2 in combinations(m.vocab.keys(), 2):
        if w1 == '' or w2 == '':
            continue
        # print(w1,w2)

        v1 = g.add_vertex() if w1 not in vdict else vdict[w1]
        vdict[w1] = v1
        freq[v1] = m.vocab[w1].count
        v2 = g.add_vertex() if w2 not in vdict else vdict[w2]
        vdict[w2] = v2
        freq[v2] = m.vocab[w2].count
        sim = m.similarity(w1, w2)
        if sim > 0.1:
            e = g.add_edge(v1, v2)
            weight[e] = sim
        if i > 10000:
            break
        i += 1
    g.vertex_properties['freq'] = freq
    g.edge_properties['sim'] = weight
    return g
Example #21
0
 def __init__(self):
     self.g = Graph(directed=True)
     self.player_id_to_vertex = {}
     self.pairs = {}  # player pair: edge
     # property maps for additional information
     self.g.vertex_properties['player_id'] = self.g.new_vertex_property(
         "string")
     self.g.vertex_properties['player_coords'] = self.g.new_vertex_property(
         "vector<float>")
     self.g.vertex_properties[
         'average_player_coords'] = self.g.new_vertex_property(
             "vector<float>")
     self.g.vertex_properties[
         'player_n_coords'] = self.g.new_vertex_property("int")
     self.g.edge_properties['weight'] = self.g.new_edge_property("float")
Example #22
0
def getDegreeValuesOf(g: gt.Graph):
    # max deg (clique)
    min_deg = len(list(g.vertices())) - 1
    max_deg = 0
    avg_deg = 0
    for v in g.vertices():
        v: gt.Vertex = v
        # in_degree is 0 for undirected graphs
        if v.out_degree() + v.in_degree() > max_deg:
            max_deg = v.out_degree() + v.in_degree()
        if v.out_degree() + v.in_degree() < min_deg:
            min_deg = v.out_degree() + v.in_degree()
        avg_deg += v.out_degree() + v.in_degree()
    avg_deg = avg_deg / len(list(g.vertices()))
    return {"min_deg": min_deg, "avg_deg": avg_deg, "max_deg": max_deg}
Example #23
0
 def __init__(self, directed=True, verbose=1):
     self.graphtool = GRAPH_TOOL
     # Initialize graph
     if self.graphtool:
         self.graph = Graph(directed=directed)
         self.weight = self.graph.new_edge_property("float")
     else:
         if directed:
             print("directed graph")
             self.graph = nx.DiGraph()
         else:
             self.graph = nx.Graph()
     # set metaparameter
     self.time_logs = {}
     self.verbose = verbose
Example #24
0
 def __init__(self):
     logger.info("starting UNIS Network Runtime Environment...")
     fconf = get_file_config(nre_settings.CONFIGFILE)
     self.conf = deepcopy(nre_settings.STANDALONE_DEFAULTS)
     merge_dicts(self.conf, fconf)
     
     self.unis_url = str(self.conf['properties']['configurations']['unis_url'])
     self.ms_url = str(self.conf['properties']['configurations']['ms_url'])
     self._unis = unis_client.UNISInstance(self.conf)
     self.time_origin = int(time())
     
     self._schemas = SchemaCache()
     self._resources = self.conf['resources']
     
     self._subunisclient = {}
     
     for resource in self._resources:
         setattr(self, resource, {'new': {}, 'existing': {}})
     
     # construct the hierarchical representation of the network
     for resource in self._resources:
         # only pullRuntime once at the beginning, as pubsub will only update
         # them later when resources are modified on the server
         self.pullRuntime(self, self._unis, self._unis.get(resource), resource, False)
     
     # construct the graph representation of the network, of which this NRE is in charge
     self.g = Graph()
     self.nodebook = {}
     for key in self.nodes['existing'].keys():
         self.nodebook[key] = self.g.add_vertex()
     
     for key, link in self.links['existing'].iteritems():
         if hasattr(link, 'src') and hasattr(link, 'dst'):
             self.g.add_edge(self.nodebook[link.src.node.selfRef],\
                             self.nodebook[link.dst.node.selfRef], add_missing=False)
Example #25
0
    def make_author_graph(self, layout="arf"):
        """Make an author graph"""
        self.graph = Graph(directed=False)
        # add vertex
        auths = self.author_list
        self.graph.add_vertex(len(auths))
        # add links
        auth2ind = {auths[i]: i
                    for i in range(len(auths))}
        abet = []
        authbet = copy.deepcopy(self.author_betweeness)
        for auth in auths:
            for col, weight in authbet[auth].items():
                if col == auth:
                    continue
                self.graph.add_edge(auth2ind[auth], auth2ind[col])
                del authbet[col][auth]  # ensure that edges are not doubled
                abet.append(weight)
        # add properties
        cb = self.graph.new_edge_property("int", abet)
        self.graph.edge_properties['weight'] = cb
        # layout
        if layout == "arf":
            self.layout_pos = arf_layout(self.graph,
                                         weight=self.graph.ep.weight,
                                         pos=self.layout_pos,
                                         max_iter=10000)
        elif layout == "sfpd":
            self.layout_pos = sfdp_layout(self.graph,
                                          eweight=self.graph.ep.weight,
                                          pos=self.layout_pos)
        elif layout == "fr":
            self.layout_pos = fruchterman_reingold_layout(self.graph,
                                                          weight=self.graph.ep.weight,
                                                          circular=True,
                                                          pos=self.layout_pos)
        elif layout == "radial":
            nc = self.get_total_citation()
            main_auth_ind = np.argmax(list(nc.values()))
            main_auth = list(nc.keys())[main_auth_ind]
            self.layout_pos = radial_tree_layout(self.graph,
                                                 auth2ind[main_auth])
        elif layout == "planar":
            self.layout_pos = planar_layout(self.graph)

        else:
            raise ValueError()
Example #26
0
def test_graphtool():
    g = Graph(directed=True)
    g.add_vertex(4)
    g.add_edge_list([(0, 1), (1, 2), (2, 3), (3, 0)])
    weight = g.new_edge_property('float')
    weight[g.edge(0, 1)] = 1
    weight[g.edge(1, 2)] = 2
    weight[g.edge(2, 3)] = 3
    weight[g.edge(3, 0)] = 4
    
    assert set(gt2edges_and_weights(g, weight)) == {
        (0, 1, 1), (1, 2, 2), (2, 3, 3), (3, 0, 4)
    }
Example #27
0
class StackGraph(object):
    def __init__(self):
        self.g = None

    def load(self, filename):
        # Initialize the graph
        self.g = Graph()
        # Each node will store a FunctionWrapper() class instance.
        self.g.vertex_properties["functions"] = self.g.new_vertex_property("object")
        self.g.vertex_properties["display"] = self.g.new_vertex_property("string")
        # Each edge will store a [ ..tbd.. ] .
        self.g.edge_properties["calls"] = self.g.new_edge_property("object")

        # Load the log file and build the graph
        i = 0
        f = open(filename, "rb")
        for line in f:
            i += 1
            try:
                # Skip any informational lines
                if "*" in line:     continue
                # Extract a call stack snapshot
                words = line.split()
                time = words[0][2:]
                depth = words[1][2:]
                stack = [FunctionWrapper(instring=item) for item in words[2].split("->")]

                # Add the top 2 functions to the graph, if necessary.  Format: f1()->f2()
                f1, f2 = stack[-2], stack[-1]
                v1, v2 = None, None
                    # Search for the vertices
                for v in self.g.vertices():
                    if self.g.vp.functions[v] == f1:    v1 = v
                    if self.g.vp.functions[v] == f2:    v2 = v
                    if v1 != None and v2 != None:       break

                    # Add new vertices if needed
                if v1 == None:
                    v1 = self.g.add_vertex()
                    self.g.vp.functions[v1] = f1
                    self.g.vp.display[v1] = f1.graphDisplayString()
                if v2 == None:
                    v2 = self.g.add_vertex()
                    self.g.vp.functions[v2] = f2
                    self.g.vp.display[v2] = f2.graphDisplayString()

                # Add the edge if necessary, and then add data to it
                if not self.g.edge(v1, v2):
                    e = self.g.add_edge(v1, v2)
                    self.g.ep.calls[e] = CallList(v1, v2)

                self.g.ep.calls[e].addCall(time, depth)
            except Exception as e:
                print "Exception on line", i, ":", e
                print [str(x) for x in stack]
                exit()
Example #28
0
 def get_pagerank_values(self):
     start = time.time()
     logger.info('Started call to get_pagerank')
     g = Graph()
     vp = g.add_edge_list(self.__v.get_graph_edges(),
                          hashed=True,
                          hash_type='int')
     logger.info('Delta time to build graph: {}s'.format(
         timedelta(seconds=(time.time() - start))))
     start = time.time()
     ranks = pagerank(g)
     logger.info('Delta time to compute pagerank: {}s'.format(
         timedelta(seconds=(time.time() - start))))
     for vertex in g.vertices():
         qid = vp[vertex]
         r = ranks[vertex]
         yield qid, r
Example #29
0
def load_train(name):
    '''
    Training file is numbered from 0 to n. Not all nodes in the training file have their own row.
    '''
    g = Graph()
    node_ids = set()
    n = -1
    for n, (node_id, neighbor_ids) in enumerate(iter_adj_list(name)):
        node_ids.add(node_id)
        node_ids.update(neighbor_ids)
    n += 1
    g.add_vertex(len(node_ids))
    for i, (node_id, neighbor_ids) in enumerate(iter_adj_list(name)):
        print('adding edge for vertex {}/{}'.format(i + 1, n))
        for neighbor_id in neighbor_ids:
            g.add_edge(node_id, neighbor_id)
    return g
Example #30
0
def mwgm_graph_tool(pairs, sim_mat):
    from graph_tool.all import Graph, max_cardinality_matching
    if not isinstance(pairs, list):
        pairs = list(pairs)
    g = Graph()
    weight_map = g.new_edge_property("float")
    nodes_dict1 = dict()
    nodes_dict2 = dict()
    edges = list()
    for x, y in pairs:
        if x not in nodes_dict1.keys():
            n1 = g.add_vertex()
            nodes_dict1[x] = n1
        if y not in nodes_dict2.keys():
            n2 = g.add_vertex()
            nodes_dict2[y] = n2
        n1 = nodes_dict1.get(x)
        n2 = nodes_dict2.get(y)
        e = g.add_edge(n1, n2)
        edges.append(e)
        weight_map[g.edge(n1, n2)] = sim_mat[x, y]
    print("graph via graph_tool", g)
    res = max_cardinality_matching(g,
                                   heuristic=True,
                                   weight=weight_map,
                                   minimize=False)
    edge_index = np.where(res.get_array() == 1)[0].tolist()
    matched_pairs = set()
    for index in edge_index:
        matched_pairs.add(pairs[index])
    return matched_pairs
Example #31
0
def analysis(name: str):
    t = clock()
    g = Graph()
    g.load(file_name=f'../data/{name}', fmt='gt')

    print(f'Dados carregados. Tempo: {clock() - t:.2f}s', end='\n')
    v = g.get_vertices()
    e = g.get_edges()

    # Degree
    deg = g.get_total_degrees(v)
    deg_rpt = report(deg)

    # Connected Components
    com = components(g)
    com_rpt = report(com)
    td = clock()
    # Distances
    dis = distances(g)
    dis_rpt = report(dis)
    print(f'td = {clock() - td}')

    print(
        f'Vértices: {len(v)}; Arestas: {len(e)}; Componentes Conexas: {len(com)};',
        end='\n--\n')

    print(f'Grau dos vértices:\n{deg_rpt}', end='\n--\n')

    print(f'Tamanho das componentes conexas:\n{com_rpt}', end='\n--\n')

    print(f'Distâncias:\n{dis_rpt}', end='\n--\n')

    print(f"Tempo total: {clock() - t:.2f}s")
Example #32
0
 def init_graph(self):
     self.graph = Graph(directed=False)
     self.vertex_index = dict()
     self.graph.graph_properties["id"] = self.graph.new_graph_property(
         "long")
     self.graph.graph_properties["id"] = 0
     self.graph.vertex_properties["id"] = self.graph.new_vertex_property(
         "long")
     self.graph.vertex_properties["x"] = self.graph.new_vertex_property(
         "double")
     self.graph.vertex_properties["y"] = self.graph.new_vertex_property(
         "double")
     self.graph.vertex_properties["t"] = self.graph.new_vertex_property(
         "long")
     self.graph.vertex_properties["f"] = self.graph.new_vertex_property(
         "vector<double>")
     self.graph.edge_properties["d"] = self.graph.new_edge_property(
         "double")
Example #33
0
def calc_pagerank(g: gt.Graph) -> List[Tuple[int, str, float]]:
    """
    Return: sorted list of tuples, [(vertex_idx, wk_title, pagerank_value), ....]
    """
    vp_label = g.vp['_graphml_vertex_id']  # same as wktitle
    pr = gt.pagerank(g)
    ranks = [(g.vertex_index[v], vp_label[v], pr[v]) for v in g.vertices()]
    ranks = sorted(ranks, key=lambda e: -e[-1])
    return ranks
Example #34
0
def main():
    args = parser.parse_args()

    print("Reading data...")
    set_dataloc(args.dloc)
    metadata = get_metadata()
    graph = GraphContainer(find_meta(args.dataset), args.dloc)

    print("Creating gt.Graph...")
    gt_graph = graph.get_gt_graph()

    assert args.motif_size == 4 or args.motif_size == 3  # Only motif 3 and 4

    all_motif = None
    if args.motif_size == 3:
        if gt_graph.is_directed():
            all_motif = all_3
        else:
            all_motif = all_u3
    else:
        if gt_graph.is_directed():
            all_motif = all_4
        else:
            all_motif = all_u4

    motif_func = None
    if args.num_shuffles <= 0:  # Motif count
        motif_func = motifs
    else:
        motif_func = motif_significance

    output = args.output + str(args.num_shuffles)


    print("Writing scores to file...")
    with open(output, "w") as ofile:
        info = "Dataset: {d} - Motif size: {m} - Directed: {di}\n".format(
                    d=args.dataset, m=args.motif_size,
                    di=str(gt_graph.is_directed()))
        ofile.write(info)

        for i, mc in enumerate(all_motif):
            idx = gt_graph.vertex_index.copy("int")
            shuffle(idx.a)
            g = Graph(gt_graph, vorder=idx)
            if args.num_shuffles <= 0:
                score = motifs(g, k=args.motif_size,
                               motif_list=[mc.gt_motif])[1][0]
            else:
                score = motif_significance(g, k=args.motif_size,
                                           n_shuffles=args.num_shuffles,
                                           motif_list=[mc.gt_motif])[1][0]
            r = "Motif index {}: {}\n".format(i, score)
            print(r)
            ofile.write(r)

    print("Motif analysis for {} is completed.".format(args.dataset))
    def __init__(self, sentence, directed=False, graph=None):
        # Create a SentenceGraph from an existing graph tool graph
        if graph is not None:
            self.sentence_graph = graph
            return

        # Create a new SentenceGraph from scratch
        self.sentence_graph = Graph(directed=directed)

        # Graph properties
        sentence_property = self.sentence_graph.new_graph_property("string", sentence)
        self.sentence_graph.graph_properties[SENTENCE_KEY] = sentence_property
    
        # Vertex properties
        word_property = self.sentence_graph.new_vertex_property("string")
        part_of_speech_property = self.sentence_graph.new_vertex_property("string")
        vertex_color_property = self.sentence_graph.new_vertex_property("vector<double>")
        self.sentence_graph.vertex_properties[WORD_KEY] = word_property
        self.sentence_graph.vertex_properties[PART_OF_SPEECH_KEY] = part_of_speech_property
        self.sentence_graph.vertex_properties[VERTEX_COLOR_KEY] = vertex_color_property

        # Edge properties
        sentence_edge_property = self.sentence_graph.new_edge_property("string")
        definition_edge_property = self.sentence_graph.new_edge_property("string")
        parsed_dependencies_edge_property = self.sentence_graph.new_edge_property("string")
        inter_sentence_edge_property = self.sentence_graph.new_edge_property("string")
        edge_color_property = self.sentence_graph.new_edge_property("vector<double>")
        dependency_edge_property = self.sentence_graph.new_edge_property("string")
        self.sentence_graph.edge_properties[SENTENCE_EDGE_KEY] = sentence_edge_property
        self.sentence_graph.edge_properties[DEFINITION_EDGE_KEY] = definition_edge_property
        self.sentence_graph.edge_properties[PARSED_DEPENDENCIES_EDGE_KEY] = parsed_dependencies_edge_property
        self.sentence_graph.edge_properties[INTER_SENTENCE_EDGE_KEY] = inter_sentence_edge_property
        self.sentence_graph.edge_properties[EDGE_COLOR_KEY] = edge_color_property
        self.sentence_graph.edge_properties[PARSE_TREE_DEPENDENCY_VALUE_KEY] = dependency_edge_property

        # Edge filter properties
        definition_edge_filter_property = self.sentence_graph.new_edge_property("bool")
        inter_sentence_edge_filter_property = self.sentence_graph.new_edge_property("bool")
        parsed_dependencies_edge_filter_property = self.sentence_graph.new_edge_property("bool")
        sentence_edge_filter_property = self.sentence_graph.new_edge_property("bool")
        self.sentence_graph.edge_properties[FILTER_DEFINITION_EDGE_KEY] = definition_edge_filter_property
        self.sentence_graph.edge_properties[FILTER_INTER_SENTENCE_EDGE_KEY] = inter_sentence_edge_filter_property
        self.sentence_graph.edge_properties[FILTER_PARSED_DEPENDENCIES_EDGE_KEY] = parsed_dependencies_edge_filter_property
        self.sentence_graph.edge_properties[FILTER_SENTENCE_EDGE_KEY] = sentence_edge_filter_property
    class __Graph__:

        def __init__(self):
            self.graph = GT_Graph()
            self.cookies = dict()
            self.cookierecvr = CookieRecvr(self)
            self.cookierecvr.start()

        def new_cookie(self, cookie):
            self.cookies[cookie['cid']] = self.graph.add_vertex()
            logging.info('added cookie {} to graph'.format(cookie['cid']))
            for parent in cookie['parents']:
                try:
                    self.graph.add_edge(self.cookies[parent],
                                        self.cookies[cookie['cid']])
                    logging.info(
                        'added eddge from cookie {} to graph'.format(parent))
                except KeyError:
                    logging.info('parent not known in graph')
Example #37
0
def graph_from_matrix(matrix, directed=False):
    g = Graph(directed=directed)
    g.add_vertex(len(matrix))
    weights = g.new_ep("float")
    edges = np.nonzero(matrix)
    edges = np.append(edges, [matrix[edges]], axis=0)
    g.add_edge_list(list(zip(*edges)), eprops=[weights])
    #graph_draw(g, output_size=(1000, 1000), output="results/structure.pdf")
    return g, weights
Example #38
0
def to_gt(db):
    """Convert db to graph-tool representation"""
    from graph_tool.all import Graph

    graph = Graph(directed=True)

    mapping = dict()

    for native in db.query(vertices, get)():
        vertex = graph.add_vertex()
        mapping[native.uid] = graph.vertex_index[vertex]

    for native in db.query(edges, get)():
        start = native.start().uid
        start = mapping[start]
        end = native.end().uid
        end = mapping[end]
        graph.add_edge(start, end)

    return graph
def rysuj_graf_wejsciowy(g, output=None, size=(600, 600), bez_napisow=False):
    gx = Graph(g)
    gx.vertex_properties['wyswietlany_tekst'] = gx.new_vertex_property('string')

    for v in gx.vertices():
        gx.vertex_properties['wyswietlany_tekst'][v] = v.__str__() + ': ' + \
                                                       str(gx.vertex_properties['liczba_kolorow'][v])
    if bez_napisow:
        graph_draw(gx
                   # , vertex_text=gx.vertex_properties['wyswietlany_tekst']
                   , bg_color=[255., 255., 255., 1]
                   , output_size=size
                   , output=output
                   )
    else:
        graph_draw(gx
                   , vertex_text=gx.vertex_properties['wyswietlany_tekst']
                   , bg_color=[255., 255., 255., 1]
                   , output_size=size
                   , output=output
                   )
def build_closure(g, cand_source, terminals, infection_times, k=-1,
                  strictly_smaller=True,
                  debug=False,
                  verbose=False):
    """
    build a clojure graph in which cand_source + terminals are all connected to each other.
    the number of neighbors of each node is determined by k

    the larger the k, the denser the graph"""
    r2pred = {}
    edges = {}
    terminals = list(terminals)

    # from cand_source to terminals
    vis = init_visitor(g, cand_source)
    cpbfs_search(g, source=cand_source, visitor=vis, terminals=terminals,
                 forbidden_nodes=terminals,
                 count_threshold=k)
    r2pred[cand_source] = vis.pred
    for u, v, c in get_edges(vis.dist, cand_source, terminals):
        edges[(u, v)] = c

    if debug:
        print('cand_source: {}'.format(cand_source))
        print('#terminals: {}'.format(len(terminals)))
        print('edges from cand_source: {}'.format(edges))

    if verbose:
        terminals_iter = tqdm(terminals)
        print('building closure graph')
    else:
        terminals_iter = terminals

    # from terminal to other terminals
    for root in terminals_iter:

        if strictly_smaller:
            late_terminals = [t for t in terminals
                              if infection_times[t] > infection_times[root]]
        else:
            # respect what the paper presents
            late_terminals = [t for t in terminals
                              if infection_times[t] >= infection_times[root]]

        late_terminals = set(late_terminals) - {cand_source}  # no one can connect to cand_source
        if debug:
            print('root: {}'.format(root))
            print('late_terminals: {}'.format(late_terminals))
        vis = init_visitor(g, root)
        cpbfs_search(g, source=root, visitor=vis, terminals=list(late_terminals),
                     forbidden_nodes=list(set(terminals) - set(late_terminals)),
                     count_threshold=k)
        r2pred[root] = vis.pred
        for u, v, c in get_edges(vis.dist, root, late_terminals):
            if debug:
                print('edge ({}, {})'.format(u, v))
            edges[(u, v)] = c

    if verbose:
        print('returning closure graph')

    gc = Graph(directed=True)

    for _ in range(g.num_vertices()):
        gc.add_vertex()

    for (u, v) in edges:
        gc.add_edge(u, v)

    eweight = gc.new_edge_property('int')
    eweight.set_2d_array(np.array(list(edges.values())))
    # for e, c in edges.items():
    #     eweight[e] = c
    return gc, eweight, r2pred
def steiner_tree_mst(g, root, infection_times, source, terminals,
                     closure_builder=build_closure,
                     strictly_smaller=True,
                     return_closure=False,
                     k=-1,
                     debug=False,
                     verbose=True):
    gc, eweight, r2pred = closure_builder(g, root, terminals,
                                          infection_times,
                                          strictly_smaller=strictly_smaller,
                                          k=k,
                                          debug=debug,
                                          verbose=verbose)

    # get the minimum spanning arborescence
    # graph_tool does not provide minimum_spanning_arborescence
    if verbose:
        print('getting mst')
    gx = gt2nx(gc, root, terminals, edge_attrs={'weight': eweight})
    try:
        nx_tree = nx.minimum_spanning_arborescence(gx, 'weight')
    except nx.exception.NetworkXException:
        if debug:
            print('fail to find mst')
        if return_closure:
            return None, gc, None
        else:
            return None

    if verbose:
        print('returning tree')

    mst_tree = Graph(directed=True)
    for _ in range(g.num_vertices()):
        mst_tree.add_vertex()

    for u, v in nx_tree.edges():
        mst_tree.add_edge(u, v)

    if verbose:
        print('extract edges from original graph')

    # extract the edges from the original graph

    # sort observations by time
    # and also topological order
    topological_index = {}
    for i, e in enumerate(bfs_iterator(mst_tree, source=root)):
        topological_index[int(e.target())] = i
    sorted_obs = sorted(
        set(terminals) - {root},
        key=lambda o: (infection_times[o], topological_index[o]))

    tree_nodes = {root}
    tree_edges = set()
    # print('root', root)
    for u in sorted_obs:
        if u in tree_nodes:
            if debug:
                print('{} covered already'.format(u))
            continue
        # print(u)
        v, u = map(int, next(mst_tree.vertex(u).in_edges()))  # v is ancestor
        tree_nodes.add(v)

        late_nodes = [n for n in terminals if infection_times[n] > infection_times[u]]
        vis = init_visitor(g, u)
        # from child to any tree node, including v

        cpbfs_search(g, source=u, terminals=list(tree_nodes),
                     forbidden_nodes=late_nodes,
                     visitor=vis,
                     count_threshold=1)
        # dist, pred = shortest_distance(g, source=u, pred_map=True)
        node_set = {v for v, d in vis.dist.items() if d > 0}
        reachable_tree_nodes = node_set.intersection(tree_nodes)
        ancestor = min(reachable_tree_nodes, key=vis.dist.__getitem__)

        edges = extract_edges_from_pred(g, u, ancestor, vis.pred)
        edges = {(j, i) for i, j in edges}  # need to reverse it
        if debug:
            print('tree_nodes', tree_nodes)
            print('connecting {} to {}'.format(v, u))
            print('using ancestor {}'.format(ancestor))
            print('adding edges {}'.format(edges))
        tree_nodes |= {u for e in edges for u in e}

        tree_edges |= edges

    t = Graph(directed=True)
    for _ in range(g.num_vertices()):
        t.add_vertex()

    for u, v in tree_edges:
        t.add_edge(t.vertex(u), t.vertex(v))

    tree_nodes = {u for e in tree_edges for u in e}
    vfilt = t.new_vertex_property('bool')
    vfilt.a = False
    for v in tree_nodes:
        vfilt[t.vertex(v)] = True

    t.set_vertex_filter(vfilt)

    if return_closure:
        return t, gc, mst_tree
    else:
        return t
Example #42
0
class UNISrt(object):
    '''
    This is the class represents UNIS in local runtime environment (local to the apps).
    All UNIS models defined in the periscope/settings.py will be represented as
    a corresponding item of the 'resources' list in this class.
    At the initialization phase, UNISrt will create an cache of the UNIS db, (and
    will maintain it consistent in a best-effort manner).
    '''
    
    # should move this methods to utils
    def validate_add_defaults(self, data):
        if "$schema" not in data:
            return None
        schema = self._schemas.get(data["$schema"])
        validictory.validate(data, schema)
        add_defaults(data, schema)
        
    def __init__(self):
        logger.info("starting UNIS Network Runtime Environment...")
        fconf = get_file_config(nre_settings.CONFIGFILE)
        self.conf = deepcopy(nre_settings.STANDALONE_DEFAULTS)
        merge_dicts(self.conf, fconf)
        
        self.unis_url = str(self.conf['properties']['configurations']['unis_url'])
        self.ms_url = str(self.conf['properties']['configurations']['ms_url'])
        self._unis = unis_client.UNISInstance(self.conf)
        self.time_origin = int(time())
        
        self._schemas = SchemaCache()
        self._resources = self.conf['resources']
        
        self._subunisclient = {}
        
        for resource in self._resources:
            setattr(self, resource, {'new': {}, 'existing': {}})
        
        # construct the hierarchical representation of the network
        for resource in self._resources:
            # only pullRuntime once at the beginning, as pubsub will only update
            # them later when resources are modified on the server
            self.pullRuntime(self, self._unis, self._unis.get(resource), resource, False)
        
        # construct the graph representation of the network, of which this NRE is in charge
        self.g = Graph()
        self.nodebook = {}
        for key in self.nodes['existing'].keys():
            self.nodebook[key] = self.g.add_vertex()
        
        for key, link in self.links['existing'].iteritems():
            if hasattr(link, 'src') and hasattr(link, 'dst'):
                self.g.add_edge(self.nodebook[link.src.node.selfRef],\
                                self.nodebook[link.dst.node.selfRef], add_missing=False)
        
    def pullRuntime(self, mainrt, currentclient, data, resource_name, localnew):
        '''
        this function should convert the input data into Python runtime objects
        '''
        model = resources_classes[resource_name]
        
        print resource_name
        if data and 'redirect' in data and 'instances' in data:
            if len(data['instances']) == 0:
                return
            
            for instance_url in data['instances']:
                # TODO: needs SSL, not figured out yet, pretend it does not exist for now
                if instance_url == 'https://dlt.crest.iu.edu:9000' or instance_url == 'http://iu-ps01.crest.osris.org:8888'\
                                    or instance_url == 'http://dev.crest.iu.edu:8888' or instance_url == 'http://unis.crest.iu.edu:8890'\
                                    or instance_url == 'http://monitor.crest.iu.edu:9000' or instance_url == 'http://sc-ps01.osris.org:8888': 
                    continue
                
                if instance_url not in self._subunisclient:
                    conf_tmp = deepcopy(self.conf)
                    conf_tmp['properties']['configurations']['unis_url'] = instance_url
                    conf_tmp['properties']['configurations']['ms_url'] = instance_url # assume ms is the same as unis
                    self._subunisclient[instance_url] = unis_client.UNISInstance(conf_tmp)
                
                unis_tmp = self._subunisclient[instance_url]
                
                self.pullRuntime(mainrt, unis_tmp, unis_tmp.get(resource_name), resource_name, False)
                    
        elif data and isinstance(data, list):
            # sorting: in unisrt res dictionaries, a newer record of same index will be saved
            data.sort(key=lambda x: x.get('ts', 0), reverse=False)
            for v in data:
                model(v, mainrt, currentclient, localnew)
                
            threading.Thread(name=resource_name + '@' + currentclient.config['unis_url'],\
                             target=self.subscribeRuntime, args=(resource_name, self._unis,)).start()
        
    def pushRuntime(self, resource_name):
        '''
        this function upload specified resource to UNIS
        '''
        def pushEntry(k, entry):
            data = entry.prep_schema()
            groups = data['selfRef'].split('/')
            unis_str = '/'.join(groups[:3])
            if unis_str in self._subunisclient:
                uc = self._subunisclient[unis_str]
            else:
                uc = self._unis
            
            # use attribute "ts" to indicate an object downloaded from UNIS, and
            # only UPDATE the values of this kind of objects.
            if hasattr(entry, 'ts'):
                url = '/' + resource_name + '/' + getattr(entry, 'id')
                uc.put(url, data)
            else:
                url = '/' + resource_name
                uc.post(url, data)
                
        while True:
            try:
                key, value = getattr(self, resource_name)['new'].popitem()
                
                if not isinstance(value, list):
                    pushEntry(key, value)
                else:
                    for item in value:
                        pushEntry(key, item)
                    
            except KeyError:
                return
    
    def subscribeRuntime(self, resource_name, currentclient):
        '''
        subscribe a channel(resource) to UNIS, and listen for any new updates on that channel
        '''
        #name = resources_subscription[resource_name]
        name = resource_name
        model = resources_classes[resource_name]
        
        #url = self.unis_url.replace('http', 'ws', 1)
        unis_url = currentclient.config['unis_url']
        url = unis_url.replace('http', 'ws', 1)
        url = url + '/subscribe/' + name
        
        ws = create_connection(url)
        
        data = ws.recv()
        while data:
            model(json.loads(data), self, currentclient, False)
            data = ws.recv()
        ws.close()

    def poke_data(self, query):
        '''
        try to address this issue:
        - ms stores lots of data, and may be separated from unis
        - this data is accessible via /data url. They shouldn't be kept on runtime environment (too much)
        - however, sometimes they may be needed. e.g. HELM schedules traceroute measurement, and needs the
          results to schedule following iperf tests
        '''
        return self._unis.get('/data/' + query)
    
    def post_data(self, data):
        '''
        same as poke_data, the other way around
        '''
        #headers = self._def_headers("data")
        print data
        return self._unis.pc.do_req('post', '/data', data)#, headers)
Example #43
0
from graph_tool.all import Vertex, Graph

class MyVertex:
    def __init__(self, g):
        self.g = g
        self.v = g.add_vertex()
        self.halted = False
        
    def vote_for_halt(self):
        self.halted = True

    def __getattr__(self, attr):
        return getattr(self.v, attr)

if __name__ == "__main__":        
    g = Graph()
    v1 = MyVertex(g)
    v2 = MyVertex(g)
    g.add_edge(v1, v2)
    v1.vote_for_halt()
    print v1.out_degree()  # will print 1
    print v1.halted  # will print True
    print v1.foo  # will raise error: AttributeError: 'Vertex' object has no attribute 'foo'
from graph_tool.flow import min_cut

filename = '../pairparser/results/p_pairs7.txt'
print(filename)
coefficient = 3

word_dict = {}
add_dict = {}

f = open('bad.txt', 'r', encoding="utf-8")
for s in f:
    # print(s.split(' ')[0])
    add_dict[s.split(' ')[0]] = 1

f = open(filename, 'r', encoding="utf-8")
pairs_graph = Graph(directed=False)
edge_weights = pairs_graph.new_edge_property("int")
ver_names = pairs_graph.new_vertex_property("string")
for line in f:
    spl_line = line.split(' ')

    if len(spl_line) == 1:
        continue

    pos = int(spl_line[0])
    neg = int(spl_line[1])
    cur_weight = pos + coefficient * neg

    w1 = spl_line[2].strip(' \n\uefef')
    w2 = spl_line[3].strip(' \n\uefef')
 def __init__(self):
     self.graph = GT_Graph()
     self.cookies = dict()
     self.cookierecvr = CookieRecvr(self)
     self.cookierecvr.start()
from graph_tool.flow import min_cut

filename = 'buf.txt'
print(filename)
coefficient = 3

word_dict = {}
add_dict = {}

f = open('bad.txt', 'r', encoding="utf-8")
for s in f:
    # print(s.split(' ')[0])
    add_dict[s.split(' ')[0]] = 1

f = open(filename, 'r', encoding="utf-8")
pairs_graph = Graph(directed=False)
edge_weights = pairs_graph.new_edge_property("int")
ver_names = pairs_graph.new_vertex_property("string")
for line in f:
    spl_line = line.split(' ')

    if len(spl_line) == 1:
        continue

    pos = int(spl_line[0])
    neg = int(spl_line[1])
    cur_weight = pos + coefficient * neg

    w1 = spl_line[2].strip(' \n\uefef')
    w2 = spl_line[3].strip(' \n\uefef')
def gen_graph((repo, events)):
    graph = Graph()

    repo_on_graph = graph.new_graph_property('string')
    repo_on_graph[graph] = repo
    graph.graph_properties['repo_on_graph'] = repo_on_graph

    language_on_graph = graph.new_graph_property('string')
    language_on_graph[graph] = events[0]['language']
    graph.graph_properties['language_on_graph'] = language_on_graph

    events_on_vertices = graph.new_vertex_property('object')
    graph.vertex_properties['events_on_vertices'] = events_on_vertices

    actors_on_vertices = graph.new_vertex_property('string')
    graph.vertex_properties['actors_on_vertices'] = actors_on_vertices

    weights_on_edges = graph.new_edge_property('long double')
    graph.edge_properties['weights_on_edges'] = weights_on_edges

    # pre_vertices = []
    pre_events_map = {}
    pre_vertices_map = {}

    # owner_vertex = graph.add_vertex()
    # owner = repo.split('/')[0]
    # actors_on_vertices[owner_vertex] = owner
    # pre_vertices_map[owner] = owner_vertex

    events = sorted(events, key=lambda x: x['created_at'])

    for event in events:
        actor = event['actor']

        if actor in pre_events_map:
            continue

        created_at = event['created_at']

        vertex = graph.add_vertex()
        events_on_vertices[vertex] = event
        actors_on_vertices[vertex] = actor

        if 'actor-following' not in event:
            continue

        following = set(event['actor-following'])
        commons = following.intersection(pre_vertices_map.keys())

        # pre_vertices.append(vertex)

        # if len(commons) == 0:
        #     edge = graph.add_edge(vertex, owner_vertex)
        #     weights_on_edges[edge] = 1.0

        for pre_actor in commons:
            edge = graph.add_edge(vertex, pre_vertices_map[pre_actor])
            interval =\
                (created_at - pre_events_map[pre_actor]['created_at']).days
            weight = 1.0 / fib(interval + 2)
            weights_on_edges[edge] = weight

        pre_events_map[actor] = event
        pre_vertices_map[actor] = vertex

    return graph
def build_region_closure(g, root, regions, infection_times, obs_nodes, debug=False):
    """return a closure graph on the the components"""
    regions = copy(regions)
    root_region = {'nodes': {root}, 'head': root, 'head_time': -float('inf')}
    regions[len(regions)] = root_region

    gc = Graph(directed=True)
    for _ in range(len(regions)):
        gc.add_vertex()

    # connect each region
    gc_edges = []
    original_edge_info = {}
    for i, j in combinations(regions, 2):
        # make group i the one with *later* head
        if regions[i]['head_time'] < regions[j]['head_time']:
            i, j = j, i
        
        if debug:
            print('i, j={}, {}'.format(i, j))
        # only need to connect head i to one of the nodes in group j
        # where nodes in j have time stamp < head i
        # then an edge from region j to region i (because j is earlier)

        head_i = regions[i]['head']
        
        def get_pseudo_time(n):
            if n == root:
                return - float('inf')
            else:
                return infection_times[n]

        targets = [n for n in regions[j]['nodes'] if get_pseudo_time(n) < regions[i]['head_time']]

        if debug:
            print('head_i: {}'.format(head_i))
            print('targets: {}'.format(targets))
            print('regions[j]["nodes"]: {}'.format(regions[j]['nodes']))
 
        if len(targets) == 0:
            continue
            
        visitor = init_visitor(g, head_i)
        forbidden_nodes = list(set(regions[i]['nodes']) | (set(regions[j]['nodes']) - set(targets)))

        if debug:
            print('forbidden_nodes: {}'.format(forbidden_nodes))
            
        # NOTE: count_threshold = 1
        cpbfs_search(g, source=head_i,
                     terminals=targets,
                     forbidden_nodes=forbidden_nodes,
                     visitor=visitor,
                     count_threshold=1)
    
        reachable_targets = [t for t in targets if visitor.dist[t] > 0]

        if debug:
            print('reachable_targets: {}'.format(reachable_targets))
            
        if len(reachable_targets) == 0:
            # cannot reach there
            continue

        source = min(reachable_targets, key=visitor.dist.__getitem__)
        dist = visitor.dist[source]

        assert dist > 0

        gc_edges.append(((j, i, dist)))
        original_edge_info[(j, i)] = {
            'dist': dist,
            'pred': visitor.pred,
            'original_edge': (source, head_i)
        }
    for u, v, _ in gc_edges:
        gc.add_edge(u, v)

    eweight = gc.new_edge_property('int')
    for u, v, c in gc_edges:
        eweight[gc.edge(gc.vertex(u), gc.vertex(v))] = c

    return gc, eweight, original_edge_info
def vytvořím_graph_tool_graf():
    from graph_tool.all import Graph
    
    graf = Graph()
    u1 = graf.add_vertex()
    u2 = graf.add_vertex()
    graf.add_edge(u1,  u2)
    
    vprop_double = graf.new_vertex_property("double")            # Double-precision floating point
    vprop_double[graf.vertex(1)] = 3.1416

    vprop_vint = graf.new_vertex_property("vector<int>")         # Vector of ints
    vprop_vint[graf.vertex(0)] = [1, 3, 42, 54]

    eprop_dict = graf.new_edge_property("object")                # Arbitrary python object. In this case, a dictionary.
    eprop_dict[graf.edges().next()] = {"foo": "bar", "gnu": 42}

    gprop_bool = graf.new_graph_property("bool")                  # Boolean
    gprop_bool[graf] = True
    
    graf.save('./data/graph_tool.graphml',  fmt='xml')
    [a, b] = get_indexes(ver_attr, low, hig)
    plt.plot(range(0, a + 1), ver_attr[:(a + 1)], 'go')
    plt.plot(range(a + 1, b + 1), ver_attr[(a + 1):(b + 1)], 'bo')
    plt.plot(range(b + 1, len(ver_attr)), ver_attr[(b + 1):], 'ro')

print("-------------------------------------------------")    
    
filename = '../pairparser/results/en_pairs(7).txt'
ftag = '7_3imp'
coefficient = 3

word_dict = {} # dict with indexes of nodes by word

f = open(filename, 'r', encoding="utf-8")

pairs_graph = Graph(directed=False)
edge_weights = pairs_graph.new_edge_property("double")
ver_names = pairs_graph.new_vertex_property("string")
ver_id = pairs_graph.new_vertex_property("int")
for line in f:
    spl_line = line.split(' ')

    if len(spl_line) == 1:
        continue

    pos = int(spl_line[0])
    neg = int(spl_line[1])
    cur_weight = pos + coefficient * neg

    w1 = spl_line[2].strip(' \n\uefef')
    w2 = spl_line[3].strip(' \n\uefef')