Esempio n. 1
0
def gen_pagerank(graph):
    if graph.num_edges():
        pr = pagerank(
            graph, weight=graph.edge_properties['weights_on_edges'])
    else:
        pr = pagerank(graph)

    pr.a /= pr.a.min()
    graph.vertex_properties['pagerank'] = pr

    return graph
def pageRankBiDi( g ):
	# calculate the product of the PageRange and reverse PageRank for each vertex
	pr = centr.pagerank( g )

	g.set_reversed( True )
	rpr = centr.pagerank( g )
	g.set_reversed( False )

	for v in g.vertices():
		pr[v] = pr[v] * rpr[v]
		
	return pr
Esempio n. 3
0
 def get_metric(ggt, metric, n_nodes, n_edges):
     if "d" == metric:
         # Density
         if n_nodes <= 1:
             value = 0.0
         else:
             value = ( 2.0 * n_edges ) / ( n_nodes * (n_nodes - 1.0) )
         ggt.gp[metric] = ggt.new_gp("float", val=value)
     elif "dg" == metric:
         # Degree
         if n_nodes <= 1:
             value = np.zeros(n_nodes, dtype=np.float32)
         else:
             value = ggt.degree_property_map('total').get_array()
         ggt.vp[metric] = ggt.new_vp("double", vals=value)
     elif "dgc" == metric:
         # Degree centrality
         if n_nodes <= 1:
             value = np.zeros(n_nodes, dtype=np.float32)
         else:
             value = ggt.degree_property_map('total').get_array() / (n_nodes - 1.0)
         ggt.vp[metric] = ggt.new_vp("double", vals=value)
     elif "cnw" == metric:
         # Clustering coefficient ( non-weighted )
         value = local_clustering(ggt).get_array()
         ggt.vp[metric] = ggt.new_vp("double", vals=value)
     elif "cw" == metric:
         # Clustering coefficient ( weighted )
         value = local_clustering(ggt, weight=ggt.ep.weight).get_array()
         ggt.vp[metric] = ggt.new_vp("double", vals=value)
     elif "pgr" == metric:
         # Page Rank
         value = pagerank(ggt).get_array()
         ggt.vp[metric] = ggt.new_vp("double", vals=value)
Esempio n. 4
0
 def pagerank(self):
     pr = {}
     dates = sorted(self.graphs.iterkeys())
     for date in dates:
         if self.graphs[date].num_vertices() > 0:
             pr[date] = gtc.pagerank(self.graphs[date])
     self.pr = pr
     return pr
Esempio n. 5
0
    def calculate_pageranks(self):
        pagerank_dict = ct.pagerank(self.graph, weight = self.ew)
        result_dict = dict()

        for anchor in self.anchor_dictionary:
            result_dict[anchor] = [(concept, pagerank_dict[self.massive_dict[concept]]) for concept in self.anchor_dictionary[anchor]]
            result_dict[anchor].sort(key = lambda x: x[1], reverse = True)
        self.pagerank = result_dict
        self.pr_result = [(anchor, concept, score) for anchor in self.pagerank for concept, score in self.pagerank[anchor]]
def _pagerank_centrality(weighted_projection, **kwargs):
    if isinstance(weighted_projection, nx.DiGraph):
        return nx.pagerank(weighted_projection, **kwargs)
    else:
        from graph_tool.centrality import pagerank

        G = weighted_projection
        pr = pagerank(G, weight=G.ep.weights)
        pr = {G.vp.node_labels[v]: pr[v] for v in G.vertices()}
        return pr
Esempio n. 7
0
    def get_dataframe_all_topolog_metrics(self):
        graph = self.get_graph()
        eprop_trust = graph.new_edge_property('double')

        start_time = time.time()
        for e in graph.edges():
            v_name_s = graph.vertex_properties['name_proteins'][e.source()]
            v_number_s = self.dict_genes[v_name_s]
            v_name_t = graph.vertex_properties['name_proteins'][e.target()]
            v_number_t = self.dict_genes[v_name_t]
            eprop_trust[e] = self.adjacency_matrix[v_number_s, v_number_t]
        graph.edge_properties['trust'] = eprop_trust
        print('confidence score за :',
              '--- %s seconds ---' % (time.time() - start_time))

        list_metrics = [
            'betweenness', 'pagerank', 'closeness', 'katz', 'hits_authority',
            'hits_hub', 'eigenvector', 'eigentrust'
        ]  # 'trust_transitivity'

        dict_map = {}
        start_time = time.time()
        dict_map['betweenness'] = ct.betweenness(graph)[0]
        dict_map['pagerank'] = ct.pagerank(graph)
        dict_map['closeness'] = ct.closeness(graph)
        dict_map['katz'] = ct.katz(graph)
        dict_map['hits_authority'] = ct.hits(graph)[1]
        dict_map['hits_hub'] = ct.hits(graph)[2]
        dict_map['eigenvector'] = ct.eigenvector(graph)[1]
        #print('trust_transitivity')
        #"dict_map['trust_transitivity'] = ct.trust_transitivity(graph,  graph.edge_properties["trust"])
        print('все метрики кроме eigentrust за :',
              '--- %s seconds ---' % (time.time() - start_time))
        start_time = time.time()
        dict_map['eigentrust'] = ct.eigentrust(graph,
                                               graph.edge_properties['trust'],
                                               max_iter=10**6)
        print('eigentrust за :',
              '--- %s seconds ---' % (time.time() - start_time))
        start_time = time.time()
        dict_metrics = {}
        for key in list_metrics:
            dict_metrics[key] = []
        for v in graph.vertices():
            for metric in list_metrics:
                dict_metrics[metric].append(dict_map[metric][v])
        dataframe_all_topolog_metrics = pd.DataFrame(dict_metrics)
        dataframe_all_topolog_metrics.index = graph.vertex_properties[
            'name_proteins']
        print('получила датафрейм с метриками за :',
              '--- %s seconds ---' % (time.time() - start_time))
        return dataframe_all_topolog_metrics
def pagerank_scores(g, obs, weight=None, eps=0.0):
    pers = g.new_vertex_property('float')
    pers.a += eps  # add some noise

    for o in obs:
        pers.a[o] = 1

    pers.a /= pers.a.sum()
    rank = pagerank(g, pers=pers, weight=weight)

    if rank.a.sum() == 0:
        raise ValueError('PageRank score all zero')

    p = rank.a / rank.a.sum()
    return p
Esempio n. 9
0
def PR_subgraph(graph, subgraph, eps, threshold):
    pr = gc.pagerank(subgraph, epsilon=eps)
    vec = pr.a
    vec_dict = dict()
    index = 0
    for value in vec:
        vec_dict[index] = value
        index += 1
    pr_list = []
    norm_dict = normalize_dictionary(vec_dict)
    for poz in norm_dict:
        poz_initial = subgraph.vertex_properties["name"][poz]
        pr_list.append((poz_initial, norm_dict[poz]))
    pr_list = sorted(pr_list, key=lambda tup: tup[1], reverse=True)
    return pr_list
Esempio n. 10
0
def f_pagerank(D, stats, options={'features': [], 'skip_features': []}):
    """"""

    if 'pagerank' not in options['features']:
        log.debug('Skipping pagerank')
        return

    pagerank_list = pagerank(D).get_array()

    pr_max = (0.0, 0)
    idx = 0

    # iterate and collect max value and idx
    for pr_val in pagerank_list:
        pr_max = (pr_val, idx) if pr_val >= pr_max[0] else pr_max
        idx += 1

    stats['max_pagerank'], stats['max_pagerank_vertex'] = pr_max[0], str(
        D.vertex_properties['name'][pr_max[1]])

    # plot degree distribution
    if 'plots' in options['features'] and (
            not 'skip_features' in options
            or not 'plots' in options['skip_features']):
        pagerank_list[::-1].sort()

        values_counted = collections.Counter(pagerank_list)
        values, counted = zip(*values_counted.items())

        with lock:
            fig, ax = plt.subplots()
            plt.plot(values, counted)

            plt.title('PageRank Histogram')
            plt.ylabel('Frequency')
            plt.xlabel('PageRank Value')

            ax.set_xticklabels(values)

            ax.set_xscale('log')
            ax.set_yscale('log')

            plt.tight_layout()
            plt.savefig('/'.join([
                os.path.dirname(stats['path_edgelist']),
                'distribution_pagerank.pdf'
            ]))
            log.debug('done plotting pagerank distribution')
def pagerank_scores(g, obs, eps=0.0, weights=None):
    pers = g.new_vertex_property('float')
    pers.a += eps  # add some noise

    for o in obs:
        pers.a[o] += 1

    pers.a /= pers.a.sum()
    rank = pagerank(g, pers=pers, weight=weights)

    for o in obs:
        rank[o] = 0  # cannot select obs nodes

    if rank.a.sum() == 0:
        raise ValueError('PageRank score all zero')

    p = rank.a / rank.a.sum()
    return p
Esempio n. 12
0
    def __init__(self, nodes_info=None, links_info=None, file_name=None):
        self.g = Graph()

        if nodes_info and links_info:
            self.nodes_info = nodes_info
            self.links_info = links_info
            self.g.vertex_properties["name"] = self.g.new_vertex_property(
                'string')
            self.g.vertex_properties["id"] = self.g.new_vertex_property(
                'int32_t')
            self.g.edge_properties["weight"] = self.g.new_edge_property(
                'int32_t')

            self.create_network()
            self.g.vertex_properties["pagerank"] = pagerank(
                self.g, weight=self.g.edge_properties["weight"])
            self.g.vertex_properties[
                "degree_centrality"] = self.degree_centrality()

        elif file_name:
            self.load_network(file_name)
Esempio n. 13
0
def run(input_file: KGTKFiles, directed, compute_degrees, compute_pagerank, compute_hits, log_file, output_stats,
        vertex_in_degree, vertex_out_degree, vertex_pagerank, vertex_auth, vertex_hubs):
    from kgtk.exceptions import KGTKException
    def infer_index(h, options=[]):
        for o in options:
            if o in h:
                return h.index(o)
        return -1

    def infer_predicate(h, options=[]):
        for o in options:
            if o in h:
                return o
        return ''

    v_prop_dict = {
        'vertex_pagerank': vertex_pagerank,
        'vertex_hubs': vertex_hubs,
        'vertex_auth': vertex_auth
    }
    try:
        # import modules locally
        import socket
        from graph_tool import load_graph_from_csv
        from graph_tool import centrality
        import kgtk.gt.analysis_utils as gtanalysis
        from pathlib import Path
        import sys
        import csv
        csv.field_size_limit(sys.maxsize)

        filename: Path = KGTKArgumentParser.get_input_file(input_file)

        # hardcoded values useful for the script. Perhaps some of them should be exposed as arguments later
        directions = ['in', 'out', 'total']
        id_col = 'name'

        with open(filename, 'r') as f:
            header = next(f).split('\t')
            header=[h.strip() for h in header]
            subj_index = infer_index(header, options=['node1', 'subject'])
            obj_index = infer_index(header, options=['node2', 'object', 'value'])
            predicate = infer_predicate(header, options=['label', 'predicate', 'relation', 'relationship'])
            p = []
            for i, header_col in enumerate(header):
                if i in [subj_index, obj_index]: continue
                p.append(header_col)
        with open(log_file, 'w') as writer:

            writer.write('loading the TSV graph now ...\n')
            G2 = load_graph_from_csv(str(filename),
                                     skip_first=True,
                                     directed=directed,
                                     hashed=True,
                                     ecols=[subj_index, obj_index],
                                     eprop_names=p,
                                     csv_options={'delimiter': '\t'})

            writer.write('graph loaded! It has %d nodes and %d edges\n' % (G2.num_vertices(), G2.num_edges()))
            writer.write('\n###Top relations:\n')
            for rel, freq in gtanalysis.get_topN_relations(G2, pred_property=predicate):
                writer.write('%s\t%d\n' % (rel, freq))

            if compute_degrees:
                writer.write('\n###Degrees:\n')
                for direction in directions:
                    degree_data = gtanalysis.compute_node_degree_hist(G2, direction)
                    max_degree = len(degree_data) - 1
                    mean_degree, std_degree = gtanalysis.compute_avg_node_degree(G2, direction)
                    writer.write(
                        '%s degree stats: mean=%f, std=%f, max=%d\n' % (direction, mean_degree, std_degree, max_degree))

            if compute_pagerank:
                writer.write('\n###PageRank\n')
                v_pr = G2.new_vertex_property('float')
                centrality.pagerank(G2, prop=v_pr)
                G2.properties[('v', 'vertex_pagerank')] = v_pr
                writer.write('Max pageranks\n')
                result = gtanalysis.get_topn_indices(G2, 'vertex_pagerank', 5, id_col)
                for n_id, n_label, pr in result:
                    writer.write('%s\t%s\t%f\n' % (n_id, n_label, pr))

            if compute_hits:
                writer.write('\n###HITS\n')
                hits_eig, G2.vp['vertex_hubs'], G2.vp['vertex_auth'] = gtanalysis.compute_hits(G2)
                writer.write('HITS hubs\n')
                main_hubs = gtanalysis.get_topn_indices(G2, 'vertex_hubs', 5, id_col)
                for n_id, n_label, hubness in main_hubs:
                    writer.write('%s\t%s\t%f\n' % (n_id, n_label, hubness))
                writer.write('HITS auth\n')
                main_auth = gtanalysis.get_topn_indices(G2, 'vertex_auth', 5, id_col)
                for n_id, n_label, authority in main_auth:
                    writer.write('%s\t%s\t%f\n' % (n_id, n_label, authority))

            sys.stdout.write('node1\tlabel\tnode2\tid\n')
            id_count = 0
            if not output_stats:
                for e in G2.edges():
                    sid, oid = e
                    lbl = G2.ep[predicate][e]
                    sys.stdout.write(
                        '%s\t%s\t%s\t%s\n' % (G2.vp[id_col][sid], lbl, G2.vp[id_col][oid],
                                              '{}-{}-{}'.format(G2.vp[id_col][sid], lbl, id_count)))
                    id_count += 1

            id_count = 0
            for v in G2.vertices():
                v_id = G2.vp[id_col][v]

                sys.stdout.write(
                    '{}\t{}\t{}\t{}\n'.format(v_id, vertex_in_degree, v.in_degree(),
                                              '{}-{}-{}'.format(v_id, vertex_in_degree, id_count)))
                id_count += 1
                sys.stdout.write(
                    '{}\t{}\t{}\t{}\n'.format(v_id, vertex_out_degree, v.out_degree(),
                                              '{}-{}-{}'.format(v_id, vertex_out_degree, id_count)))
                id_count += 1

                for vprop in G2.vertex_properties.keys():
                    if vprop == id_col: continue
                    sys.stdout.write(
                        '%s\t%s\t%s\t%s\n' % (v_id, v_prop_dict[vprop], G2.vp[vprop][v],
                                              '{}-{}-{}'.format(v_id, v_prop_dict[vprop], id_count)))
                    id_count += 1

    except Exception as e:
        raise KGTKException('Error: ' + str(e))
Esempio n. 14
0
def run(
        input_file: KGTKFiles,
        output_file: KGTKFiles,
        undirected: bool,
        compute_degrees: bool,
        compute_pagerank: bool,
        compute_hits: bool,
        log_file: str,
        statistics_only: bool,
        vertex_in_degree: str,
        vertex_out_degree: str,
        vertex_pagerank: str,
        vertex_auth: str,
        vertex_hubs: str,
        top_n: int,
        errors_to_stdout: bool,
        errors_to_stderr: bool,
        show_options: bool,
        verbose: bool,
        very_verbose: bool,
        **kwargs,  # Whatever KgtkFileOptions and KgtkValueOptions want.
):
    # import modules locally
    from pathlib import Path
    import sys

    from graph_tool import centrality
    from kgtk.exceptions import KGTKException
    import kgtk.gt.analysis_utils as gtanalysis
    from kgtk.gt.gt_load import load_graph_from_kgtk
    from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions
    from kgtk.io.kgtkwriter import KgtkWriter
    from kgtk.value.kgtkvalueoptions import KgtkValueOptions

    v_prop_dict = {
        'vertex_pagerank': vertex_pagerank,
        'vertex_hubs': vertex_hubs,
        'vertex_auth': vertex_auth
    }
    try:

        # Select where to send error messages, defaulting to stderr.
        error_file: typing.TextIO = sys.stdout if errors_to_stdout else sys.stderr

        # Build the option structures.
        reader_options: KgtkReaderOptions = KgtkReaderOptions.from_dict(kwargs)
        value_options: KgtkValueOptions = KgtkValueOptions.from_dict(kwargs)

        input_kgtk_file: Path = KGTKArgumentParser.get_input_file(input_file)
        output_kgtk_file: Path = KGTKArgumentParser.get_output_file(
            output_file)

        # hardcoded values useful for the script. Perhaps some of them should be exposed as arguments later
        directions = ['in', 'out', 'total']
        id_col = 'name'
        output_columns = ["node1", "label", "node2", "id"]

        if verbose:
            print('loading the KGTK input file...\n',
                  file=error_file,
                  flush=True)
        kr: KgtkReader = KgtkReader.open(
            input_kgtk_file,
            error_file=error_file,
            options=reader_options,
            value_options=value_options,
            verbose=verbose,
            very_verbose=very_verbose,
        )
        sub: int = kr.get_node1_column_index()
        if sub < 0:
            print("Missing node1 (subject) column.",
                  file=error_file,
                  flush=True)
        pred: int = kr.get_label_column_index()
        if pred < 0:
            print("Missing label (predicate) column.",
                  file=error_file,
                  flush=True)
        obj: int = kr.get_node2_column_index()
        if obj < 0:
            print("Missing node2 (object) column", file=error_file, flush=True)
        if sub < 0 or pred < 0 or obj < 0:
            kr.close()
            raise KGTKException("Exiting due to missing columns.")

        predicate: str = kr.column_names[pred]

        G2 = load_graph_from_kgtk(kr,
                                  directed=not undirected,
                                  ecols=(sub, obj),
                                  verbose=verbose,
                                  out=error_file)
        if verbose:
            print('graph loaded! It has %d nodes and %d edges.' %
                  (G2.num_vertices(), G2.num_edges()),
                  file=error_file,
                  flush=True)

        kw: KgtkWriter = KgtkWriter.open(output_columns,
                                         output_kgtk_file,
                                         mode=KgtkWriter.Mode.EDGE,
                                         require_all_columns=True,
                                         prohibit_extra_columns=True,
                                         fill_missing_columns=False,
                                         verbose=verbose,
                                         very_verbose=very_verbose)

        with open(log_file, 'w') as writer:
            writer.write('graph loaded! It has %d nodes and %d edges\n' %
                         (G2.num_vertices(), G2.num_edges()))
            writer.write('\n###Top relations:\n')
            for rel, freq in gtanalysis.get_topN_relations(
                    G2, pred_property=predicate):
                writer.write('%s\t%d\n' % (rel, freq))

            if compute_degrees:
                writer.write('\n###Degrees:\n')
                for direction in directions:
                    degree_data = gtanalysis.compute_node_degree_hist(
                        G2, direction)
                    max_degree = len(degree_data) - 1
                    mean_degree, std_degree = gtanalysis.compute_avg_node_degree(
                        G2, direction)
                    writer.write(
                        '%s degree stats: mean=%f, std=%f, max=%d\n' %
                        (direction, mean_degree, std_degree, max_degree))

            if compute_pagerank:
                writer.write('\n###PageRank\n')
                v_pr = G2.new_vertex_property('float')
                centrality.pagerank(G2, prop=v_pr)
                G2.properties[('v', 'vertex_pagerank')] = v_pr
                writer.write('Max pageranks\n')
                result = gtanalysis.get_topn_indices(G2, 'vertex_pagerank',
                                                     top_n, id_col)
                for n_id, n_label, pr in result:
                    writer.write('%s\t%s\t%f\n' % (n_id, n_label, pr))

            if compute_hits:
                writer.write('\n###HITS\n')
                hits_eig, G2.vp['vertex_hubs'], G2.vp[
                    'vertex_auth'] = gtanalysis.compute_hits(G2)
                writer.write('HITS hubs\n')
                main_hubs = gtanalysis.get_topn_indices(
                    G2, 'vertex_hubs', top_n, id_col)
                for n_id, n_label, hubness in main_hubs:
                    writer.write('%s\t%s\t%f\n' % (n_id, n_label, hubness))
                writer.write('HITS auth\n')
                main_auth = gtanalysis.get_topn_indices(
                    G2, 'vertex_auth', top_n, id_col)
                for n_id, n_label, authority in main_auth:
                    writer.write('%s\t%s\t%f\n' % (n_id, n_label, authority))

        id_count = 0
        if not statistics_only:
            for e in G2.edges():
                sid, oid = e
                lbl = G2.ep[predicate][e]
                kw.write([
                    G2.vp[id_col][sid], lbl, G2.vp[id_col][oid],
                    '{}-{}-{}'.format(G2.vp[id_col][sid], lbl, id_count)
                ])
                id_count += 1

        id_count = 0
        for v in G2.vertices():
            v_id = G2.vp[id_col][v]
            kw.write([
                v_id, vertex_in_degree,
                str(v.in_degree()), '{}-{}-{}'.format(v_id, vertex_in_degree,
                                                      id_count)
            ])
            id_count += 1
            kw.write([
                v_id, vertex_out_degree,
                str(v.out_degree()), '{}-{}-{}'.format(v_id, vertex_out_degree,
                                                       id_count)
            ])
            id_count += 1

            for vprop in G2.vertex_properties.keys():
                if vprop == id_col:
                    continue
                kw.write([
                    v_id, v_prop_dict[vprop],
                    str(G2.vp[vprop][v]),
                    '{}-{}-{}'.format(v_id, v_prop_dict[vprop], id_count)
                ])
                id_count += 1

        kw.close()
        kr.close()

    except Exception as e:
        raise KGTKException('Error: ' + str(e))
Esempio n. 15
0
 def rank_protection(g: GT.Graph, nodes, n_protected):
     return {
         nodes.id_of(n)
         for n in i_of_bests(IdNodes(list(pagerank(g))), n_protected)
     }
Esempio n. 16
0
def pagerank_centrality(g: Graph):
    return centrality.pagerank(g, weight=g.edge_properties['weight'])
Esempio n. 17
0
def save_centrality(g, node_out_fname, edge_out_fname, weight=None):
    """
    :param g: `Graph` instance
    :return: None
    """

    df = pd.DataFrame()
    df['node'] = pd.Series(np.array([int(v) for v in g.vertices()]))

    # degree
    print('Degree')
    num_nodes = len(g.get_vertices())
    denom = num_nodes - 1

    if g.is_directed():
        unnormalized_in_degree = np.array([v.in_degree() for v in g.vertices()])
        unnormalized_out_degree = np.array([v.out_degree() for v in g.vertices()])

        df['unnormalized_in_degree'] = unnormalized_in_degree
        df['unnormalized_out_degree'] = unnormalized_out_degree
        df['in_degree'] = unnormalized_in_degree / denom
        df['out_degree'] = unnormalized_out_degree / denom

    else:
        # check whether weighted graph or not
        if weight:
            unnormalized_degree = np.zeros(num_nodes)
            edge_weights = np.array(weight.get_array())
            for edge, w in zip(g.get_edges(), edge_weights):
                for node in edge[:2]:
                    unnormalized_degree[node] += w
            df['unnormalized_degree'] = unnormalized_degree
            df['degree'] = unnormalized_degree / denom
        else:
            unnormalized_degree = np.array([v.out_degree() for v in g.vertices()])
            df['unnormalized_degree'] = unnormalized_degree
            df['degree'] = unnormalized_degree / denom

    # closeness
    print('Closeness')
    df['unnormalized_closeness'] = np.array(closeness(g, weight=weight, norm=False).get_array())
    df['closeness'] = np.array(closeness(g, weight=weight, norm=True).get_array())

    # pageRank
    print('PageRank')
    df['pagerank'] = np.array(pagerank(g, weight=weight).get_array())

    # betweenness
    print('Betweenness')
    un_node_between, un_edge_between = betweenness(g, weight=weight, norm=False)
    node_between, edge_between = betweenness(g, weight=weight, norm=True)
    df['unnormalized_betweenness'] = np.array(un_node_between.get_array())
    df['betweenness'] = np.array(node_between.get_array())

    df.to_csv(node_out_fname, index=False)

    # edge
    sources = []
    targets = []
    for e in g.edges():
        source, target = list(map(int, [e.source(), e.target()]))
        sources.append(source)
        targets.append(target)

    df = pd.DataFrame()
    df['source'] = pd.Series(np.array(sources))
    df['target'] = np.array(targets)

    # betweenness
    df['unnormalized_betweenness'] = np.array(un_edge_between.get_array())
    df['betweenness'] = np.array(edge_between.get_array())

    df.to_csv(edge_out_fname, index=False)