Python ra_index_soundarajan_hopcroftの例、networkx.ra_index_soundarajan_hopcroft Pythonの例

コード例 #1

0

ファイルを表示

ファイル: assignment.py プロジェクト: sivaneshl/python_data_analysis

def new_connections_predictions():
    pref_attach = list(nx.preferential_attachment(G))
    df = pd.DataFrame(index=[(x[0], x[1]) for x in pref_attach])
    df['pref_attch'] = [x[2] for x in pref_attach]

    common_neigh = [(e[0], e[1], len(list(nx.common_neighbors(G, e[0], e[1]))))
                    for e in nx.non_edges(G)]
    df1 = pd.DataFrame(index=[(x[0], x[1]) for x in common_neigh])
    df1['common_neigh'] = [x[2] for x in common_neigh]
    df = df.join(df1, how='outer')
    df['common_neigh'] = df['common_neigh'].fillna(value=0)
    del df1

    community_common_neigh = list(
        nx.cn_soundarajan_hopcroft(G, community='Department'))
    df1 = pd.DataFrame(index=[(x[0], x[1]) for x in community_common_neigh])
    df1['community_common_neigh'] = [x[2] for x in community_common_neigh]
    df = df.join(df1, how='outer')
    df['community_common_neigh'] = df['community_common_neigh'].fillna(value=0)
    del df1

    community_res_alloc = list(
        nx.ra_index_soundarajan_hopcroft(G, community='Department'))
    df1 = pd.DataFrame(index=[(x[0], x[1]) for x in community_res_alloc])
    df1['community_res_alloc'] = [x[2] for x in community_res_alloc]
    df = df.join(df1, how='outer')
    df['community_res_alloc'] = df['community_res_alloc'].fillna(value=0)
    del df1

    df['res_alloc'] = [x[2] for x in list(nx.resource_allocation_index(G))]
    df['jaccard_coeff'] = [x[2] for x in list(nx.jaccard_coefficient(G))]

    features = [
        'jaccard_coeff', 'res_alloc', 'pref_attch', 'common_neigh',
        'community_common_neigh', 'community_res_alloc'
    ]

    df = future_connections.join(df, how='outer')
    df_train = df[~pd.isnull(df['Future Connection'])]
    df_test = df[pd.isnull(df['Future Connection'])]

    X_train = df_train[features]
    X_test = df_test[features]
    y_train = df_train['Future Connection']

    scalar = MinMaxScaler()
    X_train_scaled = scalar.fit_transform(X_train)
    X_test_scaled = scalar.fit_transform(X_test)

    clf = RandomForestClassifier(n_estimators=100,
                                 n_jobs=-1,
                                 max_depth=10,
                                 random_state=0).fit(X_train_scaled, y_train)
    test_proba = clf.predict_proba(X_test_scaled)[:, 1]
    predictions = pd.Series(test_proba, X_test.index)
    # target = future_connections[pd.isnull(future_connections['Future Connection'])]
    # target['proba'] = [predictions[x] for x in target.index]
    return predictions

コード例 #2

0

ファイルを表示

ファイル: undirected_features.py プロジェクト: Lawrence-unimelb/Statistical-Machine-Learning

def generate_positive_features():
    features = []
    count = 0
    print("Generating positive features......")
    for sample in positive_samples:
        if (count % 100 == 0):
            print(count)
        count += 1
        feature = []
        try:
            preds = nx.resource_allocation_index(UG, [sample])
            for u, v, p in preds:
                feature.append(p)

            preds = nx.jaccard_coefficient(UG, [sample])
            for u, v, p in preds:
                feature.append(p)

            preds = nx.adamic_adar_index(UG, [sample])
            for u, v, p in preds:
                feature.append(p)

            preds = nx.preferential_attachment(UG, [sample])
            for u, v, p in preds:
                feature.append(p)

            preds = nx.cn_soundarajan_hopcroft(UG, [sample])
            for u, v, p in preds:
                feature.append(p)

            preds = nx.ra_index_soundarajan_hopcroft(UG, [sample])
            for u, v, p in preds:
                feature.append(p)

            preds = nx.within_inter_cluster(UG, [sample])
            for u, v, p in preds:
                feature.append(p)

            feature.append(1)  # label=1

        except:
            print("one error at: " + str(count))
            pass
        features.append(feature)
    print("positive features: " + str(len(features)))
    return features

コード例 #3

0

ファイルを表示

def getNodeSim(node, g, metric):
    allDatasets = [i for i in g.nodes if (str(i).startswith('data_'))]
    pairs = generatePairs(node, [i for i in allDatasets if str(i) != node])

    if metric == 'Jaccard':
        preds = nx.jaccard_coefficient(g, pairs)
    elif metric == 'Adamic-Adar':
        preds = nx.adamic_adar_index(g, pairs)
    elif metric == 'Hopcroft':
        preds = nx.ra_index_soundarajan_hopcroft(g, pairs)
    elif metric == 'Cosine':
        return cosineSimilarity(g, pairs)
    else:
        return []

    res = []
    for u, v, p in preds:
        if p > 0.0:
            res.append((u, int(v.replace('data_', '')), p))
    df = pd.DataFrame(res, columns=['x', 'data_set_id', 'score'])
    return df[['data_set_id', 'score']].iloc[:10]

コード例 #4

0

ファイルを表示

G.node[1]['community'] = 0
G.node[2]['community'] = 0
preds = nx.cn_soundarajan_hopcroft(G, [(0, 2)])
for u, v, p in preds:
    print('(%d, %d) -> %d' % (u, v, p))

#%%
import networkx as nx
G = nx.Graph()
G.add_edges_from([(0, 1), (0, 2), (1, 3), (2, 3)])
G.node[0]['community'] = 0
G.node[1]['community'] = 0
G.node[2]['community'] = 1
G.node[3]['community'] = 0
nx.draw_networkx(G)
preds = nx.ra_index_soundarajan_hopcroft(G, [(0, 3)])
for u, v, p in preds:
    print('(%d, %d) -> %.8f' % (u, v, p))

#%%
# ER network
import networkx as nx
nx.draw(nx.fast_gnp_random_graph(100, 0.05))

#%%
# WS small world network
import networkx as nx
nx.draw_spring(nx.watts_strogatz_graph(10, 4, 0))

#%%
# newmann ws small world network

コード例 #5

0

ファイルを表示

 def ra_index_soundarajan_hopcroft(self):
     return list(
         nx.ra_index_soundarajan_hopcroft(
             self.graph, [(self.node_1, self.node_2)]))[0][2]

コード例 #6

0

ファイルを表示

ファイル: net2measures.py プロジェクト: wgs666/ddi-prediction

G = nx.read_edgelist("./data/drugbank_interactions.tsv",
                     delimiter="\t",
                     nodetype=str)

partition = community.best_partition(G)
nx.set_node_attributes(G, name='community', values=partition)

ap = list(all_pairs(G.nodes()))

cn = cn.cnbors(G, ap)
rai = nx.resource_allocation_index(G, ap)
jc = nx.jaccard_coefficient(G, ap)
aai = nx.adamic_adar_index(G, ap)
pa = nx.preferential_attachment(G, ap)
ccn = nx.cn_soundarajan_hopcroft(G, ap)
cra = nx.ra_index_soundarajan_hopcroft(G, ap)
wic = nx.within_inter_cluster(G, ap, community='community')

u, v, s1, s2, s3, s4, s5, s6, s7, s8, has_edge = ([] for i in range(11))
for m1, m2, m3, m4, m5, m6, m7, m8 in zip(cn, rai, jc, aai, pa, ccn, cra, wic):
    u.append(m1[0])
    v.append(m1[1])
    s1.append(m1[2])
    s2.append(m2[2])
    s3.append(m3[2])
    s4.append(m4[2])
    s5.append(m5[2])
    s6.append(m6[2])
    s7.append(m7[2])
    s8.append(m8[2])
    has_edge.append(int(G.has_edge(m1[0], m2[1])))

コード例 #7

0

ファイルを表示

def new_connections_predictions():
    from sklearn.linear_model import LogisticRegression
    from sklearn.model_selection import train_test_split
    from sklearn.metrics import roc_auc_score

    future_connections = pd.read_csv(path
                                     + 'Future_Connections.csv',
                                     index_col=0,
                                     converters={0:eval})

    def communities(row):
        """
        Check to whether are in the same department or notself.
        Vectorized for rows, use with pd.DataFrame.apply(x, axis=1)
        """
        nodes = row.name
        a = nodes[0]
        b = nodes[1]
        comm_a = G.node[a]['Department']
        comm_b = G.node[b]['Department']
        if comm_a == comm_b:
            return 1
        else:
            return 0

    future_connections['same_comm'] = future_connections.apply(communities,
                                                               axis=1)
    # For Soundarajan-Hopcroft algorithms.
    for node in G.nodes():
       G.node[node]['community'] = G.node[node]['Department']

    pa = list(nx.preferential_attachment(G))
    pa_df = pd.DataFrame(index=[(i[0], i[1]) for i in pa],
                         data={'pref_att':[i[2] for i in pa]})

    cn = [(e[0], e[1], len(list(nx.common_neighbors(G, e[0], e[1]))))
          for e in nx.non_edges(G)]
    cn_df = pd.DataFrame(index=[(i[0], i[1]) for i in cn],
                         data={'comm_neigh':[i[2] for i in cn]})

    cnsh = list(nx.cn_soundarajan_hopcroft(G))
    cnsh_df = pd.DataFrame(index=[(i[0], i[1]) for i in cnsh],
                         data={'sh_comm_neigh':[i[2] for i in cnsh]})

    ra = list(nx.resource_allocation_index(G))
    ra_df = pd.DataFrame(index=[(i[0], i[1]) for i in ra],
                         data={'reso_alloc':[i[2] for i in ra]})

    rash = list(nx.ra_index_soundarajan_hopcroft(G))
    rash_df = pd.DataFrame(index=[(i[0], i[1]) for i in rash],
                         data={'sh_reso_alloc':[i[2] for i in rash]})

    jc = [i for i in nx.jaccard_coefficient(G)]
    jc_df = pd.DataFrame(index=[(i[0], i[1]) for i in jc],
                         data={'jacc_coeff':[i[2] for i in jc]})

    for df in [pa_df, cn_df, cnsh_df, ra_df, rash_df, jc_df]:
        future_connections = future_connections.merge(df, how='left',
                                                      left_index=True,
                                                      right_index=True)

    keep = future_connections[~future_connections['Future Connection'].isnull()]
    hold = future_connections[future_connections['Future Connection'].isnull()]

    X_keep = keep.drop('Future Connection', axis=1)
    y_keep = keep['Future Connection']
    X_hold = hold.drop('Future Connection', axis=1)

    X_train, X_test, y_train, y_test = train_test_split(X_keep, y_keep,
                                                        random_state=0)
    clf = LogisticRegression(random_state=0)
    clf.fit(X_train, y_train)

    # Check on ROC_AUC performance.
    roc_auc = roc_auc_score(y_test, clf.predict_proba(X_test)[:, 1])

    probs = clf.predict_proba(X_hold)[:, 1]
    answer = pd.Series(index=X_hold.index,
                       data=probs)
    return answer

コード例 #8

0

ファイルを表示

ファイル: prediction-topological.py プロジェクト: SamanBeh/KeywordNet

 for edge in pedges:
     cntr += 1
     print("\r   {}/{}".format(cntr, lenedg), end="")
     positive_predictions_proba_jcc.append(
         list(nx.jaccard_coefficient(G, [edge]))[0][2])
     positive_predictions_proba_ra.append(
         list(nx.resource_allocation_index(G, [edge]))[0][2])
     positive_predictions_proba_aa.append(
         list(nx.adamic_adar_index(G, [edge]))[0][2])
     positive_predictions_proba_pa.append(
         list(nx.preferential_attachment(G, [edge]))[0][2])
     positive_predictions_proba_cnsh.append(
         list(nx.cn_soundarajan_hopcroft(
             G, [edge]))[0][2])  # needs community information
     positive_predictions_proba_rash.append(
         list(nx.ra_index_soundarajan_hopcroft(
             G, [edge]))[0][2])  # needs community information
     positive_predictions_proba_wic.append(
         list(nx.within_inter_cluster(
             G, [edge]))[0][2])  # needs community information
     positive_predictions_proba_slp_DegCent.append(
         list(SLP_prediction(G, [edge], centrality="DegCent"))[0][2])
     positive_predictions_proba_slp_EigenCent.append(
         list(SLP_prediction(G, [edge], centrality="EigenCent"))[0][2])
     positive_predictions_proba_slp_ClosenessCent.append(
         list(SLP_prediction(G, [edge], centrality="ClosenessCent"))[0][2])
     positive_predictions_proba_slp_BetweenCent.append(
         list(SLP_prediction(G, [edge], centrality="BetweenCent"))[0][2])
     positive_predictions_proba_slp_PageRank.append(
         list(SLP_prediction(G, [edge], centrality="PageRank"))[0][2])
     positive_predictions_proba_slpc_DegCent.append(
         list(SLPC_prediction(

コード例 #9

0

ファイルを表示

ファイル: graphkmeans.py プロジェクト: mhajij/Generalized_K-means_on_Graphs_Using_PageRank

    def set_edge_weight(self, edge_weight_method='weight'):

        if edge_weight_method == 'weight':
            return

        # Centrality based methods

        elif edge_weight_method == 'edge_betweenness_centrality':
            print("comptuing edge_betweenness_centrality..")
            C = nx.edge_betweenness_centrality(self.G, weight='weight')
            print("done!")

        elif edge_weight_method == 'edge_betweenness_centrality_subset':
            print("comptuing edge_betweenness_centrality_subset..")
            C = nx.edge_current_flow_betweenness_centrality(self.G,
                                                            weight='weight')
            print('done')

        elif edge_weight_method == 'edge_current_flow_betweenness_centrality_subset':
            print(
                "comptuing edge_current_flow_betweenness_centrality_subset..")
            C = nx.edge_current_flow_betweenness_centrality_subset(
                self.G, weight='weight')
            print('done')

        elif edge_weight_method == 'edge_load_centrality':
            print("comptuing edge_load_centrality..")
            C = nx.edge_load_centrality(self.G)
            print('done!')

        # Link Prediction based methods

        elif edge_weight_method == 'adamic_adar_index':
            print("comptuing adamic_adar_index ..")
            preds = nx.adamic_adar_index(self.G, self.G.edges())
            C = {}
            for u, v, p in preds:
                C[(u, v)] = p

        elif edge_weight_method == 'ra_index_soundarajan_hopcroft':
            print("comptuing ra_index_soundarajan_hopcroft ..")
            preds = nx.ra_index_soundarajan_hopcroft(self.G, self.G.edges())
            C = {}
            for u, v, p in preds:
                C[(u, v)] = p

        elif edge_weight_method == 'preferential_attachment':
            print("comptuing preferential_attachment ..")
            preds = nx.preferential_attachment(self.G, self.G.edges())
            C = {}
            for u, v, p in preds:
                C[(u, v)] = p

        #elif edge_weight_method=='cn_soundarajan_hopcroft':
        #    print("comptuing cn_soundarajan_hopcroft ..")
        #    preds=nx.cn_soundarajan_hopcroft(self.G,self.G.edges())
        #    C={}
        #    for u, v, p in preds:
        #        C[(u,v)]=p

        elif edge_weight_method == 'within_inter_cluster':
            print("comptuing within_inter_cluster ..")
            preds = nx.within_inter_cluster(self.G, self.G.edges())
            C = {}
            for u, v, p in preds:
                C[(u, v)] = p

        elif edge_weight_method == 'resource_allocation_index':
            print("comptuing resource allocation index ..")
            preds = nx.resource_allocation_index(self.G, self.G.edges())
            C = {}
            for u, v, p in preds:
                C[(u, v)] = p

        elif edge_weight_method == 'jaccard_coefficient':
            print("comptuing jaccard_coefficient..")
            preds = nx.jaccard_coefficient(self.G, self.G.edges())
            C = {}
            for u, v, p in preds:
                C[(u, v)] = p

            print('done!')

        for u, v, d in self.G.edges(data=True):
            if edge_weight_method == None:
                d['weight'] = 1
            else:

                d['weight'] = C[(u, v)]

        return 1

コード例 #10

0

ファイルを表示

    def compute_variable(self,
                         variable_name,
                         train: bool,
                         load=True,
                         path_to_file=None,
                         save=True):

        assert variable_name in self.handled_variables, "Variable %s is not handled. Handled variables are : %s" % (
            variable_name, str(self.handled_variables))

        if load and train:
            if path_to_file is None and os.path.isfile(
                    "variables/%s.npy" % variable_name):
                print("Loading STANDARD %s file!" % variable_name)
                result = np.load("variables/%s.npy" % variable_name)
                return result[:self.nb_training_samples]
            elif path_to_file is not None and os.path.isfile(path_to_file):
                print("Loading CUSTOM %s file!" % variable_name)
                result = np.load(path_to_file)
                return result[:self.nb_training_samples]
            print("Did not find saved %s in `variables` folder." %
                  variable_name)

        if load and not train:
            if path_to_file is None and os.path.isfile(
                    "variables/TEST_%s.npy" % variable_name):
                print("Loading STANDARD TEST_%s file!" % variable_name)
                result = np.load("variables/TEST_%s.npy" % variable_name)
                return result[:self.nb_training_samples]
            elif path_to_file is not None and os.path.isfile(path_to_file):
                print("Loading CUSTOM %s file!" % variable_name)
                result = np.load(path_to_file)
                return result[:self.nb_training_samples]
            print("Did not find saved TEST_%s in `variables` folder." %
                  variable_name)

        print("Starting computation of %s..." % variable_name)
        t1 = time()
        gd = self.graph_structure.graph_dicts  # "graph_dictionaries
        if train:
            nb_of_samples = self.nb_training_samples
        else:
            nb_of_samples = self.nb_testing_samples
        result = np.zeros(shape=nb_of_samples)
        for i in range(nb_of_samples):
            if train:
                t = self.train_array[i]
            else:
                t = self.test_array[i]
            if variable_name == "publication_2":
                result[i] = np.log(
                    len(
                        set(self.node_information.loc[t[0], "publication_2"])
                        & set(self.node_information.loc[t[1],
                                                        "publication_2"])) + 1)
            elif variable_name == "adam_coeff":
                if train:
                    if t[2] == 1:
                        self.graph_structure.g.remove_edge(t[0], t[1])
                        result[i] = \
                            next(nx.algorithms.link_prediction.adamic_adar_index(self.graph_structure.g,
                                                                                 [(t[0], t[1])]))[2]
                        self.graph_structure.g.add_edge(t[0], t[1])
                    else:
                        result[i] = \
                            next(nx.algorithms.link_prediction.adamic_adar_index(self.graph_structure.g,
                                                                                 [(t[0], t[1])]))[2]
                else:
                    result[i] = \
                        next(nx.algorithms.link_prediction.adamic_adar_index(self.graph_structure.g, [(t[0], t[1])]))[2]
            elif variable_name == "overlapping_words_in_title":
                result[i] = compute_intersection(
                    self.node_information.loc[t[0], "title"],
                    self.node_information.loc[t[1], "title"], self.stemmer,
                    self.stpwds)
            elif variable_name == "number_of_common_authors":
                result[i] = nbr_common_authors(
                    self.node_information.loc[t[0], "author"],
                    self.node_information.loc[t[1], "author"])

            elif variable_name == "difference_of_years":
                result[i] = abs(self.node_information.loc[t[0], 'year'] -
                                self.node_information.loc[t[1], 'year'])

            elif variable_name == "affinity_between_authors":
                result[i] = compute_affinity_between_authors(
                    self.node_information.loc[t[0], 'author'],
                    self.node_information.loc[t[1],
                                              'author'], self.authors_dict)
            elif variable_name == "identical_journal":
                result[i] = np.int(self.node_information.loc[t[0], 'journal']
                                   == self.node_information.loc[t[1],
                                                                'journal'])

            elif variable_name == "l2_distance":
                result[i] = np.linalg.norm(
                    self.node_information.loc[t[0], 'wv'] -
                    self.node_information.loc[t[1], 'wv'])

            elif variable_name == "cosine_distance_tfid":
                v1 = self.node_information.loc[t[0], "wv_tfid"]
                v2 = self.node_information.loc[t[1], "wv_tfid"]
                try:
                    b1 = np.isnan(v1)
                except TypeError:
                    b1 = False
                try:
                    b2 = np.isnan(v2)
                except TypeError:
                    b2 = False
                if not b1 and not b2:
                    result[i] = cosine_similarity(v1, v2)
                else:
                    result[i] = 0

            elif variable_name == "l2_distance_between_titles":
                dst = np.linalg.norm(
                    self.node_information.loc[t[0], 'title_wv'] -
                    self.node_information.loc[t[1], 'title_wv'])
                if np.isnan(dst):
                    result[i] = 0
                else:
                    result[i] = dst

            # elif variable_name == "cosine_distance_between_titles":
            #     result[i] = cosine_distances(
            #         np.nan_to_num(self.node_information.loc[t[0], 'title_wv']).reshape(-1, 1) - (self.node_information.loc[t[1], 'title_wv']).reshape(-1, 1)
            #     )[0][0]

            elif variable_name == "common_neighbors":
                result[i] = len(
                    sorted(
                        nx.common_neighbors(self.graph_structure.g, t[0],
                                            t[1])))

            elif variable_name == "clustering_coeff":
                result[i] = gd["clustering_coeff"][
                    t[0]] * gd["clustering_coeff"][t[1]]

            elif variable_name == "betweenness":
                result[i] = gd["betweenness"][t[0]] * gd["betweenness"][t[1]]

            elif variable_name == "closeness":
                result[i] = gd["closeness"][t[0]] * gd["closeness"][t[1]]

            elif variable_name == "degree":
                result[i] = gd["degree"][t[0]] * gd["degree"][t[1]]

            elif variable_name == "eigenvector":
                result[i] = gd["eigenvector"][t[0]] * gd["eigenvector"][t[1]]

            elif variable_name == "jaccard_coeff":
                if train:
                    if t[2] == 1:
                        self.graph_structure.g.remove_edge(t[0], t[1])
                        result[i] = next(
                            nx.jaccard_coefficient(self.graph_structure.g,
                                                   [(t[0], t[1])]))[2]
                        self.graph_structure.g.add_edge(t[0], t[1])
                    else:
                        result[i] = next(
                            nx.jaccard_coefficient(self.graph_structure.g,
                                                   [(t[0], t[1])]))[2]
                else:
                    result[i] = next(
                        nx.jaccard_coefficient(self.graph_structure.g,
                                               [(t[0], t[1])]))[2]
            elif variable_name == "shortest_path":
                if train:
                    if t[2] == 1:
                        assert self.graph_structure.g.has_edge(
                            t[0], t[1]
                        ), "There's a problem with the structure of the graph for id %i and %i" % (
                            t[0], t[1])
                        self.graph_structure.g.remove_edge(t[0], t[1])
                        try:
                            result[
                                i] = 1 / nx.algorithms.shortest_paths.generic.shortest_path_length(
                                    self.graph_structure.g, t[0], t[1])
                        except nx.NetworkXNoPath:
                            result[i] = 0
                        self.graph_structure.g.add_edge(t[0], t[1])
                    else:
                        try:
                            result[
                                i] = 1 / nx.algorithms.shortest_paths.generic.shortest_path_length(
                                    self.graph_structure.g, t[0], t[1])
                        except nx.NetworkXNoPath:
                            result[i] = 0
                else:
                    try:
                        result[
                            i] = 1 / nx.algorithms.shortest_paths.generic.shortest_path_length(
                                self.graph_structure.g, t[0], t[1])
                    except nx.NetworkXNoPath:
                        result[i] = 0

            elif variable_name == "pagerank":
                result[i] = gd["pagerank"][t[0]] * gd["pagerank"][t[1]]

            elif variable_name == "community":
                if self.graph_structure.partition[
                        t[0]] == self.graph_structure.partition[t[1]]:
                    result[i] = 1
                else:
                    result[i] = 0

            elif variable_name == "lp_resource_allocation_index":
                if train:
                    if t[2] == 1:
                        self.graph_structure.g.remove_edge(t[0], t[1])
                        result[i] = sorted(
                            nx.resource_allocation_index(
                                self.graph_structure.g, [(t[0], t[1])]))[0][2]
                        self.graph_structure.g.add_edge(t[0], t[1])
                    else:
                        result[i] = sorted(
                            nx.resource_allocation_index(
                                self.graph_structure.g, [(t[0], t[1])]))[0][2]
                else:
                    result[i] = sorted(
                        nx.resource_allocation_index(self.graph_structure.g,
                                                     [(t[0], t[1])]))[0][2]

            elif variable_name == "lp_preferential_attachment":
                if train:
                    if t[2] == 1:
                        self.graph_structure.g.remove_edge(t[0], t[1])
                        result[i] = sorted(
                            nx.preferential_attachment(self.graph_structure.g,
                                                       [(t[0], t[1])]))[0][2]
                        self.graph_structure.g.add_edge(t[0], t[1])
                    else:
                        result[i] = sorted(
                            nx.preferential_attachment(self.graph_structure.g,
                                                       [(t[0], t[1])]))[0][2]
                else:
                    result[i] = sorted(
                        nx.preferential_attachment(self.graph_structure.g,
                                                   [(t[0], t[1])]))[0][2]
            elif variable_name == "lp_cn_soundarajan":
                if train:
                    if t[2] == 1:
                        self.graph_structure.g.remove_edge(t[0], t[1])
                        result[i] = sorted(
                            nx.cn_soundarajan_hopcroft(self.graph_structure.g,
                                                       [(t[0], t[1])]))[0][2]
                        self.graph_structure.g.add_edge(t[0], t[1])
                    else:
                        result[i] = sorted(
                            nx.cn_soundarajan_hopcroft(self.graph_structure.g,
                                                       [(t[0], t[1])]))[0][2]
                else:
                    result[i] = sorted(
                        nx.cn_soundarajan_hopcroft(self.graph_structure.g,
                                                   [(t[0], t[1])]))[0][2]
            elif variable_name == "lp_ra_index_soundarajan":
                if train:
                    if t[2] == 1:
                        self.graph_structure.g.remove_edge(t[0], t[1])
                        result[i] = sorted(
                            nx.ra_index_soundarajan_hopcroft(
                                self.graph_structure.g, [(t[0], t[1])]))[0][2]
                        self.graph_structure.g.add_edge(t[0], t[1])
                    else:
                        result[i] = sorted(
                            nx.ra_index_soundarajan_hopcroft(
                                self.graph_structure.g, [(t[0], t[1])]))[0][2]
                else:
                    result[i] = sorted(
                        nx.ra_index_soundarajan_hopcroft(
                            self.graph_structure.g, [(t[0], t[1])]))[0][2]

            elif variable_name == "lp_within_inter_cluster":

                if train:
                    if t[2] == 1:
                        self.graph_structure.g.remove_edge(t[0], t[1])
                        result[i] = sorted(
                            nx.within_inter_cluster(self.graph_structure.g,
                                                    [(t[0], t[1])]))[0][2]
                        self.graph_structure.g.add_edge(t[0], t[1])
                    else:
                        result[i] = sorted(
                            nx.within_inter_cluster(self.graph_structure.g,
                                                    [(t[0], t[1])]))[0][2]
                else:
                    result[i] = sorted(
                        nx.within_inter_cluster(self.graph_structure.g,
                                                [(t[0], t[1])]))[0][2]

        print("Did %s column in %5.1fs" % (variable_name, time() - t1))
        if save and train:
            print("Saved variable %s in `variables` directory." %
                  variable_name)
            np.save("variables/" + variable_name, result)
        if save and not train:
            np.save("variables/TEST_" + variable_name, result)
            print("Saved variable TEST_%s in `variables` directory." %
                  variable_name)
        if np.isnan(result).shape[0] >= 1:
            print("Careful, you have nan values !")
            result[np.isnan(result)] = 0
        return result

コード例 #11

0

ファイルを表示

ファイル: applications.py プロジェクト: vasisouv/social-networks-analysis-pms

def get_community_resource_allocation(G):
    cra = list(nx.ra_index_soundarajan_hopcroft(G))
    cra.sort(key=operator.itemgetter(2), reverse=True)
    return cra

コード例 #12

0

ファイルを表示

import networkx as nx
import operator

G = nx.from_edgelist([('A', 'C'), ('A', 'E'), ('A', 'D'), ('B', 'D'), ('C', 'G'),
                      ('D', 'G'), ('D', 'H'), ('D', 'E'), ('E', 'H'), ('H', 'F')])

res_alloc = list(nx.resource_allocation_index(G))
print(sorted(res_alloc, key=operator.itemgetter(2), reverse=True))

pref_attach = list(nx.preferential_attachment(G))
print(sorted(pref_attach, key=operator.itemgetter(2), reverse=True))

G.node['A']['community']=0
G.node['B']['community']=0
G.node['C']['community']=0
G.node['D']['community']=0
G.node['G']['community']=0
G.node['F']['community']=1
G.node['E']['community']=1
G.node['H']['community']=1

community_common_neigh = list(nx.cn_soundarajan_hopcroft(G))
print(sorted(community_common_neigh, key=operator.itemgetter(2), reverse=True))
community_res_alloc = list(nx.ra_index_soundarajan_hopcroft(G))
print(sorted(community_res_alloc, key=operator.itemgetter(2), reverse=True))

コード例 #13

0

ファイルを表示

def new_connections_predictions():
    import operator
    # Import preprocessing, selection and metrics
    from sklearn.model_selection import train_test_split, cross_val_predict, cross_val_score, GridSearchCV
    from sklearn.metrics import roc_auc_score
    from sklearn.dummy import DummyClassifier
    from sklearn.naive_bayes import GaussianNB
    from sklearn.linear_model import LogisticRegression
    from sklearn.tree import DecisionTreeClassifier
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.ensemble import GradientBoostingClassifier
    from sklearn.ensemble import AdaBoostClassifier
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.svm import SVC, LinearSVC

    # Your Code Here
    df_fc_test_mask = pd.isnull(future_connections.loc[:, 'Future Connection'])
    df = pd.DataFrame()

    # Measure 1: Common Neighbors (intercept)
    # The number of common neighbors of nodes 𝑋 and 𝑌
    #     future_connections['common_neigh']
    L = [(e[0], e[1], len(list(nx.common_neighbors(G, e[0], e[1]))))
         for e in nx.non_edges(G)]
    df['pair'] = [(x, y) for x, y, z in L]
    df['common_nb'] = [z for x, y, z in L]
    #     L.sort(key=operator.itemgetter(2), reverse=True)
    #     print(L)

    # Measure 2: Jaccard Coefficient (intercept over union)
    # Number of common neighbors normalized by the total number of neighbors
    # common_neighbors/total_neighbors
    #     future_connections['jaccard']
    df['jaccard'] = pd.Series([z for x, y, z in nx.jaccard_coefficient(G)])
    #     L.sort(key=operator.itemgetter(2), reverse=True)
    #     print(L)

    # Measure 3: Resource
    # Fraction of a ”resource” that a node can send to another through their common neighbors
    # sum(1/degree_common_neighbor)
    df['resource'] = pd.Series([z for x, y, z in nx.resource_allocation_index(G)])
    #     L.sort(key=operator.itemgetter(2), reverse=True)
    #     print(L)

    # Measure 4:
    # Adamic Adar Index
    # Similar to resource allocation index, but with log in the denominator
    # sum(1/log(degree_common_neighbor))
    future_connections['adamic_adar'] = pd.Series([z for x, y, z in nx.adamic_adar_index(G)])
    #     L.sort(key=operator.itemgetter(2), reverse=True)
    #     print(L)

    # Method 5:
    # Preferential Attachment
    # In the preferential attachment model, nodes with high degree get more neighbors
    # degree_source * degree_target
    future_connections['pref_att'] = pd.Series([z for x, y, z in nx.preferential_attachment(G)])
    #     print(L)

    # Measure 6:
    # Community Common Neighbors
    # Number of common neighbors with bonus of 1 for each neighbor in same community
    # f(u) = 1 if same community else 0
    # sum(f(u) * degree)
    for i, dept in enumerate(nx.get_node_attributes(G, 'Department')):
        G.node[i]['community'] = dept
    future_connections['com_common_nb'] = pd.Series([z for x, y, z in nx.cn_soundarajan_hopcroft(G)])
    #     L.sort(key=operator.itemgetter(2), reverse=True)
    #     print(L)

    # Measure 7:
    # Community Resource Allocation
    # Similar to resource allocation index, but only considering nodes in the same community
    # f(u) = 1 if same community else 0
    # sum(f(u)/degree)
    future_connections['com_resource'] = pd.Series([z for x, y, z in nx.ra_index_soundarajan_hopcroft(G)])
    #     L.sort(key=operator.itemgetter(2), reverse=True)
    #     print(L)

    print(df.head())

    #     #
    #     df_fc_train = future_connections.loc[~df_fc_test_mask, :]
    #     df_fc_test = future_connections.loc[df_fc_test_mask, :]
    #     y_train = df_fc_train.loc[:, 'Future Connection']
    #     y_test = df_fc_test.loc[:, 'Future Connection']
    #     X_train = df_fc_train.index
    #     X_test = df_fc_test.index

    #     def auc_scores(model, *args, k=5, threshold=0.50):
    #         """CV scores"""
    #         X, y = args
    #         predictions = cross_val_predict(model, X, y, cv=k, n_jobs=-1)
    #         print('AUC - Test predict  {:.2%}'.format(roc_auc_score(y, predictions)))

    #     classifiers = [
    # #         GaussianNB(),
    # #         DecisionTreeClassifier(random_state=0),
    # #         DecisionTreeClassifier(max_depth=3, random_state=0),
    # #         DecisionTreeClassifier(max_depth=4, random_state=0),
    # #         DecisionTreeClassifier(max_depth=5, random_state=0),
    # #         DecisionTreeClassifier(max_depth=6, random_state=0),
    #         GradientBoostingClassifier(random_state=0),
    # #         GradientBoostingClassifier(learning_rate=0.08, random_state=0),
    # #         GradientBoostingClassifier(learning_rate=0.12, random_state=0),
    # #         GradientBoostingClassifier(learning_rate=0.1, max_depth=3, random_state=0),
    # #         GradientBoostingClassifier(learning_rate=0.1, max_depth=4, random_state=0),
    # #         RandomForestClassifier(n_estimators=100, random_state=0),
    # #         AdaBoostClassifier(learning_rate=0.1, n_estimators=100, random_state=0),
    # #         KNeighborsClassifier(),
    # #         KNeighborsClassifier(n_neighbors=4),
    # #         LinearSVC(random_state=0)
    #         ]

    #     for model in classifiers:
    # #         print('-'*80)
    # #         print(model)

    #         # Training scores
    # #         clf_train = model.fit(X_train, y_train)
    # #         pred_train = clf_train.predict(X_train)
    # #         print('AUC - Train pred    {:.2%}'.format(roc_auc_score(y_train, pred_train)))

    #         # CV scores
    #         clf = model.fit(X_train, y_train)
    # #         auc_scores(clf, X_train, y_train)

    #     # Predict
    #     predicted = clf.predict(X_test)
    #     pred_series = pd.Series(predicted)
    #     assert type(pred_series) == pd.Series, 'wtf: ' + str(type(pred_series))

    return pred_series

コード例 #14

0

ファイルを表示

G.node[2]['community'] = 0
G.node[3]['community'] = 0
G.node[4]['community'] = 1
G.node[5]['community'] = 1
G.node[6]['community'] = 1
G.node[7]['community'] = 1
G.node[8]['community'] = 1
L = list(nx.cn_soundarajan_hopcroft(G))
L.sort(key=operator.itemgetter(2), reverse=True); L

# Measure 7:
# Community Resource Allocation
# Similar to resource allocation index, but only considering nodes in the same community
# f(u) = 1 if same community else 0
# sum(f(u)/degree)
L = list(nx.ra_index_soundarajan_hopcroft(G))
L.sort(key=operator.itemgetter(2), reverse=True); L

# Summary
# • Link prediction problem: Given a network, predict which edges will be formed in the future.
# • 5 basic measures:
# – NumberofCommonNeighbors – JaccardCoefficient
# – ResourceAllocationIndex
# – Adamic-AdarIndex
# – PreferentialAttachmentScore
# • 2 measures that require community information:
# – CommonNeighborSoundarajan-HopcroftScore – ResourceAllocationSoundarajan-HopcroftScore


# ---------------------------------------------> Plot <-------------------------------------------- #

コード例 #15

0

ファイルを表示

 def ra_index_soundarajan_hopcroft(uG, ni, nj, rand_node):
     a, b = nx.ra_index_soundarajan_hopcroft(uG, [(ni, nj),
                                                  (ni, rand_node)])
     return a[2], b[2]

コード例 #16

0

ファイルを表示

ファイル: artificial_network.py プロジェクト: stephensmitchell-forks/practical-data-science-with-python-3

nx.draw_networkx_labels(G,
                        pos,
                        labels={u: u
                                for t in candidate_edges for u in t},
                        font_size=13,
                        font_weight='bold',
                        font_color='yellow')

plt.axis('off')
plt.tight_layout()
plt.show()

# Create a data frame to store various centrality measures.
df = pd.DataFrame(index=candidate_edges)

# Add generic and community aware edge features for potential machine learning classification.
df['pref-att'] = list(
    map(operator.itemgetter(2), nx.preferential_attachment(G,
                                                           candidate_edges)))
df['jaccard-c'] = list(
    map(operator.itemgetter(2), nx.jaccard_coefficient(G, candidate_edges)))
df['aa-idx'] = list(
    map(operator.itemgetter(2), nx.adamic_adar_index(G, candidate_edges)))
df['ccn'] = list(
    map(operator.itemgetter(2),
        nx.cn_soundarajan_hopcroft(G, candidate_edges, 'club')))
df['cra'] = list(
    map(operator.itemgetter(2),
        nx.ra_index_soundarajan_hopcroft(G, candidate_edges, 'club')))

print(df)

コード例 #17

0

ファイルを表示

ファイル: main_roc.py プロジェクト: cloudylai/SNA_Final

def get_features(L, flag):
    X = [[] for i in range(len(L))]

    #=====================Social features(user-to-user graph)======================

    #g0.adamic adar score
    if flag['g0'] is True:
        print("get feature g0")
        preds = nx.adamic_adar_index(G, L)
        cnt = 0
        for (u, v, p) in preds:
            X[cnt].append(p)
            cnt += 1

    #g1.jaccard coefficient
    if flag['g1'] is True:
        print("get feature g1")
        preds = nx.jaccard_coefficient(G, L)
        cnt = 0
        for (u, v, p) in preds:
            X[cnt].append(p)
            cnt += 1
    #g2.resource_allocation
    if flag['g2'] is True:
        print("get feature g2")
        preds = nx.resource_allocation_index(G, L)
        cnt = 0
        for (u, v, p) in preds:
            X[cnt].append(p)
            cnt += 1

    #g3.preferentail_attachment
    if flag['g3'] is True:
        print("get feature g3")
        preds = nx.preferential_attachment(G, L)
        cnt = 0
        for (u, v, p) in preds:
            X[cnt].append(p)
            cnt += 1

    #g4.shortest path length
    if flag['g4'] is True:
        print("get feature g4")
        cnt = 0
        for (u, v) in L:
            if G.has_edge(u, v):
                G.remove_edge(u, v)
                if nx.has_path(G, u, v):
                    X[cnt].append(
                        nx.shortest_path_length(G, source=u, target=v) / 50000)
                else:
                    X[cnt].append(1)
                G.add_edge(u, v)
            else:
                if nx.has_path(G, u, v):
                    X[cnt].append(
                        nx.shortest_path_length(G, source=u, target=v) / 50000)
                else:
                    X[cnt].append(1)
            cnt += 1

    #g5.common neighbors
    if flag['g5'] is True:
        print("get feature g5")
        cnt = 0
        for (u, v) in L:
            if G.has_edge(u, v):
                G.remove_edge(u, v)
                T = [w for w in nx.common_neighbors(G, u, v)]
                G.add_edge(u, v)
            else:
                T = [w for w in nx.common_neighbors(G, u, v)]
            X[cnt].append(len(T))
            cnt += 1

    #g6.Approximate katz for social graph
    if flag['g6'] is True:
        print("get feature g6")
        cnt = 0
        for (u, v) in L:
            p = 0
            if G.has_edge(u, v):
                G.remove_edge(u, v)
                for x in G.neighbors(u):
                    for y in G.neighbors(v):
                        if x == y or G.has_edge(x, y):
                            p += 1
                G.add_edge(u, v)
            else:
                for x in G.neighbors(u):
                    for y in G.neighbors(v):
                        if x == y or G.has_edge(x, y):
                            p += 1
            X[cnt].append(p)
            cnt += 1

    if flag['g7'] is True:
        print("get feature g7")
        cnt = 0
        with open("best_part_G.txt", "r") as f:
            for line in f:
                v, c = line.split()
                c = int(c)
                G.node[v]['community'] = c
        iters = nx.cn_soundarajan_hopcroft(G, L)
        preds = {(u, v): p for (u, v, p) in iters}
        for (u, v) in L:
            X[cnt].append(preds[(u, v)])
            cnt += 1

    if flag['g8'] is True:
        print("get feature g8")
        cnt = 0
        with open("best_part_G.txt", "r") as f:
            for line in f:
                if line == "":
                    continue
                v, c = line.split()
                c = int(c)
                G.node[v]['community'] = c
        iters = nx.ra_index_soundarajan_hopcroft(G, L)
        preds = {(u, v): p for (u, v, p) in iters}
        for (u, v) in L:
            X[cnt].append(preds[(u, v)])
            cnt += 1

    if flag['g9'] is True:
        print("get feature g9")
        cnt = 0
        with open("best_part_G.txt", "r") as f:
            for line in f:
                v, c = line.split()
                c = int(c)
                G.node[v]['community'] = c
        iters = nx.within_inter_cluster(G, L, delta=0.5)
        preds = {(u, v): p for (u, v, p) in iters}
        for (u, v) in L:
            X[cnt].append(preds[(u, v)])
            cnt += 1

    if flag['g10'] is True:
        print("get feature g10")
        cnt = 0
        with open("dendo_G.txt", "r") as f:
            line = f.readline()
            p_dict = {(u, v): 0.0 for (u, v) in L}
            for line in f:
                if 'level' in line:
                    l = int(line.split()[1])
                    if l != 0:
                        iters = nx.cn_soundarajan_hopcroft(G, L)
                        for (u, v, p) in iters:
                            p_dict[(u, v)] += p
                else:
                    v, c = line.split()
                    c = int(c)
                    G.node[v]['community'] = c
        iters = nx.cn_soundarajan_hopcroft(G, L)
        preds = {(u, v): p for (u, v, p) in iters}
        for (u, v) in L:
            X[cnt].append(p_dict[(u, v)] + preds[(u, v)])
            cnt += 1
        del p_dict
        del preds

    if flag['g11'] is True:
        print("get feature g11")
        cnt = 0
        with open("dendo_G.txt", "r") as f:
            line = f.readline()
            p_dict = {(u, v): 0.0 for (u, v) in L}
            for line in f:
                if 'level' in line:
                    l = int(line.split()[1])
                    if l != 0:
                        iters = nx.ra_index_soundarajan_hopcroft(G, L)
                        for (u, v, p) in iters:
                            p_dict[(u, v)] += p
                else:
                    v, c = line.split()
                    c = int(c)
                    G.node[v]['community'] = c
        iters = nx.ra_index_soundarajan_hopcroft(G, L)
        preds = {(u, v): p for (u, v, p) in iters}
        for (u, v) in L:
            X[cnt].append(p_dict[(u, v)] + preds[(u, v)])
            cnt += 1
        del p_dict
        del preds

    if flag['g12'] is True:
        print("get feature g12")
        cnt = 0
        with open("dendo_G.txt", "r") as f:
            line = f.readline()
            p_dict = {(u, v): 0.0 for (u, v) in L}
            for line in f:
                if 'level' in line:
                    l = int(line.split()[1])
                    if l != 0:
                        iters = nx.within_inter_cluster(G, L)
                        for (u, v, p) in iters:
                            p_dict[(u, v)] += p
                else:
                    v, c = line.split()
                    c = int(c)
                    G.node[v]['community'] = c
        iters = nx.within_inter_cluster(G, L)
        preds = {(u, v): p for (u, v, p) in iters}
        for (u, v) in L:
            X[cnt].append(p_dict[(u, v)] + preds[(u, v)])
            cnt += 1
        del p_dict
        del preds
    #=========================checkin features=========================================
    #c0.follower number
    if flag['c0'] is True:
        print("get feature c0")
        cnt = 0
        for (u, v) in L:
            X[cnt].append(U[u]['follow_cnt'] * U[v]['follow_cnt'])  # fu*fv
            cnt += 1

    #c1.same time same location
    if flag['c1'] is True:
        print("get feature c1")
        cnt = 0
        for (u, v) in L:
            p = calculate_CCC(G, u, v)
            X[cnt].append(p)
            cnt += 1

    #c2.same time same distinct spot
    if flag['c2'] is True:
        print("get deature c2")
        cnt = 0
        for (u, v) in L:
            p = 0
            dis_same_spot = []
            for k in C[u]:
                if k[1] not in dis_same_spot and k in C[v]:
                    dis_same_spot.append(k[1])
                    p += 1
            X[cnt].append(p)
            cnt += 1

    #c3.same distinct spot (not necessarily same time)
    if flag['c3'] is True:
        cnt = 0
        print("get feature c3")
        for (u, v) in L:
            p = 0
            dis_same_spot = []
            for k in C[u]:
                if k[1] not in dis_same_spot:
                    for m in C[v]:
                        if k[1] == m[1]:
                            dis_same_spot.append(k[1])
                            p += 1
                            break
            X[cnt].append(p)
            cnt += 1

    #c4.min Entropy
    if flag['c4'] is True:
        print("get feature c4")
        cnt = 0
        for (u, v) in L:
            p = 0
            E_list = []
            for k in C[u]:
                if k in C[v]:
                    spot = k[1]
                    if spot in S and S[spot]['entropy'] > 0:
                        E_list.append(S[spot]['entropy'])
            if len(E_list) > 0:
                p = min(E_list)
            X[cnt].append(p)
            cnt += 1

    #c5. distance of mean_LL
    if flag['c5'] is True:
        cnt = 0
        print("get feature c5")
        for (u, v) in L:
            dist = np.sqrt((U[u]['mean_LL'][0] - U[v]['mean_LL'][0])**2 +
                           (U[u]['mean_LL'][1] - U[v]['mean_LL'][1])**2)
            X[cnt].append(dist)
            cnt += 1

    #c6.weighted same location
    if flag['c6'] is True:
        print("get feature c6")
        cnt = 0
        for (u, v) in L:
            p = 0
            for k in C[u]:
                if k in C[v]:
                    spot = k[1]
                    #if spot in S and S[spot]['entropy'] > 0:
                    #p += 1/S[spot]['entropy']
                    if spot in S:
                        dist = np.sqrt(
                            (S[spot]['LL'][0] - U[u]['mean_LL'][0])**2 +
                            (S[spot]['LL'][1] - U[u]['mean_LL'][1])**2)
                        p += dist
                        dist = np.sqrt(
                            (S[spot]['LL'][0] - U[v]['mean_LL'][0])**2 +
                            (S[spot]['LL'][1] - U[v]['mean_LL'][1])**2)
                        p += dist
            X[cnt].append(p)
            cnt += 1

    #c7.PP
    if flag['c7'] is True:
        print("get feature c7")
        cnt = 0
        for (u, v) in L:
            p = len(C[u]) * len(C[v])
            X[cnt].append(p)
            cnt += 1

    #c8.Total Common Friend Closeness (TCFC)
    if flag['c8'] is True:
        print("get feature c8")
        cnt = 0
        for (u, v) in L:
            p = 0
            if G.has_edge(u, v):
                G.remove_edge(u, v)
                for w in nx.common_neighbors(G, u, v):
                    T1 = [x for x in nx.common_neighbors(G, u, w)]
                    T2 = [x for x in nx.common_neighbors(G, v, w)]
                    p += len(T1) * len(T2)
                G.add_edge(u, v)
            else:
                for w in nx.common_neighbors(G, u, v):
                    T1 = [x for x in nx.common_neighbors(G, u, w)]
                    T2 = [x for x in nx.common_neighbors(G, v, w)]
                    p += len(T1) * len(T2)
            X[cnt].append(p)
            cnt += 1

    #c9.Total Common friend Checkin Count (TCFCC)
    if flag['c9'] is True:
        print("get feature c9")
        cnt = 0
        for (u, v) in L:
            p = 0
            if G.has_edge(u, v):
                G.remove_edge(u, v)
                for w in nx.common_neighbors(G, u, v):
                    p += calculate_CCC(G, u, w) * calculate_CCC(G, v, w)
                G.add_edge(u, v)
            else:
                for w in nx.common_neighbors(G, u, v):
                    p += calculate_CCC(G, u, w) * calculate_CCC(G, v, w)
            X[cnt].append(p)
            cnt += 1

    #c10. Common Category Checkin Counts Product (CCCP)
    if flag['c10'] is True:
        print("get feature c10")
        cnt = 0
        for (u, v) in L:
            p = 0
            for cat in U[u]['cate']:
                if cat in U[v]['cate']:
                    p += U[u]['cate'][cat] * U[v]['cate'][cat]
            X[cnt].append(p)
            cnt += 1

    #c11. Common Category Checkin Counts Product Ratio(CCCPR)
    if flag['c11'] is True:
        print("get feature c11")
        cnt = 0
        for (u, v) in L:
            p = 0
            u_cate_total = sum(U[u]['cate'][cat]**2 for cat in U[u]['cate'])
            v_cate_total = sum(U[v]['cate'][cat]**2 for cat in U[v]['cate'])
            for cat in U[u]['cate']:
                if cat in U[v]['cate']:
                    p += (U[u]['cate'][cat] * U[v]['cate'][cat] /
                          np.sqrt(u_cate_total * v_cate_total))
            X[cnt].append(p)
            cnt += 1

#c12.trip route length all
    if flag['c12'] is True:
        print("get feature c12")
        cnt = 0
        for (u, v) in L:
            tripDayLen1 = list()
            tripDayLen2 = list()
            tripDay = "starting"
            tripLen = 0.0
            lastSpot = [0.0, 0.0]
            for k in C[u]:
                if not (lastSpot[0] == 0.0 and lastSpot[1] == 0.0):
                    if k[1] in S:
                        tripLen += np.sqrt((lastSpot[0] -
                                            S[k[1]]['LL'][0])**2 +
                                           (lastSpot[1] - S[k[1]]['LL'][1])**2)
                        lastSpot[0] = S[k[1]]['LL'][0]
                        lastSpot[1] = S[k[1]]['LL'][1]
                else:
                    if k[1] in S:
                        lastSpot[0] = S[k[1]]['LL'][0]
                        lastSpot[1] = S[k[1]]['LL'][1]
            tripDay = "starting"
            tripLen2 = 0.0
            lastSpot = [0.0, 0.0]
            for k in C[v]:
                if not (lastSpot[0] == 0.0 and lastSpot[1] == 0.0):
                    if k[1] in S:
                        tripLen2 += np.sqrt(
                            (lastSpot[0] - S[k[1]]['LL'][0])**2 +
                            (lastSpot[1] - S[k[1]]['LL'][1])**2)
                        lastSpot[0] = S[k[1]]['LL'][0]
                        lastSpot[1] = S[k[1]]['LL'][1]
                else:
                    if k[1] in S:
                        lastSpot[0] = S[k[1]]['LL'][0]
                        lastSpot[1] = S[k[1]]['LL'][1]
            X[cnt].append(tripLen + tripLen2)
            cnt += 1

    #=========================Heter Graph features=====================================

    #h0.Approximate katz for bipartite graph
    if flag['h0'] is True:
        print("get feature h0")
        cnt = 0
        for (u, v) in L:
            p = 0
            for x in B.neighbors(u):
                for y in B.neighbors(v):
                    if x == y or B.has_edge(x, y):
                        p += 1
            X[cnt].append(p)
            cnt += 1

    #h1.Approximate katz on HB
    if flag['h1'] is True:
        print("get feature h1")
        cnt = 0
        for (u, v) in L:
            p = 0
            if HB.has_edge(u, v):
                HB.remove_edge(u, v)
                for x in HB.neighbors(u):
                    for y in HB.neighbors(v):
                        if x == y or HB.has_edge(x, y):
                            p += 1
                HB.add_edge(u, v)
            else:
                for x in HB.neighbors(u):
                    for y in HB.neighbors(v):
                        if x == y or HB.has_edge(x, y):
                            p += 1
            X[cnt].append(p)
            cnt += 1

    #h2.Approximate katz on H
    if flag['h2'] is True:
        print("get feature h2")
        cnt = 0
        for (u, v) in L:
            p = 0
            if H.has_edge(u, v):
                H.remove_edge(u, v)
                for x in H.neighbors(u):
                    for y in H.neighbors(v):
                        if x == y or H.has_edge(x, y):
                            p += 1
                H.add_edge(u, v)
            else:
                for x in H.neighbors(u):
                    for y in H.neighbors(v):
                        if x == y or H.has_edge(x, y):
                            p += 1
            X[cnt].append(p)
            cnt += 1

    #h3.shortest path length on B
    if flag['h3'] is True:
        print("get feature h3")
        cnt = 0
        for (u, v) in L:
            if nx.has_path(B, u, v):
                X[cnt].append(
                    nx.shortest_path_length(B, source=u, target=v) / 50000)
            else:
                X[cnt].append(1)
            cnt += 1

    #h4.clustering coefiicient on H
    if flag['h4'] is True:
        print("get feature h4")
        cnt = 0
        for (u, v) in L:
            if H.has_edge(u, v):
                H.remove_edge(u, v)
                p = nx.clustering(H, u) * nx.clustering(H, v)
                H.add_edge(u, v)
            else:
                p = nx.clustering(H, u) * nx.clustering(H, v)
            X[cnt].append(p)
            cnt += 1

    #h5. number of (user's loc friends)'s loc friends
    if flag['h5'] is True:
        print("get feature h5")
        cnt = 0
        for (u, v) in L:
            counter1 = 0
            for neighbor in H.neighbors(u):
                if not neighbor.isnumeric():
                    for neighbor2 in H.neighbors(neighbor):
                        if not neighbor.isnumeric():
                            counter1 += 1
            counter2 = 0
            for neighbor in H.neighbors(v):
                if not neighbor.isnumeric():
                    for neighbor2 in H.neighbors(neighbor):
                        if not neighbor.isnumeric():
                            counter2 += 1

            #print(str(counter1)+" "+str(counter2)+"\n")
            X[cnt].append(counter1 * counter2)
            cnt += 1

    #h6. location friends' degree sum
    if flag['h6'] is True:
        print("get feature h6")
        cnt = 0
        for (u, v) in L:
            counter1 = 0
            for locationNeighbor in H.neighbors(u):
                if not locationNeighbor.isnumeric():
                    #print(str(locationNeighbor)+"\n")
                    if locationNeighbor in LG:
                        counter1 += LG.degree(locationNeighbor)

            counter2 = 0
            for locationNeighbor in H.neighbors(v):
                if not locationNeighbor.isnumeric():
                    if locationNeighbor in LG:
                        counter2 += LG.degree(locationNeighbor)
            X[cnt].append(counter1 * counter2)
            cnt += 1

    #h7. Approximate katz for social graph
    if flag['h7'] is True:
        print("get feature h7")
        cnt = 0
        for (u, v) in L:
            counter = 0
            for node in H.neighbors(u):
                if not node.isnumeric():
                    for node2 in H.neighbors(v):
                        if not node2.isnumeric():
                            if node == node2 or H.has_edge(node, node2):
                                counter += 1
            X[cnt].append(counter)
            cnt += 1

    #h8. adamic adar score on H
    if flag['h8'] is True:
        print("get feature h8")
        preds = nx.adamic_adar_index(H, L)
        cnt = 0
        for (u, v, p) in preds:
            X[cnt].append(p)
            cnt += 1
    #h9. resource_allocation on H
    if flag['h9'] is True:
        print("get feature h9")
        preds = nx.resource_allocation_index(H, L)
        cnt = 0
        for (u, v, p) in preds:
            X[cnt].append(p)
            cnt += 1

    #h10. shortest path length on H
    if flag['h10'] is True:
        print("get feature h10")
        cnt = 0
        for (u, v) in L:
            if H.has_edge(u, v):
                H.remove_edge(u, v)
                if nx.has_path(H, u, v):
                    X[cnt].append(
                        nx.shortest_path_length(H, source=u, target=v) / 50000)
                else:
                    X[cnt].append(1)
                H.add_edge(u, v)
            else:
                if nx.has_path(H, u, v):
                    X[cnt].append(
                        nx.shortest_path_length(H, source=u, target=v) / 50000)
                else:
                    X[cnt].append(1)
            cnt += 1
    #h11. common neighbors on H
    if flag['h11'] is True:
        print("get feature h11")
        cnt = 0
        for (u, v) in L:
            if H.has_edge(u, v):
                H.remove_edge(u, v)
                T = [w for w in nx.common_neighbors(H, u, v)]
                H.add_edge(u, v)
            else:
                T = [w for w in nx.common_neighbors(H, u, v)]
            X[cnt].append(len(T))
            cnt += 1

    #h12.Approximate katz for social graph
    if flag['h12'] is True:
        print("get feature h12")
        cnt = 0
        for (u, v) in L:
            p = 0
            if H.has_edge(u, v):
                H.remove_edge(u, v)
                for x in H.neighbors(u):
                    for y in H.neighbors(v):
                        if x == y or H.has_edge(x, y):
                            p += 1
                H.add_edge(u, v)
            else:
                for x in H.neighbors(u):
                    for y in H.neighbors(v):
                        if x == y or H.has_edge(x, y):
                            p += 1
            X[cnt].append(p)
            cnt += 1

    if flag['h13'] is True:
        print("get feature h13")
        cnt = 0
        with open("best_part_HB.txt", "r") as f:
            for line in f:
                v, c = line.split()
                c = int(c)
                HB.node[v]['community'] = c
        iters = nx.cn_soundarajan_hopcroft(HB, L)
        preds = {(u, v): p for (u, v, p) in iters}
        for (u, v) in L:
            X[cnt].append(preds[(u, v)])
            cnt += 1

    if flag['h14'] is True:
        print("get feature h14")
        cnt = 0
        with open("best_part_HB.txt", "r") as f:
            for line in f:
                if line == "":
                    continue
                v, c = line.split()
                c = int(c)
                HB.node[v]['community'] = c
        iters = nx.ra_index_soundarajan_hopcroft(HB, L)
        preds = {(u, v): p for (u, v, p) in iters}
        for (u, v) in L:
            X[cnt].append(preds[(u, v)])
            cnt += 1

    if flag['h15'] is True:
        print("get feature h15")
        cnt = 0
        with open("best_part_HB.txt", "r") as f:
            for line in f:
                v, c = line.split()
                c = int(c)
                HB.node[v]['community'] = c
        iters = nx.within_inter_cluster(HB, L, delta=0.5)
        preds = {(u, v): p for (u, v, p) in iters}
        for (u, v) in L:
            X[cnt].append(preds[(u, v)])
            cnt += 1

    if flag['h16'] is True:
        print("get feature h16")
        cnt = 0
        with open("dendo_HB.txt", "r") as f:
            line = f.readline()
            p_dict = {(u, v): 0.0 for (u, v) in L}
            for line in f:
                if 'level' in line:
                    l = int(line.split()[1])
                    if l != 0:
                        iters = nx.cn_soundarajan_hopcroft(HB, L)
                        for (u, v, p) in iters:
                            p_dict[(u, v)] += p
                else:
                    v, c = line.split()
                    c = int(c)
                    HB.node[v]['community'] = c
        iters = nx.cn_soundarajan_hopcroft(HB, L)
        preds = {(u, v): p for (u, v, p) in iters}
        for (u, v) in L:
            X[cnt].append(p_dict[(u, v)] + preds[(u, v)])
            cnt += 1
        del p_dict
        del preds

    if flag['h17'] is True:
        print("get feature h17")
        cnt = 0
        with open("dendo_HB.txt", "r") as f:
            line = f.readline()
            p_dict = {(u, v): 0.0 for (u, v) in L}
            for line in f:
                if 'level' in line:
                    l = int(line.split()[1])
                    if l != 0:
                        iters = nx.ra_index_soundarajan_hopcroft(HB, L)
                        for (u, v, p) in iters:
                            p_dict[(u, v)] += p
                else:
                    v, c = line.split()
                    c = int(c)
                    HB.node[v]['community'] = c
        iters = nx.ra_index_soundarajan_hopcroft(HB, L)
        preds = {(u, v): p for (u, v, p) in iters}
        for (u, v) in L:
            X[cnt].append(p_dict[(u, v)] + preds[(u, v)])
            cnt += 1
        del p_dict
        del preds

    if flag['h18'] is True:
        print("get feature h18")
        cnt = 0
        with open("dendo_HB.txt", "r") as f:
            line = f.readline()
            p_dict = {(u, v): 0.0 for (u, v) in L}
            for line in f:
                if 'level' in line:
                    l = int(line.split()[1])
                    if l != 0:
                        iters = nx.within_inter_cluster(HB, L)
                        for (u, v, p) in iters:
                            p_dict[(u, v)] += p
                else:
                    v, c = line.split()
                    c = int(c)
                    HB.node[v]['community'] = c
        iters = nx.within_inter_cluster(HB, L)
        preds = {(u, v): p for (u, v, p) in iters}
        for (u, v) in L:
            X[cnt].append(p_dict[(u, v)] + preds[(u, v)])
            cnt += 1
        del p_dict
        del preds

    return X

コード例 #18

0

ファイルを表示

# Resources Allocation (sum of fractions of the end node receive from middle nodes based on their degrees)
ra = list(nx.resource_allocation_index(g))
# Adamic-Adar Index (Resources Allocation with log of degrees)
aa = list(nx.adamic_adar_index(g))
# Preferential Attachment (product of nodes' degree)
pa = list(nx.preferential_attachment(g))
# Community Common Neighbors (with bonus for nieghbors in the same community)
g.nodes[0]['community'] = 0
g.nodes[1]['community'] = 1
g.nodes[2]['community'] = 0
g.nodes[3]['community'] = 1
g.nodes[4]['community'] = 1
g.nodes[5]['community'] = 0
g.nodes[6]['community'] = 1
g.nodes[7]['community'] = 1
g.nodes[8]['community'] = 0
g.nodes[9]['community'] = 0
ccn = list(nx.cn_soundarajan_hopcroft(g))
# Community Resource Allocation (only consider nodes in the same community)
g.nodes[0]['community'] = 0
g.nodes[1]['community'] = 1
g.nodes[2]['community'] = 0
g.nodes[3]['community'] = 1
g.nodes[4]['community'] = 1
g.nodes[5]['community'] = 0
g.nodes[6]['community'] = 1
g.nodes[7]['community'] = 1
g.nodes[8]['community'] = 0
g.nodes[9]['community'] = 0
cra = list(nx.ra_index_soundarajan_hopcroft(g))