Beispiel #1
0
def main():
    test_network = ["karate_club", "facebook"]
    for net in test_network:
        net_file = data_utils.get_data_path(net)
        g = graph_utils.load_basic_network(net_file)
        sir_file = net_file.split('.')[0] + '-sir.txt'
        sir = {}
        if os.path.exists(sir_file):
            with open(sir_file, 'r') as f:
                for l in f:
                    data = l.split()
                    id = int(data[0])
                    score = float(data[1])
                    sir[id] = score
        else:
            print("SIR Simulation start.")
            sir = sir_ranking(g, gamma=1.0, num_epoch=100)
            print("SIR Simulation end.")
        centralities = [
            nx.degree_centrality, nx.closeness_centrality,
            nx.eigenvector_centrality, nx.pagerank, Local_gravity_model
        ]
        for c in centralities:
            if c.__name__ == 'pagerank':
                res = c(g, alpha=0.95)
            elif c.__name__ == 'Local_gravity_model':
                res = c(g, depth=2)
            else:
                res = c(g)
            tau, p = kendallTau(res, sir)
            print("%s\t%s\t%f" % (net, c.__name__, tau))
def main():
    for dataset_name in data_utils.data_file:
        net_file = data_utils.get_data_path(dataset_name)

        nx_adj = graph_utils.load_networkx_format(net_file)
        g = nx.from_scipy_sparse_matrix(nx_adj)
        train, test, val, train_neg, test_neg, val_neg = graph_utils.train_test_split(
            nx_adj, pos_neg_ratio=0.5)
        logger.info("[%s] train 1#%d 0#%d" %
                    (dataset_name, len(train), len(train_neg)))
        logger.info("[%s] test 1#%d 0#%d" %
                    (dataset_name, len(test), len(test_neg)))
        logger.info("[%s] valid 1#%d 0#%d" %
                    (dataset_name, len(val), len(val_neg)))
        # Compute basic link prediction indexes from g_train
        aa_matrix = np.zeros(nx_adj.shape)
        g_train = nx.from_edgelist(train)
        train_nodes = g_train.nodes
        candidate_edges = []
        for u, v in test:
            if u in train_nodes and v in train_nodes:
                candidate_edges.append((u, v))
        for u, v in test_neg:
            if u in train_nodes and v in train_nodes:
                candidate_edges.append((u, v))

        # Run Algos

        lp_baselines = {
            "Adamic-Adar": nx.adamic_adar_index,
            "Resouce Allocation": nx.resource_allocation_index,
            "Jaccard": nx.jaccard_coefficient,
            "Preferential Attachment": nx.preferential_attachment
        }
        print("#============================")
        print("Method\tAUC\tAP")
        for baseline in lp_baselines:
            for u, v, p in lp_baselines[baseline](g_train, candidate_edges):
                aa_matrix[u][v] = p
                aa_matrix[v][u] = p  # make sure it's symmetric
            # Calculate ROC AUC and Average Precision
            roc, ap = get_roc_score(test, test_neg, aa_matrix)
            print("%s\t%.6f\t%.6f" % (baseline, roc, ap))
            logger.info("[%s]\t%s\t%.6f\t%.6f" %
                        (dataset_name, baseline, roc, ap))
        print("#============================")
Beispiel #3
0
def explore_lyb():
    net_file = data_utils.get_data_path("lyb")
    g = graph_utils.load_basic_network(net_file)
    n2v_emb = node2vec.node2vec_emb(g, p=0.5, q=2, out_dim=32, num_walks=20)
    n2v_emb.learn_embedding()

    # out_file = net_file.split('.')[0]+'-n2v_emb.txt'
    # n2v_emb.output_embedding(out_file)

    node_labels = graph_utils.load_node_labels(data_utils.get_node_path("lyb"))
    sus_best = n2v_emb.model.most_similar(positive=['23'])
    for item in sus_best:
        print(node_labels[int(item[0])], item[1])
    com = list(greedy_modularity_communities(g))
    node_community = {}
    for i, c in enumerate(com):
        for node in c:
            node_community[node_labels[node]] = i
    print(node_community)
Beispiel #4
0
network_stats(g, 'BA-Network')

# WS-Network
k = 20
p = 0.01
g = nx.watts_strogatz_graph(n, k, p)
g = g.to_undirected()
draw_degree_dist(g, k, title='WS-Network Distribution')
network_stats(g, 'WS-Network')

# powerlaw_cluster network
m = 10
p = 1
g = nx.powerlaw_cluster_graph(n, m, p)
g = g.to_undirected()
draw_degree_dist(g, m, title='PC-Network Distribution')
network_stats(g, 'PC-Network')

# Facebook Network
import os, sys
projct_root_path = os.path.dirname(
    os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
sys.path.append(projct_root_path)
import common.graph_utils as graph_utils
import common.data_utils as data_utils

net_file = data_utils.get_data_path("facebook")
g = graph_utils.load_basic_network(net_file)
g = g.to_undirected()
draw_degree_dist(g, m, title='Facebook-Network Distribution')
network_stats(g, 'Facebook-Network')
Beispiel #5
0
        print('%d ego graph saved to adjacency_matrix.npy.' % (len(self.ego_graphs)))
        np.save(os.path.join(file_dir, "influence_feature.npy"),self.influence_features)
        print('influence_feature.npy saved.')
        np.save(os.path.join(file_dir,"label.npy"),self.graph_labels)
        print('label.npy saved.')
        np.save(os.path.join(file_dir, "vertex_id.npy"), self.ego_virtices)
        print('vertex_id.npy saved.')
        np.save(os.path.join(file_dir, "vertex_feature.npy"), self.graph_node_features)
        print('vertex_feature.npy saved.')
        np.save(os.path.join(file_dir, "embedding.npy"),self.embedding)
        print('embedding.npy saved.')

    def load(self, file_dir):
        self.ego_graphs = np.load(os.path.join(file_dir, "adjacency_matrix.npy"))
        self.influence_features = np.load(os.path.join(file_dir, "influence_feature.npy")).astype(np.float32)
        self.graph_labels = np.load(os.path.join(file_dir, "label.npy"))
        self.ego_virtices = np.load(os.path.join(file_dir, "vertex_id.npy"))
        self.graph_node_features = torch.FloatTensor(np.load(os.path.join(file_dir, "vertex_feature.npy")))
        self.embedding = torch.FloatTensor(np.load(os.path.join(file_dir, "embedding.npy")))
        print("%s dataset loaded." % (file_dir))

if __name__ == '__main__':
    network_name = 'facebook'
    net_file = data_utils.get_data_path(network_name)
    g = graph_utils.load_basic_network(net_file)
    dataset = deepinf_dataset(g, sir_file=net_file.split('.')[0]+'-sir.txt')
    target_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),network_name+"_preprocess")
    dataset.make()
    dataset.save(target_path)
    # dataset.load(target_path)
Beispiel #6
0
def main():
    net_file = data_utils.get_data_path("twitter")
    adj = load_networkx_format(net_file)
Beispiel #7
0
def main():
    # Training settings
    # Note: Hyper-parameters need to be tuned in order to obtain results reported in the paper.
    parser = argparse.ArgumentParser(
        description=
        'PyTorch graph convolutional neural net for whole-graph classification'
    )
    parser.add_argument('--dataset',
                        type=str,
                        default="IMDBBINARY",
                        help='name of dataset (default: MUTAG)')
    parser.add_argument('--device',
                        type=int,
                        default=0,
                        help='which gpu to use if any (default: 0)')
    parser.add_argument('--batch_size',
                        type=int,
                        default=32,
                        help='input batch size for training (default: 32)')
    parser.add_argument(
        '--iters_per_epoch',
        type=int,
        default=50,
        help='number of iterations per each epoch (default: 50)')
    parser.add_argument('--epochs',
                        type=int,
                        default=50,
                        help='number of epochs to train (default: 20)')
    parser.add_argument('--lr',
                        type=float,
                        default=0.001,
                        help='learning rate (default: 0.01)')
    parser.add_argument(
        '--seed',
        type=int,
        default=0,
        help='random seed for splitting the dataset into 10 (default: 0)')
    parser.add_argument(
        '--fold_idx',
        type=int,
        default=0,
        help='the index of fold in 10-fold validation. Should be less then 10.'
    )
    parser.add_argument(
        '--num_layers',
        type=int,
        default=5,
        help='number of layers INCLUDING the input one (default: 5)')
    parser.add_argument(
        '--num_mlp_layers',
        type=int,
        default=2,
        help=
        'number of layers for MLP EXCLUDING the input one (default: 2). 1 means linear model.'
    )
    parser.add_argument('--hidden_dim',
                        type=int,
                        default=128,
                        help='number of hidden units (default: 64)')
    parser.add_argument('--final_dropout',
                        type=float,
                        default=0.5,
                        help='final layer dropout (default: 0.5)')
    parser.add_argument(
        '--graph_pooling_type',
        type=str,
        default="sum",
        choices=["sum", "average"],
        help='Pooling for over nodes in a graph: sum or average')
    parser.add_argument(
        '--neighbor_pooling_type',
        type=str,
        default="sum",
        choices=["sum", "average", "max"],
        help='Pooling for over neighboring nodes: sum, average or max')
    parser.add_argument(
        '--learn_eps',
        action="store_true",
        help=
        'Whether to learn the epsilon weighting for the center nodes. Does not affect training accuracy though.'
    )
    parser.add_argument(
        '--degree_as_tag',
        action="store_true",
        help=
        'let the input node features be the degree of nodes (heuristics for unlabeled graph)'
    )
    parser.add_argument('--filename', type=str, default="", help='output file')
    args = parser.parse_args()

    # set up seeds and gpu device
    torch.manual_seed(0)
    np.random.seed(0)
    device = torch.device(
        "cuda:" +
        str(args.device)) if torch.cuda.is_available() else torch.device("cpu")
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(0)

    graphs, num_classes = load_graphs(data_utils.get_data_path(args.dataset),
                                      args.degree_as_tag)

    # 10-fold cross validation. Conduct an experiment on the fold specified by args.fold_idx.
    train_graphs, test_graphs = separate_graph_data(graphs, args.seed,
                                                    args.fold_idx)

    model = GraphCNN(args.num_layers, args.num_mlp_layers,
                     train_graphs[0].node_features.shape[1], args.hidden_dim,
                     num_classes, args.final_dropout, args.learn_eps,
                     args.graph_pooling_type, args.neighbor_pooling_type,
                     device).to(device)

    optimizer = optim.Adam(model.parameters(), lr=args.lr)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.5)

    for epoch in range(1, args.epochs + 1):
        scheduler.step()

        print("$epoch", epoch)
        avg_loss = train(args, model, device, train_graphs, optimizer, epoch)
        acc_train, acc_test = test(args, model, device, train_graphs,
                                   test_graphs, epoch)

        if not args.filename == "":
            with open(args.filename, 'w') as f:
                f.write("%f %f %f" % (avg_loss, acc_train, acc_test))
                f.write("\n")
        print()
Beispiel #8
0
        cur_level = list(neighbors(node))
        visited = set(cur_level)
        while depth_now <= depth and len(cur_level) > 0:
            next_level = set()
            for target in cur_level:
                if target not in candidates:
                    candidates[target] = depth_now
                for child in neighbors(target):
                    if child not in visited:
                        visited.add(child)
                        next_level.add(child)
            cur_level = next_level
            depth_now += 1
        gravity = 0
        for target in candidates:
            distance = candidates[target]
            if target != node and distance <= depth:
                partial_gravity = degrees[node] * degrees[target] / (distance**
                                                                     2)
                gravity += partial_gravity
        lgm_results[node] = gravity
    return lgm_results


if __name__ == '__main__':
    # g = nx.karate_club_graph()
    net_file = data_utils.get_data_path("lyb")
    g = graph_utils.load_basic_network(net_file)
    lgm = Local_gravity_model(g)
    print("Local Gravity Model:")
    print(sorted(lgm.items(), key=lambda v: v[1], reverse=True))
Beispiel #9
0
def main():
    net_file = data_utils.get_data_path("lyb")
    g = graph_utils.load_basic_network(net_file)
    n2v_emb = node2vec.node2vec_emb(g, p=0.5, q=2, out_dim=64, num_walks=20)
    n2v_emb.learn_embedding()
Beispiel #10
0
        avg_node_inf = node_influence / num_epoch if num_epoch > 0 else 0.0
        sir_score[node] = avg_node_inf
    return sir_score


def opt_beta(g):
    k1s = 0
    k2s = 0
    for node in g.nodes():
        k1s += len(list(g.neighbors(node)))
        k2s += len(list(nx.bfs_tree(g, source=node, depth_limit=2).edges()))
    if k2s > k1s:
        beta = k1s / (k2s - k1s)
    else:
        beta = 0.1
    return beta


if __name__ == '__main__':
    # g = nx.karate_club_graph()
    # sir_score = sir_ranking(g,num_epoch=1000)
    # print(sorted(sir_score.items(), key=lambda v: v[1], reverse=True))\
    net_file = data_utils.get_data_path("BlogCatalog")
    g = graph_utils.load_basic_network(net_file)
    st = time.time()
    sir = sir_ranking(g, beta=opt_beta(g), gamma=1.0, num_epoch=100)
    out_file = net_file.split('.')[0] + '-sir.txt'
    with open(out_file, 'w') as f:
        for i, v in sir.items():
            f.write(str(i) + '\t' + str(round(v, 6)) + '\n')
    print('time used:', time.time() - st)