コード例 #1
0
ファイル: bigg.py プロジェクト: xiangsheng1325/GraphGenerator
def infer_bigg(test_graphs, config, model=None):
    random.seed(config.seed)
    torch.manual_seed(config.seed)
    np.random.seed(config.seed)
    set_device(config)
    setup_treelib(config)
    max_num_nodes = max([len(gg.nodes) for gg in test_graphs])
    config.model.max_num_nodes = max_num_nodes
    if model is None:
        model = RecurTreeGen(config).to(config.device)
        for g in test_graphs:
            TreeLib.InsertGraph(g)
    test_model_path = os.path.join(config.test.test_model_dir,
                                   config.test.test_model_name)
    if config.test.load_snapshot and os.path.isfile(
            config.test.test_model_dir):
        print('loading from', config.test.test_model_dir)
        model.load_state_dict(torch.load(test_model_path))

    # get num nodes dist
    num_node_dist = get_node_dist(test_graphs)
    gen_graphs = []
    infering_time = {
        'time_all': 0.,
        'epochs': 0,
    }
    with torch.no_grad():
        for _ in tqdm(range(config.test.num_test_gen)):
            num_nodes = np.argmax(np.random.multinomial(1, num_node_dist))
            start_time = time.time()
            _, pred_edges, _ = model(num_nodes, display=config.test.display)
            for e in pred_edges:
                assert e[0] > e[1]
            pred_g = nx.Graph()
            pred_g.add_edges_from(pred_edges)
            end_time = time.time()
            infering_time[
                'time_all'] = infering_time['time_all'] + end_time - start_time
            infering_time['epochs'] = infering_time['epochs'] + 1
            gen_graphs.append(pred_g)
    # print('saving graphs')
    # with open(test_model_path + '.graphs-%s' % str(config.test.greedy_frac), 'wb') as f:
    #     cp.dump(gen_graphs, f, cp.HIGHEST_PROTOCOL)
    # print('evaluating')
    infer_time = infering_time['time_all'] / infering_time['epochs']
    print("\nTime consumption of infering one graph by BiGG is: {:.6f}".format(
        infer_time))
    return gen_graphs
コード例 #2
0
def bigg_test(args, config):
    random.seed(config.seed)
    torch.manual_seed(config.seed)
    np.random.seed(config.seed)
    set_device(config)
    setup_treelib(config)

    train_graphs = [nx.barabasi_albert_graph(10, 2)]
    TreeLib.InsertGraph(train_graphs[0])
    max_num_nodes = max([len(gg.nodes) for gg in train_graphs])
    config.model.max_num_nodes = max_num_nodes

    model = RecurTreeGen(config).to(config.device)
    optimizer = optim.Adam(model.parameters(),
                           lr=config.train.lr,
                           weight_decay=1e-4)
    for i in range(2):
        optimizer.zero_grad()
        ll, _ = model.forward_train([0])
        loss = -ll / max_num_nodes
        print('iter', i, 'loss', loss.item())
        loss.backward()
        optimizer.step()
コード例 #3
0
ファイル: bigg.py プロジェクト: xiangsheng1325/GraphGenerator
def train_bigg(train_graphs, config):
    # print("### Type:", type(train_graphs))
    random.seed(config.seed)
    torch.manual_seed(config.seed)
    np.random.seed(config.seed)
    set_device(config)
    setup_treelib(config)
    for g in train_graphs:
        TreeLib.InsertGraph(g)
    max_num_nodes = max([len(gg.nodes) for gg in train_graphs])
    config.model.max_num_nodes = max_num_nodes

    model = RecurTreeGen(config).to(config.device)
    if config.train.resume and os.path.isfile(config.train.resume_model_dir):
        print('loading from', config.train.resume_model_dir)
        resume_model_path = os.path.join(config.train.resume_model_dir,
                                         config.train.resume_model_name)
        model.load_state_dict(torch.load(resume_model_path))

    optimizer = optim.Adam(model.parameters(),
                           lr=config.train.lr,
                           weight_decay=1e-4)
    indices = list(range(len(train_graphs)))
    if config.train.resume_epoch is None:
        config.train.resume_epoch = 0
    training_time = {
        'time_all': 0.,
        'epochs': 0,
    }
    for epoch in range(config.train.resume_epoch, config.train.max_epochs):
        pbar = tqdm(range(config.train.snapshot_epoch))

        optimizer.zero_grad()
        for idx in pbar:
            random.shuffle(indices)
            batch_indices = indices[:config.train.batch_size]

            num_nodes = sum([len(train_graphs[i]) for i in batch_indices])
            if config.model.blksize < 0 or num_nodes <= config.model.blksize:
                start_time = time.time()
                ll, _ = model.forward_train(batch_indices)
                loss = -ll / num_nodes
                loss.backward()
                end_time = time.time()
                training_time['time_all'] = training_time[
                    'time_all'] + end_time - start_time
                training_time['epochs'] = training_time['epochs'] + 1
                loss = loss.item()
            else:
                ll = 0.0
                for i in batch_indices:
                    n = len(train_graphs[i])
                    cur_ll, _ = sqrtn_forward_backward(
                        model,
                        graph_ids=[i],
                        list_node_starts=[0],
                        num_nodes=n,
                        blksize=config.model.blksize,
                        loss_scale=1.0 / n)
                    ll += cur_ll
                loss = -ll / num_nodes
            if (idx + 1) % config.train.accum_grad == 0:
                if config.train.grad_clip > 0:
                    torch.nn.utils.clip_grad_norm_(
                        model.parameters(), max_norm=config.train.grad_clip)
                optimizer.step()
                optimizer.zero_grad()
            pbar.set_description(
                'epoch %.2f, loss: %.4f' %
                (epoch + (idx + 1) / config.train.snapshot_epoch, loss))
        if config.train.save_snapshot:
            torch.save(
                model.state_dict(),
                os.path.join(config.exp_dir, config.exp_name,
                             'epoch-%d.ckpt' % (epoch + 1)))
    train_time = training_time['time_all'] / training_time['epochs']
    print("Time consumption of one epoch of training BiGG is: {:.6f}".format(
        train_time))
    return model
コード例 #4
0
    if args.phase == 'preprocessing':
        from GraphGenerator.preprocessing import utils
        tmp_path = args.input
        print("# Load edgelist...")
        graph = utils.edgelist_to_graph(tmp_path)
        graphlist = [graph]
        print("# Save graphlist...")
        if args.output is None:
            output_name = "{}.graphs".format(args.input)
        else:
            output_name = args.output
        dataio.save_data(graphlist, name=output_name)

    elif args.phase == 'train':
        config = get_config(args.config)
        set_device(config)
        from GraphGenerator.train import train_base as train
        print("Start loading data...")
        input_data = dataio.load_data(args.input)
        if args.config is None:
            args.config = "config/{}.yaml".format(args.generator)
        # os.environ["CUDA_VISIBLE_DEVICES"] = str(config.gpu)
        print("Start (training and) inferencing graph...")
        output_data = []
        if isinstance(input_data, list):
            for graph in input_data:
                tmp_data = train.train_and_inference(graph,
                                                     args.generator,
                                                     config=config)
                if isinstance(tmp_data, list):
                    output_data.extend(tmp_data)
コード例 #5
0
def train_netgan(input_data, config):
    set_device(config)
    emb_size = config.model.embedding_dim
    l_rate = config.train.lr
    _A_obs = nx.adjacency_matrix(input_data)
    _A_obs = _A_obs - sp.csr_matrix(np.diag(_A_obs.diagonal()))
    _A_obs = _A_obs + _A_obs.T
    _A_obs[_A_obs > 1] = 1
    lcc = largest_connected_components(_A_obs)
    _A_obs = _A_obs[lcc, :][:, lcc]
    _N = _A_obs.shape[0]
    val_share = config.train.val_share
    test_share = config.train.test_share
    seed = config.seed
    train_ones, val_ones, val_zeros, test_ones, test_zeros = train_val_test_split_adjacency(
        _A_obs,
        val_share,
        test_share,
        seed,
        undirected=True,
        connected=True,
        asserts=True)
    train_graph = sp.coo_matrix(
        (np.ones(len(train_ones)), (train_ones[:, 0], train_ones[:,
                                                                 1]))).tocsr()
    assert (train_graph.toarray() == train_graph.toarray().T).all()
    rw_len = config.model.rw_len
    batch_size = config.train.batch_size

    walker = RandomWalker(train_graph, rw_len, p=1, q=1, batch_size=batch_size)

    walker.walk().__next__()
    netgan = NetGAN(_N,
                    rw_len,
                    walk_generator=walker.walk,
                    gpu_id=0,
                    use_gumbel=True,
                    disc_iters=3,
                    W_down_discriminator_size=emb_size,
                    W_down_generator_size=emb_size,
                    l2_penalty_generator=1e-7,
                    l2_penalty_discriminator=5e-5,
                    batch_size=batch_size,
                    generator_layers=[40],
                    discriminator_layers=[30],
                    temp_start=5,
                    learning_rate=l_rate)
    stopping_criterion = config.train.stopping_criterion

    assert stopping_criterion in [
        "val", "eo"
    ], "Please set the desired stopping criterion."

    if stopping_criterion == "val":  # use val criterion for early stopping
        stopping = None
    elif stopping_criterion == "eo":  # use eo criterion for early stopping
        stopping = 0.5  # set the target edge overlap here
    else:
        stopping = None
    eval_iter = config.train.eval_iter
    display_iter = config.train.display_iter

    log_dict = netgan.train(A_orig=_A_obs,
                            val_ones=val_ones,
                            val_zeros=val_zeros,
                            stopping=stopping,
                            eval_every=eval_iter,
                            plot_every=display_iter,
                            max_patience=20,
                            max_iters=200000)

    sample_many = netgan.generate_discrete(10000, reuse=True)

    samples = []

    for _ in range(config.test.sample_num):
        if (_ + 1) % 1000 == 0:
            print(_ + 1)
        samples.append(sample_many.eval({netgan.tau: 0.5}))

    rws = np.array(samples).reshape([-1, rw_len])
    pool = mp.Pool(processes=5)
    args_all = [(rws, _N) for i in range(config.test.num_gen)]
    results = [
        pool.apply_async(score_to_graph, args=args) for args in args_all
    ]
    graphs = [p.get() for p in results]
    return graphs
コード例 #6
0
def train_and_inference(input_data, generator, config=None, repeat=1):
    """
    train model using input graph, and infer new graphs
    :param input_data: input graph(s), whose type is networkx.Graph or list of nx.Graph
    :param generator: name of graph generator
    :param config: configuration of graph generator
    :param repeat: number of new graphs
    :return: generated graphs
    """
    # graphs = []
    if generator in ['e-r', 'w-s', 'b-a', 'E-R', 'W-S', 'B-A']:
        import GraphGenerator.models.er as er
        import GraphGenerator.models.ws as ws
        import GraphGenerator.models.ba as ba
        tmp_name = generator.lower()
        model_name = "{}.{}".format(tmp_name.replace('-', ''), tmp_name.replace('-', '_'))
        graphs = eval(model_name)(input_data, config)
    elif generator in ['rtg', 'RTG', 'bter', 'BTER']:
        import GraphGenerator.models.rtg as rtg
        import GraphGenerator.models.bter as bter
        model_name = "{}.{}".format(generator, generator)
        graphs = eval(model_name)(input_data, config)
    elif generator in ['sbm', 'dcsbm']:
        import GraphGenerator.models.sbm as sbm
        graphs = sbm.generate(input_data, generator, repeat)
    elif generator in ['rmat', 'kronecker']:
        import GraphGenerator.models.kronecker as kronecker
        import GraphGenerator.models.rmat as rmat
        graphs = eval(generator).generate(input_data, config)
    elif generator in ['vgae', 'graphite', 'sbmgnn']:
        set_device(config)
        sp_adj = nx.adjacency_matrix(input_data).astype(np.float32)
        # print("Shape!", sp_adj.shape)
        feature = coo_to_csp(sp.diags(np.array([1. for i in range(sp_adj.shape[0])],
                                                dtype=np.float32)).tocoo()).to(config.device)
        if generator == 'vgae':
            import GraphGenerator.models.vgae as vgae
            if config.model.variational:
                model_name = "{}.{}".format(generator, "VGAE")
            else:
                model_name = "{}.{}".format(generator, "GAE")
            model = eval(model_name)(config.model.num_nodes,
                                     config.model.embedding_dim,
                                     config.model.hidden_dim,
                                     act=F.relu,
                                     layers=config.model.num_GNN_layers).to(config.device)
        elif generator == 'graphite':
            import GraphGenerator.models.graphite as graphite
            if config.model.variational:
                model_name = "{}.{}".format(generator, "GraphiteVAE")
            else:
                model_name = "{}.{}".format(generator, "GraphiteAE")
            model = eval(model_name)(config.model.num_nodes,
                                     config.model.hidden_dim,
                                     config.model.embedding_dim,
                                     config.model.decoding_dim,
                                     act=F.relu).to(config.device)
        elif generator == 'sbmgnn':
            import GraphGenerator.models.sbmgnn as sbmgnn
            model_name = "{}.{}".format(generator, 'SBMGNN')
            model = eval(model_name)(config.model.num_nodes,
                                     config.model.hidden,
                                     config=config).to(config.device)
        else:
            # model = None
            sys.exit(1)
        optimizer = optim.Adam(model.parameters(), lr=config.train.lr)
        model = train_autoencoder_base(sp_adj, feature, config, model, optimizer)
        tmp_memory = get_peak_gpu_memory(device=config.device)
        print("Peak GPU memory reserved in training process: {} MiB".format(tmp_memory//1024//1024))
        flush_cached_gpu_memory()
        graphs = infer_autoencoder(sp_adj, feature, config, model, repeat=repeat)
    elif generator in ['graphrnn', 'gran', 'bigg']:
        import GraphGenerator.train.train_graphrnn as graphrnn
        import GraphGenerator.models.bigg as bigg
        import GraphGenerator.models.gran as gran
        if isinstance(input_data, nx.Graph):
            input_data = [input_data]
        trained_model = eval("{}.train_{}".format(generator, generator))(input_data, config)
        tmp_memory = get_peak_gpu_memory(device=config.device)
        print("Peak GPU memory reserved in training process: {} MiB".format(tmp_memory//1024//1024))
        flush_cached_gpu_memory()
        graphs = eval("{}.infer_{}".format(generator, generator))(input_data, config, trained_model)
    else:
        print("Wrong generator name! Process exit..")
        sys.exit(1)
    return graphs