def infer_bigg(test_graphs, config, model=None): random.seed(config.seed) torch.manual_seed(config.seed) np.random.seed(config.seed) set_device(config) setup_treelib(config) max_num_nodes = max([len(gg.nodes) for gg in test_graphs]) config.model.max_num_nodes = max_num_nodes if model is None: model = RecurTreeGen(config).to(config.device) for g in test_graphs: TreeLib.InsertGraph(g) test_model_path = os.path.join(config.test.test_model_dir, config.test.test_model_name) if config.test.load_snapshot and os.path.isfile( config.test.test_model_dir): print('loading from', config.test.test_model_dir) model.load_state_dict(torch.load(test_model_path)) # get num nodes dist num_node_dist = get_node_dist(test_graphs) gen_graphs = [] infering_time = { 'time_all': 0., 'epochs': 0, } with torch.no_grad(): for _ in tqdm(range(config.test.num_test_gen)): num_nodes = np.argmax(np.random.multinomial(1, num_node_dist)) start_time = time.time() _, pred_edges, _ = model(num_nodes, display=config.test.display) for e in pred_edges: assert e[0] > e[1] pred_g = nx.Graph() pred_g.add_edges_from(pred_edges) end_time = time.time() infering_time[ 'time_all'] = infering_time['time_all'] + end_time - start_time infering_time['epochs'] = infering_time['epochs'] + 1 gen_graphs.append(pred_g) # print('saving graphs') # with open(test_model_path + '.graphs-%s' % str(config.test.greedy_frac), 'wb') as f: # cp.dump(gen_graphs, f, cp.HIGHEST_PROTOCOL) # print('evaluating') infer_time = infering_time['time_all'] / infering_time['epochs'] print("\nTime consumption of infering one graph by BiGG is: {:.6f}".format( infer_time)) return gen_graphs
def bigg_test(args, config): random.seed(config.seed) torch.manual_seed(config.seed) np.random.seed(config.seed) set_device(config) setup_treelib(config) train_graphs = [nx.barabasi_albert_graph(10, 2)] TreeLib.InsertGraph(train_graphs[0]) max_num_nodes = max([len(gg.nodes) for gg in train_graphs]) config.model.max_num_nodes = max_num_nodes model = RecurTreeGen(config).to(config.device) optimizer = optim.Adam(model.parameters(), lr=config.train.lr, weight_decay=1e-4) for i in range(2): optimizer.zero_grad() ll, _ = model.forward_train([0]) loss = -ll / max_num_nodes print('iter', i, 'loss', loss.item()) loss.backward() optimizer.step()
def train_bigg(train_graphs, config): # print("### Type:", type(train_graphs)) random.seed(config.seed) torch.manual_seed(config.seed) np.random.seed(config.seed) set_device(config) setup_treelib(config) for g in train_graphs: TreeLib.InsertGraph(g) max_num_nodes = max([len(gg.nodes) for gg in train_graphs]) config.model.max_num_nodes = max_num_nodes model = RecurTreeGen(config).to(config.device) if config.train.resume and os.path.isfile(config.train.resume_model_dir): print('loading from', config.train.resume_model_dir) resume_model_path = os.path.join(config.train.resume_model_dir, config.train.resume_model_name) model.load_state_dict(torch.load(resume_model_path)) optimizer = optim.Adam(model.parameters(), lr=config.train.lr, weight_decay=1e-4) indices = list(range(len(train_graphs))) if config.train.resume_epoch is None: config.train.resume_epoch = 0 training_time = { 'time_all': 0., 'epochs': 0, } for epoch in range(config.train.resume_epoch, config.train.max_epochs): pbar = tqdm(range(config.train.snapshot_epoch)) optimizer.zero_grad() for idx in pbar: random.shuffle(indices) batch_indices = indices[:config.train.batch_size] num_nodes = sum([len(train_graphs[i]) for i in batch_indices]) if config.model.blksize < 0 or num_nodes <= config.model.blksize: start_time = time.time() ll, _ = model.forward_train(batch_indices) loss = -ll / num_nodes loss.backward() end_time = time.time() training_time['time_all'] = training_time[ 'time_all'] + end_time - start_time training_time['epochs'] = training_time['epochs'] + 1 loss = loss.item() else: ll = 0.0 for i in batch_indices: n = len(train_graphs[i]) cur_ll, _ = sqrtn_forward_backward( model, graph_ids=[i], list_node_starts=[0], num_nodes=n, blksize=config.model.blksize, loss_scale=1.0 / n) ll += cur_ll loss = -ll / num_nodes if (idx + 1) % config.train.accum_grad == 0: if config.train.grad_clip > 0: torch.nn.utils.clip_grad_norm_( model.parameters(), max_norm=config.train.grad_clip) optimizer.step() optimizer.zero_grad() pbar.set_description( 'epoch %.2f, loss: %.4f' % (epoch + (idx + 1) / config.train.snapshot_epoch, loss)) if config.train.save_snapshot: torch.save( model.state_dict(), os.path.join(config.exp_dir, config.exp_name, 'epoch-%d.ckpt' % (epoch + 1))) train_time = training_time['time_all'] / training_time['epochs'] print("Time consumption of one epoch of training BiGG is: {:.6f}".format( train_time)) return model
if args.phase == 'preprocessing': from GraphGenerator.preprocessing import utils tmp_path = args.input print("# Load edgelist...") graph = utils.edgelist_to_graph(tmp_path) graphlist = [graph] print("# Save graphlist...") if args.output is None: output_name = "{}.graphs".format(args.input) else: output_name = args.output dataio.save_data(graphlist, name=output_name) elif args.phase == 'train': config = get_config(args.config) set_device(config) from GraphGenerator.train import train_base as train print("Start loading data...") input_data = dataio.load_data(args.input) if args.config is None: args.config = "config/{}.yaml".format(args.generator) # os.environ["CUDA_VISIBLE_DEVICES"] = str(config.gpu) print("Start (training and) inferencing graph...") output_data = [] if isinstance(input_data, list): for graph in input_data: tmp_data = train.train_and_inference(graph, args.generator, config=config) if isinstance(tmp_data, list): output_data.extend(tmp_data)
def train_netgan(input_data, config): set_device(config) emb_size = config.model.embedding_dim l_rate = config.train.lr _A_obs = nx.adjacency_matrix(input_data) _A_obs = _A_obs - sp.csr_matrix(np.diag(_A_obs.diagonal())) _A_obs = _A_obs + _A_obs.T _A_obs[_A_obs > 1] = 1 lcc = largest_connected_components(_A_obs) _A_obs = _A_obs[lcc, :][:, lcc] _N = _A_obs.shape[0] val_share = config.train.val_share test_share = config.train.test_share seed = config.seed train_ones, val_ones, val_zeros, test_ones, test_zeros = train_val_test_split_adjacency( _A_obs, val_share, test_share, seed, undirected=True, connected=True, asserts=True) train_graph = sp.coo_matrix( (np.ones(len(train_ones)), (train_ones[:, 0], train_ones[:, 1]))).tocsr() assert (train_graph.toarray() == train_graph.toarray().T).all() rw_len = config.model.rw_len batch_size = config.train.batch_size walker = RandomWalker(train_graph, rw_len, p=1, q=1, batch_size=batch_size) walker.walk().__next__() netgan = NetGAN(_N, rw_len, walk_generator=walker.walk, gpu_id=0, use_gumbel=True, disc_iters=3, W_down_discriminator_size=emb_size, W_down_generator_size=emb_size, l2_penalty_generator=1e-7, l2_penalty_discriminator=5e-5, batch_size=batch_size, generator_layers=[40], discriminator_layers=[30], temp_start=5, learning_rate=l_rate) stopping_criterion = config.train.stopping_criterion assert stopping_criterion in [ "val", "eo" ], "Please set the desired stopping criterion." if stopping_criterion == "val": # use val criterion for early stopping stopping = None elif stopping_criterion == "eo": # use eo criterion for early stopping stopping = 0.5 # set the target edge overlap here else: stopping = None eval_iter = config.train.eval_iter display_iter = config.train.display_iter log_dict = netgan.train(A_orig=_A_obs, val_ones=val_ones, val_zeros=val_zeros, stopping=stopping, eval_every=eval_iter, plot_every=display_iter, max_patience=20, max_iters=200000) sample_many = netgan.generate_discrete(10000, reuse=True) samples = [] for _ in range(config.test.sample_num): if (_ + 1) % 1000 == 0: print(_ + 1) samples.append(sample_many.eval({netgan.tau: 0.5})) rws = np.array(samples).reshape([-1, rw_len]) pool = mp.Pool(processes=5) args_all = [(rws, _N) for i in range(config.test.num_gen)] results = [ pool.apply_async(score_to_graph, args=args) for args in args_all ] graphs = [p.get() for p in results] return graphs
def train_and_inference(input_data, generator, config=None, repeat=1): """ train model using input graph, and infer new graphs :param input_data: input graph(s), whose type is networkx.Graph or list of nx.Graph :param generator: name of graph generator :param config: configuration of graph generator :param repeat: number of new graphs :return: generated graphs """ # graphs = [] if generator in ['e-r', 'w-s', 'b-a', 'E-R', 'W-S', 'B-A']: import GraphGenerator.models.er as er import GraphGenerator.models.ws as ws import GraphGenerator.models.ba as ba tmp_name = generator.lower() model_name = "{}.{}".format(tmp_name.replace('-', ''), tmp_name.replace('-', '_')) graphs = eval(model_name)(input_data, config) elif generator in ['rtg', 'RTG', 'bter', 'BTER']: import GraphGenerator.models.rtg as rtg import GraphGenerator.models.bter as bter model_name = "{}.{}".format(generator, generator) graphs = eval(model_name)(input_data, config) elif generator in ['sbm', 'dcsbm']: import GraphGenerator.models.sbm as sbm graphs = sbm.generate(input_data, generator, repeat) elif generator in ['rmat', 'kronecker']: import GraphGenerator.models.kronecker as kronecker import GraphGenerator.models.rmat as rmat graphs = eval(generator).generate(input_data, config) elif generator in ['vgae', 'graphite', 'sbmgnn']: set_device(config) sp_adj = nx.adjacency_matrix(input_data).astype(np.float32) # print("Shape!", sp_adj.shape) feature = coo_to_csp(sp.diags(np.array([1. for i in range(sp_adj.shape[0])], dtype=np.float32)).tocoo()).to(config.device) if generator == 'vgae': import GraphGenerator.models.vgae as vgae if config.model.variational: model_name = "{}.{}".format(generator, "VGAE") else: model_name = "{}.{}".format(generator, "GAE") model = eval(model_name)(config.model.num_nodes, config.model.embedding_dim, config.model.hidden_dim, act=F.relu, layers=config.model.num_GNN_layers).to(config.device) elif generator == 'graphite': import GraphGenerator.models.graphite as graphite if config.model.variational: model_name = "{}.{}".format(generator, "GraphiteVAE") else: model_name = "{}.{}".format(generator, "GraphiteAE") model = eval(model_name)(config.model.num_nodes, config.model.hidden_dim, config.model.embedding_dim, config.model.decoding_dim, act=F.relu).to(config.device) elif generator == 'sbmgnn': import GraphGenerator.models.sbmgnn as sbmgnn model_name = "{}.{}".format(generator, 'SBMGNN') model = eval(model_name)(config.model.num_nodes, config.model.hidden, config=config).to(config.device) else: # model = None sys.exit(1) optimizer = optim.Adam(model.parameters(), lr=config.train.lr) model = train_autoencoder_base(sp_adj, feature, config, model, optimizer) tmp_memory = get_peak_gpu_memory(device=config.device) print("Peak GPU memory reserved in training process: {} MiB".format(tmp_memory//1024//1024)) flush_cached_gpu_memory() graphs = infer_autoencoder(sp_adj, feature, config, model, repeat=repeat) elif generator in ['graphrnn', 'gran', 'bigg']: import GraphGenerator.train.train_graphrnn as graphrnn import GraphGenerator.models.bigg as bigg import GraphGenerator.models.gran as gran if isinstance(input_data, nx.Graph): input_data = [input_data] trained_model = eval("{}.train_{}".format(generator, generator))(input_data, config) tmp_memory = get_peak_gpu_memory(device=config.device) print("Peak GPU memory reserved in training process: {} MiB".format(tmp_memory//1024//1024)) flush_cached_gpu_memory() graphs = eval("{}.infer_{}".format(generator, generator))(input_data, config, trained_model) else: print("Wrong generator name! Process exit..") sys.exit(1) return graphs