def walk_from_graph(self): def node_generator(): if self.train_files is None: while True: for nodes in self.graph.node_batch_iter(self.batch_size): yield nodes else: nodes = [] while True: for filename in self.train_files: with io.open(filename) as inf: for line in inf: node = int(line.strip('\n\t')) nodes.append(node) if len(nodes) == self.batch_size: yield nodes nodes = [] if len(nodes): yield nodes if "alias" in self.graph.node_feat and "events" in self.graph.node_feat: log.info("Deepwalk using alias sample") for nodes in node_generator(): if "alias" in self.graph.node_feat and "events" in self.graph.node_feat: walks = deepwalk_sample(self.graph, nodes, self.walk_len, "alias", "events") else: walks = deepwalk_sample(self.graph, nodes, self.walk_len) yield walks
def main(args): if not args.use_cuda: paddle.set_device("cpu") if paddle.distributed.get_world_size() > 1: paddle.distributed.init_parallel_env() graph = load(args.dataset) model = SkipGramModel(graph.num_nodes, args.embed_size, args.neg_num, sparse=not args.use_cuda) model = paddle.DataParallel(model) train_steps = int(graph.num_nodes / args.batch_size) * args.epoch scheduler = paddle.optimizer.lr.PolynomialDecay( learning_rate=args.learning_rate, decay_steps=train_steps, end_lr=0.0001) optim = Adam(learning_rate=scheduler, parameters=model.parameters()) train_ds = ShardedDataset(graph.nodes) collate_fn = BatchRandWalk(graph, args.walk_len, args.win_size, args.neg_num, args.neg_sample_type) data_loader = Dataloader(train_ds, batch_size=args.batch_size, shuffle=True, num_workers=args.sample_workers, collate_fn=collate_fn) for epoch in tqdm.tqdm(range(args.epoch)): train_loss = train(model, data_loader, optim) log.info("Runing epoch:%s\t train_loss:%.6f", epoch, train_loss) paddle.save(model.state_dict(), "model.pdparams")
def mlp(self, features, name): h = features dim = features.shape[-1] dim_list = [dim * 2, dim] for i in range(2): h = L.fc(h, size=dim_list[i], name="%s_fc_%s" % (name, i), act=None) if self.args.norm_type == "layer_norm": log.info("norm_type is %s" % self.args.norm_type) h = L.layer_norm( h, begin_norm_axis=1, param_attr=F.ParamAttr( name="norm_scale_%s_%s" % (name, i), initializer=F.initializer.Constant(1.0)), bias_attr=F.ParamAttr( name="norm_bias_%s_%s" % (name, i), initializer=F.initializer.Constant(0.0)), ) else: log.info("using batch_norm") h = L.batch_norm(h) h = pgl.layers.graph_norm(self.graph_wrapper, h) h = L.relu(h) return h
def load_link_prediction_train_data(config, str2id, term_file, terms, item_distribution): train_data = [] neg_samples = [] with io.open(config.train_data, encoding=config.encoding) as f: for idx, line in enumerate(f): if idx % 100000 == 0: log.info("%s readed %s lines" % (config.train_data, idx)) slots = [] for col_idx, col in enumerate(line.strip("\n").split("\t")): s = col[:config.max_seqlen] if s not in str2id: str2id[s] = len(str2id) term_file.write(str(col_idx) + "\t" + col + "\n") item_distribution.append(0) slots.append(str2id[s]) src = slots[0] dst = slots[1] neg_samples.append(slots[2:]) train_data.append((src, dst)) train_data = np.array(train_data, dtype="int64") np.save(os.path.join(config.graph_work_path, "train_data.npy"), train_data) if len(neg_samples) != 0: np.save(os.path.join(config.graph_work_path, "neg_samples.npy"), np.array(neg_samples))
def split_10_cv(dataset, args): """10 folds cross validation """ dataset.shuffle() X = np.array([0] * len(dataset)) y = X kf = KFold(n_splits=10, shuffle=False) i = 1 test_acc = [] for train_index, test_index in kf.split(X, y): train_val_dataset = Subset(dataset, train_index) test_dataset = Subset(dataset, test_index) train_val_index_range = list(range(0, len(train_val_dataset))) num_val = int(len(train_val_dataset) / 9) val_dataset = Subset(train_val_dataset, train_val_index_range[:num_val]) train_dataset = Subset(train_val_dataset, train_val_index_range[num_val:]) log.info("######%d fold of 10-fold cross validation######" % i) i += 1 test_acc_ = main(args, train_dataset, val_dataset, test_dataset) test_acc.append(test_acc_) mean_acc = sum(test_acc) / len(test_acc) return mean_acc, test_acc
def test(model, data_loader, log_per_step=1000, threshold=0.3): model.eval() total_loss = 0. total_sample = 0 bce_loss = paddle.nn.BCEWithLogitsLoss() test_probs_vals, test_labels_vals, test_topk_vals = [], [], [] for batch, (node, labels) in enumerate(data_loader): num_samples = len(node) node = paddle.to_tensor(node) labels = paddle.to_tensor(labels) logits = model(node) probs = paddle.nn.functional.sigmoid(logits) loss = bce_loss(logits, labels) topk = labels.sum(-1) test_probs_vals.append(probs.numpy()) test_labels_vals.append(labels.numpy()) test_topk_vals.append(topk.numpy()) total_loss += loss.numpy()[0] * num_samples total_sample += num_samples test_probs_array = np.concatenate(test_probs_vals) test_labels_array = np.concatenate(test_labels_vals) test_topk_array = np.concatenate(test_topk_vals) test_macro_f1 = topk_f1_score(test_labels_array, test_probs_array, test_topk_array, "macro", threshold) test_micro_f1 = topk_f1_score(test_labels_array, test_probs_array, test_topk_array, "micro", threshold) test_loss_val = total_loss / total_sample log.info("\t\tTest Loss: %f " % test_loss_val + "Test Macro F1: %f " % test_macro_f1 + "Test Micro F1: %f " % test_micro_f1) return test_loss_val, test_macro_f1, test_micro_f1
def multi_m2v_node_generate(self): """multi_m2v_node_generate""" n_type_list = self.first_node_type.split(';') num_n_type = len(n_type_list) node_types = np.unique(self.graph.node_types).tolist() node_generators = {} for n_type in node_types: node_generators[n_type] = \ self.graph.node_batch_iter(self.batch_size, n_type=n_type) cc = 0 while True: idx = cc % num_n_type n_type = n_type_list[idx] try: nodes = next(node_generators[n_type]) except StopIteration as e: log.info("node type of %s iteration finished in one epoch" % (n_type)) node_generators[n_type] = \ self.graph.node_batch_iter(self.batch_size, n_type=n_type) break yield (nodes, idx) cc += 1
def train_prog(exe, program, loss, node2vec_pyreader, args, train_steps): step = 0 node2vec_pyreader.start() profiler.start_profiler("All") while True: try: begin_time = time.time() loss_val = exe.run(program, fetch_list=[loss]) log.info("step %s: loss %.5f speed: %.5f s/step" % (step, np.mean(loss_val), time.time() - begin_time)) step += 1 except F.core.EOFException: node2vec_pyreader.reset() if step % args.steps_per_save == 0 or step == train_steps: profiler.stop_profiler("total", "/tmp/profile") model_save_dir = args.save_path model_path = os.path.join(model_save_dir, str(step)) if not os.path.exists(model_save_dir): os.makedirs(model_save_dir) #fleet.save_persistables(exe, model_path) F.io.save_params(exe, dirname=model_path, main_program=program) if step == train_steps: break
def run_predict(py_reader, exe, program, model_dict, log_per_step=1, args=None): id2str = io.open(os.path.join(args.graph_work_path, "terms.txt"), encoding=args.encoding).readlines() trainer_id = int(os.getenv("PADDLE_TRAINER_ID", "0")) trainer_count = int(os.getenv("PADDLE_TRAINERS_NUM", "1")) if not os.path.exists(args.output_path): os.mkdir(args.output_path) fout = io.open("%s/part-%s" % (args.output_path, trainer_id), "w", encoding="utf8") batch = 0 for batch_feed_dict in py_reader(): batch += 1 batch_usr_feat, _, _, batch_src_real_index, _ = exe.run( program, feed=batch_feed_dict, fetch_list=model_dict.outputs) if batch % log_per_step == 0: log.info("Predict %s finished" % batch) for ufs, sri in zip(batch_usr_feat, batch_src_real_index): if args.input_type == "text": sri = id2str[int(sri)].strip("\n") line = "{}\t{}\n".format(sri, tostr(ufs)) fout.write(line) fout.close()
def train(train_exe, exe, program, loss, node2vec_pyreader, args, train_steps): """ train """ trainer_id = int(os.getenv("PADDLE_TRAINER_ID", "0")) step = 0 while True: try: begin_time = time.time() loss_val, = train_exe.run(fetch_list=[loss]) log.info("step %s: loss %.5f speed: %.5f s/step" % (step, np.mean(loss_val), time.time() - begin_time)) step += 1 except F.core.EOFException: node2vec_pyreader.reset() if (step % args.steps_per_save == 0 or step == train_steps) and trainer_id == 0: model_save_dir = args.output_path model_path = os.path.join(model_save_dir, str(step)) if not os.path.exists(model_save_dir): os.makedirs(model_save_dir) F.io.save_params(exe, model_path, program) if step == train_steps: break
def dump_node_feat(config): log.info("Dump node feat starting...") id2str = [ line.strip("\n").split("\t")[-1] for line in io.open(os.path.join(config.graph_work_path, "terms.txt"), encoding=config.encoding) ] if "tiny" in config.ernie_name: tokenizer = ErnieTinyTokenizer.from_pretrained(config.ernie_name) #tokenizer.vocab = tokenizer.sp_model.vocab term_ids = [ partial(term2id, tokenizer=tokenizer, max_seqlen=config.max_seqlen)(s) for s in id2str ] else: tokenizer = ErnieTokenizer.from_pretrained(config.ernie_name) pool = multiprocessing.Pool() term_ids = pool.map( partial(term2id, tokenizer=tokenizer, max_seqlen=config.max_seqlen), id2str) pool.terminate() node_feat_path = os.path.join(config.graph_work_path, "node_feat") if not os.path.exists(node_feat_path): os.makedirs(node_feat_path) np.save(os.path.join(config.graph_work_path, "node_feat", "term_ids.npy"), np.array(term_ids, np.uint16)) log.info("Dump node feat done.")
def train(dataloader, model, feature, criterion, optim, log_per_step=100): model.train() batch = 0 total_loss = 0. total_acc = 0. total_sample = 0 for g, sample_index, index, label in dataloader: batch += 1 num_samples = len(index) g.tensor() sample_index = paddle.to_tensor(sample_index) index = paddle.to_tensor(index) label = paddle.to_tensor(label) feat = paddle.gather(feature, sample_index) pred = model(g, feat) pred = paddle.gather(pred, index) loss = criterion(pred, label) loss.backward() acc = paddle.metric.accuracy(input=pred, label=label, k=1) optim.step() optim.clear_grad() total_loss += loss.numpy() * num_samples total_acc += acc.numpy() * num_samples total_sample += num_samples if batch % log_per_step == 0: log.info("Batch %s %s-Loss %s %s-Acc %s" % (batch, "train", loss.numpy(), "train", acc.numpy())) return total_loss / total_sample, total_acc / total_sample
def save_model(output_path, model, steps, opt, lr_scheduler, max_ckpt=2): if paddle.distributed.get_rank() == 0: output_dir = os.path.join(output_path, "model_%d" % steps) if not os.path.exists(output_dir): os.makedirs(output_dir) paddle.save(model.state_dict(), os.path.join(output_dir, "ckpt.pdparams")) # paddle.save({ "global_step": steps }, os.path.join(output_dir, "step")) # paddle.save(opt.state_dict(), os.path.join(output_dir, "opt.pdparams")) # paddle.save(lr_scheduler.state_dict(), os.path.join(output_dir, "lr_scheduler.pdparams")) log.info("save model %s" % output_dir) ckpt_paths = glob.glob(os.path.join(output_path, "model_*")) if len(ckpt_paths) > max_ckpt: def version(x): x = int(x.split("_")[-1]) return x rm_ckpt_paths = sorted(ckpt_paths, key=version, reverse=True)[max_ckpt:] for ckpt_dir in rm_ckpt_paths: if os.path.exists(ckpt_dir): shutil.rmtree(ckpt_dir)
def run_epoch(batch_iter, exe, program, prefix, model_loss, model_acc, epoch, log_per_step=100): batch = 0 total_loss = 0. total_acc = 0. total_sample = 0 start = time.time() for batch_feed_dict in batch_iter(): batch += 1 batch_loss, batch_acc = exe.run(program, fetch_list=[model_loss, model_acc], feed=batch_feed_dict) if batch % log_per_step == 0: log.info("Batch %s %s-Loss %s %s-Acc %s" % (batch, prefix, batch_loss, prefix, batch_acc)) num_samples = len(batch_feed_dict["node_index"]) total_loss += batch_loss * num_samples total_acc += batch_acc * num_samples total_sample += num_samples end = time.time() log.info( "%s Epoch %s Loss %.5lf Acc %.5lf Speed(per batch) %.5lf sec" % (prefix, epoch, total_loss / total_sample, total_acc / total_sample, (end - start) / batch))
def main(config): # Select Model model = Model.factory(config) # Build Train Edges data = TrainData(config.graph_path) # Build Train Data train_iter = GraphGenerator( graph_wrappers=model.graph_wrappers, batch_size=config.batch_size, data=data, samples=config.samples, num_workers=config.sample_workers, feed_name_list=[var.name for var in model.feed_list], use_pyreader=config.use_pyreader, phase="train", graph_data_path=config.graph_path, shuffle=True) log.info("build graph reader done.") learner = Learner.factory(config.learner_type) learner.build(model, train_iter, config) learner.start() learner.stop()
def test_gen_speed(gen_func): cur_time = time.time() for idx, _ in enumerate(gen_func()): log.info("iter %s: %s s" % (idx, time.time() - cur_time)) cur_time = time.time() if idx == 100: break
def main(args): import logging log.setLevel(logging.DEBUG) log.info("start") num_devices = len(F.cuda_places()) model = DeepwalkModel(args.num_nodes, args.hidden_size, args.neg_num, False, False, 1.) pyreader = model.pyreader loss = model.forward() train_steps = int(args.num_nodes * args.epoch / args.batch_size / num_devices) optimization(args.lr * num_devices, loss, train_steps, args.optimizer) place = F.CUDAPlace(0) exe = F.Executor(place) exe.run(F.default_startup_program()) graph = build_graph(args.num_nodes, args.edge_path) gen_func = build_gen_func(args, graph) pyreader.decorate_tensor_provider(gen_func) pyreader.start() train_prog = F.default_main_program() if args.warm_start_from_dir is not None: F.io.load_params(exe, args.warm_start_from_dir, train_prog) train_exe = get_parallel_exe(train_prog, loss) train(train_exe, exe, train_prog, loss, pyreader, args, train_steps)
def __init__(self, config): super(GNNVirt, self).__init__() log.info("gnn_type is %s" % self.__class__.__name__) self.config = config self.atom_encoder = getattr(ME, self.config.atom_enc_type, ME.AtomEncoder)( self.config.emb_dim) self.virtualnode_embedding = self.create_parameter( shape=[1, self.config.emb_dim], dtype='float32', default_initializer=nn.initializer.Constant(value=0.0)) self.convs = paddle.nn.LayerList() self.batch_norms = paddle.nn.LayerList() self.mlp_virtualnode_list = paddle.nn.LayerList() for layer in range(self.config.num_layers): self.convs.append(getattr(L, self.config.layer_type)(self.config)) self.batch_norms.append(L.batch_norm_1d(self.config.emb_dim)) for layer in range(self.config.num_layers - 1): self.mlp_virtualnode_list.append( nn.Sequential(L.Linear(self.config.emb_dim, self.config.emb_dim), L.batch_norm_1d(self.config.emb_dim), nn.Swish(), L.Linear(self.config.emb_dim, self.config.emb_dim), L.batch_norm_1d(self.config.emb_dim), nn.Swish()) ) self.pool = gnn.GraphPool(pool_type="sum")
def train_prog(exe, program, model, pyreader, args): trainer_id = int(os.getenv("PADDLE_TRAINER_ID", "0")) start = time.time() batch = 0 total_loss = 0. total_acc = 0. total_sample = 0 for epoch_idx in range(args.num_epoch): for step, batch_feed_dict in enumerate(pyreader()): try: cpu_time = time.time() batch += 1 batch_loss, batch_acc = exe.run( program, feed=batch_feed_dict, fetch_list=[model.loss, model.acc]) end = time.time() if batch % args.log_per_step == 0: log.info( "Batch %s Loss %s Acc %s \t Speed(per batch) %.5lf/%.5lf sec" % (batch, np.mean(batch_loss), np.mean(batch_acc), (end - start) / batch, (end - cpu_time))) if step % args.steps_per_save == 0: save_path = args.save_path if trainer_id == 0: model_path = os.path.join(save_path, "%s" % step) fleet.save_persistables(exe, model_path) except Exception as e: log.info("Pyreader train error") log.exception(e)
def main(args): if paddle.distributed.get_world_size() > 1: paddle.distributed.init_parallel_env() graph = load(args.dataset) model = SkipGramModel( graph.num_nodes, args.embed_size, args.neg_num, sparse=not args.use_cuda) model = paddle.DataParallel(model) optim = Adam( learning_rate=args.learning_rate, parameters=model.parameters(), weight_decay=args.weight_decay) train_ds = ShardedDataset(graph.nodes) collate_fn = BatchRandWalk(graph, args.walk_len, args.win_size, args.neg_num, args.neg_sample_type) data_loader = Dataloader( train_ds, batch_size=args.batch_size, shuffle=True, num_workers=args.sample_workers, collate_fn=collate_fn) for epoch in tqdm.tqdm(range(args.epoch)): train_loss = train(model, data_loader, optim) log.info("Runing epoch:%s\t train_loss:%.6f", epoch, train_loss)
def infer(config, output_path): model = getattr(M, config.model_type)(config) log.info("infer model from %s" % config.infer_from) model.set_state_dict(paddle.load(config.infer_from)) log.info("loading data") ds = getattr(DS, config.dataset_type)(config) split_idx = ds.get_idx_split() test_ds = DS.Subset(ds, split_idx['test'], mode='test') log.info("Test exapmles: %s" % len(test_ds)) test_loader = Dataloader(test_ds, batch_size=config.valid_batch_size, shuffle=False, num_workers=1, collate_fn=DS.CollateFn(config)) ### automatic evaluator. takes dataset name as input evaluator = PCQM4MEvaluator() # ---------------- test ----------------------- # log.info("testing ...") pred_dict = evaluate(model, test_loader) test_output_path = os.path.join(config.output_dir, config.task_name) make_dir(test_output_path) test_output_file = os.path.join(test_output_path, "test_pred.npz") log.info("saving test result to %s" % test_output_file) np.savez_compressed(test_output_file, pred_dict['y_pred'].astype(np.float32))
def dump_node_feat(args): log.info("Dump node feat starting...") id2str = np.load(os.path.join(args.outpath, "id2str.npy"), mmap_mode="r") pool = multiprocessing.Pool() tokenizer = FullTokenizer(args.vocab_file) term_ids = pool.map(partial(term2id, tokenizer=tokenizer, max_seqlen=args.max_seqlen), id2str) np.save(os.path.join(args.outpath, "term_ids.npy"), np.array(term_ids)) log.info("Dump node feat done.") pool.terminate()
def load_edges(self): for etype, file_or_dir in self.etype2files.items(): file_list = [f for f in helper.get_files(file_or_dir)] filepath = ";".join(file_list) log.info("load edges of type %s from %s" % (etype, filepath)) self._client.load_edge_file(etype, filepath, False) if self.symmetry: r_etype = helper.get_inverse_etype(etype) self._client.load_edge_file(r_etype, filepath, True)
def run(dataset, feature, exe, program, loss, acc, phase="train", log_per_step=5, cpu_num=1): batch = 0 total_loss = 0. total_acc = 0. total_sample = 0 feed_list = [] step = 0 for g, sample_index, index, label in dataset: feed_dict = { "num_nodes": np.array( [g.num_nodes], dtype="int32"), "edges": g.edges.astype("int32"), "sample_index": sample_index.astype("int32"), "index": index.astype("int32"), "label": label.astype("int64").reshape(-1), "feature": feature[sample_index].astype("float32") } if len(feed_list) < cpu_num: feed_list.append(feed_dict) batch += 1 if len(feed_list) == cpu_num: batch_index = [] for feed_dict in feed_list: batch_index.append(feed_dict["index"]) if len(feed_list) == 1: feed_list = feed_list[0] batch_loss, batch_acc = exe.run(program, feed=feed_list, fetch_list=[loss.name, acc.name]) step += 1 feed_list = [] if step % log_per_step == 0: log.info("Batch %s %s-Loss %s %s-Acc %s" % (batch, phase, np.mean(batch_loss), phase, np.mean(batch_acc))) for n, index in enumerate(batch_index): total_acc += batch_acc[n] * len(index) total_loss += batch_loss[n] * len(index) total_sample += len(index) return total_loss / total_sample, total_acc / total_sample
def random_split(dataset_size, split_ratio=0.9, seed=0, shuffle=True): """random splitter""" np.random.seed(seed) indices = list(range(dataset_size)) np.random.shuffle(indices) split = int(split_ratio * dataset_size) train_idx, valid_idx = indices[:split], indices[split:] log.info("train_set : test_set == %d : %d" % (len(train_idx), len(valid_idx))) return train_idx, valid_idx
def test(args): graph = build_graph(args.num_nodes, args.edge_path) gen_func = build_gen_func(args, graph) start = time.time() num = 10 for idx, _ in enumerate(gen_func()): if idx % num == num - 1: log.info("%s" % (1.0 * (time.time() - start) / num)) start = time.time()
def __init__(self, emb_dim): super(BondEncoder, self).__init__() log.info("bond encoder type is %s" % self.__class__.__name__) self.bond_embedding_list = nn.LayerList() for i, dim in enumerate(full_bond_feature_dims): weight_attr = nn.initializer.XavierUniform() emb = paddle.nn.Embedding(dim, emb_dim, weight_attr=weight_attr) self.bond_embedding_list.append(emb)
def dump_id2str_map(args): log.info("Dump id2str map starting...") id2str = np.array([ line.strip("\n") for line in open(os.path.join(args.outpath, "terms.txt"), "r", encoding=args.encoding) ]) np.save(os.path.join(args.outpath, "id2str.npy"), id2str) log.info("Dump id2str map done.")
def __init__(self, config): super(JuncGNNVirt, self).__init__() log.info("gnn_type is %s" % self.__class__.__name__) self.config = config self.num_layers = config.num_layers self.drop_ratio = config.drop_ratio self.JK = config.JK self.residual = config.residual self.emb_dim = config.emb_dim self.gnn_type = config.gnn_type self.layer_type = config.layer_type if self.num_layers < 2: raise ValueError("Number of GNN layers must be greater than 1.") self.atom_encoder = getattr(ME, self.config.atom_enc_type, ME.AtomEncoder)( self.emb_dim) self.junc_embed = paddle.nn.Embedding(6000, self.emb_dim) ### set the initial virtual node embedding to 0. # self.virtualnode_embedding = paddle.nn.Embedding(1, emb_dim) # torch.nn.init.constant_(self.virtualnode_embedding.weight.data, 0) self.virtualnode_embedding = self.create_parameter( shape=[1, self.emb_dim], dtype='float32', default_initializer=nn.initializer.Constant(value=0.0)) ### List of GNNs self.convs = nn.LayerList() ### batch norms applied to node embeddings self.batch_norms = nn.LayerList() ### List of MLPs to transform virtual node at every layer self.mlp_virtualnode_list = nn.LayerList() self.junc_convs = nn.LayerList() for layer in range(self.num_layers): self.convs.append(getattr(L, self.layer_type)(self.config)) self.junc_convs.append(gnn.GINConv(self.emb_dim, self.emb_dim)) self.batch_norms.append(L.batch_norm_1d(self.emb_dim)) for layer in range(self.num_layers - 1): self.mlp_virtualnode_list.append( nn.Sequential(L.Linear(self.emb_dim, self.emb_dim), L.batch_norm_1d(self.emb_dim), nn.Swish(), L.Linear(self.emb_dim, self.emb_dim), L.batch_norm_1d(self.emb_dim), nn.Swish()) ) self.pool = gnn.GraphPool(pool_type="sum")
def main(args, config): dataset = load(args.dataset, args.feature_pre_normalize) graph = dataset.graph train_index = dataset.train_index train_label = dataset.train_label val_index = dataset.val_index val_label = dataset.val_label test_index = dataset.test_index test_label = dataset.test_label GraphModel = getattr(model, config.model_name) criterion = paddle.nn.loss.CrossEntropyLoss() dur = [] best_test = [] for run in range(args.runs): cal_val_acc = [] cal_test_acc = [] cal_val_loss = [] cal_test_loss = [] gnn_model = GraphModel(input_size=graph.node_feat["words"].shape[1], num_class=dataset.num_classes, **config) optim = Adam(learning_rate=config.learning_rate, parameters=gnn_model.parameters(), weight_decay=config.weight_decay) for epoch in tqdm.tqdm(range(args.epoch)): train_loss, train_acc = train(train_index, train_label, gnn_model, graph, criterion, optim) val_loss, val_acc = eval(val_index, val_label, gnn_model, graph, criterion) cal_val_acc.append(val_acc.numpy()) cal_val_loss.append(val_loss.numpy()) test_loss, test_acc = eval(test_index, test_label, gnn_model, graph, criterion) cal_test_acc.append(test_acc.numpy()) cal_test_loss.append(test_loss.numpy()) log.info( "Runs %s: Model: %s Best Test Accuracy: %f" % (run, config.model_name, cal_test_acc[np.argmin(cal_val_loss)])) best_test.append(cal_test_acc[np.argmin(cal_val_loss)]) log.info("Dataset: %s Best Test Accuracy: %f ( stddev: %f )" % (args.dataset, np.mean(best_test), np.std(best_test)))