def predict(config): model = getattr(models, config.model.name).GNNModel( **dict(config.model.items())) _create_if_not_exist(config.output_path) load_model(config.output_path, model) model.eval() pred_temp = [] dataset = MAG240M(config.data_dir, seed=123) evaluator = MAG240MEvaluator() dataset.prepare_data() test_iter = DataGenerator( dataset=dataset, samples=[160] * len(config.samples), batch_size=16, num_workers=config.num_workers, data_type="test") for batch in test_iter.generator(): graph_list, x, y = batch x = paddle.to_tensor(x, dtype='float32') y = paddle.to_tensor(y, dtype='int64') graph_list = [(item[0].tensor(), paddle.to_tensor(item[2])) for item in graph_list] out = model(graph_list, x) pred_temp.append(out.numpy()) pred_temp = np.concatenate(pred_temp, axis=0) y_pred = pred_temp.argmax(axis=-1) res = {'y_pred': y_pred} evaluator.save_test_submission(res, 'results')
def test(args, dataset, g, feats, paper_offset): print('Loading masks and labels...') valid_idx = torch.LongTensor(dataset.get_idx_split('valid')) + paper_offset test_idx = torch.LongTensor(dataset.get_idx_split('test')) + paper_offset label = dataset.paper_label print('Initializing data loader...') sampler = dgl.dataloading.MultiLayerNeighborSampler([160, 160]) valid_collator = ExternalNodeCollator(g, valid_idx, sampler, paper_offset, feats, label) valid_dataloader = torch.utils.data.DataLoader( valid_collator.dataset, batch_size=16, shuffle=False, drop_last=False, collate_fn=valid_collator.collate, num_workers=2) test_collator = ExternalNodeCollator(g, test_idx, sampler, paper_offset, feats, label) test_dataloader = torch.utils.data.DataLoader( test_collator.dataset, batch_size=16, shuffle=False, drop_last=False, collate_fn=test_collator.collate, num_workers=4) print('Loading model...') model = RGAT(dataset.num_paper_features, dataset.num_classes, 1024, 5, 2, 4, 0.5, 'paper').cuda() model.load_state_dict(torch.load(args.model_path)) model.eval() correct = total = 0 for i, (input_nodes, output_nodes, mfgs) in enumerate(tqdm.tqdm(valid_dataloader)): with torch.no_grad(): mfgs = [g.to('cuda') for g in mfgs] x = mfgs[0].srcdata['x'] y = mfgs[-1].dstdata['y'] y_hat = model(mfgs, x) correct += (y_hat.argmax(1) == y).sum().item() total += y_hat.shape[0] acc = correct / total print('Validation accuracy:', acc) evaluator = MAG240MEvaluator() y_preds = [] for i, (input_nodes, output_nodes, mfgs) in enumerate(tqdm.tqdm(test_dataloader)): with torch.no_grad(): mfgs = [g.to('cuda') for g in mfgs] x = mfgs[0].srcdata['x'] y = mfgs[-1].dstdata['y'] y_hat = model(mfgs, x) y_preds.append(y_hat.argmax(1).cpu()) evaluator.save_test_submission({'y_pred': torch.cat(y_preds)}, args.submission_path)
def infer(config, do_eval=False): if paddle.distributed.get_world_size() > 1: paddle.distributed.init_parallel_env() dataset = MAG240M(config) evaluator = MAG240MEvaluator() dataset.prepare_data() model = getattr(models, config.model.name).GNNModel(**dict(config.model.items())) if paddle.distributed.get_world_size() > 1: model = paddle.DataParallel(model) loss_func = F.cross_entropy _create_if_not_exist(config.output_path) load_model(config.output_path, model) if paddle.distributed.get_rank() == 0: file = 'model_result_temp' sudo_label = np.memmap(file, dtype=np.float32, mode='w+', shape=(121751666, 153)) if do_eval: valid_iter = DataGenerator( dataset=dataset, samples=[200] * len(config.samples), batch_size=64, num_workers=config.num_workers, data_type="eval") r = evaluate(valid_iter, model, loss_func, config, evaluator, dataset) log.info("finish eval") test_iter = DataGenerator( dataset=dataset, samples=[200] * len(config.samples), batch_size=64, num_workers=config.num_workers, data_type="test") r = evaluate(test_iter, model, loss_func, config, evaluator, dataset) log.info("finish test")
def get_result(config, eval_all=False): dataset = MAG240MDataset(config.data_dir) evaluator = MAG240MEvaluator() file = 'model_result_temp' sudo_label = np.memmap(file, dtype=np.float32, mode='r', shape=(121751666, 153)) file = "ck_result.txt" wf = open(file, 'a', encoding='utf-8') label = dataset.all_paper_label if eval_all: valid_idx = dataset.get_idx_split('valid') pred = sudo_label[valid_idx] save_path = os.path.join(config.valid_path, "all_eval_result") np.save(save_path, pred) y_pred = pred.argmax(1) y_true = label[valid_idx] valid_acc = evaluator.eval({'y_true': y_true, 'y_pred': y_pred})['acc'] print("all eval result\n") print(f"valid_acc: {valid_acc}\n") wf.write("all eval result\n") wf.write(f"valid_acc: {valid_acc}\n") else: valid_path = os.path.join(config.valid_path, config.valid_name) valid_idx = np.load(valid_path) test_idx = dataset.get_idx_split('test') pred = sudo_label[valid_idx] y_pred = pred.argmax(1) y_true = label[valid_idx] valid_acc = evaluator.eval({'y_true': y_true, 'y_pred': y_pred})['acc'] print(f"eval cv {config.valid_name} result\n") print(f"valid_acc: {valid_acc}\n") wf.write(f"eval cv {config.valid_name} result\n") wf.write(f"valid_acc: {valid_acc}\n") save_path_test = os.path.join(config.valid_path, config.test_name) pred_test = sudo_label[test_idx] print(pred_test.shape) np.save(save_path_test, pred_test)
trainer.fit(model, datamodule=datamodule) if args.evaluate: dirs = glob.glob(f'logs/{args.model}/lightning_logs/*') version = max([int(x.split(os.sep)[-1].split('_')[-1]) for x in dirs]) logdir = f'logs/{args.model}/lightning_logs/version_{version}' print(f'Evaluating saved model in {logdir}...') ckpt = glob.glob(f'{logdir}/checkpoints/*')[0] trainer = Trainer(gpus=args.device, resume_from_checkpoint=ckpt) model = RGNN.load_from_checkpoint( checkpoint_path=ckpt, hparams_file=f'{logdir}/hparams.yaml') datamodule.batch_size = 16 datamodule.sizes = [160] * len(args.sizes) # (Almost) no sampling... trainer.test(model=model, datamodule=datamodule) evaluator = MAG240MEvaluator() loader = datamodule.hidden_test_dataloader() model.eval() y_preds = [] for batch in tqdm(loader): batch = batch.to(int(args.device)) with torch.no_grad(): out = model(batch.x, batch.adjs_t).argmax(dim=-1).cpu() y_preds.append(out) res = {'y_pred': torch.cat(y_preds, dim=0)} evaluator.save_test_submission(res, f'results/{args.model}')
def train(config, do_eval=False): if paddle.distributed.get_world_size() > 1: paddle.distributed.init_parallel_env() dataset = MAG240M(config.data_dir, seed=123) evaluator = MAG240MEvaluator() dataset.prepare_data() train_iter = DataGenerator(dataset=dataset, samples=config.samples, batch_size=config.batch_size, num_workers=config.num_workers, data_type="train") valid_iter = DataGenerator(dataset=dataset, samples=config.samples, batch_size=config.batch_size, num_workers=config.num_workers, data_type="eval") model = getattr(models, config.model.name).GNNModel(**dict(config.model.items())) if paddle.distributed.get_world_size() > 1: model = paddle.DataParallel(model) loss_func = F.cross_entropy opt, lr_scheduler = optim.get_optimizer( parameters=model.parameters(), learning_rate=config.lr, max_steps=config.max_steps, weight_decay=config.weight_decay, warmup_proportion=config.warmup_proportion, clip=config.clip, use_lr_decay=config.use_lr_decay) _create_if_not_exist(config.output_path) load_model(config.output_path, model) swriter = SummaryWriter(os.path.join(config.output_path, 'log')) if do_eval and paddle.distributed.get_rank() == 0: valid_iter = DataGenerator(dataset=dataset, samples=[160] * len(config.samples), batch_size=16, num_workers=config.num_workers, data_type="eval") r = evaluate(valid_iter, model, loss_func, config, evaluator, dataset) log.info(dict(r)) else: best_valid_acc = -1 for e_id in range(config.epochs): loss_temp = [] for batch in tqdm.tqdm(train_iter.generator()): loss = train_step(model, loss_func, batch, dataset) log.info(loss.numpy()[0]) loss.backward() opt.step() opt.clear_gradients() loss_temp.append(loss.numpy()[0]) if lr_scheduler is not None: lr_scheduler.step() loss = np.mean(loss_temp) log.info("Epoch %s Train Loss: %s" % (e_id, loss)) swriter.add_scalar('loss', loss, e_id) if e_id >= config.eval_step and e_id % config.eval_per_steps == 0 and \ paddle.distributed.get_rank() == 0: r = evaluate(valid_iter, model, loss_func, config, evaluator, dataset) log.info(dict(r)) for key, value in r.items(): swriter.add_scalar('eval/' + key, value, e_id) best_valid_acc = max(best_valid_acc, r['acc']) if best_valid_acc == r['acc']: save_model(config.output_path, model, e_id, opt, lr_scheduler) swriter.close()