def save_test_submission(self, input_dict, dir_path): assert 'y_pred' in input_dict y_pred = input_dict['y_pred'] assert y_pred.shape == (146818, ) if isinstance(y_pred, torch.Tensor): y_pred = y_pred.cpu().numpy() y_pred = y_pred.astype(np.short) makedirs(dir_path) filename = osp.join(dir_path, 'y_pred_mag240m') np.savez_compressed(filename, y_pred=y_pred)
def save_test_submission(self, input_dict, dir_path): assert 'h,r->t' in input_dict assert 't_pred_top10' in input_dict['h,r->t'] t_pred_top10 = input_dict['h,r->t']['t_pred_top10'] assert t_pred_top10.shape == (1359303, 10) and (0 <= t_pred_top10).all() and (t_pred_top10 < 1001).all() if isinstance(t_pred_top10, torch.Tensor): t_pred_top10 = t_pred_top10.cpu().numpy() t_pred_top10 = t_pred_top10.astype(np.int16) makedirs(dir_path) filename = osp.join(dir_path, 't_pred_wikikg90m') np.savez_compressed(filename, t_pred_top10=t_pred_top10)
def save_test_submission(self, input_dict, dir_path): assert 'h,r->t' in input_dict assert 't_pred_top10' in input_dict['h,r->t'] t_pred_top10 = input_dict['h,r->t']['t_pred_top10'] assert t_pred_top10.shape == (1359303, 10) and ( 0 <= t_pred_top10).all() and (t_pred_top10 < 1001).all() if isinstance(t_pred_top10, torch.Tensor): t_pred_top10 = t_pred_top10.cpu().numpy() for i in range(len(t_pred_top10)): assert len(pd.unique(t_pred_top10[i])) == len( t_pred_top10[i] ), 'Found duplicated tail prediction for some triplets!' t_pred_top10 = t_pred_top10.astype(np.int16) makedirs(dir_path) filename = osp.join(dir_path, 't_pred_wikikg90m') np.savez_compressed(filename, t_pred_top10=t_pred_top10)
def save_test_submission(self, input_dict: Dict, dir_path: str, mode: str): assert 'y_pred' in input_dict assert mode in ['test-whole', 'test-dev', 'test-challenge'] y_pred = input_dict['y_pred'] if mode == 'test-whole': assert y_pred.shape == (146818, ) filename = osp.join(dir_path, 'y_pred_mag240m') elif mode == 'test-dev': assert y_pred.shape == (88092, ) filename = osp.join(dir_path, 'y_pred_mag240m_test-dev') elif mode == 'test-challenge': assert y_pred.shape == (58726, ) filename = osp.join(dir_path, 'y_pred_mag240m_test-challenge') makedirs(dir_path) if isinstance(y_pred, torch.Tensor): y_pred = y_pred.cpu().numpy() y_pred = y_pred.astype(np.short) np.savez_compressed(filename, y_pred=y_pred)
def save_test_submission(self, input_dict: Dict, dir_path: str, mode: str): assert 'h,r->t' in input_dict assert 't_pred_top10' in input_dict['h,r->t'] assert mode in ['test-dev', 'test-challenge'] t_pred_top10 = input_dict['h,r->t']['t_pred_top10'] for i in range(len(t_pred_top10)): assert len(pd.unique(t_pred_top10[i])) == len(t_pred_top10[i]), 'Found duplicated tail prediction for some triplets!' if mode == 'test-dev': assert t_pred_top10.shape == (15000, 10) filename = osp.join(dir_path, 't_pred_wikikg90m-v2_test-dev') elif mode == 'test-challenge': assert t_pred_top10.shape == (10000, 10) filename = osp.join(dir_path, 't_pred_wikikg90m-v2_test-challenge') makedirs(dir_path) if isinstance(t_pred_top10, torch.Tensor): t_pred_top10 = t_pred_top10.cpu().numpy() t_pred_top10 = t_pred_top10.astype(np.int32) np.savez_compressed(filename, t_pred_top10=t_pred_top10)
print("Reading training node features...", end=" ", flush=True) x_train = dataset.paper_feat[train_idx] x_train = torch.from_numpy(x_train).to(torch.float).to(device) print(f"Done! [{time.perf_counter() - t:.2f}s]") t = time.perf_counter() print("Reading validation node features...", end=" ", flush=True) x_valid = dataset.paper_feat[valid_idx] x_valid = torch.from_numpy(x_valid).to(torch.float).to(device) print(f"Done! [{time.perf_counter() - t:.2f}s]") y_train = torch.from_numpy(dataset.paper_label[train_idx]) y_train = y_train.to(device, torch.long) y_valid = torch.from_numpy(dataset.paper_label[valid_idx]) y_valid = y_valid.to(device, torch.long) makedirs("results/cs") model = MLP( dataset.num_paper_features, args.hidden_channels, dataset.num_classes, args.num_layers, args.dropout, not args.no_batch_norm, args.relu_last, ).to(device) optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) num_params = sum([p.numel() for p in model.parameters()]) print(f"#Params: {num_params}") best_valid_acc = 0 for epoch in range(1, args.epochs + 1):
print('Reading training node features...', end=' ', flush=True) x_train = dataset.paper_feat[train_idx] x_train = torch.from_numpy(x_train).to(torch.float).to(device) print(f'Done! [{time.perf_counter() - t:.2f}s]') t = time.perf_counter() print('Reading validation node features...', end=' ', flush=True) x_valid = dataset.paper_feat[valid_idx] x_valid = torch.from_numpy(x_valid).to(torch.float).to(device) print(f'Done! [{time.perf_counter() - t:.2f}s]') y_train = torch.from_numpy(dataset.paper_label[train_idx]) y_train = y_train.to(device, torch.long) y_valid = torch.from_numpy(dataset.paper_label[valid_idx]) y_valid = y_valid.to(device, torch.long) makedirs('results/cs') model = MLP(dataset.num_paper_features, args.hidden_channels, dataset.num_classes, args.num_layers, args.dropout, not args.no_batch_norm, args.relu_last).to(device) optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) num_params = sum([p.numel() for p in model.parameters()]) print(f'#Params: {num_params}') best_valid_acc = 0 for epoch in range(1, args.epochs + 1): loss = train(model, x_train, y_train, args.batch_size, optimizer) train_acc = test(model, x_train, y_train, evaluator) valid_acc = test(model, x_valid, y_valid, evaluator) if valid_acc > best_valid_acc: best_valid_acc = valid_acc torch.save(model.state_dict(), 'results/cs/model.pt')