Beispiel #1
0
    def save_test_submission(self, input_dict, dir_path):
        assert 'y_pred' in input_dict
        y_pred = input_dict['y_pred']
        assert y_pred.shape == (146818, )

        if isinstance(y_pred, torch.Tensor):
            y_pred = y_pred.cpu().numpy()
        y_pred = y_pred.astype(np.short)

        makedirs(dir_path)
        filename = osp.join(dir_path, 'y_pred_mag240m')
        np.savez_compressed(filename, y_pred=y_pred)
Beispiel #2
0
    def save_test_submission(self, input_dict, dir_path):
        assert 'h,r->t' in input_dict
        assert 't_pred_top10' in input_dict['h,r->t']

        t_pred_top10 = input_dict['h,r->t']['t_pred_top10']

        assert t_pred_top10.shape == (1359303, 10) and (0 <= t_pred_top10).all() and (t_pred_top10 < 1001).all()

        if isinstance(t_pred_top10, torch.Tensor):
            t_pred_top10 = t_pred_top10.cpu().numpy()
        t_pred_top10 = t_pred_top10.astype(np.int16)

        makedirs(dir_path)
        filename = osp.join(dir_path, 't_pred_wikikg90m')
        np.savez_compressed(filename, t_pred_top10=t_pred_top10)
Beispiel #3
0
    def save_test_submission(self, input_dict, dir_path):
        assert 'h,r->t' in input_dict
        assert 't_pred_top10' in input_dict['h,r->t']

        t_pred_top10 = input_dict['h,r->t']['t_pred_top10']

        assert t_pred_top10.shape == (1359303, 10) and (
            0 <= t_pred_top10).all() and (t_pred_top10 < 1001).all()

        if isinstance(t_pred_top10, torch.Tensor):
            t_pred_top10 = t_pred_top10.cpu().numpy()

        for i in range(len(t_pred_top10)):
            assert len(pd.unique(t_pred_top10[i])) == len(
                t_pred_top10[i]
            ), 'Found duplicated tail prediction for some triplets!'

        t_pred_top10 = t_pred_top10.astype(np.int16)

        makedirs(dir_path)
        filename = osp.join(dir_path, 't_pred_wikikg90m')
        np.savez_compressed(filename, t_pred_top10=t_pred_top10)
Beispiel #4
0
    def save_test_submission(self, input_dict: Dict, dir_path: str, mode: str):
        assert 'y_pred' in input_dict
        assert mode in ['test-whole', 'test-dev', 'test-challenge']

        y_pred = input_dict['y_pred']

        if mode == 'test-whole':
            assert y_pred.shape == (146818, )
            filename = osp.join(dir_path, 'y_pred_mag240m')
        elif mode == 'test-dev':
            assert y_pred.shape == (88092, )
            filename = osp.join(dir_path, 'y_pred_mag240m_test-dev')
        elif mode == 'test-challenge':
            assert y_pred.shape == (58726, )
            filename = osp.join(dir_path, 'y_pred_mag240m_test-challenge')

        makedirs(dir_path)
        
        if isinstance(y_pred, torch.Tensor):
            y_pred = y_pred.cpu().numpy()

        y_pred = y_pred.astype(np.short)
        np.savez_compressed(filename, y_pred=y_pred)
Beispiel #5
0
    def save_test_submission(self, input_dict: Dict, dir_path: str, mode: str):
        assert 'h,r->t' in input_dict
        assert 't_pred_top10' in input_dict['h,r->t']
        assert mode in ['test-dev', 'test-challenge']

        t_pred_top10 = input_dict['h,r->t']['t_pred_top10']
        
        for i in range(len(t_pred_top10)):
            assert len(pd.unique(t_pred_top10[i])) == len(t_pred_top10[i]), 'Found duplicated tail prediction for some triplets!'

        if mode == 'test-dev':
            assert t_pred_top10.shape == (15000, 10)
            filename = osp.join(dir_path, 't_pred_wikikg90m-v2_test-dev')
        elif mode == 'test-challenge':
            assert t_pred_top10.shape == (10000, 10)
            filename = osp.join(dir_path, 't_pred_wikikg90m-v2_test-challenge')

        makedirs(dir_path)

        if isinstance(t_pred_top10, torch.Tensor):
            t_pred_top10 = t_pred_top10.cpu().numpy()
        t_pred_top10 = t_pred_top10.astype(np.int32)

        np.savez_compressed(filename, t_pred_top10=t_pred_top10)
Beispiel #6
0
    print("Reading training node features...", end=" ", flush=True)
    x_train = dataset.paper_feat[train_idx]
    x_train = torch.from_numpy(x_train).to(torch.float).to(device)
    print(f"Done! [{time.perf_counter() - t:.2f}s]")
    t = time.perf_counter()
    print("Reading validation node features...", end=" ", flush=True)
    x_valid = dataset.paper_feat[valid_idx]
    x_valid = torch.from_numpy(x_valid).to(torch.float).to(device)
    print(f"Done! [{time.perf_counter() - t:.2f}s]")

    y_train = torch.from_numpy(dataset.paper_label[train_idx])
    y_train = y_train.to(device, torch.long)
    y_valid = torch.from_numpy(dataset.paper_label[valid_idx])
    y_valid = y_valid.to(device, torch.long)

    makedirs("results/cs")
    model = MLP(
        dataset.num_paper_features,
        args.hidden_channels,
        dataset.num_classes,
        args.num_layers,
        args.dropout,
        not args.no_batch_norm,
        args.relu_last,
    ).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
    num_params = sum([p.numel() for p in model.parameters()])
    print(f"#Params: {num_params}")

    best_valid_acc = 0
    for epoch in range(1, args.epochs + 1):
    print('Reading training node features...', end=' ', flush=True)
    x_train = dataset.paper_feat[train_idx]
    x_train = torch.from_numpy(x_train).to(torch.float).to(device)
    print(f'Done! [{time.perf_counter() - t:.2f}s]')
    t = time.perf_counter()
    print('Reading validation node features...', end=' ', flush=True)
    x_valid = dataset.paper_feat[valid_idx]
    x_valid = torch.from_numpy(x_valid).to(torch.float).to(device)
    print(f'Done! [{time.perf_counter() - t:.2f}s]')

    y_train = torch.from_numpy(dataset.paper_label[train_idx])
    y_train = y_train.to(device, torch.long)
    y_valid = torch.from_numpy(dataset.paper_label[valid_idx])
    y_valid = y_valid.to(device, torch.long)

    makedirs('results/cs')
    model = MLP(dataset.num_paper_features, args.hidden_channels,
                dataset.num_classes, args.num_layers, args.dropout,
                not args.no_batch_norm, args.relu_last).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
    num_params = sum([p.numel() for p in model.parameters()])
    print(f'#Params: {num_params}')

    best_valid_acc = 0
    for epoch in range(1, args.epochs + 1):
        loss = train(model, x_train, y_train, args.batch_size, optimizer)
        train_acc = test(model, x_train, y_train, evaluator)
        valid_acc = test(model, x_valid, y_valid, evaluator)
        if valid_acc > best_valid_acc:
            best_valid_acc = valid_acc
            torch.save(model.state_dict(), 'results/cs/model.pt')