Beispiel #1
0
def main():
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    dataset = Planetoid(root='data',
                        name='Cora',
                        transform=T.NormalizeFeatures())
    data = dataset[0]
    ground_truth_edge_index = data.edge_index.to(device)
    data.train_mask = data.val_mask = data.test_mask = data.y = None
    data = train_test_split_edges(data)
    data = data.to(device)

    model = Net(dataset.num_features, 64).to(device)
    optimizer = torch.optim.Adam(params=model.parameters(), lr=0.01)

    best_val_auc = test_auc = 0
    for epoch in range(1, 101):
        loss = train(data, model, optimizer)
        val_auc, tmp_test_auc = test(data, model)
        if val_auc > best_val_auc:
            best_val_auc = val_auc
            test_auc = tmp_test_auc
        print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}, Val: {val_auc:.4f}, '
              f'Test: {test_auc:.4f}')

    z = model.encode(data.x, data.train_pos_edge_index)
    final_edge_index = model.decode_all(z)
    def create_train_test_split_edges(self, stage=None):
        def get_link_labels(pos_edge_index, neg_edge_index):
            link_labels = torch.zeros(
                pos_edge_index.size(1) + neg_edge_index.size(1)).float()
            link_labels[:pos_edge_index.size(1)] = 1.
            return link_labels

        def prepare_data(data, stage):
            x, pos_edge_index = data.x, getattr(data,
                                                f"{stage}_pos_edge_index")

            _edge_index, _ = remove_self_loops(pos_edge_index)
            pos_edge_index_with_self_loops, _ = add_self_loops(
                _edge_index, num_nodes=x.size(0))

            neg_edge_index = negative_sampling(
                edge_index=pos_edge_index_with_self_loops,
                num_nodes=x.size(0),
                num_neg_samples=pos_edge_index.size(1))

            link_labels = get_link_labels(pos_edge_index, neg_edge_index)
            Data = namedtuple(
                "Data",
                ["x", "pos_edge_index", "neg_edge_index", "link_labels"])
            return [Data(*[x, pos_edge_index, neg_edge_index, link_labels])]

        if not hasattr(self, "_loaded_dataset"):
            self.data.train_mask = self.data.val_mask = self.data.test_mask = self.data.y = None
            self._loaded_dataset = train_test_split_edges(self.data)

        data = prepare_data(self._loaded_dataset, stage)
        return THDataloader(data)
Beispiel #3
0
def test_gae():
    model = GAE(encoder=lambda x: x)
    model.reset_parameters()

    x = torch.Tensor([[1, -1], [1, 2], [2, 1]])
    z = model.encode(x)
    assert z.tolist() == x.tolist()

    adj = model.decoder.forward_all(z)
    assert adj.tolist() == torch.sigmoid(
        torch.Tensor([[+2, -1, +1], [-1, +5, +4], [+1, +4, +5]])).tolist()

    edge_index = torch.tensor([[0, 1], [1, 2]])
    value = model.decode(z, edge_index)
    assert value.tolist() == torch.sigmoid(torch.Tensor([-1, 4])).tolist()

    edge_index = torch.tensor([[0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
                               [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]])
    data = Data(edge_index=edge_index)
    data.num_nodes = edge_index.max().item() + 1
    data = train_test_split_edges(data, val_ratio=0.2, test_ratio=0.3)

    z = torch.randn(11, 16)
    loss = model.recon_loss(z, data.train_pos_edge_index)
    assert loss.item() > 0

    auc, ap = model.test(z, data.val_pos_edge_index, data.val_neg_edge_index)
    assert auc >= 0 and auc <= 1 and ap >= 0 and ap <= 1
Beispiel #4
0
    def train_val_test_split(self):
        x, edge_index = self.data.x, self.data.edge_index

        edge_index, _ = remove_self_loops(edge_index)
        edge_index, _ = add_self_loops(edge_index, num_nodes=x.size(0))

        data = deepcopy(self.data)
        data.edge_index = edge_index

        # train_pos_edge_index=[2, E * 0.85] (undirected)
        # val_neg/pos_edge_index=[2, E/2 * 0.05] (not undirected)
        # test_neg/pos_edge_index: [2, E/2 * 0.1] (not undirected)
        data = train_test_split_edges(data, *self.train_val_test_ratio[1:])
        data.__delattr__("train_neg_adj_mask")

        test_edge_index = torch.cat([
            to_undirected(data.test_pos_edge_index),
            to_undirected(data.test_neg_edge_index)
        ],
                                    dim=1)

        if data.val_pos_edge_index.size(1) > 0:
            val_edge_index = torch.cat([
                to_undirected(data.val_pos_edge_index),
                to_undirected(data.val_neg_edge_index)
            ],
                                       dim=1)
        else:
            val_edge_index = test_edge_index

        return data.train_pos_edge_index, val_edge_index, test_edge_index
Beispiel #5
0
 def __init__(self, feature):
     root = raw_path = 'data/{}'.format(feature)
     dataset = ScisciDataset(root=root,
                             raw_path=raw_path,
                             transform=T.NormalizeFeatures())
     data = train_test_split_edges(dataset[0])
     self.feature = feature
     self.root = root
     self.dataset = dataset
     self.data = data
     self.device = torch.device(
         "cuda:0" if torch.cuda.is_available() else "cpu")
     self.patience = 100
     self.num_epochs = 400
def test_train_test_split_edges():
    edge_index = torch.tensor([[0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
                               [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]])
    data = Data(edge_index=edge_index)
    data.num_nodes = edge_index.max().item() + 1
    data = train_test_split_edges(data, val_ratio=0.2, test_ratio=0.3)

    assert data.val_pos_edge_index.size() == (2, 2)
    assert data.val_neg_edge_index.size() == (2, 2)
    assert data.test_pos_edge_index.size() == (2, 3)
    assert data.test_neg_edge_index.size() == (2, 3)
    assert data.train_pos_edge_index.size() == (2, 10)
    assert data.train_neg_adj_mask.size() == (11, 11)
    assert data.train_neg_adj_mask.sum().item() == (11**2 - 11) / 2 - 4 - 6 - 5
Beispiel #7
0
def construct_sparse(loader):
    """Construct sparse matrices in the graphs from the loader"""
    graphs = []
    for graph in loader:
        graph.edge_id = dict(
            list(
                zip(
                    (graph.num_nodes * graph.edge_index[0] +
                     graph.edge_index[1]).numpy().squeeze(),
                    graph.edge_attr.squeeze().numpy(),
                )))
        graphs.append(
            data.make_sparse(
                train_test_split_edges(graph, val_ratio=0, test_ratio=0)))
    return graphs
def get_biograkn_data(edge_csv_file_path,
                      nodes_labs_file_path,
                      task='node',
                      features="ones",
                      val_ratio=0.3,
                      test_ratio=0.3):
    '''
    Returns pytorch 'Data' object from provided filepaths.
    Args:
    edge_csv_file_path: filepath to a csv file where nodes and hyperrelations are columns and traversed graph paths are rows
    nodes_labs_file_path: filepath to a two columns csv files with nodes ids and their labels
    val_ratio: percentage ratio for validation data mask
    test_ratio: percentage ratio for test data mask
    '''

    edge_index_df = pd.read_csv(edge_csv_file_path)
    nodes_labels = pd.read_csv(nodes_labs_file_path)
    edge_index = torch.tensor(
        [edge_index_df["source"], edge_index_df["target"]], dtype=torch.long)
    y = torch.tensor(nodes_labels["label"], dtype=torch.long)
    num_nodes = len(y)
    if features == "ones":
        x = torch.ones(num_nodes, 10)
    elif features == 'labels':
        encoder = OneHotEncoder(categories='auto')
        features = nodes_labels["label"].values.reshape(-1, 1)
        features = encoder.fit_transform(features)
        x = torch.tensor(features.toarray(), dtype=torch.float32)

    if task == 'node':
        dataset_masks = create_masks(nodes_labels["node"], val_ratio,
                                     test_ratio)
        num_classes = torch.unique(y).size(0)
        data = Data(y=y,
                    x=x,
                    edge_index=edge_index,
                    num_nodes=num_nodes,
                    test_mask=dataset_masks["test"],
                    train_mask=dataset_masks["train"],
                    val_mask=dataset_masks["validation"],
                    num_classes=num_classes)
    elif task == 'link':
        data = Data(y=y, x=x, edge_index=edge_index, num_nodes=num_nodes)
        data = train_test_split_edges(data)
    else:
        raise RuntimeError('Unknown task.')
    return data
Beispiel #9
0
def do_edge_split(dataset):
    data = dataset[0]
    data = train_test_split_edges(data)

    edge_index, _ = add_self_loops(data.train_pos_edge_index)
    data.train_neg_edge_index = negative_sampling(
        edge_index, num_nodes=data.num_nodes,
        num_neg_samples=data.train_pos_edge_index.size(1))

    split_edge = {'train': {}, 'valid': {}, 'test': {}}
    split_edge['train']['edge'] = data.train_pos_edge_index.t()
    split_edge['train']['edge_neg'] = data.train_neg_edge_index.t()
    split_edge['valid']['edge'] = data.val_pos_edge_index.t()
    split_edge['valid']['edge_neg'] = data.val_neg_edge_index.t()
    split_edge['test']['edge'] = data.test_pos_edge_index.t()
    split_edge['test']['edge_neg'] = data.test_neg_edge_index.t()
    return split_edge
Beispiel #10
0
def do_edge_split(dataset, fast_split=False, val_ratio=0.05, test_ratio=0.1):
    data = dataset[0]
    random.seed(234)
    torch.manual_seed(234)

    if not fast_split:
        data = train_test_split_edges(data, val_ratio, test_ratio)
        edge_index, _ = add_self_loops(data.train_pos_edge_index)
        data.train_neg_edge_index = negative_sampling(
            edge_index,
            num_nodes=data.num_nodes,
            num_neg_samples=data.train_pos_edge_index.size(1))
    else:
        num_nodes = data.num_nodes
        row, col = data.edge_index
        # Return upper triangular portion.
        mask = row < col
        row, col = row[mask], col[mask]
        n_v = int(math.floor(val_ratio * row.size(0)))
        n_t = int(math.floor(test_ratio * row.size(0)))
        # Positive edges.
        perm = torch.randperm(row.size(0))
        row, col = row[perm], col[perm]
        r, c = row[:n_v], col[:n_v]
        data.val_pos_edge_index = torch.stack([r, c], dim=0)
        r, c = row[n_v:n_v + n_t], col[n_v:n_v + n_t]
        data.test_pos_edge_index = torch.stack([r, c], dim=0)
        r, c = row[n_v + n_t:], col[n_v + n_t:]
        data.train_pos_edge_index = torch.stack([r, c], dim=0)
        # Negative edges (cannot guarantee (i,j) and (j,i) won't both appear)
        neg_edge_index = negative_sampling(data.edge_index,
                                           num_nodes=num_nodes,
                                           num_neg_samples=row.size(0))
        data.val_neg_edge_index = neg_edge_index[:, :n_v]
        data.test_neg_edge_index = neg_edge_index[:, n_v:n_v + n_t]
        data.train_neg_edge_index = neg_edge_index[:, n_v + n_t:]

    split_edge = {'train': {}, 'valid': {}, 'test': {}}
    split_edge['train']['edge'] = data.train_pos_edge_index.t()
    split_edge['train']['edge_neg'] = data.train_neg_edge_index.t()
    split_edge['valid']['edge'] = data.val_pos_edge_index.t()
    split_edge['valid']['edge_neg'] = data.val_neg_edge_index.t()
    split_edge['test']['edge'] = data.test_pos_edge_index.t()
    split_edge['test']['edge_neg'] = data.test_neg_edge_index.t()
    return split_edge
Beispiel #11
0
    def train_test_split(self, x, edge_index, ratio):
        edge_index, _ = remove_self_loops(edge_index)
        edge_index, _ = add_self_loops(edge_index, num_nodes=x.size(0))

        data = deepcopy(self.data)
        data.edge_index = edge_index
        data.x = x

        data = train_test_split_edges(data, 0, ratio)
        data.__delattr__("train_neg_adj_mask")

        test_edge_index = torch.cat([
            to_undirected(data.test_pos_edge_index),
            to_undirected(data.test_neg_edge_index)
        ],
                                    dim=1)

        return data.train_pos_edge_index, test_edge_index
Beispiel #12
0
def test_lp_trainer():

    dataset = build_dataset_from_name("cora")
    dataset = to_pyg_dataset(dataset)
    data = dataset[0]
    data = train_test_split_edges(data, 0.1, 0.1)
    dataset = [data]

    lp_trainer = LinkPredictionTrainer(model='gcn', init=False)

    lp_trainer.num_features = data.x.size(1)
    lp_trainer.initialize()
    print(lp_trainer.encoder.encoder)
    print(lp_trainer.decoder.decoder)

    lp_trainer.train(dataset, True)
    result = lp_trainer.evaluate(dataset, "test", "auc")
    print(result)
def main(training_method, dataset_name, dataset_dir, gpu_id, seed):
    torch.manual_seed(seed)
    device = get_device(gpu_id)

    dataset = get_data(dataset_name, dataset_dir)
    features_dimension = dataset.num_features
    data = dataset[0].to(device)
    data.train_mask = data.val_mask = data.test_mask = data.y = None
    data = train_test_split_edges(data)
    x, train_pos_edge_index = data.x, data.train_pos_edge_index

    model, optimizer = get_model_and_optimizer(training_method, dataset_name, features_dimension, device)

    max_epoch = 201 if dataset_name == 'citeseer' else 401
    for epoch in range(1, max_epoch):
        train(model, optimizer, x, train_pos_edge_index)
        auc, ap = test(model, x, train_pos_edge_index, data.test_pos_edge_index, data.test_neg_edge_index)
        print('Epoch: {:03d}, AUC: {:.4f}, AP: {:.4f}'.format(epoch, auc, ap))
    def _train_test_split_edges(data, random_seed=12345):

        import pytorch_lightning as pl
        from torch_geometric.utils import train_test_split_edges
        import random
        import numpy as np
        import torch

        rand_state = random.getstate()
        np_state = np.random.get_state()
        torch_state = torch.random.get_rng_state()

        pl.seed_everything(random_seed)
        data = train_test_split_edges(data)

        random.setstate(rand_state)
        np.random.set_state(np_state)
        torch.random.set_rng_state(torch_state)

        return data
Beispiel #15
0
    def process(self):
        random.seed(12345)
        torch.manual_seed(12345)

        data = train_test_split_edges(self.data)

        edge_index, _ = add_self_loops(data.train_pos_edge_index)
        data.train_neg_edge_index = negative_sampling(
            edge_index,
            num_nodes=data.num_nodes,
            num_neg_samples=data.train_pos_edge_index.size(1))

        self.__max_z__ = 0

        # Collect a list of subgraphs for training, validation and test.
        train_pos_list = self.extract_enclosing_subgraphs(
            data.train_pos_edge_index, data.train_pos_edge_index, 1)
        train_neg_list = self.extract_enclosing_subgraphs(
            data.train_neg_edge_index, data.train_pos_edge_index, 0)

        val_pos_list = self.extract_enclosing_subgraphs(
            data.val_pos_edge_index, data.train_pos_edge_index, 1)
        val_neg_list = self.extract_enclosing_subgraphs(
            data.val_neg_edge_index, data.train_pos_edge_index, 0)

        test_pos_list = self.extract_enclosing_subgraphs(
            data.test_pos_edge_index, data.train_pos_edge_index, 1)
        test_neg_list = self.extract_enclosing_subgraphs(
            data.test_neg_edge_index, data.train_pos_edge_index, 0)

        # Convert labels to one-hot features.
        for data in chain(train_pos_list, train_neg_list, val_pos_list,
                          val_neg_list, test_pos_list, test_neg_list):
            data.x = F.one_hot(data.z, self.__max_z__ + 1).to(torch.float)

        torch.save(self.collate(train_pos_list + train_neg_list),
                   self.processed_paths[0])
        torch.save(self.collate(val_pos_list + val_neg_list),
                   self.processed_paths[1])
        torch.save(self.collate(test_pos_list + test_neg_list),
                   self.processed_paths[2])
Beispiel #16
0
def test_train_test_split_edges():
    edge_index = torch.tensor([[0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
                               [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]])
    edge_attr = torch.arange(edge_index.size(1))
    data = Data(edge_index=edge_index, edge_attr=edge_attr)
    data.num_nodes = edge_index.max().item() + 1

    with pytest.warns(UserWarning, match='deprecated'):
        data = train_test_split_edges(data, val_ratio=0.2, test_ratio=0.3)

    assert len(data) == 10
    assert data.val_pos_edge_index.size() == (2, 2)
    assert data.val_neg_edge_index.size() == (2, 2)
    assert data.test_pos_edge_index.size() == (2, 3)
    assert data.test_neg_edge_index.size() == (2, 3)
    assert data.train_pos_edge_index.size() == (2, 10)
    assert data.train_neg_adj_mask.size() == (11, 11)
    assert data.train_neg_adj_mask.sum().item() == (11**2 - 11) / 2 - 4 - 6 - 5
    assert data.train_pos_edge_attr.size() == (10, )
    assert data.val_pos_edge_attr.size() == (2, )
    assert data.test_pos_edge_attr.size() == (3, )
Beispiel #17
0
    def __init__(self, model_type='GCN', text_encoding='bert'):
        """
            Class for training N times and computing the results
            model_type: string. Model definition. Options {"GCN","SAGE", "GIN", "GAT", "AGNN","GraphUNet"} default GCN
            text_encoding: text representation. Options {"bert","tfidf","d2v"} default BERT
        """

        root = 'data/{}'.format(text_encoding)
        raw_path = '../../data/torch/{}/'.format(text_encoding)
        dataset = ScisciDataset(root=root,
                                raw_path=raw_path,
                                transform=T.NormalizeFeatures())
        data = train_test_split_edges(dataset[0])
        self.model_type = model_type
        self.text_encoding = text_encoding
        self.root = root
        self.dataset = dataset
        self.data = data
        self.device = torch.device(
            "cuda:0" if torch.cuda.is_available() else "cpu")
        self.patience = 100
        self.num_epochs = 450
Beispiel #18
0
def run(file, data_name, model_name,lr):
    parser = argparse.ArgumentParser(description='OGBL-DDI (GNN)')
    parser.add_argument('--device', type=int, default=0)
    parser.add_argument('--log_steps', type=int, default=1)
    parser.add_argument('--use_sage', action='store_true')
    parser.add_argument('--num_layers', type=int, default=2)
    parser.add_argument('--hidden_channels', type=int, default=256)
    parser.add_argument('--dropout', type=float, default=0.5)
    parser.add_argument('--batch_size', type=int, default=64*1024)
    parser.add_argument('--lr', type=float, default=0.001)
    parser.add_argument('--epochs', type=int, default=200)
    parser.add_argument('--eval_steps', type=int, default=5)
    parser.add_argument('--runs', type=int, default=10)
    parser.add_argument('--use_nd', action='store_true')
    parser.add_argument('--use_lgae', action='store_true')
    parser.add_argument('--use_vgae', action='store_true')
    parser.add_argument('--model', type=str, default='')

    parser.add_argument('--dataset', type=str, default='Citeseer')

    args = parser.parse_args()
    if data_name != None and model_name != None and lr != None:
        args.dataset = data_name
        args.model = model_name
        args.lr = lr
    print(args)

    device = f'cuda:{args.device}' if torch.cuda.is_available() else 'cpu'
    # device = 'cpu'
    device = torch.device(device)

    dataset = CitationFull(os.path.join('citation_data',args.dataset),name=args.dataset,transform=T.ToSparseTensor())
    num_training = int(dataset.__len__()*0.8)
    num_val = int(dataset.__len__()*0.1)
    num_test = dataset.__len__() - (num_training+num_val)

    data = dataset[0]
    print('data:',vars(data))
    adj_t = data.adj_t.to(device)
    edge_index, edge_type = utils.dense_to_sparse(adj_t.to_dense())
    data.edge_index = edge_index
    data.x = data.x.to(device)
    split_edge = utils.train_test_split_edges(data)
    split_edge.edge_index = edge_index

    print(data)
    print(edge_index.shape)

    decoder_enable = args.model[-3:]
    if args.model[-3:] == '-nd': model_name = args.model[:-3]
    

    if model_name == 'lgae':
        model = LGAE(data.num_features, args.hidden_channels,
                    args.hidden_channels, args.num_layers,
                    args.dropout)
    elif model_name == 'vgae':
        model = DeepVGAE(data.num_features, args.hidden_channels,
                    args.hidden_channels, args.num_layers,
                    args.dropout)

    elif model_name == 'gae':
        model = GraphAutoEncoder(data.num_features, args.hidden_channels,
                    args.hidden_channels, args.num_layers,
                    args.dropout)

    elif model_name == 'arga':
        model = AdversarialGAE(data.num_features, args.hidden_channels,
                    args.hidden_channels, args.num_layers,
                    args.dropout)

    elif model_name == 'arvga':
        model = AdversarialVGAE(data.num_features, args.hidden_channels,
                    args.hidden_channels, args.num_layers,
                    args.dropout)
    elif model_name == 'lrga':
        model = LRGA(data.num_features, args.hidden_channels,
                    args.hidden_channels, args.num_layers,
                    args.dropout)
    elif model_name == 'sage':
        model = SAGEAutoEncoder(data.num_features, args.hidden_channels,
                    args.hidden_channels, args.num_layers,
                    args.dropout)

    if decoder_enable == '-nd':
        model.decoder = NeuralDecoder( args.hidden_channels,  
            args.hidden_channels, 1, args.num_layers, args.dropout)

    evaluator = Evaluator(name='ogbl-ddi')

    model = model.to(device)

    loggers = {
        'metrics': Logger(args.runs, args)
    }

    for run in range(args.runs):
        torch.manual_seed(run)
        model.reset_parameters()

        if args.model in ['arga','arga-nd','arvga','arvga-nd']:
            args.lr=0.005
        optimizer = torch.optim.Adam(
                list(model.parameters()), lr=args.lr)

        for epoch in range(1, 1 + args.epochs):
            loss = train(model, data.x, adj_t, split_edge,
                         optimizer, args.batch_size)

        result = test(model, data.x, data, split_edge, evaluator, args.batch_size)
        loggers['metrics'].add_result(run, result)

    for key in loggers.keys():
        print(key)
        toWrite = loggers[key].print_statistics()
        file.write(args.model+'\t'+'\t'.join(toWrite)+'\n')
        file.flush()
        os.fsync(file)
def data_processor(data, undirected=True, val_ratio=0.05, test_ratio=0.1):
    '''
    グラフデータをPytorch Geometric用に処理する.

    Parameters:
        data (torch_geometric.data.Data): グラフデータ.

    Returens:
        all_pos_edge_index (torch.Tensor[2, num_pos_edges]): train_test_split前の全リンク.
        train_pos_edge_adj_t (torch.SparseTensor[2, num_pos_edges]): trainデータのリンク.
        y_true (numpy.ndarray[num_nodes, num_nodes].flatten()): 全リンクの隣接行列をflattenしたもの.
        y_train (numpy.ndarray[num_nodes, num_nodes].flatten()): trainデータのリンクの隣接行列をflattenしたもの.
        mask (numpy.ndarray[num_nodes, num_nodes].flatten()): validation, testのpos_edge, neg_edgeとしてサンプリングしたリンクをFalse、それ以外をTrueとした隣接行列をflattenしたもの.
    '''

    # train_test_splitをする前に、エッジのTensorをコピーしておく
    all_pos_edge_index = data.edge_index

    data.train_mask = data.val_mask = data.test_mask = data.y = None
    data = train_test_split_edges(data,
                                  val_ratio=val_ratio,
                                  test_ratio=test_ratio)

    if (val_ratio + test_ratio
            == 1) and (data.train_pos_edge_index.size(1) > 0):
        data.test_pos_edge_index = torch.cat(
            [data.test_pos_edge_index, data.train_pos_edge_index], dim=-1)
        data.train_pos_edge_index = torch.LongTensor([[], []])

    print('train test split has been done.')
    print(data)
    print('')

    # GCN2Convに渡すエッジのSparseTensorを作成する
    # 参考: https://pytorch-geometric.readthedocs.io/en/latest/_modules/torch_geometric/transforms/to_sparse_tensor.html#ToSparseTensor
    # edge_index全てではなく、train_pos_edge_indexに抽出されたエッジのみを変換する点に注意
    (
        row, col
    ), N, E = data.train_pos_edge_index, data.num_nodes, data.train_pos_edge_index.size(
        1)
    perm = (col * N + row).argsort()
    row, col = row[perm], col[perm]

    value = None
    for key in ['edge_weight', 'edge_attr', 'edge_type']:
        if data[key] is not None:
            value = data[key][perm]
            break

    for key, item in data:
        if item.size(0) == E:
            data[key] = item[perm]

    train_pos_edge_adj_t = SparseTensor(row=col,
                                        col=row,
                                        value=value,
                                        sparse_sizes=(N, N),
                                        is_sorted=True)

    print('train_pos_edge_adj_t is completed.\n')

    # 1. 全エッジ
    edge = pd.DataFrame(all_pos_edge_index.cpu().numpy().T,
                        columns=['source', 'target'])
    G = nx.from_pandas_edgelist(edge, create_using=nx.Graph())

    #隣接行列を作成
    df_adj = pd.DataFrame(
        np.zeros([len(G.nodes()), len(G.nodes())]),
        index=G.nodes(),
        columns=G.nodes()).sort_index(axis=0).sort_index(axis=1)
    for i, j in G.edges():
        df_adj.loc[i, j] = 1

    y_true = torch.tensor(df_adj.to_numpy().flatten(), dtype=torch.float)

    print('y_true is completed.\n')

    # 2. trainに用いるエッジ
    edge = pd.DataFrame(data.train_pos_edge_index.cpu().numpy().T,
                        columns=['source', 'target'])
    G_train = nx.from_pandas_edgelist(edge, create_using=nx.Graph())

    #隣接行列を作成
    df_adj_train = pd.DataFrame(
        np.zeros([len(G.nodes()), len(G.nodes())]),
        index=G.nodes(),
        columns=G.nodes()).sort_index(axis=0).sort_index(axis=1)
    for i, j in G_train.edges():
        df_adj_train.loc[i, j] = 1

    y_train = torch.tensor(df_adj_train.to_numpy().flatten(),
                           dtype=torch.float)

    print('y_train is completed.\n')

    # 隣接行列が0の部分には、validation、testで用いるpositive、negativeのエッジが含まれる。これらのエッジをlossの計算から除くためのmaskを作成
    val_test_edge = torch.cat([
        data.test_neg_edge_index, data.test_pos_edge_index,
        data.val_neg_edge_index, data.val_pos_edge_index
    ],
                              dim=-1)
    mask = torch.ones([data.x.size(0), data.x.size(0)], dtype=torch.float)

    for i in range(val_test_edge.size(1)):
        mask[val_test_edge[0, i], val_test_edge[1, i]] = 0
        mask[val_test_edge[1, i], val_test_edge[0, i]] = 0

    mask = mask.flatten()

    return all_pos_edge_index, train_pos_edge_adj_t, y_true, y_train, mask
Beispiel #20
0
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

dataset = None
if args.dataset.lower() == 'Cora'.lower():
    dataset = Planetoid(root='tmp', name='Cora')
    print("use dataset: Cora")
elif args.dataset.lower() == 'CiteSeer'.lower():
    dataset = Planetoid(root='tmp', name='CiteSeer')
    print("use dataset: CiteSeer")
elif args.dataset.lower() == 'PubMed'.lower():
    dataset = Planetoid(root='tmp', name='PubMed')
    print("use dataset: PubMed")
data = dataset[0]

enhanced_data = train_test_split_edges(data.clone(),
                                       val_ratio=0.1,
                                       test_ratio=0.2)

train_data = Data(x=enhanced_data.x,
                  edge_index=enhanced_data['train_pos_edge_index']).to(DEVICE)
target_data = data.to(DEVICE)

if args.model is 'VGAE':
    model = VGAE(encoder=VEncoder(data['x'].shape[1])).to(DEVICE)
else:
    model = GAE(encoder=Encoder(data['x'].shape[1])).to(DEVICE)

optimizer = torch.optim.Adam(model.parameters(),
                             lr=args.learning_rate,
                             weight_decay=5e-4)
Beispiel #21
0
        device = args.device = "cpu"
    else:
        device = args.device = f"cuda:{args.device}"

    dataset = Planetoid(osp.expanduser('~/.cache-autogl'),
                        args.dataset,
                        transform=T.NormalizeFeatures())

    res = []
    begin_time = time.time()
    for seed in tqdm(range(1234, 1234 + args.repeat)):
        setup_seed(seed)
        data = dataset[0].to(device)
        # use train_test_split_edges to create neg and positive edges
        data.train_mask = data.val_mask = data.test_mask = data.y = None
        data = train_test_split_edges(data).to(device)

        model_hp, decoder_hp = get_encoder_decoder_hp(args.model)

        trainer = LinkPredictionTrainer(
            model=args.model,
            num_features=data.x.size(1),
            lr=1e-2,
            max_epoch=100,
            early_stopping_round=101,
            weight_decay=0.0,
            device=args.device,
            feval=[Auc],
            loss="binary_cross_entropy_with_logits",
            init=False).duplicate_from_hyper_parameter(
                {
Beispiel #22
0
def galTrainer(model, data: torch_geometric.data.Data):
    """
        trains the model according to the required epochs/patience

        Parameters
        ----------
        model: Model
        data: torch_geometric.data.Data

        Returns
        -------
        model: Model
        model_log: str
        test_accuracy: torch.Tensor
    """

    # according to best results reported in GAL paper
    if model.dataset_name == "CITESEER":
        lambda_param = 0.75
        use_ws_loss = False
    elif model.dataset_name == "CORA":  # default param - nor reported in the paper
        lambda_param = 0.05
        use_ws_loss = True
    elif model.dataset_name == "PUBMED":
        lambda_param = 0.5
        use_ws_loss = False
    else:
        lambda_param = 0.05
        use_ws_loss = True

    # Train/validation/test
    data = train_test_split_edges(data)
    optimizer, optimizer_attack, optimizer_fine_tune = create_gal_optimizer(model=model, lambda_reg=lambda_param)

    train_epochs = 250
    fine_tune_epochs = 800

    switch = True
    for epoch in range(1, train_epochs + 1):

        train_acc = train(model=model, optimizer=optimizer, optimizer_attack=optimizer_attack,
                         data=data, switch=switch, use_ws_loss=use_ws_loss)
        switch = not switch
        # start of changes XXXXX
        log_template = 'Regular Epoch: {:03d}, Train: {:.4f}, Val: {:.4f}, Test: {:.4f}'
        val_acc, test_acc = test(model, data)
        print(log_template.format(epoch, train_acc, val_acc, test_acc), flush=True)

    print(flush=True)
    # end of changes XXXXX

    best_val_acc = test_acc = 0
    for epoch in range(1, fine_tune_epochs + 1):
        train_attr(model=model, optimizer_attr=optimizer_fine_tune, data=data)
        train_acc, val_acc, tmp_test_acc = test_attr(model=model, data=data)
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            test_acc = tmp_test_acc
        log = 'Finetune Epoch: {:03d}, Train: {:.4f}, Val: {:.4f}, Test: {:.4f}'
        print(log.format(epoch, train_acc, val_acc, tmp_test_acc))
    print(flush=True)
    model_log = 'Basic Model - Train: {:.4f}, Val: {:.4f}, Test: {:.4f}' \
        .format(train_acc, best_val_acc, test_acc)
    return model, model_log, test_accuracy
Beispiel #23
0
def main(config_path):
    # settings
    conf = load_config(config_path)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    base_dir = conf['base_dir']
    strengths_path = base_dir + conf['strengths_path']

    # load data
    df = pd.read_csv(strengths_path)

    num_dict = make_dict(df)
    fm = make_feature_matrix(df, num_dict, **conf.feature_matrix)

    G = make_graph(df, num_dict, **conf.graph)
    preprocess_G = G.copy()
    for (u, v, d) in G.edges(data=True):
        if d["weight"] <= 6:    # 重みが6以下のエッジは削除する
            preprocess_G.remove_edge(u, v)
    G = preprocess_G

    # 下記の対応表がないとtorch geometric <-> 社員名の変換ができない
    mapping = {num: num_dict['syain']['num_str'][i]
               for num, i in enumerate(G.nodes)}
    # 一旦ここで画像が表示されるため止まる
    plot_graph(G, mapping, conf.save_pos)
    mapping_swap = {v: k for k, v in mapping.items()}
    data = from_networkx(G)
    # 特徴量行列
    data.x = fm
    # 標準化
    transform = T.NormalizeFeatures()
    data = transform(data)

    # GAE
    model = kwargs[conf.gae.model](
        Encoder(data.x.shape[1], conf.gae.dim, model=conf.gae.model)).to(device)
    data.train_mask = data.val_mask = data.test_mask = data.y = None

    # 今回全データでGAEを実行してしまう
    data = train_test_split_edges(data)
    x, train_pos_edge_index = data.x.to(
        device), data.train_pos_edge_index.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

    def train():
        """train"""
        model.train()
        optimizer.zero_grad()
        z = model.encode(x, train_pos_edge_index)
        loss = model.recon_loss(z, train_pos_edge_index)
        if conf.gae.model in ['VGAE']:
            loss = loss + (1 / data.num_nodes) * model.kl_loss()
        loss.backward()
        optimizer.step()
        return loss.item()

    def test(pos_edge_index, neg_edge_index):
        """test"""
        model.eval()
        with torch.no_grad():
            z = model.encode(x, train_pos_edge_index)
        return model.test(z, pos_edge_index, neg_edge_index)

    # 学習実行
    print("start training")
    for epoch in range(1, conf.num_iter + 1):
        loss = train()
        auc, ap = test(data.test_pos_edge_index, data.test_neg_edge_index)
        if epoch % (conf.num_iter//10) == 0:
            print(
                f'Epoch: {epoch:02d}, Loss: {loss:.4f}, AUC: {auc:.4f}, AP: {ap:.4f}')

    @torch.no_grad()
    def plot_points():
        """学習済みモデルで学習データの分散表現をTSNEで2次元圧縮し、可視化する"""
        model.eval()
        res = model.encode(x, train_pos_edge_index)
        z = TSNE(n_components=2).fit_transform(res.cpu().numpy())

        plt.figure(figsize=(8, 8))
        plt.scatter(z[:, 0], z[:, 1], s=20)
        for num, [x_pos, y_pos] in enumerate(z):
            label = mapping[num]
            plt.annotate(
                label,
                (x_pos, y_pos),
                size=10,
                ha='center',
                fontproperties=font_prop
            )
        plt.axis('off')
        plt.show()
        return res.cpu().numpy()
    # 二次元圧縮と可視化
    res = plot_points()

    if conf.save_res:
        res_vec_save_path = base_dir + conf['res_vec_path']
        df_res = pd.DataFrame(res)
        df_res.to_csv(res_vec_save_path)

    if conf.save_res:
        res_cos_save_path = base_dir + conf['res_cos_path']
        df_res = pd.DataFrame(cos_sim_matrix(res))
        df_res.to_csv(res_cos_save_path)
Beispiel #24
0
def perturb_edges(data,
                  name,
                  remove_pct,
                  add_pct,
                  hidden_channels=16,
                  epochs=400):
    if remove_pct == 0 and add_pct == 0:
        return
    try:
        cached = pickle.load(
            open(f'{ROOT}/cache/edge/{name}_{remove_pct}_{add_pct}.pt', 'rb'))
        print(f'Use cached edge augmentation for dataset {name}')

        if data.setting == 'inductive':
            data.train_edge_index = cached
        else:
            data.edge_index = cached
        return
    except FileNotFoundError:
        try:
            A_pred, adj_orig = pickle.load(
                open(f'{ROOT}/cache/edge/{name}.pt', 'rb'))
            A = sample_graph_det(adj_orig, A_pred, remove_pct, add_pct)
            data.edge_index, _ = from_scipy_sparse_matrix(A)
            pickle.dump(
                data.edge_index,
                open(f'{ROOT}/cache/edge/{name}_{remove_pct}_{add_pct}.pt',
                     'wb'))
            return
        except FileNotFoundError:
            print(
                f'cache/edge/{name}_{remove_pct}_{add_pct}.pt not found! Regenerating it now'
            )

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    if data.setting == 'inductive':
        train_data = Data(x=data.train_x,
                          ori_x=data.ori_x,
                          edge_index=data.train_edge_index,
                          y=data.train_y)
    else:
        train_data = deepcopy(data)

    edge_index = deepcopy(train_data.edge_index)
    train_data = train_test_split_edges(train_data,
                                        val_ratio=0.1,
                                        test_ratio=0)
    num_features = train_data.ori_x.shape[1]
    model = GAE(GCNEncoder(num_features, hidden_channels))
    model = model.to(device)
    x = train_data.ori_x.to(device)
    train_pos_edge_index = train_data.train_pos_edge_index.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

    best_val_auc = 0
    best_z = None
    for epoch in range(1, epochs + 1):
        model.train()
        optimizer.zero_grad()
        z = model.encode(x, train_pos_edge_index)
        loss = model.recon_loss(z, train_pos_edge_index)
        loss.backward()
        optimizer.step()

        model.eval()
        with torch.no_grad():
            z = model.encode(x, train_pos_edge_index)

        auc, ap = model.test(z, train_data.val_pos_edge_index,
                             train_data.val_neg_edge_index)
        print('Val | Epoch: {:03d}, AUC: {:.4f}, AP: {:.4f}'.format(
            epoch, auc, ap))
        if auc > best_val_auc:
            best_val_auc = auc
            best_z = deepcopy(z)

    A_pred = torch.sigmoid(torch.mm(z, z.T)).cpu().numpy()

    adj_orig = to_scipy_sparse_matrix(edge_index).asformat('csr')
    adj_pred = sample_graph_det(adj_orig, A_pred, remove_pct, add_pct)

    if data.setting == 'inductive':
        data.train_edge_index, _ = from_scipy_sparse_matrix(adj_pred)
    else:
        data.edge_index, _ = from_scipy_sparse_matrix(adj_pred)

    pickle.dump((A_pred, adj_orig), open(f'{ROOT}/cache/edge/{name}.pt', 'wb'))

    if data.setting == 'inductive':
        pickle.dump(
            data.train_edge_index,
            open(f'{ROOT}/cache/edge/{name}_{remove_pct}_{add_pct}.pt', 'wb'))
    else:
        pickle.dump(
            data.edge_index,
            open(f'{ROOT}/cache/edge/{name}_{remove_pct}_{add_pct}.pt', 'wb'))
from sklearn.metrics.cluster import (v_measure_score, homogeneity_score,
                                     completeness_score)
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
import torch_geometric.transforms as T
from torch_geometric.datasets import Planetoid
from torch_geometric.nn import GCNConv, ARGVA
from torch_geometric.utils import train_test_split_edges

dataset = 'Cora'
path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', dataset)
dataset = Planetoid(path, dataset, T.NormalizeFeatures())
data = dataset.get(0)

data.train_mask = data.val_mask = data.test_mask = None
data = train_test_split_edges(data)


class Encoder(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super(Encoder, self).__init__()
        self.conv1 = GCNConv(in_channels, hidden_channels, cached=True)
        self.conv_mu = GCNConv(hidden_channels, out_channels, cached=True)
        self.conv_logvar = GCNConv(hidden_channels, out_channels, cached=True)

    def forward(self, x, edge_index):
        x = F.relu(self.conv1(x, edge_index))
        return self.conv_mu(x, edge_index), self.conv_logvar(x, edge_index)


class Discriminator(torch.nn.Module):
Beispiel #26
0
from model import DeepVGAE
from config.config import parse_args

torch.manual_seed(12345)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

args = parse_args()

model = DeepVGAE(args).to(device)
optimizer = Adam(model.parameters(), lr=args.lr)

os.makedirs("datasets", exist_ok=True)
dataset = Planetoid("datasets", args.dataset, transform=T.NormalizeFeatures())
data = dataset[0].to(device)
all_edge_index = data.edge_index
data = train_test_split_edges(data, 0.05, 0.1)

for epoch in range(args.epoch):
    model.train()
    optimizer.zero_grad()
    loss = model.loss(data.x, data.train_pos_edge_index, all_edge_index)
    loss.backward()
    optimizer.step()
    if epoch % 2 == 0:
        model.eval()
        roc_auc, ap = model.single_test(data.x, data.train_pos_edge_index,
                                        data.test_pos_edge_index,
                                        data.test_neg_edge_index)
        print("Epoch {} - Loss: {} ROC_AUC: {} Precision: {}".format(
            epoch,
            loss.cpu().item(), roc_auc, ap))
Beispiel #27
0
def split_edges(dataset, train_ratio, val_ratio):
    datas = [data for data in dataset]
    for i in range(len(datas)):
        datas[i] = train_test_split_edges(datas[i], val_ratio,
                                          1 - train_ratio - val_ratio)
    dataset.data, dataset.slices = dataset.collate(datas)
Beispiel #28
0
def get_data_model(custom_dataset,
					task,
					random_seed,
					sparse_matrix,
					feature_matrix,
					initialize_spectral,
					encoder_base,
					n_hidden,
					n_layers,
					discriminator_layers,
					ae_type,
					bias,
					attention_heads,
					decoder_type,
					use_mincut,
					K,
					Niter,
					val_ratio,
					test_ratio,
					interpret=False,
					prediction_column=-1
					):
	assert custom_dataset in ['lawyer', 'physician', 'none']
	assert task in ['link_prediction', 'generation', 'clustering', 'embedding', 'classification', 'regression']
	print("Random Seed:",random_seed)
	torch.manual_seed(random_seed)
	np.random.seed(random_seed)
	random.seed(random_seed)

	if custom_dataset != 'none':
		sparse_matrix=DATA[custom_dataset]['A']
		feature_matrix=DATA[custom_dataset]['X']

	if isinstance(sparse_matrix,str) and os.path.exists(sparse_matrix) and sparse_matrix.split('.')[-1] in ['npz','csv']:
		if sparse_matrix.endswith('.csv'):
			sparse_matrix=sps.csr_matrix(pd.read_csv(sparse_matrix).values)
		else:
			sparse_matrix=sps.load_npz(sparse_matrix)
	elif not sps.issparse(sparse_matrix):
		sparse_matrix=sps.csr_matrix(sparse_matrix)

	# print(sparse_matrix.shape)

	if isinstance(feature_matrix,str) and os.path.exists(feature_matrix) and feature_matrix.split('.')[-1] in ['npy','csv']:
		if feature_matrix.endswith('.csv'):
			X=pd.read_csv(feature_matrix).values.astype(float)
		else:
			X=np.load(feature_matrix,allow_pickle=True).astype(float)
	elif isinstance(feature_matrix,type(None)):
		if initialize_spectral:
			from sklearn.manifold import SpectralEmbedding
			X=SpectralEmbedding(n_components=3,affinity="precomputed",random_state=42).fit_transform(sparse_matrix)
		else:
			X=np.ones(sparse_matrix.shape[0],dtype=float)[:,np.newaxis]#np.eye(sparse_matrix.shape[0])*sparse_matrix.sum(axis=1)#modify#np.ones(sparse_matrix.shape[0],dtype=float)[:,np.newaxis]
	else:
		X=feature_matrix

	y=None
	idx_df=None
	label_encoder=None
	n_classes=-1
	if task in ['classification','regression']:
		X=pd.DataFrame(X)
		# print(X)
		assert prediction_column>=0 #in X.columns
		prediction_column=X.columns.values[prediction_column]
		y=X.pop(prediction_column).values.flatten()
		X=X.values
		# print(X,y)
		idx_df=pd.DataFrame(dict(idx=np.arange(len(y)),y=y))
		idx_df_train,idx_df_test=train_test_split(idx_df,test_size=test_ratio,stratify=idx_df['y'] if task=='classification' else None, random_state=random_seed)
		idx_df_train,idx_df_val=train_test_split(idx_df_train,test_size=val_ratio,stratify=idx_df_train['y'] if task=='classification' else None, random_state=random_seed)
		idx_df_train['set']='train'
		idx_df_val['set']='val'
		idx_df_test['set']='test'
		idx_df=pd.concat([idx_df_train,idx_df_val,idx_df_test])
		if task=='classification':
			n_classes=idx_df['y'].nunique()
			label_encoder=LabelEncoder()
			y=torch.tensor(label_encoder.fit_transform(y)).long()
		else:
			n_classes=1
			y=torch.FloatTensor(y)

	X=torch.FloatTensor(X)

	n_input = X.shape[1]

	edge_index,edge_attr=from_scipy_sparse_matrix(sparse_matrix)

	G=Data(X,edge_index,edge_attr,y=y,idx_df=idx_df)

	G.num_nodes = X.shape[0]

	model=get_model(encoder_base,
					n_input,
					n_hidden,
					n_layers,
					discriminator_layers,
					ae_type,
					bias,
					attention_heads,
					decoder_type,
					use_mincut,
					K,
					Niter,
					interpret,
					n_classes)

	if task == 'link_prediction':
		G=train_test_split_edges(G, val_ratio=val_ratio, test_ratio=test_ratio)

	if torch.cuda.is_available():
		model=model.cuda()
	return G,model,X,edge_index,edge_attr
Beispiel #29
0
def run(file, data_name, model_name,lr):
    parser = argparse.ArgumentParser(description='OGBL-DDI (GNN)')
    parser.add_argument('--device', type=int, default=0)
    parser.add_argument('--log_steps', type=int, default=1)
    parser.add_argument('--use_sage', action='store_true')
    parser.add_argument('--num_layers', type=int, default=2)
    parser.add_argument('--hidden_channels', type=int, default=256)
    parser.add_argument('--dropout', type=float, default=0.5)
    parser.add_argument('--batch_size', type=int, default=64*1024)
    parser.add_argument('--lr', type=float, default=0.001)
    parser.add_argument('--epochs', type=int, default=200)
    parser.add_argument('--eval_steps', type=int, default=5)
    parser.add_argument('--runs', type=int, default=10)
    parser.add_argument('--use_nd', action='store_true')
    parser.add_argument('--use_lgae', action='store_true')
    parser.add_argument('--use_vgae', action='store_true')
    parser.add_argument('--model', type=str, default='')

    parser.add_argument('--dataset', type=str, default='Citeseer')

    args = parser.parse_args()
    if data_name != None and model_name != None and lr != None:
        args.dataset = data_name
        args.model = model_name
        args.lr = lr
    print(args)

    device = f'cuda:{args.device}' if torch.cuda.is_available() else 'cpu'
    # device = 'cpu'
    device = torch.device(device)

    dataset = CitationFull(os.path.join('citation_data',args.dataset),name=args.dataset,transform=T.ToSparseTensor())
    num_training = int(dataset.__len__()*0.8)
    num_val = int(dataset.__len__()*0.1)
    num_test = dataset.__len__() - (num_training+num_val)

    data = dataset[0]
    adj_t = data.adj_t.to(device)
    edge_index, edge_type = utils.dense_to_sparse(adj_t.to_dense())
    data.edge_index = edge_index
    data.x = data.x.to(device)
    num_nodes = data.x.shape[0]
    num_edges = data.edge_index.shape[1]
    print(data)
    # nx_data = to_networkx(data, to_undirected=True)
    # print('graph density='+str(2*num_edges/(num_nodes*(num_nodes-1))))
    # print('clustering coefficient='+str(nx.average_clustering(nx_data)))


    decoder_enable = args.model[-3:]
    if args.model[-3:] == '-nd': model_name = args.model[:-3]
    
    if model_name == 'lgae':
        model = LGAE(data.num_features, args.hidden_channels,
                    args.hidden_channels, args.num_layers,
                    args.dropout)
    elif model_name == 'vgae':
        model = DeepVGAE(data.num_features, args.hidden_channels,
                    args.hidden_channels, args.num_layers,
                    args.dropout)

    elif model_name == 'gae':
        model = GraphAutoEncoder(data.num_features, args.hidden_channels,
                    args.hidden_channels, args.num_layers,
                    args.dropout)

    elif model_name == 'arga':
        model = AdversarialGAE(data.num_features, args.hidden_channels,
                    args.hidden_channels, args.num_layers,
                    args.dropout)

    elif model_name == 'arvga':
        model = AdversarialVGAE(data.num_features, args.hidden_channels,
                    args.hidden_channels, args.num_layers,
                    args.dropout)
    elif model_name == 'lrga':
        model = LRGA(data.num_features, args.hidden_channels,
                    args.hidden_channels, args.num_layers,
                    args.dropout)
    elif model_name == 'sage':
        model = SAGEAutoEncoder(data.num_features, args.hidden_channels,
                    args.hidden_channels, args.num_layers,
                    args.dropout)

    if decoder_enable == '-nd':
        model.decoder = NeuralDecoder( args.hidden_channels,  
            args.hidden_channels, 1, args.num_layers, args.dropout)
    
    evaluator = Evaluator(name='ogbl-ddi')

    model = model.to(device)

    loggers = {}
    K_list = ['20','50','100']
    for k in K_list:
        loggers['Hits@'+k] = Logger(args.runs, args)

    for run in range(args.runs):
        torch.manual_seed(run)
        split_edge = utils.train_test_split_edges(data)
        # print(split_edge.train_pos_edge_index.shape)
        # print(split_edge.val_pos_edge_index.shape)


        # exit()
        split_edge.edge_index = edge_index

        # emb.weight.data = features
        model.reset_parameters()

        if args.model in ['arga','arga-nd','arvga','arvga-nd']:
            args.lr=0.005
        optimizer = torch.optim.Adam(
                list(model.parameters()), lr=args.lr)

        for epoch in range(1, 1 + args.epochs):
            loss = train(model, data.x, adj_t, split_edge,
                         optimizer, args.batch_size)

            if epoch % args.eval_steps == 0:
                results = test(model, data.x, adj_t, split_edge,
                               evaluator, args.batch_size)
                for key, result in results.items():
                    loggers[key].add_result(run, result)
            

                if epoch % args.log_steps == 0:
                    for key, result in results.items():
                        train_hits, valid_hits, test_hits, test_auc, test_ap, val_auc, val_ap = result
                        print(key)
                        print(f'Run: {run + 1:02d}, '
                              f'Epoch: {epoch:02d}, '
                              f'Loss: {loss:.4f}, '
                              f'auc: {100 * test_auc:.2f}%, '
                              f'ap: {100 * test_ap:.2f}%, '
                              f'Train: {100 * train_hits:.2f}%, '
                              f'Valid: {100 * valid_hits:.2f}%, '
                              f'Test: {100 * test_hits:.2f}%', )
                    print('---')



        for key in loggers.keys():
            print(key)
            loggers[key].print_statistics(run)

    for key in loggers.keys():
        print(key)
        toWrite = loggers[key].print_statistics()

        file.write(str(args.lr)+' ' +key + ' ' +args.model+"'"+str(toWrite)+'\n')
        file.flush()
def train(args):
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if int(args.double_precision):
        torch.set_default_dtype(torch.float64)
    if int(args.cuda) >= 0:
        torch.cuda.manual_seed(args.seed)
    args.device = 'cuda:' + str(args.cuda) if int(args.cuda) >= 0 else 'cpu'
    args.patience = args.epochs if not args.patience else int(args.patience)
    logging.getLogger().setLevel(logging.INFO)
    if args.save:
        if not args.save_dir:
            dt = datetime.datetime.now()
            date = f"{dt.year}_{dt.month}_{dt.day}"
            models_dir = os.path.join(os.environ['LOG_DIR'], args.task, date)
            save_dir = get_dir_name(models_dir)
        else:
            save_dir = args.save_dir
        logging.basicConfig(level=logging.INFO,
                            handlers=[
                                logging.FileHandler(
                                    os.path.join(save_dir, 'log.txt')),
                                logging.StreamHandler()
                            ])

    logging.info(f'Using: {args.device}')
    logging.info("Using seed {}.".format(args.seed))
    from torch_geometric.datasets import Planetoid
    import torch_geometric.transforms as T
    from torch_geometric.utils import train_test_split_edges

    dataset = Planetoid("data/", args.dataset, transform=T.NormalizeFeatures())
    data_pyg = dataset[0]
    all_edge_index = data_pyg.edge_index
    data_pyg = train_test_split_edges(data_pyg, 0.05, 0.1)

    reserve_mark = 0

    if args.task == 'nc':
        reserve_mark = 0
    else:
        args.task = 'nc'
        reserve_mark = 1
    # Load data
    data = load_data(args, os.path.join('data/', args.dataset))
    args.n_nodes, args.feat_dim = data['features'].shape
    if args.task == 'nc':
        # Model = ADVNCModel
        args.n_classes = int(data['labels'].max() + 1)
        logging.info(f'Num classes: {args.n_classes}')
    else:
        args.nb_false_edges = len(data['train_edges_false'])
        args.nb_edges = len(data['train_edges'])
        if args.task == 'lp':
            print(' ')
            # Model = ADVLPModel
        else:
            Model = RECModel
            # No validation for reconstruction task
            args.eval_freq = args.epochs + 1

    #transfer loading
    if reserve_mark == 1:
        args.task = 'lp'
        # reset reserve mark
        reserve_mark = 0

    if args.task == 'lp':
        reserve_mark = 0
    else:
        args.task = 'lp'
        reserve_mark = 1

    data1 = load_data(args, os.path.join('data/', args.dataset))
    args.n_nodes, args.feat_dim = data1['features'].shape
    if args.task == 'nc':
        # Model = ADVNCModel
        args.n_classes = int(data1['labels'].max() + 1)
        logging.info(f'Num classes: {args.n_classes}')
    else:
        print('*****')
        args.nb_false_edges = len(data1['train_edges_false'])
        args.nb_edges = len(data1['train_edges'])
        if args.task == 'lp':
            print(' ')
            # Model = ADVLPModel
        else:
            Model = RECModel
            # No validation for reconstruction task
            args.eval_freq = args.epochs + 1

    if reserve_mark == 1:
        args.task = 'nc'

    # if args.task == 'nc':
    #     Model = ADVNCModel
    # else:
    #     Model = ADVLPModel

    print(data_pyg.x)
    print(data['features'])
    print((data_pyg.x == data['features']).all())