Ejemplo n.º 1
0
def run_sage(dataset_name, aggr_name):
    epochs = 10
    #patience = 10

    datasets = {
        "ppi": PPIDataset,
        "reddit": RedditDataset
    }

    aggrs = {
        # 0.930
        "gcn": lambda input_size, output_size: MeanAggregator(input_size, output_size),
        # 0.948
        "pool": lambda input_size, output_size: MaxPoolAggregator(input_size, output_size, model_size=ModelSize.SMALL),
        # 0.954
        "lstm": lambda input_size, output_size: LstmAggregator(input_size, output_size, model_size=ModelSize.SMALL)
    }

    model_name = os.path.splitext(os.path.basename(__file__))[0]
    experiments_dir = os.path.dirname(os.path.abspath(__file__))
    model_dir = os.path.join(
        experiments_dir, model_name, dataset_name, aggr_name)

    run_id = datetime.now().strftime("%Y%m%dT%H%M%S")
    run_dir = os.path.join(model_dir, run_id)
    if not os.path.exists(run_dir):
        os.makedirs(run_dir)

    model_file = os.path.join(run_dir, f"best_model.pt")
    device = torch.device(
        "cuda") if torch.cuda.is_available() else torch.device("cpu")

    dataset = datasets[dataset_name](
        experiments_dir,
        SubSampleNeighborhoodSize(128)
    ).load()

    model = GraphSageRedditSupervisedModel(
        dataset.features_per_node,
        dataset.number_of_classes,
        aggregator_factory=aggrs[aggr_name]
    )
    model.to(device)

    train_data = dataset
    validation_data = dataset
    test_data = dataset
    train_stats = model.fit(
        epochs,
        train_data,
        validation_data,
        SaveModelOnBestMetric(model_file, lambda x: x.validation_loss)
    )

    write_train_epochs_stats(run_dir, train_stats)

    results = model.test(test_data, model_file)
    write_test_results(model_dir, run_id, results)
Ejemplo n.º 2
0
def run_gat_transductive(dataset_name, lr):
    epochs = 100000
    patience = 100

    datasets = {
        "cora": CoraDataset,  # 83.0 ± 0.7%
        "pubmed": PubmedDataset,  # 79.0 ± 0.3%
        "citeseer": CiteseerDataset  # 72.5 ± 0.7%
    }
    model_name = os.path.splitext(os.path.basename(__file__))[0]
    experiments_dir = os.path.dirname(os.path.abspath(__file__))
    model_dir = os.path.join(experiments_dir, model_name, dataset_name)

    run_id = datetime.now().strftime("%Y%m%dT%H%M%S")
    run_dir = os.path.join(model_dir, run_id)
    if not os.path.exists(run_dir):
        os.makedirs(run_dir)

    model_file = os.path.join(run_dir, f"best_model.pt")
    device = torch.device(
        "cuda") if torch.cuda.is_available() else torch.device("cpu")

    dataset = datasets[dataset_name](experiments_dir, NormalizeFeatures(),
                                     AddSelfLoop()).load()
    dataset = dataset.to(device)

    model = GATTransductiveModel(dataset.features_per_node,
                                 dataset.number_of_classes, lr)
    model.to(device)

    train_data = GraphData(dataset.name,
                           dataset.features_vectors,
                           dataset.labels[dataset.train_mask],
                           dataset.adj_coo_matrix,
                           train_mask=dataset.train_mask)
    validation_data = GraphData(dataset.name,
                                dataset.features_vectors,
                                dataset.labels[dataset.validation_mask],
                                dataset.adj_coo_matrix,
                                validation_mask=dataset.validation_mask)
    test_data = GraphData(dataset.name,
                          dataset.features_vectors,
                          dataset.labels[dataset.test_mask],
                          dataset.adj_coo_matrix,
                          test_mask=dataset.test_mask)
    train_stats = model.fit(
        epochs, train_data, validation_data,
        EarlyStopping(patience, lambda x: x.validation_loss,
                      lambda x: x.validation_accuracy),
        SaveModelOnBestMetric(model_file, lambda x: x.validation_loss,
                              lambda x: x.validation_accuracy))
    write_train_epochs_stats(run_dir, train_stats)

    results = model.test(test_data, model_file)
    write_test_results(model_dir, run_id, results)
Ejemplo n.º 3
0
def run_sage(aggregator_name):
    epochs = 5
    dataset_name = "ppi"

    datasets = {"ppi": PPIDataset}
    aggrs = {
        # 0.465
        "gcn":
        lambda input_size, output_size: MeanAggregator(input_size, output_size
                                                       ),
        # 0.502
        "pool":
        lambda input_size, output_size: MaxPoolAggregator(
            input_size, output_size, model_size=ModelSize.SMALL),
        # 0.482
        "lstm":
        lambda input_size, output_size: LstmAggregator(
            input_size, output_size, model_size=ModelSize.SMALL)
    }

    model_name = os.path.splitext(os.path.basename(__file__))[0]
    experiments_dir = os.path.dirname(os.path.abspath(__file__))
    model_dir = os.path.join(experiments_dir, model_name, dataset_name,
                             aggregator_name)

    run_id = datetime.now().strftime("%Y%m%dT%H%M%S")
    run_dir = os.path.join(model_dir, run_id)
    if not os.path.exists(run_dir):
        os.makedirs(run_dir)

    model_file = os.path.join(run_dir, f"best_model.pt")
    device = torch.device(
        "cuda") if torch.cuda.is_available() else torch.device("cpu")

    dataset = datasets[dataset_name](experiments_dir,
                                     SubSampleNeighborhoodSize(128),
                                     CalcPositivePairs(50, 5)).load()
    dataset = dataset.to(device)

    model = GraphSagePPIUnsupervisedModel(dataset.features_per_node, 256,
                                          aggrs[aggregator_name], 1e-5)
    model.to(device)

    train_stats = model.fit(
        epochs, dataset.train, dataset.validation,
        SaveModelOnBestMetric(model_file, lambda x: x.validation_loss))

    write_train_epochs_stats(run_dir, train_stats)

    results = model.test(dataset.test, model_file)
    write_test_results(model_dir, run_id, results)
Ejemplo n.º 4
0
def run_gat_inductive():
    epochs = 100000
    patience = 100
    dataset_name = "ppi"
    lr = 0.005

    datasets = {
        "ppi": PPIDataset  # 0.973 ± 0.002 micro F1
    }
    model_name = os.path.splitext(os.path.basename(__file__))[0]
    experiments_dir = os.path.dirname(os.path.abspath(__file__))
    model_dir = os.path.join(experiments_dir, model_name, dataset_name)

    run_id = datetime.now().strftime("%Y%m%dT%H%M%S")
    run_dir = os.path.join(model_dir, run_id)
    if not os.path.exists(run_dir):
        os.makedirs(run_dir)

    model_file = os.path.join(run_dir, f"best_model.pt")
    device = torch.device(
        "cuda") if torch.cuda.is_available() else torch.device("cpu")

    dataset = datasets[dataset_name](experiments_dir, NormalizeFeatures(),
                                     AddSelfLoop()).load()
    dataset = dataset.to(device)

    model = GATInductiveModel(dataset.features_per_node,
                              dataset.number_of_classes, lr)
    model.to(device)

    train_stats = model.fit(
        epochs, dataset.train, dataset.validation,
        EarlyStopping(patience, lambda x: x.validation_loss,
                      lambda x: x.validation_F1),
        SaveModelOnBestMetric(model_file, lambda x: x.validation_loss,
                              lambda x: x.validation_F1))

    write_train_epochs_stats(run_dir, train_stats)

    results = model.test(dataset.test, model_file)
    write_test_results(model_dir, run_id, results)