Ejemplo n.º 1
0
def test_distance_to_clusters():
    args = get_default_args()
    args.auxiliary_task = "distance2clusters"
    args.alpha = 3
    dataset = build_dataset(args)
    model = build_model(args)
    task = build_task(args, dataset=dataset, model=model)
    ret = task.train()
    assert 0 <= ret["Acc"] <= 1
Ejemplo n.º 2
0
def test_pairwise_distance():
    args = get_default_args()
    args.auxiliary_task = "pairwise-distance"
    args.alpha = 35
    dataset = build_dataset(args)
    model = build_model(args)
    task = build_task(args, dataset=dataset, model=model)
    ret = task.train()
    assert 0 <= ret["Acc"] <= 1
Ejemplo n.º 3
0
def test_pairwise_attr_sim():
    args = get_default_args()
    args.auxiliary_task = "pairwise-attr-sim"
    args.alpha = 100
    dataset = build_dataset(args)
    model = build_model(args)
    task = build_task(args, dataset=dataset, model=model)
    ret = task.train()
    assert 0 <= ret["Acc"] <= 1
Ejemplo n.º 4
0
def test_edgemask():
    args = get_default_args()
    args.auxiliary_task = "edgemask"
    args.alpha = 1
    dataset = build_dataset(args)
    model = build_model(args)
    task = build_task(args, dataset=dataset, model=model)
    ret = task.train()
    assert 0 <= ret["Acc"] <= 1
Ejemplo n.º 5
0
    def __init__(self, args, dataset=None, model=None):
        super(GraphClassification, self).__init__(args)
        dataset = build_dataset(args) if dataset is None else dataset

        args.max_graph_size = max([ds.num_nodes for ds in dataset])
        args.num_features = dataset.num_features
        args.num_classes = dataset.num_classes
        args.use_unsup = False

        self.args = args
        self.kfold = args.kfold
        self.folds = 10

        self.device = "cpu" if not torch.cuda.is_available(
        ) or args.cpu else args.device_id[0]

        if args.dataset.startswith("ogbg"):
            self.data = dataset.data
            self.train_loader, self.val_loader, self.test_loader = dataset.get_loader(
                args)
            model = build_model(args) if model is None else model
        else:
            self.data = self.generate_data(dataset, args)
            model = build_model(args) if model is None else model
            (
                self.train_loader,
                self.val_loader,
                self.test_loader,
            ) = model.split_dataset(self.data, args)

        self.model = model.to(self.device)

        self.set_loss_fn(dataset)
        self.set_evaluator(dataset)

        self.patience = args.patience
        self.max_epoch = args.max_epoch

        self.optimizer = torch.optim.Adam(self.model.parameters(),
                                          lr=args.lr,
                                          weight_decay=args.weight_decay)
        self.scheduler = torch.optim.lr_scheduler.StepLR(
            optimizer=self.optimizer, step_size=50, gamma=0.5)
Ejemplo n.º 6
0
def test_hope_ppi():
    args = get_default_args()
    args.task = 'link_prediction'
    args.dataset = 'ppi'
    args.model = 'hope'
    dataset = build_dataset(args)
    args.beta = 0.001
    model = build_model(args)
    task = build_task(args)
    ret = task.train()
    assert ret['ROC_AUC'] >= 0 and ret['ROC_AUC'] <= 1
Ejemplo n.º 7
0
 def __init__(self, args, dataset=None, model=None):
     super(MultiplexNodeClassification, self).__init__(args)
     dataset = build_dataset(args) if dataset is None else dataset
     self.data = dataset[0]
     self.label_matrix = self.data.y
     self.num_nodes, self.num_classes = dataset.num_nodes, dataset.num_classes
     self.hidden_size = args.hidden_size
     self.model = build_model(args) if model is None else model
     self.args = args
     self.device = torch.device('cpu' if args.cpu else 'cuda')
     self.model = self.model.to(self.device)
Ejemplo n.º 8
0
    def __init__(
        self,
        args,
        dataset=None,
        model: Optional[SupervisedHomogeneousNodeClassificationModel] = None,
    ):
        super(NodeClassification, self).__init__(args)

        self.args = args
        self.model_name = args.model

        self.device = "cpu" if not torch.cuda.is_available(
        ) or args.cpu else args.device_id[0]
        dataset = build_dataset(args) if dataset is None else dataset

        self.dataset = dataset
        self.data = dataset[0]
        args.num_features = dataset.num_features
        args.num_classes = dataset.num_classes
        args.num_nodes = dataset.data.x.shape[0]

        self.model: SupervisedHomogeneousNodeClassificationModel = build_model(
            args) if model is None else model
        self.model.set_device(self.device)

        self.set_loss_fn(dataset)
        self.set_evaluator(dataset)

        self.trainer = (self.model.get_trainer(NodeClassification, self.args)(
            self.args) if self.model.get_trainer(NodeClassification, self.args)
                        else None)

        if not self.trainer:
            if hasattr(self.args, "trainer") and self.args.trainer is not None:
                if "saint" in self.args.trainer:
                    self.trainer = SAINTTrainer(self.args)
                elif "self_auxiliary_task" in self.args.trainer:
                    if not hasattr(self.model, "get_embeddings"):
                        raise ValueError(
                            "Model ({}) must implement get_embeddings method".
                            format(self.model_name))
                    self.trainer = SelfAuxiliaryTaskTrainer(self.args)
            else:
                self.optimizer = (torch.optim.Adam(
                    self.model.parameters(),
                    lr=args.lr,
                    weight_decay=args.weight_decay)
                                  if not hasattr(self.model, "get_optimizer")
                                  else self.model.get_optimizer(args))
                self.data.apply(lambda x: x.to(self.device))
                self.model: SupervisedHomogeneousNodeClassificationModel = self.model.to(
                    self.device)
                self.patience = args.patience
                self.max_epoch = args.max_epoch
Ejemplo n.º 9
0
def main_dataset():
    args = get_task_model_args(task="node_classification", model="gcn")
    # use customized dataset
    dataset = MyNodeClassificationDataset()
    args.num_features = dataset.num_features
    args.num_classes = dataset.num_classes
    # use model in cogdl
    model = build_model(args)
    task = build_task(args, dataset, model)
    result = task.train()
    print(result)
Ejemplo n.º 10
0
    def run_n_seed(self, args):
        result_list = []
        for seed in range(N_SEED):
            set_random_seed(seed)

            model = build_model(args)
            task = build_task(args, model=model, dataset=self.dataset)

            result = task.train()
            result_list.append(result)
        return result_list
Ejemplo n.º 11
0
def test_grarep_ppi():
    args = get_default_args()
    args.task = 'link_prediction'
    args.dataset = 'ppi'
    args.model = 'grarep'
    dataset = build_dataset(args)
    args.step = 1
    model = build_model(args)
    task = build_task(args)
    ret = task.train()
    assert ret['ROC_AUC'] >= 0 and ret['ROC_AUC'] <= 1
Ejemplo n.º 12
0
def test_edgemask_pt_ft_freeze():
    args = get_default_args()
    args.auxiliary_task = "edgemask"
    args.trainer = "self_auxiliary_task_pretrain"
    args.alpha = 1
    args.freeze = True
    dataset = build_dataset(args)
    model = build_model(args)
    task = build_task(args, dataset=dataset, model=model)
    ret = task.train()
    assert 0 <= ret["Acc"] <= 1
Ejemplo n.º 13
0
def test_grarep_ppi():
    args = get_default_args()
    args.task = 'unsupervised_node_classification'
    args.dataset = 'ppi'
    args.model = 'grarep'
    dataset = build_dataset(args)
    args.step = 1
    model = build_model(args)
    task = build_task(args)
    ret = task.train()
    assert ret['Micro-F1 0.9'] > 0   
 def enhance_emb(self, G, embs):
     A = sp.csr_matrix(nx.adjacency_matrix(G))
     if self.args.enhance == "prone":
         self.args.model = 'prone'
         self.args.step, self.args.theta, self.args.mu = 5, 0.5, 0.2
         model = build_model(self.args)
         embs = model._chebyshev_gaussian(A, embs)
     elif self.args.enhance == "prone++":
         self.args.model = "prone++"
         self.args.filter_types = ["heat", "ppr", "gaussian", "sc"]
         self.args.max_evals = 100
         self.args.num_workers = 10
         self.args.no_svd = False
         self.args.loss = "infomax"
         self.args.no_search = False
         model = build_model(self.args)
         embs = model(embs, A)
     else:
         raise ValueError("only supports 'prone' and 'prone++'")
     return embs
Ejemplo n.º 15
0
def test_hope_ppi():
    args = get_default_args()
    args.task = 'unsupervised_node_classification'
    args.dataset = 'ppi'
    args.model = 'hope'
    dataset = build_dataset(args)
    args.beta = 0.001
    model = build_model(args)
    task = build_task(args)
    ret = task.train()
    assert ret['Micro-F1 0.9'] > 0    
Ejemplo n.º 16
0
    def __init__(self, args, dataset=None, model=None):
        super(MultiplexLinkPrediction, self).__init__(args)

        dataset = build_dataset(args) if dataset is None else dataset
        data = dataset[0]
        self.data = data
        if hasattr(dataset, "num_features"):
            args.num_features = dataset.num_features
        model = build_model(args) if model is None else model
        self.model = model
        self.eval_type = args.eval_type
Ejemplo n.º 17
0
 def __init__(self, args, dataset=None, model=None):
     super(TripleLinkPrediction, self).__init__()
     self.dataset = build_dataset(args) if dataset is None else dataset
     args.nentity = self.dataset.num_entities
     args.nrelation = self.dataset.num_relations
     self.model = build_model(args) if model is None else model
     self.args = args
     set_logger(args)
     logging.info('Model: %s' % args.model)
     logging.info('#entity: %d' % args.nentity)
     logging.info('#relation: %d' % args.nrelation)
Ejemplo n.º 18
0
    def __init__(self, args, dataset=None, model=None):
        super(MultiplexNodeClassification, self).__init__(args)
        dataset = build_dataset(args) if dataset is None else dataset
        self.data = dataset[0]
        self.label_matrix = self.data.y
        self.num_nodes, self.num_classes = dataset.num_nodes, dataset.num_classes
        self.hidden_size = args.hidden_size
        self.model = build_model(args) if model is None else model
        self.args = args

        self.device = "cpu" if not torch.cuda.is_available() or args.cpu else args.device_id[0]
        self.model = self.model.to(self.device)
def test_gcn_cora():
    args = get_default_args()
    args.task = 'node_classification'
    args.dataset = 'cora'
    args.model = 'gcn'
    dataset = build_dataset(args)
    args.num_features = dataset.num_features
    args.num_classes = dataset.num_classes
    model = build_model(args)
    task = build_task(args)
    ret = task.train()
    assert ret['Acc'] >= 0 and ret['Acc'] <= 1
Ejemplo n.º 20
0
def test_gcn_cora():
    args = get_default_args()
    args.task = "node_classification"
    args.dataset = "cora"
    args.model = "gcn"
    dataset = build_dataset(args)
    args.num_features = dataset.num_features
    args.num_classes = dataset.num_classes
    model = build_model(args)
    task = build_task(args)
    ret = task.train()
    assert ret["Acc"] >= 0 and ret["Acc"] <= 1
Ejemplo n.º 21
0
 def __init__(self, args, dataset=None, model=None):
     super(TripleLinkPrediction, self).__init__()
     self.dataset = build_dataset(args) if dataset is None else dataset
     args.nentity = self.dataset.num_entities
     args.nrelation = self.dataset.num_relations
     self.model = build_model(args) if model is None else model
     self.args = args
     self.device = "cpu" if args.cpu else args.device_id[0]
     self.model = self.model.to(self.device)
     set_logger(args)
     logging.info("Model: %s" % args.model)
     logging.info("#entity: %d" % args.nentity)
     logging.info("#relation: %d" % args.nrelation)
Ejemplo n.º 22
0
    def __init__(self, args):
        super(MultiplexLinkPrediction, self).__init__(args)

        dataset = build_dataset(args)
        data = dataset[0]
        self.data = data
        if hasattr(dataset, "num_features"):
            args.num_features = dataset.num_features
        model = build_model(args)
        self.model = model
        self.patience = args.patience
        self.max_epoch = args.max_epoch
        self.eval_type = args.eval_type
Ejemplo n.º 23
0
    def __init__(self, args, dataset=None, model=None):
        super(MultiplexLinkPrediction, self).__init__(args)

        self.device = "cpu" if not torch.cuda.is_available(
        ) or args.cpu else args.device_id[0]
        dataset = build_dataset(args) if dataset is None else dataset
        data = dataset[0]
        self.data = data
        if hasattr(dataset, "num_features"):
            args.num_features = dataset.num_features
        model = build_model(args) if model is None else model
        self.model = model
        self.eval_type = args.eval_type
Ejemplo n.º 24
0
 def _enhance_emb(self, graph, embs):
     A = nx.to_scipy_sparse_matrix(graph.to_networkx())
     args = ArgClass()
     if self.enhance == "prone":
         args.model = "prone"
         args.hidden_size = embs.shape[1]
         args.step, args.theta, args.mu = 5, 0.5, 0.2
         model = build_model(args)
         embs = model._chebyshev_gaussian(A, embs)
     elif self.enhance == "prone++":
         args.model = "prone++"
         args.filter_types = ["heat", "ppr", "gaussian", "sc"]
         args.max_evals = self.max_evals
         args.num_workers = self.num_workers
         args.no_svd = False
         args.loss = "infomax"
         args.no_search = False
         model = build_model(args)
         embs = model(embs, A)
     else:
         raise ValueError("only supports 'prone' and 'prone++'")
     return embs
def test_mvgrl():
    args = get_unsupervised_nn_args()
    args.task = "unsupervised_node_classification"
    args.dataset = "cora"
    args.max_epochs = 2
    args.model = "mvgrl"
    dataset = build_dataset(args)
    args.num_features = dataset.num_features
    args.num_classes = dataset.num_classes
    model = build_model(args)
    task = build_task(args)
    ret = task.train()
    assert ret['Acc'] > 0
def test_mixhop_citeseer():
    args = get_default_args()
    args.task = 'node_classification'
    args.dataset = 'citeseer'
    args.model = 'mixhop'
    dataset = build_dataset(args)
    args.num_features = dataset.num_features
    args.num_classes = dataset.num_classes
    args.num_layers = 2
    model = build_model(args)
    task = build_task(args)
    ret = task.train()
    assert ret['Acc'] >= 0 and ret['Acc'] <= 1
Ejemplo n.º 27
0
def test_mixhop_citeseer():
    args = get_default_args()
    args.task = "node_classification"
    args.dataset = "citeseer"
    args.model = "mixhop"
    dataset = build_dataset(args)
    args.num_features = dataset.num_features
    args.num_classes = dataset.num_classes
    args.num_layers = 2
    model = build_model(args)
    task = build_task(args)
    ret = task.train()
    assert ret["Acc"] >= 0 and ret["Acc"] <= 1
Ejemplo n.º 28
0
def test_prone_amazon():
    args = get_default_args()
    args.task = 'multiplex_link_prediction'
    args.dataset = 'amazon'
    args.model = 'prone'
    dataset = build_dataset(args)
    args.step = 5
    args.theta = 0.5
    args.mu = 0.2
    model = build_model(args)
    task = build_task(args)
    ret = task.train()
    assert ret['ROC_AUC'] >= 0 and ret['ROC_AUC'] <= 1
Ejemplo n.º 29
0
def test_prone_blogcatalog():
    args = get_default_args()
    args.task = 'unsupervised_node_classification'
    args.dataset = 'blogcatalog'
    args.model = 'prone'
    dataset = build_dataset(args)
    args.step = 5
    args.theta = 0.5
    args.mu = 0.2
    model = build_model(args)
    task = build_task(args)
    ret = task.train()
    assert ret['Micro-F1 0.9'] > 0
Ejemplo n.º 30
0
    def __init__(
        self,
        args,
        dataset=None,
        model: Optional[SupervisedHomogeneousNodeClassificationModel] = None,
    ):
        super(NodeClassification, self).__init__(args)

        self.args = args
        self.model_name = args.model

        self.device = "cpu" if not torch.cuda.is_available(
        ) or args.cpu else args.device_id[0]
        dataset = build_dataset(args) if dataset is None else dataset

        if args.model == "sgcpn" and args.missing_rate > 0:
            assert args.dataset in ["cora", "citeseer", "pubmed"]
            dataset.data = preprocess_data_sgcpn(dataset.data,
                                                 normalize_feature=True,
                                                 missing_rate=0)
            # adj_slice = torch.tensor(dataset.data.adj.size())
            # adj_slice[0] = 0
            # dataset.slices["adj"] = adj_slice

        self.dataset = dataset
        self.data = dataset[0]
        args.num_features = dataset.num_features
        args.num_classes = dataset.num_classes
        args.num_nodes = dataset.data.x.shape[0]

        self.model: SupervisedHomogeneousNodeClassificationModel = build_model(
            args) if model is None else model
        self.model.set_device(self.device)

        self.trainer: Optional[
            SupervisedHomogeneousNodeClassificationTrainer] = (
                self.model.get_trainer(NodeClassification, self.args)(
                    self.args) if self.model.get_trainer(
                        NodeClassification, self.args) else None)

        if not self.trainer:
            self.optimizer = (torch.optim.Adam(self.model.parameters(),
                                               lr=args.lr,
                                               weight_decay=args.weight_decay)
                              if not hasattr(self.model, "get_optimizer") else
                              self.model.get_optimizer(args))
            self.data.apply(lambda x: x.to(self.device))
            self.model: SupervisedHomogeneousNodeClassificationModel = self.model.to(
                self.device)
            self.patience = args.patience
            self.max_epoch = args.max_epoch