def test_distance_to_clusters(): args = get_default_args() args.auxiliary_task = "distance2clusters" args.alpha = 3 dataset = build_dataset(args) model = build_model(args) task = build_task(args, dataset=dataset, model=model) ret = task.train() assert 0 <= ret["Acc"] <= 1
def test_pairwise_distance(): args = get_default_args() args.auxiliary_task = "pairwise-distance" args.alpha = 35 dataset = build_dataset(args) model = build_model(args) task = build_task(args, dataset=dataset, model=model) ret = task.train() assert 0 <= ret["Acc"] <= 1
def test_pairwise_attr_sim(): args = get_default_args() args.auxiliary_task = "pairwise-attr-sim" args.alpha = 100 dataset = build_dataset(args) model = build_model(args) task = build_task(args, dataset=dataset, model=model) ret = task.train() assert 0 <= ret["Acc"] <= 1
def test_edgemask(): args = get_default_args() args.auxiliary_task = "edgemask" args.alpha = 1 dataset = build_dataset(args) model = build_model(args) task = build_task(args, dataset=dataset, model=model) ret = task.train() assert 0 <= ret["Acc"] <= 1
def __init__(self, args, dataset=None, model=None): super(GraphClassification, self).__init__(args) dataset = build_dataset(args) if dataset is None else dataset args.max_graph_size = max([ds.num_nodes for ds in dataset]) args.num_features = dataset.num_features args.num_classes = dataset.num_classes args.use_unsup = False self.args = args self.kfold = args.kfold self.folds = 10 self.device = "cpu" if not torch.cuda.is_available( ) or args.cpu else args.device_id[0] if args.dataset.startswith("ogbg"): self.data = dataset.data self.train_loader, self.val_loader, self.test_loader = dataset.get_loader( args) model = build_model(args) if model is None else model else: self.data = self.generate_data(dataset, args) model = build_model(args) if model is None else model ( self.train_loader, self.val_loader, self.test_loader, ) = model.split_dataset(self.data, args) self.model = model.to(self.device) self.set_loss_fn(dataset) self.set_evaluator(dataset) self.patience = args.patience self.max_epoch = args.max_epoch self.optimizer = torch.optim.Adam(self.model.parameters(), lr=args.lr, weight_decay=args.weight_decay) self.scheduler = torch.optim.lr_scheduler.StepLR( optimizer=self.optimizer, step_size=50, gamma=0.5)
def test_hope_ppi(): args = get_default_args() args.task = 'link_prediction' args.dataset = 'ppi' args.model = 'hope' dataset = build_dataset(args) args.beta = 0.001 model = build_model(args) task = build_task(args) ret = task.train() assert ret['ROC_AUC'] >= 0 and ret['ROC_AUC'] <= 1
def __init__(self, args, dataset=None, model=None): super(MultiplexNodeClassification, self).__init__(args) dataset = build_dataset(args) if dataset is None else dataset self.data = dataset[0] self.label_matrix = self.data.y self.num_nodes, self.num_classes = dataset.num_nodes, dataset.num_classes self.hidden_size = args.hidden_size self.model = build_model(args) if model is None else model self.args = args self.device = torch.device('cpu' if args.cpu else 'cuda') self.model = self.model.to(self.device)
def __init__( self, args, dataset=None, model: Optional[SupervisedHomogeneousNodeClassificationModel] = None, ): super(NodeClassification, self).__init__(args) self.args = args self.model_name = args.model self.device = "cpu" if not torch.cuda.is_available( ) or args.cpu else args.device_id[0] dataset = build_dataset(args) if dataset is None else dataset self.dataset = dataset self.data = dataset[0] args.num_features = dataset.num_features args.num_classes = dataset.num_classes args.num_nodes = dataset.data.x.shape[0] self.model: SupervisedHomogeneousNodeClassificationModel = build_model( args) if model is None else model self.model.set_device(self.device) self.set_loss_fn(dataset) self.set_evaluator(dataset) self.trainer = (self.model.get_trainer(NodeClassification, self.args)( self.args) if self.model.get_trainer(NodeClassification, self.args) else None) if not self.trainer: if hasattr(self.args, "trainer") and self.args.trainer is not None: if "saint" in self.args.trainer: self.trainer = SAINTTrainer(self.args) elif "self_auxiliary_task" in self.args.trainer: if not hasattr(self.model, "get_embeddings"): raise ValueError( "Model ({}) must implement get_embeddings method". format(self.model_name)) self.trainer = SelfAuxiliaryTaskTrainer(self.args) else: self.optimizer = (torch.optim.Adam( self.model.parameters(), lr=args.lr, weight_decay=args.weight_decay) if not hasattr(self.model, "get_optimizer") else self.model.get_optimizer(args)) self.data.apply(lambda x: x.to(self.device)) self.model: SupervisedHomogeneousNodeClassificationModel = self.model.to( self.device) self.patience = args.patience self.max_epoch = args.max_epoch
def main_dataset(): args = get_task_model_args(task="node_classification", model="gcn") # use customized dataset dataset = MyNodeClassificationDataset() args.num_features = dataset.num_features args.num_classes = dataset.num_classes # use model in cogdl model = build_model(args) task = build_task(args, dataset, model) result = task.train() print(result)
def run_n_seed(self, args): result_list = [] for seed in range(N_SEED): set_random_seed(seed) model = build_model(args) task = build_task(args, model=model, dataset=self.dataset) result = task.train() result_list.append(result) return result_list
def test_grarep_ppi(): args = get_default_args() args.task = 'link_prediction' args.dataset = 'ppi' args.model = 'grarep' dataset = build_dataset(args) args.step = 1 model = build_model(args) task = build_task(args) ret = task.train() assert ret['ROC_AUC'] >= 0 and ret['ROC_AUC'] <= 1
def test_edgemask_pt_ft_freeze(): args = get_default_args() args.auxiliary_task = "edgemask" args.trainer = "self_auxiliary_task_pretrain" args.alpha = 1 args.freeze = True dataset = build_dataset(args) model = build_model(args) task = build_task(args, dataset=dataset, model=model) ret = task.train() assert 0 <= ret["Acc"] <= 1
def test_grarep_ppi(): args = get_default_args() args.task = 'unsupervised_node_classification' args.dataset = 'ppi' args.model = 'grarep' dataset = build_dataset(args) args.step = 1 model = build_model(args) task = build_task(args) ret = task.train() assert ret['Micro-F1 0.9'] > 0
def enhance_emb(self, G, embs): A = sp.csr_matrix(nx.adjacency_matrix(G)) if self.args.enhance == "prone": self.args.model = 'prone' self.args.step, self.args.theta, self.args.mu = 5, 0.5, 0.2 model = build_model(self.args) embs = model._chebyshev_gaussian(A, embs) elif self.args.enhance == "prone++": self.args.model = "prone++" self.args.filter_types = ["heat", "ppr", "gaussian", "sc"] self.args.max_evals = 100 self.args.num_workers = 10 self.args.no_svd = False self.args.loss = "infomax" self.args.no_search = False model = build_model(self.args) embs = model(embs, A) else: raise ValueError("only supports 'prone' and 'prone++'") return embs
def test_hope_ppi(): args = get_default_args() args.task = 'unsupervised_node_classification' args.dataset = 'ppi' args.model = 'hope' dataset = build_dataset(args) args.beta = 0.001 model = build_model(args) task = build_task(args) ret = task.train() assert ret['Micro-F1 0.9'] > 0
def __init__(self, args, dataset=None, model=None): super(MultiplexLinkPrediction, self).__init__(args) dataset = build_dataset(args) if dataset is None else dataset data = dataset[0] self.data = data if hasattr(dataset, "num_features"): args.num_features = dataset.num_features model = build_model(args) if model is None else model self.model = model self.eval_type = args.eval_type
def __init__(self, args, dataset=None, model=None): super(TripleLinkPrediction, self).__init__() self.dataset = build_dataset(args) if dataset is None else dataset args.nentity = self.dataset.num_entities args.nrelation = self.dataset.num_relations self.model = build_model(args) if model is None else model self.args = args set_logger(args) logging.info('Model: %s' % args.model) logging.info('#entity: %d' % args.nentity) logging.info('#relation: %d' % args.nrelation)
def __init__(self, args, dataset=None, model=None): super(MultiplexNodeClassification, self).__init__(args) dataset = build_dataset(args) if dataset is None else dataset self.data = dataset[0] self.label_matrix = self.data.y self.num_nodes, self.num_classes = dataset.num_nodes, dataset.num_classes self.hidden_size = args.hidden_size self.model = build_model(args) if model is None else model self.args = args self.device = "cpu" if not torch.cuda.is_available() or args.cpu else args.device_id[0] self.model = self.model.to(self.device)
def test_gcn_cora(): args = get_default_args() args.task = 'node_classification' args.dataset = 'cora' args.model = 'gcn' dataset = build_dataset(args) args.num_features = dataset.num_features args.num_classes = dataset.num_classes model = build_model(args) task = build_task(args) ret = task.train() assert ret['Acc'] >= 0 and ret['Acc'] <= 1
def test_gcn_cora(): args = get_default_args() args.task = "node_classification" args.dataset = "cora" args.model = "gcn" dataset = build_dataset(args) args.num_features = dataset.num_features args.num_classes = dataset.num_classes model = build_model(args) task = build_task(args) ret = task.train() assert ret["Acc"] >= 0 and ret["Acc"] <= 1
def __init__(self, args, dataset=None, model=None): super(TripleLinkPrediction, self).__init__() self.dataset = build_dataset(args) if dataset is None else dataset args.nentity = self.dataset.num_entities args.nrelation = self.dataset.num_relations self.model = build_model(args) if model is None else model self.args = args self.device = "cpu" if args.cpu else args.device_id[0] self.model = self.model.to(self.device) set_logger(args) logging.info("Model: %s" % args.model) logging.info("#entity: %d" % args.nentity) logging.info("#relation: %d" % args.nrelation)
def __init__(self, args): super(MultiplexLinkPrediction, self).__init__(args) dataset = build_dataset(args) data = dataset[0] self.data = data if hasattr(dataset, "num_features"): args.num_features = dataset.num_features model = build_model(args) self.model = model self.patience = args.patience self.max_epoch = args.max_epoch self.eval_type = args.eval_type
def __init__(self, args, dataset=None, model=None): super(MultiplexLinkPrediction, self).__init__(args) self.device = "cpu" if not torch.cuda.is_available( ) or args.cpu else args.device_id[0] dataset = build_dataset(args) if dataset is None else dataset data = dataset[0] self.data = data if hasattr(dataset, "num_features"): args.num_features = dataset.num_features model = build_model(args) if model is None else model self.model = model self.eval_type = args.eval_type
def _enhance_emb(self, graph, embs): A = nx.to_scipy_sparse_matrix(graph.to_networkx()) args = ArgClass() if self.enhance == "prone": args.model = "prone" args.hidden_size = embs.shape[1] args.step, args.theta, args.mu = 5, 0.5, 0.2 model = build_model(args) embs = model._chebyshev_gaussian(A, embs) elif self.enhance == "prone++": args.model = "prone++" args.filter_types = ["heat", "ppr", "gaussian", "sc"] args.max_evals = self.max_evals args.num_workers = self.num_workers args.no_svd = False args.loss = "infomax" args.no_search = False model = build_model(args) embs = model(embs, A) else: raise ValueError("only supports 'prone' and 'prone++'") return embs
def test_mvgrl(): args = get_unsupervised_nn_args() args.task = "unsupervised_node_classification" args.dataset = "cora" args.max_epochs = 2 args.model = "mvgrl" dataset = build_dataset(args) args.num_features = dataset.num_features args.num_classes = dataset.num_classes model = build_model(args) task = build_task(args) ret = task.train() assert ret['Acc'] > 0
def test_mixhop_citeseer(): args = get_default_args() args.task = 'node_classification' args.dataset = 'citeseer' args.model = 'mixhop' dataset = build_dataset(args) args.num_features = dataset.num_features args.num_classes = dataset.num_classes args.num_layers = 2 model = build_model(args) task = build_task(args) ret = task.train() assert ret['Acc'] >= 0 and ret['Acc'] <= 1
def test_mixhop_citeseer(): args = get_default_args() args.task = "node_classification" args.dataset = "citeseer" args.model = "mixhop" dataset = build_dataset(args) args.num_features = dataset.num_features args.num_classes = dataset.num_classes args.num_layers = 2 model = build_model(args) task = build_task(args) ret = task.train() assert ret["Acc"] >= 0 and ret["Acc"] <= 1
def test_prone_amazon(): args = get_default_args() args.task = 'multiplex_link_prediction' args.dataset = 'amazon' args.model = 'prone' dataset = build_dataset(args) args.step = 5 args.theta = 0.5 args.mu = 0.2 model = build_model(args) task = build_task(args) ret = task.train() assert ret['ROC_AUC'] >= 0 and ret['ROC_AUC'] <= 1
def test_prone_blogcatalog(): args = get_default_args() args.task = 'unsupervised_node_classification' args.dataset = 'blogcatalog' args.model = 'prone' dataset = build_dataset(args) args.step = 5 args.theta = 0.5 args.mu = 0.2 model = build_model(args) task = build_task(args) ret = task.train() assert ret['Micro-F1 0.9'] > 0
def __init__( self, args, dataset=None, model: Optional[SupervisedHomogeneousNodeClassificationModel] = None, ): super(NodeClassification, self).__init__(args) self.args = args self.model_name = args.model self.device = "cpu" if not torch.cuda.is_available( ) or args.cpu else args.device_id[0] dataset = build_dataset(args) if dataset is None else dataset if args.model == "sgcpn" and args.missing_rate > 0: assert args.dataset in ["cora", "citeseer", "pubmed"] dataset.data = preprocess_data_sgcpn(dataset.data, normalize_feature=True, missing_rate=0) # adj_slice = torch.tensor(dataset.data.adj.size()) # adj_slice[0] = 0 # dataset.slices["adj"] = adj_slice self.dataset = dataset self.data = dataset[0] args.num_features = dataset.num_features args.num_classes = dataset.num_classes args.num_nodes = dataset.data.x.shape[0] self.model: SupervisedHomogeneousNodeClassificationModel = build_model( args) if model is None else model self.model.set_device(self.device) self.trainer: Optional[ SupervisedHomogeneousNodeClassificationTrainer] = ( self.model.get_trainer(NodeClassification, self.args)( self.args) if self.model.get_trainer( NodeClassification, self.args) else None) if not self.trainer: self.optimizer = (torch.optim.Adam(self.model.parameters(), lr=args.lr, weight_decay=args.weight_decay) if not hasattr(self.model, "get_optimizer") else self.model.get_optimizer(args)) self.data.apply(lambda x: x.to(self.device)) self.model: SupervisedHomogeneousNodeClassificationModel = self.model.to( self.device) self.patience = args.patience self.max_epoch = args.max_epoch