def __init__(self, app: str, model: str, **kwargs): super(RecommendationPipepline, self).__init__(app, model=model, **kwargs) if "data" in kwargs: data = kwargs["data"] val_data = test_data = data[-100:, :] data = build_recommendation_data("custom", data, val_data, test_data) self.data_path = kwargs.get("data_path", "tmp_data.pt") self.batch_size = kwargs.get("batch_size", 128) torch.save(data, self.data_path) self.dataset = NodeDataset(path=self.data_path, scale_feat=False) elif "dataset" in kwargs: dataset = kwargs.pop("dataset") self.dataset = build_dataset_from_name(dataset) else: print("Please provide recommendation data!") exit(0) self.batch_size = kwargs.get("batch_size", 2048) self.n_items = self.dataset[0].n_params["n_items"] args = get_default_args(task="recommendation", dataset="ali", model=model, **kwargs) args.model = args.model[0] # task = build_task(args, dataset=self.dataset) # task.train() # self.model = task.model self.model = build_model(args) self.model.eval() self.user_emb, self.item_emb = self.model.generate()
def get_default_args_kg(dataset, model, dw="gnn_kg_link_prediction_dw", mw="gnn_kg_link_prediction_mw"): args = get_default_args(dataset=dataset, model=model, dw=dw, mw=mw) for key, value in default_dict_kg.items(): args.__setattr__(key, value) return args
def get_default_args_graph_clf(dataset, model, dw="graph_classification_dw", mw="graph_classification_mw"): args = get_default_args(dataset=dataset, model=model, dw=dw, mw=mw) for key, value in default_dict.items(): args.__setattr__(key, value) return args
def __init__(self, app: str, model: str, **kwargs): super(GenerateEmbeddingPipeline, self).__init__(app, model=model, **kwargs) match_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "match.yml") with open(match_path, "r", encoding="utf8") as f: match = yaml.load(f, Loader=yaml.FullLoader) objective = match.get("unsupervised_node_classification", None) for pair_dict in objective: if "blogcatalog" in pair_dict["dataset"]: emb_models = pair_dict["model"] elif "cora" in pair_dict["dataset"]: gnn_models = pair_dict["model"] if model in emb_models: self.method_type = "emb" args = get_default_args(task="unsupervised_node_classification", dataset="blogcatalog", model=model, **kwargs) elif model in gnn_models: self.method_type = "gnn" args = get_default_args(task="unsupervised_node_classification", dataset="cora", model=model, **kwargs) else: print("Please choose a model from ", emb_models, "or", gnn_models) exit(0) self.data_path = kwargs.get("data_path", "tmp_data.pt") self.num_features = kwargs.get("num_features", None) if self.num_features is not None: args.num_features = self.num_features elif self.method_type == "gnn": print("Please provide num_features for gnn model!") exit(0) args.model = args.model[0] self.model = build_model(args) self.trainer = self.model.get_trainer(self.model, args) if self.trainer is not None: self.trainer = self.trainer(args)
def get_default_args_ne(dataset, model, dw="network_embedding_dw", mw="network_embedding_mw"): args = get_default_args(dataset=dataset, model=model, dw=dw, mw=mw) for key, value in default_dict.items(): args.__setattr__(key, value) return args
def get_default_args_for_unsup_nn(dataset, model, dw="node_classification_dw", mw="self_auxiliary_mw"): args = get_default_args(dataset=dataset, model=model, dw=dw, mw=mw) for key, value in default_dict.items(): args.__setattr__(key, value) return args
def get_default_args_emb(dataset, model, dw="heterogeneous_embedding_dw", mw="heterogeneous_embedding_mw"): args = get_default_args(dataset=dataset, model=model, dw=dw, mw=mw) for key, value in default_dict_emb.items(): args.__setattr__(key, value) return args
def test_train(): args = get_default_args(dataset="cora", model="gcn", epochs=10, cpu=True) args.dataset = args.dataset[0] args.model = args.model[0] args.seed = args.seed[0] result = train(args) assert "test_acc" in result assert result["test_acc"] > 0
def test_get_default_args(): args = options.get_default_args(task="node_classification", dataset="cora", model="gcn") assert args.task == "node_classification" assert args.model == "gcn" assert args.dataset == "cora" assert args.hidden_size > 0 assert args.lr > 0
def test_get_default_args(): args = options.get_default_args(dataset=["cora", "citeseer"], model=["gcn", "gat"], hidden_size=128) assert args.model[0] == "gcn" assert args.model[1] == "gat" assert args.dataset[0] == "cora" assert args.dataset[1] == "citeseer" assert args.hidden_size == 128
def get_default_args_generative(dataset, model, dw="node_classification_dw", mw="self_auxiliary_mw", **kwargs): args = get_default_args(dataset=dataset, model=model, dw=dw, mw=mw) for key, value in default_dict.items(): args.__setattr__(key, value) for key, value in kwargs.items(): args.__setattr__(key, value) return args
def test_set_best_config(): args = get_default_args(task="node_classification", dataset="citeseer", model="gat") args.model = args.model[0] args.dataset = args.dataset[0] args = set_best_config(args) assert args.lr == 0.005 assert args.max_epoch == 1000 assert args.weight_decay == 0.001
def __init__(self, app: str, model: str, **kwargs): super(GenerateEmbeddingPipeline, self).__init__(app, model=model, **kwargs) self.kwargs = kwargs emb_models = [ "prone", "netmf", "netsmf", "deepwalk", "line", "node2vec", "hope", "sdne", "grarep", "dngr", "spectral", ] gnn_models = ["dgi", "mvgrl", "grace", "unsup_graphsage"] if model in emb_models: self.method_type = "emb" args = get_default_args(dataset="blogcatalog", model=model, **kwargs) elif model in gnn_models: self.method_type = "gnn" args = get_default_args(dataset="cora", model=model, **kwargs) else: print("Please choose a model from ", emb_models, "or", gnn_models) exit(0) self.data_path = kwargs.get("data_path", "tmp_data.pt") self.num_features = kwargs.get("num_features", None) if self.num_features is not None: args.num_features = self.num_features elif self.method_type == "gnn": print("Please provide num_features for gnn model!") exit(0) args.model = args.model[0] self.args = args
def test_train(): args = get_default_args(task="node_classification", dataset="cora", model="gcn", max_epoch=10, cpu=True) args.dataset = args.dataset[0] args.model = args.model[0] args.seed = args.seed[0] result = train(args) assert "Acc" in result assert result["Acc"] > 0
def raw_experiment(task: str, dataset, model, **kwargs): if "args" not in kwargs: args = get_default_args(task=task, dataset=dataset, model=model, **kwargs) else: args = kwargs["args"] variants = list(gen_variants(dataset=args.dataset, model=args.model, seed=args.seed)) variants = check_task_dataset_model_match(task, variants) results_dict = defaultdict(list) results = [train(args) for args in variant_args_generator(args, variants)] for variant, result in zip(variants, results): results_dict[variant[:-1]].append(result) tablefmt = kwargs["tablefmt"] if "tablefmt" in kwargs else "github" output_results(results_dict, tablefmt) return results_dict
def experiment(dataset, model=None, **kwargs): if model is None: model = "autognn" if isinstance(dataset, str) or isinstance(dataset, Dataset): dataset = [dataset] if isinstance(model, str) or isinstance(model, nn.Module): model = [model] if "args" not in kwargs: args = get_default_args(dataset=[str(x) for x in dataset], model=[str(x) for x in model], **kwargs) else: args = kwargs["args"] for key, value in kwargs.items(): if key != "args": args.__setattr__(key, value) if isinstance(model[0], nn.Module): args.model = [x.model_name for x in model] print(args) args.dataset = dataset args.model = model if args.max_epoch is not None: warnings.warn( "The max_epoch is deprecated and will be removed in the future, please use epochs instead!" ) args.epochs = args.max_epoch if len(model) == 1 and isinstance(model[0], str) and model[0] == "autognn": if not hasattr(args, "search_space"): args.search_space = default_search_space if not hasattr(args, "seed"): args.seed = [1, 2] if not hasattr(args, "n_trials"): args.n_trials = 20 if hasattr(args, "search_space"): return auto_experiment(args) return raw_experiment(args)
.format(acc)) # %% # Experiment API # -------------- # CogDL在训练上提供了更易于使用的 API ,即Experiment from cogdl import experiment experiment(model="gcn", dataset="cora") #或者,您可以单独创建每个组件并使用CogDL 中的 build_dataset , build_model 来手动运行该过程。 from cogdl import experiment from cogdl.datasets import build_dataset from cogdl.models import build_model from cogdl.options import get_default_args args = get_default_args(model="gcn", dataset="cora") dataset = build_dataset(args) model = build_model(args) experiment(model=model, dataset=dataset) # %% # 如何保存训练好的模型? # -------------------------- experiment(model="gcn", dataset="cora", checkpoint_path="gcn_cora.pt") # 当训练停止时,模型将保存在 gcn_cora.pt 中。如果你想从之前的checkpoint继续训练,使用不同的参数(如学习率、权重衰减等),保持相同的模型参数(如hidden size、模型层数),可以像下面这样做: experiment(model="gcn", dataset="cora", checkpoint_path="gcn_cora.pt", resume_training=True) # %%
def get_default_args_agc(dataset, model, dw=None, mw=None): args = get_default_args(dataset=dataset, model=model, dw=dw, mw=mw) for key, value in default_dict.items(): args.__setattr__(key, value) return args
from cogdl.tasks import build_task from cogdl.options import get_default_args args = get_default_args(task="node_classification", dataset="cora", model="gcn") task = build_task(args) ret = task.train()