def main(args): dataset = build_dataset_from_name("hetero-acm-hgt") G = dataset[0].to(args.device) field = dataset.schema["target_node_type"] print(G) num_features = 256 labels = G.nodes[field].data['label'].to(args.device) num_classes = labels.max().item() + 1 test_mask = G.nodes[field].data['test_mask'].to(args.device) accs = [] for seed in tqdm(range(args.repeat)): set_seed(seed) if args.model == 'hgt': model_hp = { # hp from model "num_layers": 2, "hidden": [256, 256, 256], "heads": 4, "dropout": 0.2, "act": "gelu", "use_norm": True, } elif args.model == 'HeteroRGCN': model_hp = { # hp from model "num_layers": 2, "hidden": [256], "heads": 4, "dropout": 0.2, "act": "leaky_relu", } trainer = NodeClassificationHetTrainer( model=args.model, dataset=dataset, num_features=num_features, num_classes=num_classes, device=args.device, init=False, feval=['acc'], loss="cross_entropy", optimizer=torch.optim.AdamW).duplicate_from_hyper_parameter({ "trainer": { "max_epoch": args.epoch, "early_stopping_round": args.epoch + 1, "lr": args.max_lr, "weight_decay": args.weight_decay, }, "encoder": model_hp }) trainer.train(dataset, False) acc = trainer.evaluate(dataset, test_mask, "acc") print(acc) accs.append(acc) print('{:.4f} ~ {:.4f}'.format(np.mean(accs), np.std(accs)))
def main(args): dataset = build_dataset_from_name("hetero-acm-han") field = dataset.schema["target_node_type"] g = dataset[0].to(args['device']) labels = g.nodes[field].data['label'] num_classes = labels.max().item()+1 #features = features.to(args['device']) labels = labels.to(args['device']) train_mask = g.nodes[field].data['train_mask'].to(args['device']) val_mask = g.nodes[field].data['val_mask'].to(args['device']) test_mask = g.nodes[field].data['test_mask'].to(args['device']) num_features = g.nodes[field].data['feat'].shape[1] num_classes = labels.max().item()+1 accs = [] for seed in tqdm(range(args["repeat"])): set_seed(seed) if args["model"] == 'han': model_hp = { "num_layers": 2, "hidden": [256], ## "heads": [8], ## "dropout": 0.2, "act": "gelu", } trainer = NodeClassificationHetTrainer( model=args["model"], dataset = dataset, num_features=num_features, num_classes=num_classes, device=args["device"], init=False, feval=['acc'], loss="cross_entropy", optimizer=torch.optim.AdamW, ).duplicate_from_hyper_parameter({ "trainer": { "max_epoch": args["num_epochs"], "early_stopping_round": args["num_epochs"] + 1, "lr": args["lr"], "weight_decay": args["weight_decay"], }, "encoder": model_hp }) trainer.train(dataset, False) acc = trainer.evaluate(dataset, "test", "acc") print(acc) accs.append(acc) print('{:.4f} ~ {:.4f}'.format(np.mean(accs), np.std(accs)))
def test_graph_cross_validation(): dataset = build_dataset_from_name('imdb-b') # first level, 10 folds utils.graph_cross_validation(dataset, 10) # set to fold id utils.set_fold(dataset, 1) # get train split train_dataset = utils.graph_get_split(dataset, "train", False) # further split train to train / val utils.graph_random_splits(train_dataset, 0.8, 0.2)
def main(args): dataset = build_dataset_from_name("hetero-acm-han") node_type = dataset.schema["target_node_type"] g = dataset[0].to(args['device']) labels = g.nodes[node_type].data['label'] num_classes = labels.max().item() + 1 labels = labels.to(args['device']) train_mask = g.nodes[node_type].data['train_mask'].to(args['device']) val_mask = g.nodes[node_type].data['val_mask'].to(args['device']) test_mask = g.nodes[node_type].data['test_mask'].to(args['device']) model = AutoHAN(dataset=dataset, num_features=g.nodes[node_type].data['feat'].shape[1], num_classes=num_classes, device=args['device'], init=True).model g = g.to(args['device']) stopper = EarlyStopping(patience=args['patience']) loss_fcn = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=args['lr'], weight_decay=args['weight_decay']) for epoch in range(args['num_epochs']): model.train() logits = model(g) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() val_loss, val_acc, _, _ = evaluate(model, g, labels, val_mask, loss_fcn) early_stop = stopper.step(val_loss.data.item(), val_acc, model) if early_stop: break stopper.load_checkpoint(model) _, test_acc, _, _ = evaluate(model, g, labels, test_mask, loss_fcn) return test_acc
def test_lp_trainer(): dataset = build_dataset_from_name("cora") dataset = to_pyg_dataset(dataset) data = dataset[0] data = train_test_split_edges(data, 0.1, 0.1) dataset = [data] lp_trainer = LinkPredictionTrainer(model='gcn', init=False) lp_trainer.num_features = data.x.size(1) lp_trainer.initialize() print(lp_trainer.encoder.encoder) print(lp_trainer.decoder.decoder) lp_trainer.train(dataset, True) result = lp_trainer.evaluate(dataset, "test", "auc") print(result)
def test_node_trainer(): dataset = build_dataset_from_name("cora") dataset = to_pyg_dataset(dataset) node_trainer = NodeClassificationFullTrainer( model='gcn', init=False, lr=1e-2, weight_decay=5e-4, max_epoch=200, early_stopping_round=200, ) node_trainer.num_features = dataset[0].x.size(1) node_trainer.num_classes = dataset[0].y.max().item() + 1 node_trainer.initialize() print(node_trainer.encoder.encoder) print(node_trainer.decoder.decoder) node_trainer.train(dataset, True) result = node_trainer.evaluate(dataset, "test", "acc") print("Acc:", result)
parser.add_argument('--n_epoch', type=int, default=200) parser.add_argument('--n_inp', type=int, default=256) parser.add_argument('--clip', type=int, default=1.0) parser.add_argument('--max_lr', type=float, default=1e-3) parser.add_argument('--repeat', type=int, default=50) parser.add_argument('--device', type=str, default='cuda') parser.add_argument('--model', type=str, choices=['hgt', 'HeteroRGCN'], default='hgt') args = parser.parse_args() torch.manual_seed(0) dataset = build_dataset_from_name("hetero-acm-hgt") G = dataset[0].to(args.device) print(G) target_field = dataset.schema["target_node_type"] labels = G.nodes[target_field].data["label"].to(args.device) train_mask = G.nodes[target_field].data["train_mask"].nonzero().flatten() val_mask = G.nodes[target_field].data["val_mask"].nonzero().flatten() test_mask = G.nodes[target_field].data["test_mask"].nonzero().flatten() num_features = G.nodes[target_field].data["feat"].size(1) num_classes = labels.max().item() + 1 accs = [] for seed in tqdm(range(args.repeat)):
args = parser.parse_args() if torch.cuda.is_available(): torch.cuda.set_device(args.device) seed = args.seed # set random seed random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) if torch.cuda.is_available(): torch.cuda.manual_seed(seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False print("begin processing dataset", args.dataset, "into", args.folds, "folds.") dataset = build_dataset_from_name(args.dataset) _converted_dataset = convert_dataset(dataset) if args.dataset.startswith("imdb"): from autogl.module.feature import OneHotDegreeGenerator if DependentBackend.is_pyg(): from torch_geometric.utils import degree max_degree = 0 for data in _converted_dataset: deg_max = int( degree(data.edge_index[0], data.num_nodes).max().item()) max_degree = max(max_degree, deg_max) else: max_degree = 0 for data, _ in _converted_dataset: deg_max = data.in_degrees().max().item()
if len(model.hyper_parameter_space) == 0: model_2 = model.from_hyper_parameter({}) if check_children: test_model(model_2, data) if __name__ == "__main__": print("Testing backend: {}".format( "dgl" if DependentBackend.is_dgl() else "pyg")) if DependentBackend.is_dgl(): from autogl.datasets.utils.conversion._to_dgl_dataset import to_dgl_dataset as convert_dataset else: from autogl.datasets.utils.conversion._to_pyg_dataset import to_pyg_dataset as convert_dataset dataset = build_dataset_from_name('cora') dataset = convert_dataset(dataset) data = dataset[0] di = bk_feat(data).shape[1] do = len(np.unique(bk_label(data))) print("evolutionary + singlepath ") space = SinglePathNodeClassificationSpace().cuda() space.instantiate(input_dim=di, output_dim=do) esti = OneShotEstimator() algo = Spos(cycles=200) model = algo.search(space, dataset, esti) test_model(model, data, True) print("evolutionary + graphnas ")
default="../configs/nodeclf_ladies_reproduction.yml", help="configuration file to adopt", ) argument_parser.add_argument("--seed", type=int, default=0, help="random seed") argument_parser.add_argument("--device", default=0, type=int, help="GPU device") arguments = argument_parser.parse_args() if torch.cuda.is_available(): torch.cuda.set_device(arguments.device) seed = arguments.seed random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) if torch.cuda.is_available(): torch.cuda.manual_seed(seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False dataset = build_dataset_from_name(arguments.dataset) configs = yaml.load( open(arguments.configs, "r").read(), Loader=yaml.FullLoader ) autoClassifier = AutoNodeClassifier.from_config(configs) # The running time is likely to exceed 1 hour when CiteSeer or Reddit dataset is adopted autoClassifier.fit(dataset, time_limit=24 * 3600, evaluation_method=[MicroF1]) autoClassifier.get_leaderboard().show() predict_result = autoClassifier.predict_proba() res = autoClassifier.evaluate(metric=[MicroF1, 'acc']) print("Final Micro F1 {:.4f} Acc {:.4f}".format(res[0], res[1]))
import autogl from autogl.datasets import build_dataset_from_name cora_dataset = build_dataset_from_name('cora', path='/home/qinyj/AGL/') import torch device = torch.device('cuda:5' if torch.cuda.is_available() else 'cpu') #device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') from autogl.solver import AutoNodeClassifier solver = AutoNodeClassifier(feature_module='deepgl', graph_models=['gcn', 'gat'], hpo_module='anneal', ensemble_module='voting', device=device) solver.fit(cora_dataset, time_limit=3600) solver.get_leaderboard().show() from autogl.module.train import Acc predicted = solver.predict_proba() print( 'Test accuracy: ', Acc.evaluate( predicted, cora_dataset.data.y[cora_dataset.data.test_mask].cpu().numpy()))
import os os.environ["AUTOGL_BACKEND"] = "dgl" from autogl.datasets import build_dataset_from_name from autogl.solver import AutoNodeClassifier from autogl.module.train import NodeClassificationFullTrainer from autogl.backend import DependentBackend key = "y" if DependentBackend.is_pyg() else "label" cora = build_dataset_from_name("cora") solver = AutoNodeClassifier(graph_models=("gin", ), default_trainer=NodeClassificationFullTrainer( decoder=None, init=False, max_epoch=200, early_stopping_round=201, lr=0.01, weight_decay=0.0, ), hpo_module=None, device="auto") solver.fit(cora, evaluation_method=["acc"]) result = solver.predict(cora) print((result == cora[0].nodes.data[key][ cora[0].nodes.data["test_mask"]].cpu().numpy()).astype('float').mean())
import os os.environ["AUTOGL_BACKEND"] = 'dgl' from autogl.datasets import build_dataset_from_name from autogl.solver import AutoHeteroNodeClassifier import argparse if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument("--model", type=str, choices=["han", "hgt"]) parser.add_argument("--max_evals", type=int, default=10) args = parser.parse_args() dataset = build_dataset_from_name(f"hetero-acm-{args.model}") solver = AutoHeteroNodeClassifier(graph_models=(args.model, ), max_evals=10) solver.fit(dataset) acc = solver.evaluate(metric='acc') print("acc: ", acc)
parser.add_argument("--model", type=str, choices=["han", "hgt", "HeteroRGCN"], default="hgt") parser.add_argument("--epoch", type=int, default=200) parser.add_argument("--lr", type=float, default=1e-3) parser.add_argument("--weight_decay", type=float, default=1e-2) parser.add_argument("--device", type=str, default="cuda") parser.add_argument("--repeat", type=int, default=10) args = parser.parse_args() dataset = { "han": "hetero-acm-han", "hgt": "hetero-acm-hgt", "HeteroRGCN": "hetero-acm-hgt" } dataset = build_dataset_from_name(dataset[args.model]) model_hp, _ = get_encoder_decoder_hp(args.model) accs = [] process = tqdm(total=args.repeat) for rep in range(args.repeat): solver = AutoHeteroNodeClassifier( graph_models=[args.model], hpo_module="random", ensemble_module=None, max_evals=1, device=args.device, trainer_hp_space=fixed( max_epoch=args.epoch, early_stopping_round=args.epoch + 1,
import sys sys.path.append('../') from autogl.datasets import build_dataset_from_name, utils from autogl.solver import AutoGraphClassifier from autogl.module import Acc, BaseModel dataset = build_dataset_from_name('mutag') utils.graph_random_splits(dataset, train_ratio=0.4, val_ratio=0.4) autoClassifier = AutoGraphClassifier.from_config( '../configs/graph_classification.yaml') # train autoClassifier.fit( dataset, time_limit=3600, train_split=0.8, val_split=0.1, cross_validation=True, cv_split=10, ) autoClassifier.get_leaderboard().show() print('best single model:\n', autoClassifier.get_leaderboard().get_best_model(0)) # test predict_result = autoClassifier.predict_proba() print( Acc.evaluate(predict_result, dataset.data.y[dataset.test_index].cpu().detach().numpy()))
if __name__ == '__main__': import argparse parser = argparse.ArgumentParser('dgl trainer dataset') parser.add_argument('--device', type=str, default='cuda') parser.add_argument('--dataset', type=str, choices=['Cora', 'CiteSeer', 'PubMed'], default='Cora') parser.add_argument('--repeat', type=int, default=50) parser.add_argument('--model', type=str, choices=['gat', 'gcn', 'sage', 'gin'], default='gat') parser.add_argument('--lr', type=float, default=0.01) parser.add_argument('--weight_decay', type=float, default=0.0) parser.add_argument('--epoch', type=int, default=200) args = parser.parse_args() # seed = 100 dataset = build_dataset_from_name(args.dataset.lower()) dataset = to_dgl_dataset(dataset) data = dataset[0].to(args.device) num_features = data.ndata['feat'].size(1) num_classes = data.ndata['label'].max().item() + 1 label = data.ndata['label'] test_mask = data.ndata['test_mask'] accs = [] model_hp, decoder_hp = get_encoder_decoder_hp(args.model) for seed in tqdm(range(args.repeat)): set_seed(seed) trainer = NodeClassificationFullTrainer(
from autogl.datasets import build_dataset_from_name from autogl.solver import AutoGraphClassifier from autogl.datasets import utils mutag = build_dataset_from_name("mutag") utils.graph_random_splits(mutag, 0.8, 0.1) solver = AutoGraphClassifier(graph_models=("gin", ), hpo_module=None, device="auto") solver.fit(mutag, evaluation_method=["acc"]) result = solver.predict(mutag) print( "Acc:", sum([d.data["y"].item() == r for d, r in zip(mutag.test_split, result)]) / len(result))