def set_random_seed(seed): """设置Python, numpy, PyTorch的随机数种子 :param seed: int 随机数种子 """ random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) if torch.cuda.is_available(): torch.cuda.manual_seed(seed) dgl.seed(seed)
def set_seed(seed): import numpy as np import random import torch import dgl np.random.seed(seed) random.seed(seed) if torch.cuda.is_available(): torch.cuda.manual_seed(seed) torch.cuda.manual_seed_all(seed) # multi-gpu torch.manual_seed(seed) torch.backends.cudnn.deterministic = True dgl.seed(seed)
def setup_seed(seed): import numpy as np import random from torch.backends import cudnn import dgl dgl.seed(seed) dgl.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) torch.cuda.manual_seed(seed) # torch.cuda.seed_all() np.random.seed(seed) random.seed(seed) cudnn.deterministic = True cudnn.benchmark = False
import torch import torch.nn as nn import torch.nn.functional as F from dgl import DGLGraph from dgl.nn import GraphConv import time import numpy as np import psutil from statistics import mean from dgl.data import RedditDataset import networkx as nx ############################################################################### # dgl.seed(0) dgl.random.seed(0) ############################################################################### class Net(nn.Module): def __init__(self): super(Net, self).__init__() self.layer1 = GraphConv(602, 128) self.layer2 = GraphConv(128, 128) self.layer3 = GraphConv(128, 128) self.fc = nn.Linear(128, 41) def forward(self, g, features): x = F.relu(self.layer1(g, features)) x = F.relu(self.layer2(g, x))
def set_random_seeds(seed): if seed != -1: np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) dgl.seed(seed)
def main(): parser = argparse.ArgumentParser(description='pretrain_supervised') parser.add_argument('--device', type=int, default=0, help='which gpu to use if any. (default: 0)') parser.add_argument('--batch_size', type=int, default=32, help='input batch size for training. (default: 32)') parser.add_argument('--epochs', type=int, default=100, help='number of epochs to train. (default: 100)') parser.add_argument('--lr', type=float, default=0.001, help='learning rate. (default: 0.001)') parser.add_argument('--decay', type=float, default=0, help='weight decay. (default: 0)') parser.add_argument( '--num_layer', type=int, default=5, help='number of GNN message passing layers. (default: 5).') parser.add_argument('--emb_dim', type=int, default=300, help='embedding dimensions. (default: 300)') parser.add_argument('--dropout_ratio', type=float, default=0.2, help='dropout ratio. (default: 0.2)') parser.add_argument( '--graph_pooling', type=str, default="mean", help='graph level pooling, or readout.' 'for computing graph representations out of node representations, ' 'which can be `sum`, `mean`, `max` or `attention`. (default: mean)' '`sum`: apply sum pooling over the nodes in the graph.' '`mean`: apply average pooling over the nodes in the graph.' '`max`: apply max pooling over the nodes in the graph.' '`attention`: apply Global Attention Pooling over the nodes in the graph.' '`set2set`: apply set2set pooling over the nodes in the graph.') parser.add_argument( '--JK', type=str, default="last", help='JK for jumping knowledge ' 'decides how we are going to combine the all-layer node representations for the final output.' 'It can be `concat`, `last`, `max` or `sum`. (default: last)' '`concat`: concatenate the output node representations from all GIN layers' '`last`: use the node representations from the last GIN layer' '`max`: apply max pooling to the node representations across all GIN layers' '`sum`: sum the output node representations from all GIN layers') parser.add_argument( '--dataset', type=str, default='chembl_filtered', help= 'path of the dataset. For now, only classification (default: chembl_filtered)' ) parser.add_argument( '--input_model_file', type=str, default=None, help= 'filename to read the model if there is any. (default: no model to load.)' ) parser.add_argument( '--output_model_file', type=str, default='pretrain_supervised.pth', help= 'filename to output the pre-trained model. (default: pretrain_supervised.pth)' ) parser.add_argument('--seed', type=int, default=0, help="Random seed.") parser.add_argument( '--num_workers', type=int, default=8, help='number of workers for dataset loading. (default: 8)') args = parser.parse_args() print(args) # set seed torch.manual_seed(args.seed) dgl.seed(args.seed) if torch.cuda.is_available(): torch.cuda.manual_seed(args.seed) device = torch.device( "cuda:" + str(args.device)) if torch.cuda.is_available() else torch.device("cpu") model = GINPredictor(num_node_emb_list=[119, 4], num_edge_emb_list=[6, 3], num_layers=args.num_layer, emb_dim=args.emb_dim, JK=args.JK, dropout=args.dropout_ratio, readout=args.graph_pooling, n_tasks=1310) if args.input_model_file is not None: model.gnn.load_state_dict(torch.load(args.input_model_file)) model.to(device) if args.dataset == 'chembl_filtered': url = 'dataset/pretrain_gnns.zip' data_path = get_download_dir() + '/pretrain_gnns.zip' dir_path = get_download_dir() + '/pretrain_gnns' download(_get_dgl_url(url), path=data_path, overwrite=False) extract_archive(data_path, dir_path) with open(dir_path + '/supervised_chembl_rev.pkl', 'rb') as f: data = pickle.load(f) else: with open(args.dataset, 'rb') as f: data = pickle.load(f) atom_featurizer = PretrainAtomFeaturizer() bond_featurizer = PretrainBondFeaturizer() dataset = PretrainDataset(data=data, smiles_to_graph=partial(smiles_to_bigraph, add_self_loop=True), node_featurizer=atom_featurizer, edge_featurizer=bond_featurizer, task='supervised') train_dataloader = DataLoader(dataset=dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=collate) optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.decay) criterion = nn.BCEWithLogitsLoss(reduction='none') train(args, model, train_dataloader, optimizer, criterion, device) if not args.output_model_file == "": torch.save(model.gnn.state_dict(), args.output_model_file)
import scipy.sparse as sp from utils import update_linear_schedule from running_mean_std import RunningMeanStd args = get_args() assert args.algo in ['a2c', 'ppo', 'acktr'] if args.recurrent_policy: assert args.algo in ['a2c', 'ppo'], \ 'Recurrent policy is not implemented for ACKTR' num_updates = int(args.num_frames) // args.num_steps // args.num_processes np.random.seed(args.seed) dgl.seed(args.seed) torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) try: os.makedirs(args.log_dir) except OSError: files = glob.glob(os.path.join(args.log_dir, '*.monitor.csv')) for f in files: os.remove(f) eval_log_dir = args.log_dir + "_eval" try: os.makedirs(eval_log_dir)
def main(): global device, n_node_feats, n_classes, epsilon argparser = argparse.ArgumentParser("GAT on OGBN-Arxiv", formatter_class=argparse.ArgumentDefaultsHelpFormatter) argparser.add_argument("--cpu", action="store_true", help="CPU mode. This option overrides --gpu.") argparser.add_argument("--gpu", type=int, default=0, help="GPU device ID.") argparser.add_argument("--n-runs", type=int, help="running times", default=10) argparser.add_argument("--n-epochs", type=int, help="number of epochs", default=2000) argparser.add_argument("--use-labels", type=str2bool, default=True) argparser.add_argument("--n-label-iters", type=int, help="number of label iterations", default=0) argparser.add_argument("--no-attn-dst", type=str2bool, default=True) argparser.add_argument("--use-norm", type=str2bool, default=True) argparser.add_argument('--topo-loss-ratio', type=float, default=1.0) argparser.add_argument('--topo-mask-threshold', type=float, default=0.2) argparser.add_argument("--n-layers", type=int, help="number of layers", default=3) argparser.add_argument("--n-heads", type=int, help="number of heads", default=3) argparser.add_argument("--n-hidden", type=int, help="number of hidden units", default=250) argparser.add_argument("--dropout", type=float, help="dropout rate", default=0.75) argparser.add_argument("--input-drop", type=float, help="input drop rate", default=0.25) argparser.add_argument("--attn-drop", type=float, help="attention dropout rate", default=0.0) argparser.add_argument("--edge-drop", type=float, help="edge drop rate", default=0.3) argparser.add_argument("--lr", type=float, help="learning rate", default=0.002) argparser.add_argument("--wd", type=float, help="weight decay", default=0) argparser.add_argument("--log-every", type=int, help="log every LOG_EVERY epochs", default=1) argparser.add_argument("--plot-curves", help="plot learning curves", action="store_true") argparser.add_argument("--version", type=str, required=True) args = argparser.parse_args() if not args.use_labels and args.n_label_iters > 0: raise ValueError("'--use-labels' must be enabled when n_label_iters > 0") path = 'log' if not os.path.exists(path): os.mkdir(path) version = f'{args.version}_{formatted_time()}' backup_code(path=path, version=version) log_file = open(os.path.join(path, version, 'log'), 'w', buffering=1) write_dict(args.__dict__, log_file, prefix=hint_line('args')) tensorboard_writer = SummaryWriter(log_dir=os.path.join(path, version, 'tensorboard')) device = f'cuda:{args.gpu}' if torch.cuda.is_available() else 'cpu' device = torch.device(device) # load data graph, labels, train_idx, val_idx, test_idx, evaluator = load_data('ogbn-arxiv') print(graph, file=log_file) graph = preprocess(graph) log_file.write(f"Number of params: {count_parameters(args)}\n") graph = graph.to(device) labels = labels.to(device) train_idx = train_idx.to(device) val_idx = val_idx.to(device) test_idx = test_idx.to(device) # run val_accs = [] test_accs = [] for n_running in range(1, args.n_runs + 1): # seed from 0~9 according to the OGB leaderboard instruction. seed = n_running - 1 torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) dgl.seed(seed) dgl.random.seed(seed) val_acc, test_acc = run(args, graph, labels, train_idx, val_idx, test_idx, evaluator, n_running, log_file, tensorboard_writer) val_accs.append(val_acc) test_accs.append(test_acc) log_file.write(f"Runned {args.n_runs} times\n") log_file.write(f"Val Accs: {val_accs}\n") log_file.write(f"Test Accs: {test_accs}\n") log_file.write(f"Average val accuracy: {np.mean(val_accs):.4f} ± {np.std(val_accs):.4f}\n") log_file.write(f"Average test accuracy: {np.mean(test_accs):.4f} ± {np.std(test_accs):.4f}\n") log_file.write(f"Number¢∞ of params: {count_parameters(args)}\n")
def train_treelstm(config: DictConfig): filter_warnings() seed_everything(config.seed) dgl.seed(config.seed) print_config(config, ["hydra", "log_offline"]) data_module = JsonlDataModule(config) data_module.prepare_data() data_module.setup() model: LightningModule if "max_types" in config and "max_type_parts" in config: model = TypedTreeLSTM2Seq(config, data_module.vocabulary) else: model = TreeLSTM2Seq(config, data_module.vocabulary) # define logger wandb_logger = WandbLogger(project=f"tree-lstm-{config.dataset}", log_model=False, offline=config.log_offline) wandb_logger.watch(model) # define model checkpoint callback checkpoint_callback = ModelCheckpoint( dirpath=wandb_logger.experiment.dir, filename="{epoch:02d}-{val_loss:.4f}", period=config.save_every_epoch, save_top_k=-1, ) upload_checkpoint_callback = UploadCheckpointCallback( wandb_logger.experiment.dir) # define early stopping callback early_stopping_callback = EarlyStopping(patience=config.patience, monitor="val_loss", verbose=True, mode="min") # define callback for printing intermediate result print_epoch_result_callback = PrintEpochResultCallback("train", "val") # use gpu if it exists gpu = 1 if torch.cuda.is_available() else None # define learning rate logger lr_logger = LearningRateMonitor("step") trainer = Trainer( max_epochs=config.n_epochs, gradient_clip_val=config.clip_norm, deterministic=True, check_val_every_n_epoch=config.val_every_epoch, log_every_n_steps=config.log_every_step, logger=wandb_logger, gpus=gpu, progress_bar_refresh_rate=config.progress_bar_refresh_rate, callbacks=[ lr_logger, early_stopping_callback, checkpoint_callback, upload_checkpoint_callback, print_epoch_result_callback, ], resume_from_checkpoint=config.resume_checkpoint, ) trainer.fit(model=model, datamodule=data_module) trainer.test()