예제 #1
0
def set_random_seed(seed):
    """设置Python, numpy, PyTorch的随机数种子

    :param seed: int 随机数种子
    """
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
    dgl.seed(seed)
예제 #2
0
def set_seed(seed):
    import numpy as np
    import random
    import torch
    import dgl

    np.random.seed(seed)
    random.seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)  # multi-gpu
    torch.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    dgl.seed(seed)
예제 #3
0
파일: utils.py 프로젝트: tao-shen/FedGraph
def setup_seed(seed):
    import numpy as np
    import random
    from torch.backends import cudnn
    import dgl
    dgl.seed(seed)
    dgl.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.cuda.manual_seed(seed)
    # torch.cuda.seed_all()
    np.random.seed(seed)
    random.seed(seed)
    cudnn.deterministic = True
    cudnn.benchmark = False
예제 #4
0
import torch
import torch.nn as nn
import torch.nn.functional as F
from dgl import DGLGraph
from dgl.nn import GraphConv
import time
import numpy as np
import psutil
from statistics import mean
from dgl.data import RedditDataset
import networkx as nx

###############################################################################
# 

dgl.seed(0)
dgl.random.seed(0)

###############################################################################

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.layer1 = GraphConv(602, 128)
        self.layer2 = GraphConv(128, 128)
        self.layer3 = GraphConv(128, 128)
        self.fc = nn.Linear(128, 41)
    
    def forward(self, g, features):
        x = F.relu(self.layer1(g, features))
        x = F.relu(self.layer2(g, x))
예제 #5
0
파일: utils.py 프로젝트: zhjwy9343/APAN
def set_random_seeds(seed):
    if seed != -1:
        np.random.seed(seed)
        torch.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        dgl.seed(seed)
예제 #6
0
def main():
    parser = argparse.ArgumentParser(description='pretrain_supervised')
    parser.add_argument('--device',
                        type=int,
                        default=0,
                        help='which gpu to use if any. (default: 0)')
    parser.add_argument('--batch_size',
                        type=int,
                        default=32,
                        help='input batch size for training. (default: 32)')
    parser.add_argument('--epochs',
                        type=int,
                        default=100,
                        help='number of epochs to train. (default: 100)')
    parser.add_argument('--lr',
                        type=float,
                        default=0.001,
                        help='learning rate. (default: 0.001)')
    parser.add_argument('--decay',
                        type=float,
                        default=0,
                        help='weight decay. (default: 0)')
    parser.add_argument(
        '--num_layer',
        type=int,
        default=5,
        help='number of GNN message passing layers. (default: 5).')
    parser.add_argument('--emb_dim',
                        type=int,
                        default=300,
                        help='embedding dimensions. (default: 300)')
    parser.add_argument('--dropout_ratio',
                        type=float,
                        default=0.2,
                        help='dropout ratio. (default: 0.2)')
    parser.add_argument(
        '--graph_pooling',
        type=str,
        default="mean",
        help='graph level pooling, or readout.'
        'for computing graph representations out of node representations, '
        'which can be `sum`, `mean`, `max` or `attention`. (default: mean)'
        '`sum`: apply sum pooling over the nodes in the graph.'
        '`mean`: apply average pooling over the nodes in the graph.'
        '`max`: apply max pooling over the nodes in the graph.'
        '`attention`: apply Global Attention Pooling over the nodes in the graph.'
        '`set2set`: apply set2set pooling over the nodes in the graph.')
    parser.add_argument(
        '--JK',
        type=str,
        default="last",
        help='JK for jumping knowledge '
        'decides how we are going to combine the all-layer node representations for the final output.'
        'It can be `concat`, `last`, `max` or `sum`. (default: last)'
        '`concat`: concatenate the output node representations from all GIN layers'
        '`last`: use the node representations from the last GIN layer'
        '`max`: apply max pooling to the node representations across all GIN layers'
        '`sum`: sum the output node representations from all GIN layers')
    parser.add_argument(
        '--dataset',
        type=str,
        default='chembl_filtered',
        help=
        'path of the dataset. For now, only classification (default: chembl_filtered)'
    )
    parser.add_argument(
        '--input_model_file',
        type=str,
        default=None,
        help=
        'filename to read the model if there is any. (default: no model to load.)'
    )
    parser.add_argument(
        '--output_model_file',
        type=str,
        default='pretrain_supervised.pth',
        help=
        'filename to output the pre-trained model. (default: pretrain_supervised.pth)'
    )
    parser.add_argument('--seed', type=int, default=0, help="Random seed.")
    parser.add_argument(
        '--num_workers',
        type=int,
        default=8,
        help='number of workers for dataset loading. (default: 8)')
    args = parser.parse_args()
    print(args)

    # set seed
    torch.manual_seed(args.seed)
    dgl.seed(args.seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(args.seed)

    device = torch.device(
        "cuda:" +
        str(args.device)) if torch.cuda.is_available() else torch.device("cpu")

    model = GINPredictor(num_node_emb_list=[119, 4],
                         num_edge_emb_list=[6, 3],
                         num_layers=args.num_layer,
                         emb_dim=args.emb_dim,
                         JK=args.JK,
                         dropout=args.dropout_ratio,
                         readout=args.graph_pooling,
                         n_tasks=1310)

    if args.input_model_file is not None:
        model.gnn.load_state_dict(torch.load(args.input_model_file))
    model.to(device)

    if args.dataset == 'chembl_filtered':
        url = 'dataset/pretrain_gnns.zip'
        data_path = get_download_dir() + '/pretrain_gnns.zip'
        dir_path = get_download_dir() + '/pretrain_gnns'
        download(_get_dgl_url(url), path=data_path, overwrite=False)
        extract_archive(data_path, dir_path)
        with open(dir_path + '/supervised_chembl_rev.pkl', 'rb') as f:
            data = pickle.load(f)
    else:
        with open(args.dataset, 'rb') as f:
            data = pickle.load(f)

    atom_featurizer = PretrainAtomFeaturizer()
    bond_featurizer = PretrainBondFeaturizer()
    dataset = PretrainDataset(data=data,
                              smiles_to_graph=partial(smiles_to_bigraph,
                                                      add_self_loop=True),
                              node_featurizer=atom_featurizer,
                              edge_featurizer=bond_featurizer,
                              task='supervised')

    train_dataloader = DataLoader(dataset=dataset,
                                  batch_size=args.batch_size,
                                  shuffle=True,
                                  num_workers=args.num_workers,
                                  collate_fn=collate)

    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.decay)
    criterion = nn.BCEWithLogitsLoss(reduction='none')

    train(args, model, train_dataloader, optimizer, criterion, device)

    if not args.output_model_file == "":
        torch.save(model.gnn.state_dict(), args.output_model_file)
예제 #7
0
파일: main.py 프로젝트: msgerasyov/phi_gcn
import scipy.sparse as sp
from utils import update_linear_schedule

from running_mean_std import RunningMeanStd

args = get_args()

assert args.algo in ['a2c', 'ppo', 'acktr']
if args.recurrent_policy:
    assert args.algo in ['a2c', 'ppo'], \
        'Recurrent policy is not implemented for ACKTR'

num_updates = int(args.num_frames) // args.num_steps // args.num_processes

np.random.seed(args.seed)
dgl.seed(args.seed)
torch.manual_seed(args.seed)
if args.cuda:
    torch.cuda.manual_seed(args.seed)

try:
    os.makedirs(args.log_dir)
except OSError:
    files = glob.glob(os.path.join(args.log_dir, '*.monitor.csv'))
    for f in files:
        os.remove(f)

eval_log_dir = args.log_dir + "_eval"

try:
    os.makedirs(eval_log_dir)
예제 #8
0
def main():
    global device, n_node_feats, n_classes, epsilon

    argparser = argparse.ArgumentParser("GAT on OGBN-Arxiv",
                                        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    argparser.add_argument("--cpu", action="store_true", help="CPU mode. This option overrides --gpu.")
    argparser.add_argument("--gpu", type=int, default=0, help="GPU device ID.")
    argparser.add_argument("--n-runs", type=int, help="running times", default=10)
    argparser.add_argument("--n-epochs", type=int, help="number of epochs", default=2000)

    argparser.add_argument("--use-labels", type=str2bool, default=True)
    argparser.add_argument("--n-label-iters", type=int, help="number of label iterations", default=0)
    argparser.add_argument("--no-attn-dst", type=str2bool, default=True)
    argparser.add_argument("--use-norm", type=str2bool, default=True)
    argparser.add_argument('--topo-loss-ratio', type=float, default=1.0)
    argparser.add_argument('--topo-mask-threshold', type=float, default=0.2)

    argparser.add_argument("--n-layers", type=int, help="number of layers", default=3)
    argparser.add_argument("--n-heads", type=int, help="number of heads", default=3)
    argparser.add_argument("--n-hidden", type=int, help="number of hidden units", default=250)

    argparser.add_argument("--dropout", type=float, help="dropout rate", default=0.75)
    argparser.add_argument("--input-drop", type=float, help="input drop rate", default=0.25)
    argparser.add_argument("--attn-drop", type=float, help="attention dropout rate", default=0.0)
    argparser.add_argument("--edge-drop", type=float, help="edge drop rate", default=0.3)

    argparser.add_argument("--lr", type=float, help="learning rate", default=0.002)
    argparser.add_argument("--wd", type=float, help="weight decay", default=0)
    argparser.add_argument("--log-every", type=int, help="log every LOG_EVERY epochs", default=1)
    argparser.add_argument("--plot-curves", help="plot learning curves", action="store_true")

    argparser.add_argument("--version", type=str, required=True)
    args = argparser.parse_args()

    if not args.use_labels and args.n_label_iters > 0:
        raise ValueError("'--use-labels' must be enabled when n_label_iters > 0")

    path = 'log'
    if not os.path.exists(path):
        os.mkdir(path)
    version = f'{args.version}_{formatted_time()}'
    backup_code(path=path, version=version)
    log_file = open(os.path.join(path, version, 'log'), 'w', buffering=1)
    write_dict(args.__dict__, log_file, prefix=hint_line('args'))
    tensorboard_writer = SummaryWriter(log_dir=os.path.join(path, version, 'tensorboard'))

    device = f'cuda:{args.gpu}' if torch.cuda.is_available() else 'cpu'
    device = torch.device(device)

    # load data
    graph, labels, train_idx, val_idx, test_idx, evaluator = load_data('ogbn-arxiv')
    print(graph, file=log_file)
    graph = preprocess(graph)
    log_file.write(f"Number of params: {count_parameters(args)}\n")

    graph = graph.to(device)
    labels = labels.to(device)
    train_idx = train_idx.to(device)
    val_idx = val_idx.to(device)
    test_idx = test_idx.to(device)

    # run
    val_accs = []
    test_accs = []

    for n_running in range(1, args.n_runs + 1):
        # seed from 0~9 according to the OGB leaderboard instruction.
        seed = n_running - 1
        torch.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        dgl.seed(seed)
        dgl.random.seed(seed)

        val_acc, test_acc = run(args, graph, labels, train_idx, val_idx, test_idx, evaluator, n_running,
                                log_file, tensorboard_writer)
        val_accs.append(val_acc)
        test_accs.append(test_acc)

    log_file.write(f"Runned {args.n_runs} times\n")
    log_file.write(f"Val Accs: {val_accs}\n")
    log_file.write(f"Test Accs: {test_accs}\n")
    log_file.write(f"Average val accuracy: {np.mean(val_accs):.4f} ± {np.std(val_accs):.4f}\n")
    log_file.write(f"Average test accuracy: {np.mean(test_accs):.4f} ± {np.std(test_accs):.4f}\n")
    log_file.write(f"Number¢∞ of params: {count_parameters(args)}\n")
예제 #9
0
def train_treelstm(config: DictConfig):
    filter_warnings()
    seed_everything(config.seed)
    dgl.seed(config.seed)

    print_config(config, ["hydra", "log_offline"])

    data_module = JsonlDataModule(config)
    data_module.prepare_data()
    data_module.setup()
    model: LightningModule
    if "max_types" in config and "max_type_parts" in config:
        model = TypedTreeLSTM2Seq(config, data_module.vocabulary)
    else:
        model = TreeLSTM2Seq(config, data_module.vocabulary)

    # define logger
    wandb_logger = WandbLogger(project=f"tree-lstm-{config.dataset}",
                               log_model=False,
                               offline=config.log_offline)
    wandb_logger.watch(model)
    # define model checkpoint callback
    checkpoint_callback = ModelCheckpoint(
        dirpath=wandb_logger.experiment.dir,
        filename="{epoch:02d}-{val_loss:.4f}",
        period=config.save_every_epoch,
        save_top_k=-1,
    )
    upload_checkpoint_callback = UploadCheckpointCallback(
        wandb_logger.experiment.dir)
    # define early stopping callback
    early_stopping_callback = EarlyStopping(patience=config.patience,
                                            monitor="val_loss",
                                            verbose=True,
                                            mode="min")
    # define callback for printing intermediate result
    print_epoch_result_callback = PrintEpochResultCallback("train", "val")
    # use gpu if it exists
    gpu = 1 if torch.cuda.is_available() else None
    # define learning rate logger
    lr_logger = LearningRateMonitor("step")
    trainer = Trainer(
        max_epochs=config.n_epochs,
        gradient_clip_val=config.clip_norm,
        deterministic=True,
        check_val_every_n_epoch=config.val_every_epoch,
        log_every_n_steps=config.log_every_step,
        logger=wandb_logger,
        gpus=gpu,
        progress_bar_refresh_rate=config.progress_bar_refresh_rate,
        callbacks=[
            lr_logger,
            early_stopping_callback,
            checkpoint_callback,
            upload_checkpoint_callback,
            print_epoch_result_callback,
        ],
        resume_from_checkpoint=config.resume_checkpoint,
    )

    trainer.fit(model=model, datamodule=data_module)
    trainer.test()