Ejemplo n.º 1
0
def manual_seed(seed):
    paddorch.manual_seed(seed)
Ejemplo n.º 2
0
def main(args):
    dgl.random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if args.gpu >= 0:
        torch.cuda.manual_seed(args.seed)
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume, map_location="cpu")
            pretrain_args = checkpoint["opt"]
            pretrain_args.fold_idx = args.fold_idx
            pretrain_args.gpu = args.gpu
            pretrain_args.finetune = args.finetune
            pretrain_args.resume = args.resume
            pretrain_args.cv = args.cv
            pretrain_args.dataset = args.dataset
            pretrain_args.epochs = args.epochs
            pretrain_args.num_workers = args.num_workers
            if args.dataset in GRAPH_CLASSIFICATION_DSETS:
                # HACK for speeding up finetuning on graph classification tasks
                pretrain_args.num_workers = 0
            pretrain_args.batch_size = args.batch_size
            args = pretrain_args
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))
    args = option_update(args)
    print(args)
    if args.gpu >= 0:
        assert args.gpu is not None and torch.cuda.is_available()
        print("Use GPU: {} for training".format(args.gpu))
    assert args.positional_embedding_size % 2 == 0
    print("setting random seeds")

    mem = psutil.virtual_memory()
    print("before construct dataset", mem.used / 1024**3)
    if args.finetune:
        if args.dataset in GRAPH_CLASSIFICATION_DSETS:
            dataset = GraphClassificationDatasetLabeled(
                dataset=args.dataset,
                rw_hops=args.rw_hops,
                subgraph_size=args.subgraph_size,
                restart_prob=args.restart_prob,
                positional_embedding_size=args.positional_embedding_size,
            )
            labels = dataset.dataset.data.y.tolist()
        else:
            dataset = NodeClassificationDatasetLabeled(
                dataset=args.dataset,
                rw_hops=args.rw_hops,
                subgraph_size=args.subgraph_size,
                restart_prob=args.restart_prob,
                positional_embedding_size=args.positional_embedding_size,
            )
            labels = dataset.data.y.argmax(dim=1).tolist()

        skf = StratifiedKFold(n_splits=10,
                              shuffle=True,
                              random_state=args.seed)
        idx_list = []
        for idx in skf.split(np.zeros(len(labels)), labels):
            idx_list.append(idx)
        assert (0 <= args.fold_idx
                and args.fold_idx < 10), "fold_idx must be from 0 to 9."
        train_idx, test_idx = idx_list[args.fold_idx]
        train_dataset = torch.utils.data.Subset(dataset, train_idx)
        valid_dataset = torch.utils.data.Subset(dataset, test_idx)

    elif args.dataset == "dgl":
        train_dataset = LoadBalanceGraphDataset(
            rw_hops=args.rw_hops,
            restart_prob=args.restart_prob,
            positional_embedding_size=args.positional_embedding_size,
            num_workers=args.num_workers,
            num_samples=args.num_samples,
            dgl_graphs_file="./data/small.bin",
            num_copies=args.num_copies,
        )
    else:
        if args.dataset in GRAPH_CLASSIFICATION_DSETS:
            train_dataset = GraphClassificationDataset(
                dataset=args.dataset,
                rw_hops=args.rw_hops,
                subgraph_size=args.subgraph_size,
                restart_prob=args.restart_prob,
                positional_embedding_size=args.positional_embedding_size,
            )
        else:
            train_dataset = NodeClassificationDataset(
                dataset=args.dataset,
                rw_hops=args.rw_hops,
                subgraph_size=args.subgraph_size,
                restart_prob=args.restart_prob,
                positional_embedding_size=args.positional_embedding_size,
            )

    mem = psutil.virtual_memory()
    print("before construct dataloader", mem.used / 1024**3)
    train_loader = torch.utils.data.graph.Dataloader(
        dataset=train_dataset,
        batch_size=args.batch_size,
        collate_fn=labeled_batcher() if args.finetune else batcher(),
        shuffle=True if args.finetune else False,
        num_workers=args.num_workers,
        worker_init_fn=None
        if args.finetune or args.dataset != "dgl" else worker_init_fn,
    )
    if args.finetune:
        valid_loader = torch.utils.data.DataLoader(
            dataset=valid_dataset,
            batch_size=args.batch_size,
            collate_fn=labeled_batcher(),
            num_workers=args.num_workers,
        )
    mem = psutil.virtual_memory()
    print("before training", mem.used / 1024**3)

    # create model and optimizer
    # n_data = train_dataset.total
    n_data = None
    import gcc.models.graph_encoder
    gcc.models.graph_encoder.final_dropout = 0  ##disable dropout
    model, model_ema = [
        GraphEncoder(
            positional_embedding_size=args.positional_embedding_size,
            max_node_freq=args.max_node_freq,
            max_edge_freq=args.max_edge_freq,
            max_degree=args.max_degree,
            freq_embedding_size=args.freq_embedding_size,
            degree_embedding_size=args.degree_embedding_size,
            output_dim=args.hidden_size,
            node_hidden_dim=args.hidden_size,
            edge_hidden_dim=args.hidden_size,
            num_layers=args.num_layer,
            num_step_set2set=args.set2set_iter,
            num_layer_set2set=args.set2set_lstm_layer,
            norm=args.norm,
            gnn_model=args.model,
            degree_input=True,
        ) for _ in range(2)
    ]

    # copy weights from `model' to `model_ema'
    if args.moco:
        moment_update(model, model_ema, 0)

    # set the contrast memory and criterion
    contrast = MemoryMoCo(args.hidden_size,
                          n_data,
                          args.nce_k,
                          args.nce_t,
                          use_softmax=True)
    if args.gpu >= 0:
        contrast = contrast.cuda(args.gpu)

    if args.finetune:
        criterion = nn.CrossEntropyLoss()
    else:
        criterion = NCESoftmaxLoss() if args.moco else NCESoftmaxLossNS()
        if args.gpu >= 0:
            criterion = criterion.cuda(args.gpu)
    if args.gpu >= 0:
        model = model.cuda(args.gpu)
        model_ema = model_ema.cuda(args.gpu)

    if args.finetune:
        output_layer = nn.Linear(in_features=args.hidden_size,
                                 out_features=dataset.num_classes)
        if args.gpu >= 0:
            output_layer = output_layer.cuda(args.gpu)
        output_layer_optimizer = torch.optim.Adam(
            output_layer.parameters(),
            lr=args.learning_rate,
            betas=(args.beta1, args.beta2),
            weight_decay=args.weight_decay,
        )

        def clear_bn(m):
            classname = m.__class__.__name__
            if classname.find("BatchNorm") != -1:
                m.reset_running_stats()

        model.apply(clear_bn)

    if args.optimizer == "sgd":
        optimizer = torch.optim.SGD(
            model.parameters(),
            lr=args.learning_rate,
            momentum=args.momentum,
            weight_decay=args.weight_decay,
        )
    elif args.optimizer == "adam":
        optimizer = torch.optim.Adam(
            model.parameters(),
            lr=args.learning_rate,
            betas=(args.beta1, args.beta2),
            weight_decay=args.weight_decay,
        )
    elif args.optimizer == "adagrad":
        optimizer = torch.optim.Adagrad(
            model.parameters(),
            lr=args.learning_rate,
            lr_decay=args.lr_decay_rate,
            weight_decay=args.weight_decay,
        )
    else:
        raise NotImplementedError

    # optionally resume from a checkpoint
    args.start_epoch = 1
    if True:
        # print("=> loading checkpoint '{}'".format(args.resume))
        # checkpoint = torch.load(args.resume, map_location="cpu")
        import torch as th
        checkpoint = th.load("torch_models/ckpt_epoch_100.pth",
                             map_location=th.device('cpu'))
        torch_input_output_grad = th.load(
            "torch_models/torch_input_output_grad.pt",
            map_location=th.device('cpu'))
        from paddorch.convert_pretrain_model import load_pytorch_pretrain_model
        print("loading.............. model")
        paddle_state_dict = load_pytorch_pretrain_model(
            model, checkpoint["model"])
        model.load_state_dict(paddle_state_dict)
        print("loading.............. contrast")
        paddle_state_dict2 = load_pytorch_pretrain_model(
            contrast, checkpoint["contrast"])
        contrast.load_state_dict(paddle_state_dict2)
        print("loading.............. model_ema")
        paddle_state_dict3 = load_pytorch_pretrain_model(
            model_ema, checkpoint["model_ema"])
        if args.moco:
            model_ema.load_state_dict(paddle_state_dict3)

        print("=> loaded successfully '{}' (epoch {})".format(
            args.resume, checkpoint["epoch"]))
        del checkpoint
        if args.gpu >= 0:
            torch.cuda.empty_cache()
        optimizer = torch.optim.Adam(
            model.parameters(),
            lr=args.learning_rate * 0.1,
            betas=(args.beta1, args.beta2),
            weight_decay=args.weight_decay,
        )
        for _ in range(1):
            graph_q, graph_k = train_dataset[0]
            graph_q2, graph_k2 = train_dataset[1]
            graph_q, graph_k = dgl.batch([graph_q, graph_q2
                                          ]), dgl.batch([graph_k, graph_k2])

            input_output_grad = []
            input_output_grad.append([graph_q, graph_k])
            model.train()
            model_ema.eval()

            feat_q = model(graph_q)
            with torch.no_grad():
                feat_k = model_ema(graph_k)

            out = contrast(feat_q, feat_k)
            loss = criterion(out)
            optimizer.zero_grad()
            loss.backward()
            input_output_grad.append([feat_q, out, loss])
            print("loss:", loss.numpy())
            optimizer.step()
            moment_update(model, model_ema, args.alpha)
        print(
            "max diff feat_q:",
            np.max(
                np.abs(torch_input_output_grad[1][0].detach().numpy() -
                       feat_q.numpy())))
        print(
            "max diff out:",
            np.max(
                np.abs(torch_input_output_grad[1][1].detach().numpy() -
                       out.numpy())))
        print(
            "max diff loss:",
            np.max(
                np.abs(torch_input_output_grad[1][2].detach().numpy() -
                       loss.numpy())))

        name2grad = dict()
        for name, p in dict(model.named_parameters()).items():
            if p.grad is not None:
                name2grad[name] = p.grad
                torch_grad = torch_input_output_grad[2][name].numpy()

                if "linear" in name and "weight" in name:
                    torch_grad = torch_grad.T
                max_grad_diff = np.max(np.abs(p.grad - torch_grad))
                print("max grad diff:", name, max_grad_diff)
        input_output_grad.append(name2grad)
Ejemplo n.º 3
0
    parser.add_argument('--inp_dir',
                        type=str,
                        default='assets/representative/custom/female',
                        help='input directory when aligning faces')
    parser.add_argument('--out_dir',
                        type=str,
                        default='assets/representative/celeba_hq/src/female',
                        help='output directory when aligning faces')

    # face alignment
    parser.add_argument('--wing_path',
                        type=str,
                        default='expr/checkpoints/wing.pdparams')
    parser.add_argument('--lm_path',
                        type=str,
                        default='expr/checkpoints/celeba_lm_mean.npz')

    # step size
    parser.add_argument('--print_every', type=int, default=10)
    parser.add_argument('--sample_every', type=int, default=5000)
    parser.add_argument('--save_every', type=int, default=10000)
    parser.add_argument('--eval_every', type=int, default=50000)

    args = parser.parse_args()

    place = fluid.CUDAPlace(0)
    porch.manual_seed(args.seed)
    with fluid.dygraph.guard(place=place):
        print("compute device:", place)
        main(args)
Ejemplo n.º 4
0
def main(args):
    dgl.random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if args.gpu >= 0:
        torch.cuda.manual_seed(args.seed)
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume, map_location="cpu")
            pretrain_args = checkpoint["opt"]
            pretrain_args.fold_idx = args.fold_idx
            pretrain_args.gpu = args.gpu
            pretrain_args.finetune = args.finetune
            pretrain_args.resume = args.resume
            pretrain_args.cv = args.cv
            pretrain_args.dataset = args.dataset
            pretrain_args.epochs = args.epochs
            pretrain_args.num_workers = args.num_workers
            if args.dataset in GRAPH_CLASSIFICATION_DSETS:
                # HACK for speeding up finetuning on graph classification tasks
                pretrain_args.num_workers = 1
            pretrain_args.batch_size = args.batch_size
            args = pretrain_args
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))
    args = option_update(args)
    learning_rate = float(args.learning_rate)
    print(args)
    if args.gpu >= 0:
        assert args.gpu is not None and torch.cuda.is_available()
        print("Use GPU: {} for training".format(args.gpu))
    assert args.positional_embedding_size % 2 == 0
    print("setting random seeds")

    mem = psutil.virtual_memory()
    print("before construct dataset", mem.used / 1024**3)
    if args.finetune:
        if args.dataset in GRAPH_CLASSIFICATION_DSETS:
            dataset = GraphClassificationDatasetLabeled(
                dataset=args.dataset,
                rw_hops=args.rw_hops,
                subgraph_size=args.subgraph_size,
                restart_prob=args.restart_prob,
                positional_embedding_size=args.positional_embedding_size,
            )
            labels = dataset.dataset.data.y.tolist()
        else:
            dataset = NodeClassificationDatasetLabeled(
                dataset=args.dataset,
                rw_hops=args.rw_hops,
                subgraph_size=args.subgraph_size,
                restart_prob=args.restart_prob,
                positional_embedding_size=args.positional_embedding_size,
            )
            labels = dataset.data.y.argmax(dim=1).tolist()

        skf = StratifiedKFold(n_splits=10,
                              shuffle=True,
                              random_state=args.seed)
        idx_list = []
        for idx in skf.split(np.zeros(len(labels)), labels):
            idx_list.append(idx)
        assert (0 <= args.fold_idx
                and args.fold_idx < 10), "fold_idx must be from 0 to 9."
        train_idx, test_idx = idx_list[args.fold_idx]
        train_dataset = torch.utils.data.Subset(dataset, train_idx)
        valid_dataset = torch.utils.data.Subset(dataset, test_idx)

    elif args.dataset == "dgl":
        train_dataset = LoadBalanceGraphDataset(
            rw_hops=args.rw_hops,
            restart_prob=args.restart_prob,
            positional_embedding_size=args.positional_embedding_size,
            num_workers=args.num_workers,
            num_samples=args.num_samples,
            dgl_graphs_file="./data/small.bin",
            num_copies=args.num_copies,
        )
    else:
        if args.dataset in GRAPH_CLASSIFICATION_DSETS:
            train_dataset = GraphClassificationDataset(
                dataset=args.dataset,
                rw_hops=args.rw_hops,
                subgraph_size=args.subgraph_size,
                restart_prob=args.restart_prob,
                positional_embedding_size=args.positional_embedding_size,
            )
        else:
            train_dataset = NodeClassificationDataset(
                dataset=args.dataset,
                rw_hops=args.rw_hops,
                subgraph_size=args.subgraph_size,
                restart_prob=args.restart_prob,
                positional_embedding_size=args.positional_embedding_size,
            )

    mem = psutil.virtual_memory()
    print("before construct dataloader", mem.used / 1024**3)
    train_loader = torch.utils.data.graph.Dataloader(
        dataset=train_dataset,
        batch_size=args.batch_size,
        collate_fn=labeled_batcher() if args.finetune else batcher(),
        shuffle=True if args.finetune else False,
        num_workers=args.num_workers,
        worker_init_fn=None
        if args.finetune or args.dataset != "dgl" else worker_init_fn,
    )
    if args.finetune:
        valid_loader = torch.utils.data.graph.Dataloader(
            dataset=valid_dataset,
            batch_size=args.batch_size,
            collate_fn=labeled_batcher(),
            num_workers=args.num_workers,
        )
    mem = psutil.virtual_memory()
    print("before training", mem.used / 1024**3)

    # create model and optimizer
    # n_data = train_dataset.total
    n_data = None

    model, model_ema = [
        GraphEncoder(
            positional_embedding_size=args.positional_embedding_size,
            max_node_freq=args.max_node_freq,
            max_edge_freq=args.max_edge_freq,
            max_degree=args.max_degree,
            freq_embedding_size=args.freq_embedding_size,
            degree_embedding_size=args.degree_embedding_size,
            output_dim=args.hidden_size,
            node_hidden_dim=args.hidden_size,
            edge_hidden_dim=args.hidden_size,
            num_layers=args.num_layer,
            num_step_set2set=args.set2set_iter,
            num_layer_set2set=args.set2set_lstm_layer,
            norm=args.norm,
            gnn_model=args.model,
            degree_input=True,
        ) for _ in range(2)
    ]

    # copy weights from `model' to `model_ema'
    if args.moco:
        # model_ema.load_state_dict(model.state_dict()) ##complete copy of model
        moment_update(model, model_ema, 0)

    # set the contrast memory and criterion
    contrast = MemoryMoCo(args.hidden_size,
                          n_data,
                          args.nce_k,
                          args.nce_t,
                          use_softmax=True)
    if args.gpu >= 0:
        contrast = contrast

    if args.finetune:
        criterion = nn.CrossEntropyLoss()
    else:
        criterion = NCESoftmaxLoss() if args.moco else NCESoftmaxLossNS()
        if args.gpu >= 0:
            criterion = criterion
    if args.gpu >= 0:
        model = model
        model_ema = model_ema

    import paddle
    if args.finetune:
        output_layer = nn.Linear(in_features=args.hidden_size,
                                 out_features=dataset.num_classes)
        if args.gpu >= 0:
            output_layer = output_layer
        output_layer_optimizer = torch.optim.Adam(
            output_layer.parameters(),
            lr=args.learning_rate,
            betas=(args.beta1, args.beta2),
            weight_decay=args.weight_decay,
            grad_clip=paddle.nn.clip.ClipGradByValue(max=1))

        def clear_bn(m):
            classname = m.__class__.__name__
            if classname.find("BatchNorm") != -1:
                m.reset_running_stats()

        model.apply(clear_bn)

    if args.optimizer == "sgd":
        optimizer = torch.optim.SGD(
            model.parameters(),
            lr=args.learning_rate,
            momentum=args.momentum,
            weight_decay=args.weight_decay,
        )
    elif args.optimizer == "adam":
        if args.finetune:
            optimizer = torch.optim.Adam(
                model.parameters(),
                lr=learning_rate,
                betas=(args.beta1, args.beta2),
                weight_decay=args.weight_decay,
                grad_clip=paddle.nn.clip.ClipGradByValue(max=1),
            )
        else:
            optimizer = torch.optim.Adam(
                model.parameters(),
                lr=learning_rate,
                betas=(args.beta1, args.beta2),
                weight_decay=args.weight_decay,
                grad_clip=paddle.nn.clip.ClipGradByNorm(args.clip_norm))
    elif args.optimizer == "adagrad":
        optimizer = torch.optim.Adagrad(
            model.parameters(),
            lr=args.learning_rate,
            lr_decay=args.lr_decay_rate,
            weight_decay=args.weight_decay,
        )
    else:
        raise NotImplementedError

    # optionally resume from a checkpoint
    args.start_epoch = 1
    if args.resume:
        if args.finetune:  ##if finetune model exists, continue resume that
            if os.path.isdir(args.model_folder + "/current.pth"):
                args.resume = args.model_folder + "/current.pth"
                print("change resume model to finetune model path:",
                      args.resume)
                ##find last end epoch
                import glob
                ckpt_epoches = glob.glob(args.model_folder +
                                         "/ckpt_epoch*.pth")
                if len(ckpt_epoches) > 0:
                    args.start_epoch = sorted([
                        int(
                            os.path.basename(x).replace(".pth", "").replace(
                                "ckpt_epoch_", "")) for x in ckpt_epoches
                    ])[-1] + 1
                    print("starting epoch:", args.start_epoch)
                    args.epochs = args.epochs + args.start_epoch - 1
        print("=> loading checkpoint '{}'".format(args.resume))
        checkpoint = torch.load(args.resume, map_location="cpu")
        # checkpoint = torch.load(args.resume)
        # args.start_epoch = checkpoint["epoch"] + 1
        model.load_state_dict(checkpoint["model"])
        # optimizer.load_state_dict(checkpoint["optimizer"])
        contrast.load_state_dict(checkpoint["contrast"])
        if args.moco:
            model_ema.load_state_dict(checkpoint["model_ema"])

        print("=> loaded successfully '{}' ".format(args.resume))
        if args.finetune:
            if "output_layer" in checkpoint:
                output_layer.load_state_dict(checkpoint["output_layer"])
                print("loaded output layer")
        # del checkpoint
        if args.gpu >= 0:
            torch.cuda.empty_cache()

    # tensorboard
    #  logger = tb_logger.Logger(logdir=args.tb_folder, flush_secs=2)
    sw = LogWriter(logdir=args.tb_folder)

    import gc
    gc.enable()
    for epoch in range(args.start_epoch, args.epochs + 1):

        adjust_learning_rate(epoch, args, optimizer)
        print("==> training...")

        time1 = time.time()
        try:
            if args.finetune:
                loss, _ = train_finetune(
                    epoch,
                    train_loader,
                    model,
                    output_layer,
                    criterion,
                    optimizer,
                    output_layer_optimizer,
                    sw,
                    args,
                )
            else:

                loss = train_moco(
                    epoch,
                    train_loader,
                    model,
                    model_ema,
                    contrast,
                    criterion,
                    optimizer,
                    sw,
                    args,
                )
        except:
            print("Error in Epoch", epoch)
            continue
        time2 = time.time()
        print("epoch {}, total time {:.2f}".format(epoch, time2 - time1))

        # save model
        if epoch % args.save_freq == 0:
            print("==> Saving...")
            state = {
                "opt": vars(args).copy(),
                "model": model.state_dict(),
                "contrast": contrast.state_dict(),
                "optimizer": optimizer.state_dict()
            }
            if args.moco:
                state["model_ema"] = model_ema.state_dict()
            if args.finetune:
                state['output_layer'] = output_layer.state_dict()
            save_file = os.path.join(
                args.model_folder,
                "ckpt_epoch_{epoch}.pth".format(epoch=epoch))
            torch.save(state, save_file)
            # help release GPU memory
            # del state

        # saving the model
        print("==> Saving...")
        state = {
            "opt": vars(args).copy(),
            "model": model.state_dict(),
            "contrast": contrast.state_dict(),
            "optimizer": optimizer.state_dict()
        }
        if args.moco:
            state["model_ema"] = model_ema.state_dict()
        if args.finetune:
            state['output_layer'] = output_layer.state_dict()
        save_file = os.path.join(args.model_folder, "current.pth")
        torch.save(state, save_file)
        if epoch % args.save_freq == 0:
            save_file = os.path.join(
                args.model_folder,
                "ckpt_epoch_{epoch}.pth".format(epoch=epoch))
            torch.save(state, save_file)
        # help release GPU memory
        # del state
        if args.gpu >= 0:
            torch.cuda.empty_cache()

        if args.finetune:
            valid_loss, valid_f1 = test_finetune(epoch, valid_loader, model,
                                                 output_layer, criterion, sw,
                                                 args)
            print("epoch %d| valid f1: %.3f" % (epoch, valid_f1))

    # del model,model_ema,train_loader
    gc.collect()
    return valid_f1