return FAN(fname_pretrained="./wing.ckpt")



if __name__ == '__main__':
    from paddorch.convert_pretrain_model import load_pytorch_pretrain_model
    import torch as pytorch
    import torchvision

    # place = fluid.CPUPlace()
    place = fluid.CUDAPlace(0)
    np.random.seed(0)
    x=np.random.randn(1,3,256,256).astype("float32")
    with fluid.dygraph.guard(place=place):
        model=FAN()
        model.eval()
        pytorch_model=eval_pytorch_model()
        pytorch_model.eval()
        pytorch_model.
        torch_output = pytorch_model(pytorch.FloatTensor(x).)[1][0]
        pytorch_model
        pytorch_state_dict=pytorch_model.state_dict()
        load_pytorch_pretrain_model(model, pytorch_state_dict)
        torch.save(model.state_dict(),"wing")
        paddle_output = model(torch.Tensor(x))[1][0]

        print("torch mean",torch_output.mean())
        print("paddle mean", torch.mean(paddle_output).numpy())
Ejemplo n.º 2
0
                mu, sigma = torch.mean(pool, 0), torch_cov(pool, rowvar=False)
            else:
                mu, sigma = np.mean(pool.numpy(), axis=0), np.cov(pool.numpy(),
                                                                  rowvar=False)
            if prints:
                print('Covariances calculated, getting FID...')
            if use_torch:
                FID = torch_calculate_frechet_distance(
                    mu, sigma, torch.tensor(data_mu), torch.tensor(data_sigma))
                FID = float(FID.numpy())
            else:
                FID = numpy_calculate_frechet_distance(mu, sigma, data_mu,
                                                       data_sigma)
        # Delete mu, sigma, pool, logits, and labels, just in case
        del mu, sigma, pool, logits, labels
        return IS_mean, IS_std, FID

    return get_inception_metrics


if __name__ == '__main__':
    from paddle import fluid
    place = fluid.CUDAPlace(0)
    with fluid.dygraph.guard(place=place):
        inception_model = inception_v3()
        import torch as pytorch
        torch_state_dict = pytorch.load("inception_model.pth")
        from paddorch.convert_pretrain_model import load_pytorch_pretrain_model

        load_pytorch_pretrain_model(inception_model, torch_state_dict)
        torch.save(inception_model.state_dict(), "inception_model.pdparams")
Ejemplo n.º 3
0
def main(args):
    dgl.random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if args.gpu >= 0:
        torch.cuda.manual_seed(args.seed)
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume, map_location="cpu")
            pretrain_args = checkpoint["opt"]
            pretrain_args.fold_idx = args.fold_idx
            pretrain_args.gpu = args.gpu
            pretrain_args.finetune = args.finetune
            pretrain_args.resume = args.resume
            pretrain_args.cv = args.cv
            pretrain_args.dataset = args.dataset
            pretrain_args.epochs = args.epochs
            pretrain_args.num_workers = args.num_workers
            if args.dataset in GRAPH_CLASSIFICATION_DSETS:
                # HACK for speeding up finetuning on graph classification tasks
                pretrain_args.num_workers = 0
            pretrain_args.batch_size = args.batch_size
            args = pretrain_args
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))
    args = option_update(args)
    print(args)
    if args.gpu >= 0:
        assert args.gpu is not None and torch.cuda.is_available()
        print("Use GPU: {} for training".format(args.gpu))
    assert args.positional_embedding_size % 2 == 0
    print("setting random seeds")

    mem = psutil.virtual_memory()
    print("before construct dataset", mem.used / 1024**3)
    if args.finetune:
        if args.dataset in GRAPH_CLASSIFICATION_DSETS:
            dataset = GraphClassificationDatasetLabeled(
                dataset=args.dataset,
                rw_hops=args.rw_hops,
                subgraph_size=args.subgraph_size,
                restart_prob=args.restart_prob,
                positional_embedding_size=args.positional_embedding_size,
            )
            labels = dataset.dataset.data.y.tolist()
        else:
            dataset = NodeClassificationDatasetLabeled(
                dataset=args.dataset,
                rw_hops=args.rw_hops,
                subgraph_size=args.subgraph_size,
                restart_prob=args.restart_prob,
                positional_embedding_size=args.positional_embedding_size,
            )
            labels = dataset.data.y.argmax(dim=1).tolist()

        skf = StratifiedKFold(n_splits=10,
                              shuffle=True,
                              random_state=args.seed)
        idx_list = []
        for idx in skf.split(np.zeros(len(labels)), labels):
            idx_list.append(idx)
        assert (0 <= args.fold_idx
                and args.fold_idx < 10), "fold_idx must be from 0 to 9."
        train_idx, test_idx = idx_list[args.fold_idx]
        train_dataset = torch.utils.data.Subset(dataset, train_idx)
        valid_dataset = torch.utils.data.Subset(dataset, test_idx)

    elif args.dataset == "dgl":
        train_dataset = LoadBalanceGraphDataset(
            rw_hops=args.rw_hops,
            restart_prob=args.restart_prob,
            positional_embedding_size=args.positional_embedding_size,
            num_workers=args.num_workers,
            num_samples=args.num_samples,
            dgl_graphs_file="./data/small.bin",
            num_copies=args.num_copies,
        )
    else:
        if args.dataset in GRAPH_CLASSIFICATION_DSETS:
            train_dataset = GraphClassificationDataset(
                dataset=args.dataset,
                rw_hops=args.rw_hops,
                subgraph_size=args.subgraph_size,
                restart_prob=args.restart_prob,
                positional_embedding_size=args.positional_embedding_size,
            )
        else:
            train_dataset = NodeClassificationDataset(
                dataset=args.dataset,
                rw_hops=args.rw_hops,
                subgraph_size=args.subgraph_size,
                restart_prob=args.restart_prob,
                positional_embedding_size=args.positional_embedding_size,
            )

    mem = psutil.virtual_memory()
    print("before construct dataloader", mem.used / 1024**3)
    train_loader = torch.utils.data.graph.Dataloader(
        dataset=train_dataset,
        batch_size=args.batch_size,
        collate_fn=labeled_batcher() if args.finetune else batcher(),
        shuffle=True if args.finetune else False,
        num_workers=args.num_workers,
        worker_init_fn=None
        if args.finetune or args.dataset != "dgl" else worker_init_fn,
    )
    if args.finetune:
        valid_loader = torch.utils.data.DataLoader(
            dataset=valid_dataset,
            batch_size=args.batch_size,
            collate_fn=labeled_batcher(),
            num_workers=args.num_workers,
        )
    mem = psutil.virtual_memory()
    print("before training", mem.used / 1024**3)

    # create model and optimizer
    # n_data = train_dataset.total
    n_data = None
    import gcc.models.graph_encoder
    gcc.models.graph_encoder.final_dropout = 0  ##disable dropout
    model, model_ema = [
        GraphEncoder(
            positional_embedding_size=args.positional_embedding_size,
            max_node_freq=args.max_node_freq,
            max_edge_freq=args.max_edge_freq,
            max_degree=args.max_degree,
            freq_embedding_size=args.freq_embedding_size,
            degree_embedding_size=args.degree_embedding_size,
            output_dim=args.hidden_size,
            node_hidden_dim=args.hidden_size,
            edge_hidden_dim=args.hidden_size,
            num_layers=args.num_layer,
            num_step_set2set=args.set2set_iter,
            num_layer_set2set=args.set2set_lstm_layer,
            norm=args.norm,
            gnn_model=args.model,
            degree_input=True,
        ) for _ in range(2)
    ]

    # copy weights from `model' to `model_ema'
    if args.moco:
        moment_update(model, model_ema, 0)

    # set the contrast memory and criterion
    contrast = MemoryMoCo(args.hidden_size,
                          n_data,
                          args.nce_k,
                          args.nce_t,
                          use_softmax=True)
    if args.gpu >= 0:
        contrast = contrast.cuda(args.gpu)

    if args.finetune:
        criterion = nn.CrossEntropyLoss()
    else:
        criterion = NCESoftmaxLoss() if args.moco else NCESoftmaxLossNS()
        if args.gpu >= 0:
            criterion = criterion.cuda(args.gpu)
    if args.gpu >= 0:
        model = model.cuda(args.gpu)
        model_ema = model_ema.cuda(args.gpu)

    if args.finetune:
        output_layer = nn.Linear(in_features=args.hidden_size,
                                 out_features=dataset.num_classes)
        if args.gpu >= 0:
            output_layer = output_layer.cuda(args.gpu)
        output_layer_optimizer = torch.optim.Adam(
            output_layer.parameters(),
            lr=args.learning_rate,
            betas=(args.beta1, args.beta2),
            weight_decay=args.weight_decay,
        )

        def clear_bn(m):
            classname = m.__class__.__name__
            if classname.find("BatchNorm") != -1:
                m.reset_running_stats()

        model.apply(clear_bn)

    if args.optimizer == "sgd":
        optimizer = torch.optim.SGD(
            model.parameters(),
            lr=args.learning_rate,
            momentum=args.momentum,
            weight_decay=args.weight_decay,
        )
    elif args.optimizer == "adam":
        optimizer = torch.optim.Adam(
            model.parameters(),
            lr=args.learning_rate,
            betas=(args.beta1, args.beta2),
            weight_decay=args.weight_decay,
        )
    elif args.optimizer == "adagrad":
        optimizer = torch.optim.Adagrad(
            model.parameters(),
            lr=args.learning_rate,
            lr_decay=args.lr_decay_rate,
            weight_decay=args.weight_decay,
        )
    else:
        raise NotImplementedError

    # optionally resume from a checkpoint
    args.start_epoch = 1
    if True:
        # print("=> loading checkpoint '{}'".format(args.resume))
        # checkpoint = torch.load(args.resume, map_location="cpu")
        import torch as th
        checkpoint = th.load("torch_models/ckpt_epoch_100.pth",
                             map_location=th.device('cpu'))
        torch_input_output_grad = th.load(
            "torch_models/torch_input_output_grad.pt",
            map_location=th.device('cpu'))
        from paddorch.convert_pretrain_model import load_pytorch_pretrain_model
        print("loading.............. model")
        paddle_state_dict = load_pytorch_pretrain_model(
            model, checkpoint["model"])
        model.load_state_dict(paddle_state_dict)
        print("loading.............. contrast")
        paddle_state_dict2 = load_pytorch_pretrain_model(
            contrast, checkpoint["contrast"])
        contrast.load_state_dict(paddle_state_dict2)
        print("loading.............. model_ema")
        paddle_state_dict3 = load_pytorch_pretrain_model(
            model_ema, checkpoint["model_ema"])
        if args.moco:
            model_ema.load_state_dict(paddle_state_dict3)

        print("=> loaded successfully '{}' (epoch {})".format(
            args.resume, checkpoint["epoch"]))
        del checkpoint
        if args.gpu >= 0:
            torch.cuda.empty_cache()
        optimizer = torch.optim.Adam(
            model.parameters(),
            lr=args.learning_rate * 0.1,
            betas=(args.beta1, args.beta2),
            weight_decay=args.weight_decay,
        )
        for _ in range(1):
            graph_q, graph_k = train_dataset[0]
            graph_q2, graph_k2 = train_dataset[1]
            graph_q, graph_k = dgl.batch([graph_q, graph_q2
                                          ]), dgl.batch([graph_k, graph_k2])

            input_output_grad = []
            input_output_grad.append([graph_q, graph_k])
            model.train()
            model_ema.eval()

            feat_q = model(graph_q)
            with torch.no_grad():
                feat_k = model_ema(graph_k)

            out = contrast(feat_q, feat_k)
            loss = criterion(out)
            optimizer.zero_grad()
            loss.backward()
            input_output_grad.append([feat_q, out, loss])
            print("loss:", loss.numpy())
            optimizer.step()
            moment_update(model, model_ema, args.alpha)
        print(
            "max diff feat_q:",
            np.max(
                np.abs(torch_input_output_grad[1][0].detach().numpy() -
                       feat_q.numpy())))
        print(
            "max diff out:",
            np.max(
                np.abs(torch_input_output_grad[1][1].detach().numpy() -
                       out.numpy())))
        print(
            "max diff loss:",
            np.max(
                np.abs(torch_input_output_grad[1][2].detach().numpy() -
                       loss.numpy())))

        name2grad = dict()
        for name, p in dict(model.named_parameters()).items():
            if p.grad is not None:
                name2grad[name] = p.grad
                torch_grad = torch_input_output_grad[2][name].numpy()

                if "linear" in name and "weight" in name:
                    torch_grad = torch_grad.T
                max_grad_diff = np.max(np.abs(p.grad - torch_grad))
                print("max grad diff:", name, max_grad_diff)
        input_output_grad.append(name2grad)
Ejemplo n.º 4
0
            if "optim" in torch_fn:
                continue  # skip optimizer file

            if "best3" not in torch_fn:
                continue
            import torch as pytorch
            torch_state_dict = pytorch.load(torch_fn)

            # Next, build the model
            print(torch_fn)
            out_fn = torch_fn.replace(".pth", ".pdparams")
            if os.path.basename(torch_fn).startswith("G"):
                G = model.Generator(**config)
                torch_G = get_pytorch_G_model(config)
                torch_G.load_state_dict(torch_state_dict)
                load_pytorch_pretrain_model(G, torch_state_dict)
                z = np.zeros((1, 128))
                y = np.ones((1, 1))
                import torch as pytorch
                torch_X = torch_G(pytorch.Tensor(z), pytorch.LongTensor(y))
                X = G(
                    paddorch.Tensor(z).astype("float32"),
                    paddorch.Tensor(y).astype("int64"))
                print(torch_X.detach().numpy().mean(),
                      X.detach().numpy().mean())
                print("saved file:", out_fn)
                paddorch.save(G.state_dict(), out_fn)
            elif os.path.basename(torch_fn).startswith("D"):
                D = model.Discriminator(**config)
                load_pytorch_pretrain_model(D, torch_state_dict)
                paddorch.save(D.state_dict(), out_fn)
Ejemplo n.º 5
0
def build_model(args):
    import torch as pytorch
    from paddorch.convert_pretrain_model import load_pytorch_pretrain_model
    pytorch_state_dict = pytorch.load("../../../starganv2_paddle/expr/checkpoints/celeba_hq/100000_nets_ema.pt",
                                      map_location=pytorch.device('cpu'))

    generator, mapping_network, style_encoder = eval_pytorch_model(args)
    x=np.ones( (1,3,256,256)).astype("float32")
    batch_size=2000

    batch_size2=3
    x= np.random.randn(batch_size2,3,256,256).astype("float32")
    y = np.random.randint(0,2, batch_size2).astype("int32")
    s = np.random.randn(args.style_dim*batch_size2).astype("float32").reshape(batch_size2,-1)
    z = np.random.randn(16*batch_size2).astype("float32").reshape(batch_size2, -1)
    generator.load_state_dict(pytorch_state_dict['generator'])
    g_out=generator(pytorch.FloatTensor(x), pytorch.FloatTensor(s))
    mapping_network.load_state_dict(pytorch_state_dict['mapping_network'])
    m_out=mapping_network(pytorch.FloatTensor(z), pytorch.LongTensor(y))
    y_train = np.random.randint(0,2, batch_size).astype("int32")
    z_train = np.random.randn(16*batch_size).astype("float32").reshape(batch_size, -1)
    m_out_train,m_out_train_1,m_out_train_2 = mapping_network.finetune(pytorch.FloatTensor(z_train), pytorch.LongTensor(y_train))
    style_encoder.load_state_dict(pytorch_state_dict['style_encoder'])
    s_out=style_encoder(pytorch.FloatTensor(x), pytorch.LongTensor(y) )

    import paddle.fluid as fluid
    import paddorch as torch
    import paddorch.nn.functional as F
    # place = fluid.CPUPlace()
    place = fluid.CUDAPlace(0)


    with fluid.dygraph.guard(place=place):
        import sys
        sys.path.append("../../../starganv2_paddle")
        import core.model



        generator_ema = core.model.Generator(args.img_size, args.style_dim, w_hpf=args.w_hpf) #copy.deepcopy(generator)
        mapping_network_ema =core.model.MappingNetwork(args.latent_dim, args.style_dim, args.num_domains)  # copy.deepcopy(mapping_network)
        style_encoder_ema =core.model.StyleEncoder(args.img_size, args.style_dim, args.num_domains) # copy.deepcopy(style_encoder)

        nets = Munch(generator=generator,
                     mapping_network=mapping_network,
                     style_encoder=style_encoder,
                     )


        print("load generator")
        load_pytorch_pretrain_model(generator_ema, pytorch_state_dict['generator'])
        print("load mapping_network")
        load_pytorch_pretrain_model(mapping_network_ema, pytorch_state_dict['mapping_network'])
        print("load style_encoder")
        load_pytorch_pretrain_model(style_encoder_ema, pytorch_state_dict['style_encoder'])

        nets_ema = Munch(generator=generator_ema,
                         mapping_network=mapping_network_ema,
                         style_encoder=style_encoder_ema)
        nets_ema_state_dict=dict()

        # nets_ema['mapping_network'].load_state_dict(
        #     torch.load("../../expr/checkpoints/celeba_hq/100000_nets_ema.ckpt/mapping_network.pdparams"))

        nets_ema['mapping_network'].train()
        d_optimizer = fluid.optimizer.AdamOptimizer(learning_rate=1e-4,
                                                    parameter_list=nets_ema['mapping_network'].parameters())
        from tqdm import tqdm
        z_train_p = torch.Tensor(z_train)
        y_train_p = torch.LongTensor(y_train)
        m_out_train_p = torch.Tensor(m_out_train.detach().numpy())
        m_out_train_1p = torch.Tensor(m_out_train_1.detach().numpy())
        m_out_train_2p = torch.Tensor(m_out_train_2.detach().numpy())
        # dummy_net=fluid.dygraph.Linear(16,1)
        # d_optimizer = fluid.optimizer.AdamOptimizer(learning_rate=1e-4,
        #                                             parameter_list=dummy_net.parameters())
        for ii in range(10000):
            # predictions=dummy_net(z_train_p )
            # predictions2=torch.varbase_to_tensor(predictions)
            # d_avg_cost = fluid.layers.mse_loss(predictions2, m_out_train_p)
            # fluid.layers.assign(predictions,predictions2)
            out, out1, out2 = nets_ema['mapping_network'].finetune(z_train_p, y_train_p)
            d_avg_cost = fluid.layers.mse_loss(out, m_out_train_p) + fluid.layers.mse_loss(out1,
                                                                                           m_out_train_1p) + fluid.layers.mse_loss(
                out2, m_out_train_2p)

            d_avg_cost.backward()
            d_optimizer.minimize(d_avg_cost)
            nets_ema['mapping_network'].clear_gradients()
            if ii % 99 == 0:
                print("d_avg_cost", d_avg_cost.numpy())
                nets_ema['mapping_network'].eval()
                m_out_t = nets_ema['mapping_network'](torch.Tensor(z), torch.LongTensor(y)).numpy()
                print(ii, "torch m result:", m_out.detach().numpy().mean(), m_out_t.mean())
                nets_ema['mapping_network'].train()

                y_train = np.random.randint(0, 2, batch_size).astype("int32")
                z_train = np.random.randn(16 * batch_size).astype("float32").reshape(batch_size, -1)
                m_out_train, m_out_train_1, m_out_train_2 = mapping_network.finetune(pytorch.FloatTensor(z_train),
                                                                                     pytorch.LongTensor(y_train))
                z_train_p = torch.Tensor(z_train)
                y_train_p = torch.LongTensor(y_train)
                m_out_train_p = torch.Tensor(m_out_train.detach().numpy())
                if np.abs(m_out.detach().numpy().mean() - m_out_t.mean()) < 1e-5:
                    break

        nets_ema['mapping_network'].eval()
        g_out_t = nets_ema['generator'](torch.Tensor(x), torch.Tensor(s)).numpy()
        m_out_t = nets_ema['mapping_network'](torch.Tensor(z), torch.LongTensor(y)).numpy()
        s_out_t = nets_ema['style_encoder'](torch.Tensor(x), torch.LongTensor(y)).numpy()

        print("torch g result:", g_out.mean().detach().numpy(), g_out_t.mean())
        print("torch s result:", s_out.mean().detach().numpy(), s_out_t.mean())
        print("torch m result:", m_out.mean().detach().numpy(), m_out_t.mean())
        nets_ema_state_dict['generator'] = generator_ema.state_dict()
        nets_ema_state_dict['mapping_network'] = mapping_network_ema.state_dict()
        nets_ema_state_dict['style_encoder'] = style_encoder_ema.state_dict()

        torch.save(nets_ema_state_dict, "../../../starganv2_paddle/expr/checkpoints/celeba_hq/100000_nets_ema.ckpt")
        if args.w_hpf > 0:
            fan = FAN(fname_pretrained="../../../starganv2_paddle/expr/checkpoints/wing.pdparams").eval()
            nets.fan = fan
            nets_ema.fan = fan

        return nets, nets_ema