def save(self, step): fname = self.fname_template.format(step) print('Saving checkpoint into %s...' % fname) outdict = {} for name, module in self.module_dict.items(): outdict[name] = module.state_dict() porch.save(outdict, fname)
def save(self, dir, step): params = {} params['genA2B'] = self.genA2B.state_dict() params['genB2A'] = self.genB2A.state_dict() params['disGA'] = self.disGA.state_dict() params['disGB'] = self.disGB.state_dict() params['disLA'] = self.disLA.state_dict() params['disLB'] = self.disLB.state_dict() torch.save(params, os.path.join(dir, self.dataset + '_params_%07d.pt' % step))
return FAN(fname_pretrained="./wing.ckpt") if __name__ == '__main__': from paddorch.convert_pretrain_model import load_pytorch_pretrain_model import torch as pytorch import torchvision # place = fluid.CPUPlace() place = fluid.CUDAPlace(0) np.random.seed(0) x=np.random.randn(1,3,256,256).astype("float32") with fluid.dygraph.guard(place=place): model=FAN() model.eval() pytorch_model=eval_pytorch_model() pytorch_model.eval() pytorch_model. torch_output = pytorch_model(pytorch.FloatTensor(x).)[1][0] pytorch_model pytorch_state_dict=pytorch_model.state_dict() load_pytorch_pretrain_model(model, pytorch_state_dict) torch.save(model.state_dict(),"wing") paddle_output = model(torch.Tensor(x))[1][0] print("torch mean",torch_output.mean()) print("paddle mean", torch.mean(paddle_output).numpy())
16, 64, 2) # copy.deepcopy(mapping_network) out_model_fn = "../expr/checkpoints/celeba_hq/100000_nets_ema.ckpt/mapping_network.pdparams" mapping_network_ema.load_state_dict(porch.load(out_model_fn)) d_optimizer = fluid.optimizer.AdamOptimizer( learning_rate=lr, parameter_list=mapping_network_ema.parameters()) from tqdm import tqdm mapping_network_ema.train() z_train_p = porch.Tensor(z_train) y_train_p = porch.LongTensor(y_train) m_out_train_p = porch.Tensor(m_out_train) best_loss = 100000000 for ii in range(100000000000): st = np.random.randint(0, z_train_p.shape[0] - batch_size) out = mapping_network_ema(z_train_p[st:st + batch_size], y_train_p[st:st + batch_size]) d_avg_cost = fluid.layers.mse_loss( out, m_out_train_p[st:st + batch_size] ) #+fluid.layers.mse_loss(out1,m_out_train_1p)+fluid.layers.mse_loss(out2,m_out_train_2p) d_avg_cost.backward() d_optimizer.minimize(d_avg_cost) mapping_network_ema.clear_gradients() if ii % 99 == 0: print("d_avg_cost", d_avg_cost.numpy()) if best_loss > d_avg_cost.numpy(): best_loss = d_avg_cost.numpy() porch.save(mapping_network_ema.state_dict(), out_model_fn) print("save model file:", out_model_fn)
else: mu, sigma = np.mean(pool.numpy(), axis=0), np.cov(pool.numpy(), rowvar=False) if prints: print('Covariances calculated, getting FID...') if use_torch: FID = torch_calculate_frechet_distance( mu, sigma, torch.tensor(data_mu), torch.tensor(data_sigma)) FID = float(FID.numpy()) else: FID = numpy_calculate_frechet_distance(mu, sigma, data_mu, data_sigma) # Delete mu, sigma, pool, logits, and labels, just in case del mu, sigma, pool, logits, labels return IS_mean, IS_std, FID return get_inception_metrics if __name__ == '__main__': from paddle import fluid place = fluid.CUDAPlace(0) with fluid.dygraph.guard(place=place): inception_model = inception_v3() import torch as pytorch torch_state_dict = pytorch.load("inception_model.pth") from paddorch.convert_pretrain_model import load_pytorch_pretrain_model load_pytorch_pretrain_model(inception_model, torch_state_dict) torch.save(inception_model.state_dict(), "inception_model.pdparams")
def train(self): if not self.is_parallel: writer = LogWriter(logdir=self.result_dir + "/log/") self.genA2B.train(), self.genB2A.train(), self.disGA.train( ), self.disGB.train(), self.disLA.train(), self.disLB.train() start_iter = 1 if self.resume: print(self.result_dir, self.dataset, os.path.join(self.result_dir, self.dataset, 'model', '*.pt')) model_list = glob( os.path.join(self.result_dir, self.dataset, 'model', '*.pt')) print("resuming, model_list", model_list) if not len(model_list) == 0: model_list.sort() start_iter = int(model_list[-1].split('_')[-1].split('.')[0]) print("resuming, start_iter", start_iter) self.load(os.path.join(self.result_dir, self.dataset, 'model'), start_iter) print(" [*] Load SUCCESS") if self.decay_flag and start_iter > (self.iteration // 2): self.G_optim._learning_rate -= (self.lr / (self.iteration // 2)) * ( start_iter - self.iteration // 2) self.D_optim._learning_rate -= (self.lr / (self.iteration // 2)) * ( start_iter - self.iteration // 2) # training loop print('training start !') start_time = time.time() for step in range(start_iter, self.iteration + 1): if self.decay_flag and step > (self.iteration // 2): self.G_optim._learning_rate -= (self.lr / (self.iteration // 2)) self.D_optim._learning_rate -= (self.lr / (self.iteration // 2)) try: real_A, _ = trainA_iter.next() except: trainA_iter = iter(self.trainA_loader) real_A, _ = trainA_iter.next() try: real_B, _ = trainB_iter.next() except: trainB_iter = iter(self.trainB_loader) real_B, _ = trainB_iter.next() real_A = real_A[0] real_B = real_B[0] ##some handling needed using paddle dataloader # Update D if hasattr(self.D_optim, "_optimizer"): # support meta optimizer self.D_optim._optimizer.clear_gradients() else: self.D_optim.clear_gradients() fake_A2B, _, _ = self.genA2B(real_A) fake_B2A, _, _ = self.genB2A(real_B) real_GA_logit, real_GA_cam_logit, _ = self.disGA(real_A) real_LA_logit, real_LA_cam_logit, _ = self.disLA(real_A) real_GB_logit, real_GB_cam_logit, _ = self.disGB(real_B) real_LB_logit, real_LB_cam_logit, _ = self.disLB(real_B) fake_GA_logit, fake_GA_cam_logit, _ = self.disGA(fake_B2A) fake_LA_logit, fake_LA_cam_logit, _ = self.disLA(fake_B2A) fake_GB_logit, fake_GB_cam_logit, _ = self.disGB(fake_A2B) fake_LB_logit, fake_LB_cam_logit, _ = self.disLB(fake_A2B) D_ad_loss_GA = self.MSE_loss( real_GA_logit, torch.ones_like(real_GA_logit).to( self.device)) + self.MSE_loss( fake_GA_logit, torch.zeros_like(fake_GA_logit).to(self.device)) D_ad_cam_loss_GA = self.MSE_loss( real_GA_cam_logit, torch.ones_like(real_GA_cam_logit).to( self.device)) + self.MSE_loss( fake_GA_cam_logit, torch.zeros_like(fake_GA_cam_logit).to(self.device)) D_ad_loss_LA = self.MSE_loss( real_LA_logit, torch.ones_like(real_LA_logit).to( self.device)) + self.MSE_loss( fake_LA_logit, torch.zeros_like(fake_LA_logit).to(self.device)) D_ad_cam_loss_LA = self.MSE_loss( real_LA_cam_logit, torch.ones_like(real_LA_cam_logit).to( self.device)) + self.MSE_loss( fake_LA_cam_logit, torch.zeros_like(fake_LA_cam_logit).to(self.device)) D_ad_loss_GB = self.MSE_loss( real_GB_logit, torch.ones_like(real_GB_logit).to( self.device)) + self.MSE_loss( fake_GB_logit, torch.zeros_like(fake_GB_logit).to(self.device)) D_ad_cam_loss_GB = self.MSE_loss( real_GB_cam_logit, torch.ones_like(real_GB_cam_logit).to( self.device)) + self.MSE_loss( fake_GB_cam_logit, torch.zeros_like(fake_GB_cam_logit).to(self.device)) D_ad_loss_LB = self.MSE_loss( real_LB_logit, torch.ones_like(real_LB_logit).to( self.device)) + self.MSE_loss( fake_LB_logit, torch.zeros_like(fake_LB_logit).to(self.device)) D_ad_cam_loss_LB = self.MSE_loss( real_LB_cam_logit, torch.ones_like(real_LB_cam_logit).to( self.device)) + self.MSE_loss( fake_LB_cam_logit, torch.zeros_like(fake_LB_cam_logit).to(self.device)) D_loss_A = self.adv_weight * (D_ad_loss_GA + D_ad_cam_loss_GA + D_ad_loss_LA + D_ad_cam_loss_LA) / self.n_gpu D_loss_B = self.adv_weight * (D_ad_loss_GB + D_ad_cam_loss_GB + D_ad_loss_LB + D_ad_cam_loss_LB) / self.n_gpu Discriminator_loss = D_loss_A + D_loss_B Discriminator_loss.backward() if self.is_parallel: self.disGA.apply_collective_grads() self.disGB.apply_collective_grads() self.disLA.apply_collective_grads() self.disLB.apply_collective_grads() self.genA2B.apply_collective_grads() self.genB2A.apply_collective_grads() self.D_optim.minimize(Discriminator_loss) # Update G if hasattr(self.G_optim, "_optimizer"): # support meta optimizer self.G_optim._optimizer.clear_gradients() else: self.G_optim.clear_gradients() fake_A2B, fake_A2B_cam_logit, _ = self.genA2B(real_A) fake_B2A, fake_B2A_cam_logit, _ = self.genB2A(real_B) fake_A2B2A, _, _ = self.genB2A(fake_A2B) fake_B2A2B, _, _ = self.genA2B(fake_B2A) fake_A2A, fake_A2A_cam_logit, _ = self.genB2A(real_A) fake_B2B, fake_B2B_cam_logit, _ = self.genA2B(real_B) fake_GA_logit, fake_GA_cam_logit, _ = self.disGA(fake_B2A) fake_LA_logit, fake_LA_cam_logit, _ = self.disLA(fake_B2A) fake_GB_logit, fake_GB_cam_logit, _ = self.disGB(fake_A2B) fake_LB_logit, fake_LB_cam_logit, _ = self.disLB(fake_A2B) G_ad_loss_GA = self.MSE_loss( fake_GA_logit, torch.ones_like(fake_GA_logit).to(self.device)) G_ad_cam_loss_GA = self.MSE_loss( fake_GA_cam_logit, torch.ones_like(fake_GA_cam_logit).to(self.device)) G_ad_loss_LA = self.MSE_loss( fake_LA_logit, torch.ones_like(fake_LA_logit).to(self.device)) G_ad_cam_loss_LA = self.MSE_loss( fake_LA_cam_logit, torch.ones_like(fake_LA_cam_logit).to(self.device)) G_ad_loss_GB = self.MSE_loss( fake_GB_logit, torch.ones_like(fake_GB_logit).to(self.device)) G_ad_cam_loss_GB = self.MSE_loss( fake_GB_cam_logit, torch.ones_like(fake_GB_cam_logit).to(self.device)) G_ad_loss_LB = self.MSE_loss( fake_LB_logit, torch.ones_like(fake_LB_logit).to(self.device)) G_ad_cam_loss_LB = self.MSE_loss( fake_LB_cam_logit, torch.ones_like(fake_LB_cam_logit).to(self.device)) G_recon_loss_A = self.L1_loss(fake_A2B2A, real_A) G_recon_loss_B = self.L1_loss(fake_B2A2B, real_B) G_identity_loss_A = self.L1_loss(fake_A2A, real_A) G_identity_loss_B = self.L1_loss(fake_B2B, real_B) G_cam_loss_A = self.BCE_loss( fake_B2A_cam_logit, torch.ones_like(fake_B2A_cam_logit).to( self.device)) + self.BCE_loss( fake_A2A_cam_logit, torch.zeros_like(fake_A2A_cam_logit).to(self.device)) G_cam_loss_B = self.BCE_loss( fake_A2B_cam_logit, torch.ones_like(fake_A2B_cam_logit).to( self.device)) + self.BCE_loss( fake_B2B_cam_logit, torch.zeros_like(fake_B2B_cam_logit).to(self.device)) G_loss_A = (self.adv_weight * (G_ad_loss_GA + G_ad_cam_loss_GA + G_ad_loss_LA + G_ad_cam_loss_LA) + self.cycle_weight * G_recon_loss_A + self.identity_weight * G_identity_loss_A + self.cam_weight * G_cam_loss_A) / self.n_gpu G_loss_B = (self.adv_weight * (G_ad_loss_GB + G_ad_cam_loss_GB + G_ad_loss_LB + G_ad_cam_loss_LB) + self.cycle_weight * G_recon_loss_B + self.identity_weight * G_identity_loss_B + self.cam_weight * G_cam_loss_B) / self.n_gpu Generator_loss = G_loss_A + G_loss_B Generator_loss.backward() if self.is_parallel: self.disGA.apply_collective_grads() self.disGB.apply_collective_grads() self.disLA.apply_collective_grads() self.disLB.apply_collective_grads() self.genA2B.apply_collective_grads() self.genB2A.apply_collective_grads() self.G_optim.minimize(Generator_loss) # clip parameter of AdaILN and ILN, applied after optimizer step self.Rho_clipper(self.genA2B) self.Rho_clipper(self.genB2A) if not self.is_parallel: writer.add_scalar(tag="G/G_loss_A", step=step, value=G_loss_A.numpy()) writer.add_scalar(tag="G/G_loss_B", step=step, value=G_loss_B.numpy()) writer.add_scalar(tag="D/D_loss_A", step=step, value=D_loss_A.numpy()) writer.add_scalar(tag="D/D_loss_B", step=step, value=D_loss_B.numpy()) writer.add_scalar(tag="D/Discriminator_loss", step=step, value=Discriminator_loss.numpy()) writer.add_scalar(tag="D/Generator_loss", step=step, value=Generator_loss.numpy()) if step % 10 == 9: writer.add_image("fake_A2B", (porch.Tensor(fake_A2B[0] * 255)).clamp_( 0, 255).numpy().transpose( [1, 2, 0]).astype(np.uint8), step) writer.add_image("fake_B2A", (porch.Tensor(fake_B2A[0] * 255)).clamp_( 0, 255).numpy().transpose( [1, 2, 0]).astype(np.uint8), step) writer.add_image("fake_A2B2A", (porch.Tensor(fake_A2B[0] * 255)).clamp_( 0, 255).numpy().transpose( [1, 2, 0]).astype(np.uint8), step) writer.add_image("fake_B2A2B", (porch.Tensor(fake_B2A[0] * 255)).clamp_( 0, 255).numpy().transpose( [1, 2, 0]).astype(np.uint8), step) print("[%5d/%5d] time: %4.4f d_loss: %.8f, g_loss: %.8f" % (step, self.iteration, time.time() - start_time, Discriminator_loss, Generator_loss)) if step % self.print_freq == 0: train_sample_num = 5 test_sample_num = 5 A2B = np.zeros((self.img_size * 7, 0, 3)) B2A = np.zeros((self.img_size * 7, 0, 3)) self.genA2B.eval(), self.genB2A.eval(), self.disGA.eval( ), self.disGB.eval(), self.disLA.eval(), self.disLB.eval() for _ in range(train_sample_num): try: real_A, _ = trainA_iter.next() except: trainA_iter = iter(self.trainA_loader) real_A, _ = trainA_iter.next() try: real_B, _ = trainB_iter.next() except: trainB_iter = iter(self.trainB_loader) real_B, _ = trainB_iter.next() real_A, real_B = real_A[0], real_B[0] fake_A2B, _, fake_A2B_heatmap = self.genA2B(real_A) fake_B2A, _, fake_B2A_heatmap = self.genB2A(real_B) fake_A2B2A, _, fake_A2B2A_heatmap = self.genB2A(fake_A2B) fake_B2A2B, _, fake_B2A2B_heatmap = self.genA2B(fake_B2A) fake_A2A, _, fake_A2A_heatmap = self.genB2A(real_A) fake_B2B, _, fake_B2B_heatmap = self.genA2B(real_B) A2B = np.concatenate( (A2B, np.concatenate( (RGB2BGR(tensor2numpy(denorm(real_A[0]))), cam(tensor2numpy(fake_A2A_heatmap[0]), self.img_size), RGB2BGR(tensor2numpy(denorm(fake_A2A[0]))), cam(tensor2numpy(fake_A2B_heatmap[0]), self.img_size), RGB2BGR(tensor2numpy(denorm(fake_A2B[0]))), cam(tensor2numpy(fake_A2B2A_heatmap[0]), self.img_size), RGB2BGR(tensor2numpy(denorm(fake_A2B2A[0])))), 0)), 1) B2A = np.concatenate( (B2A, np.concatenate( (RGB2BGR(tensor2numpy(denorm(real_B[0]))), cam(tensor2numpy(fake_B2B_heatmap[0]), self.img_size), RGB2BGR(tensor2numpy(denorm(fake_B2B[0]))), cam(tensor2numpy(fake_B2A_heatmap[0]), self.img_size), RGB2BGR(tensor2numpy(denorm(fake_B2A[0]))), cam(tensor2numpy(fake_B2A2B_heatmap[0]), self.img_size), RGB2BGR(tensor2numpy(denorm(fake_B2A2B[0])))), 0)), 1) for _ in range(test_sample_num): try: real_A, _ = testA_iter.next() except: testA_iter = iter(self.testA_loader) real_A, _ = testA_iter.next() try: real_B, _ = testB_iter.next() except: testB_iter = iter(self.testB_loader) real_B, _ = testB_iter.next() real_A, real_B = real_A[0], real_B[0] fake_A2B, _, fake_A2B_heatmap = self.genA2B(real_A) fake_B2A, _, fake_B2A_heatmap = self.genB2A(real_B) fake_A2B2A, _, fake_A2B2A_heatmap = self.genB2A(fake_A2B) fake_B2A2B, _, fake_B2A2B_heatmap = self.genA2B(fake_B2A) fake_A2A, _, fake_A2A_heatmap = self.genB2A(real_A) fake_B2B, _, fake_B2B_heatmap = self.genA2B(real_B) A2B = np.concatenate( (A2B, np.concatenate( (RGB2BGR(tensor2numpy(denorm(real_A[0]))), cam(tensor2numpy(fake_A2A_heatmap[0]), self.img_size), RGB2BGR(tensor2numpy(denorm(fake_A2A[0]))), cam(tensor2numpy(fake_A2B_heatmap[0]), self.img_size), RGB2BGR(tensor2numpy(denorm(fake_A2B[0]))), cam(tensor2numpy(fake_A2B2A_heatmap[0]), self.img_size), RGB2BGR(tensor2numpy(denorm(fake_A2B2A[0])))), 0)), 1) B2A = np.concatenate( (B2A, np.concatenate( (RGB2BGR(tensor2numpy(denorm(real_B[0]))), cam(tensor2numpy(fake_B2B_heatmap[0]), self.img_size), RGB2BGR(tensor2numpy(denorm(fake_B2B[0]))), cam(tensor2numpy(fake_B2A_heatmap[0]), self.img_size), RGB2BGR(tensor2numpy(denorm(fake_B2A[0]))), cam(tensor2numpy(fake_B2A2B_heatmap[0]), self.img_size), RGB2BGR(tensor2numpy(denorm(fake_B2A2B[0])))), 0)), 1) if (not self.is_parallel ) or fluid.dygraph.parallel.Env().local_rank == 0: cv2.imwrite( os.path.join(self.result_dir, self.dataset, 'img', 'A2B_%07d.png' % step), A2B * 255.0) cv2.imwrite( os.path.join(self.result_dir, self.dataset, 'img', 'B2A_%07d.png' % step), B2A * 255.0) self.genA2B.train(), self.genB2A.train(), self.disGA.train( ), self.disGB.train(), self.disLA.train(), self.disLB.train() if step % self.save_freq == 0: if (not self.is_parallel ) or fluid.dygraph.parallel.Env().local_rank == 0: self.save( os.path.join(self.result_dir, self.dataset, 'model'), step) if step % 1000 == 0: params = {} params['genA2B'] = self.genA2B.state_dict() params['genB2A'] = self.genB2A.state_dict() params['disGA'] = self.disGA.state_dict() params['disGB'] = self.disGB.state_dict() params['disLA'] = self.disLA.state_dict() params['disLB'] = self.disLB.state_dict() if (not self.is_parallel ) or fluid.dygraph.parallel.Env().local_rank == 0: torch.save( params, os.path.join(self.result_dir, self.dataset + '_params_latest.pt'))
print(torch_fn) out_fn = torch_fn.replace(".pth", ".pdparams") if os.path.basename(torch_fn).startswith("G"): G = model.Generator(**config) torch_G = get_pytorch_G_model(config) torch_G.load_state_dict(torch_state_dict) load_pytorch_pretrain_model(G, torch_state_dict) z = np.zeros((1, 128)) y = np.ones((1, 1)) import torch as pytorch torch_X = torch_G(pytorch.Tensor(z), pytorch.LongTensor(y)) X = G( paddorch.Tensor(z).astype("float32"), paddorch.Tensor(y).astype("int64")) print(torch_X.detach().numpy().mean(), X.detach().numpy().mean()) print("saved file:", out_fn) paddorch.save(G.state_dict(), out_fn) elif os.path.basename(torch_fn).startswith("D"): D = model.Discriminator(**config) load_pytorch_pretrain_model(D, torch_state_dict) paddorch.save(D.state_dict(), out_fn) print("saved file:", out_fn) else: ##state_dict torch_state_dict['config']['D_activation'] = paddorch.nn.ReLU( ).state_dict() torch_state_dict['config']['G_activation'] = paddorch.nn.ReLU( ).state_dict() fluid.dygraph.save_dygraph(torch_state_dict, out_fn) os.system("mv %s.pdopt %s" % (out_fn, out_fn))
def main(args): dgl.random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) if args.gpu >= 0: torch.cuda.manual_seed(args.seed) if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume, map_location="cpu") pretrain_args = checkpoint["opt"] pretrain_args.fold_idx = args.fold_idx pretrain_args.gpu = args.gpu pretrain_args.finetune = args.finetune pretrain_args.resume = args.resume pretrain_args.cv = args.cv pretrain_args.dataset = args.dataset pretrain_args.epochs = args.epochs pretrain_args.num_workers = args.num_workers if args.dataset in GRAPH_CLASSIFICATION_DSETS: # HACK for speeding up finetuning on graph classification tasks pretrain_args.num_workers = 1 pretrain_args.batch_size = args.batch_size args = pretrain_args else: print("=> no checkpoint found at '{}'".format(args.resume)) args = option_update(args) learning_rate = float(args.learning_rate) print(args) if args.gpu >= 0: assert args.gpu is not None and torch.cuda.is_available() print("Use GPU: {} for training".format(args.gpu)) assert args.positional_embedding_size % 2 == 0 print("setting random seeds") mem = psutil.virtual_memory() print("before construct dataset", mem.used / 1024**3) if args.finetune: if args.dataset in GRAPH_CLASSIFICATION_DSETS: dataset = GraphClassificationDatasetLabeled( dataset=args.dataset, rw_hops=args.rw_hops, subgraph_size=args.subgraph_size, restart_prob=args.restart_prob, positional_embedding_size=args.positional_embedding_size, ) labels = dataset.dataset.data.y.tolist() else: dataset = NodeClassificationDatasetLabeled( dataset=args.dataset, rw_hops=args.rw_hops, subgraph_size=args.subgraph_size, restart_prob=args.restart_prob, positional_embedding_size=args.positional_embedding_size, ) labels = dataset.data.y.argmax(dim=1).tolist() skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=args.seed) idx_list = [] for idx in skf.split(np.zeros(len(labels)), labels): idx_list.append(idx) assert (0 <= args.fold_idx and args.fold_idx < 10), "fold_idx must be from 0 to 9." train_idx, test_idx = idx_list[args.fold_idx] train_dataset = torch.utils.data.Subset(dataset, train_idx) valid_dataset = torch.utils.data.Subset(dataset, test_idx) elif args.dataset == "dgl": train_dataset = LoadBalanceGraphDataset( rw_hops=args.rw_hops, restart_prob=args.restart_prob, positional_embedding_size=args.positional_embedding_size, num_workers=args.num_workers, num_samples=args.num_samples, dgl_graphs_file="./data/small.bin", num_copies=args.num_copies, ) else: if args.dataset in GRAPH_CLASSIFICATION_DSETS: train_dataset = GraphClassificationDataset( dataset=args.dataset, rw_hops=args.rw_hops, subgraph_size=args.subgraph_size, restart_prob=args.restart_prob, positional_embedding_size=args.positional_embedding_size, ) else: train_dataset = NodeClassificationDataset( dataset=args.dataset, rw_hops=args.rw_hops, subgraph_size=args.subgraph_size, restart_prob=args.restart_prob, positional_embedding_size=args.positional_embedding_size, ) mem = psutil.virtual_memory() print("before construct dataloader", mem.used / 1024**3) train_loader = torch.utils.data.graph.Dataloader( dataset=train_dataset, batch_size=args.batch_size, collate_fn=labeled_batcher() if args.finetune else batcher(), shuffle=True if args.finetune else False, num_workers=args.num_workers, worker_init_fn=None if args.finetune or args.dataset != "dgl" else worker_init_fn, ) if args.finetune: valid_loader = torch.utils.data.graph.Dataloader( dataset=valid_dataset, batch_size=args.batch_size, collate_fn=labeled_batcher(), num_workers=args.num_workers, ) mem = psutil.virtual_memory() print("before training", mem.used / 1024**3) # create model and optimizer # n_data = train_dataset.total n_data = None model, model_ema = [ GraphEncoder( positional_embedding_size=args.positional_embedding_size, max_node_freq=args.max_node_freq, max_edge_freq=args.max_edge_freq, max_degree=args.max_degree, freq_embedding_size=args.freq_embedding_size, degree_embedding_size=args.degree_embedding_size, output_dim=args.hidden_size, node_hidden_dim=args.hidden_size, edge_hidden_dim=args.hidden_size, num_layers=args.num_layer, num_step_set2set=args.set2set_iter, num_layer_set2set=args.set2set_lstm_layer, norm=args.norm, gnn_model=args.model, degree_input=True, ) for _ in range(2) ] # copy weights from `model' to `model_ema' if args.moco: # model_ema.load_state_dict(model.state_dict()) ##complete copy of model moment_update(model, model_ema, 0) # set the contrast memory and criterion contrast = MemoryMoCo(args.hidden_size, n_data, args.nce_k, args.nce_t, use_softmax=True) if args.gpu >= 0: contrast = contrast if args.finetune: criterion = nn.CrossEntropyLoss() else: criterion = NCESoftmaxLoss() if args.moco else NCESoftmaxLossNS() if args.gpu >= 0: criterion = criterion if args.gpu >= 0: model = model model_ema = model_ema import paddle if args.finetune: output_layer = nn.Linear(in_features=args.hidden_size, out_features=dataset.num_classes) if args.gpu >= 0: output_layer = output_layer output_layer_optimizer = torch.optim.Adam( output_layer.parameters(), lr=args.learning_rate, betas=(args.beta1, args.beta2), weight_decay=args.weight_decay, grad_clip=paddle.nn.clip.ClipGradByValue(max=1)) def clear_bn(m): classname = m.__class__.__name__ if classname.find("BatchNorm") != -1: m.reset_running_stats() model.apply(clear_bn) if args.optimizer == "sgd": optimizer = torch.optim.SGD( model.parameters(), lr=args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay, ) elif args.optimizer == "adam": if args.finetune: optimizer = torch.optim.Adam( model.parameters(), lr=learning_rate, betas=(args.beta1, args.beta2), weight_decay=args.weight_decay, grad_clip=paddle.nn.clip.ClipGradByValue(max=1), ) else: optimizer = torch.optim.Adam( model.parameters(), lr=learning_rate, betas=(args.beta1, args.beta2), weight_decay=args.weight_decay, grad_clip=paddle.nn.clip.ClipGradByNorm(args.clip_norm)) elif args.optimizer == "adagrad": optimizer = torch.optim.Adagrad( model.parameters(), lr=args.learning_rate, lr_decay=args.lr_decay_rate, weight_decay=args.weight_decay, ) else: raise NotImplementedError # optionally resume from a checkpoint args.start_epoch = 1 if args.resume: if args.finetune: ##if finetune model exists, continue resume that if os.path.isdir(args.model_folder + "/current.pth"): args.resume = args.model_folder + "/current.pth" print("change resume model to finetune model path:", args.resume) ##find last end epoch import glob ckpt_epoches = glob.glob(args.model_folder + "/ckpt_epoch*.pth") if len(ckpt_epoches) > 0: args.start_epoch = sorted([ int( os.path.basename(x).replace(".pth", "").replace( "ckpt_epoch_", "")) for x in ckpt_epoches ])[-1] + 1 print("starting epoch:", args.start_epoch) args.epochs = args.epochs + args.start_epoch - 1 print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume, map_location="cpu") # checkpoint = torch.load(args.resume) # args.start_epoch = checkpoint["epoch"] + 1 model.load_state_dict(checkpoint["model"]) # optimizer.load_state_dict(checkpoint["optimizer"]) contrast.load_state_dict(checkpoint["contrast"]) if args.moco: model_ema.load_state_dict(checkpoint["model_ema"]) print("=> loaded successfully '{}' ".format(args.resume)) if args.finetune: if "output_layer" in checkpoint: output_layer.load_state_dict(checkpoint["output_layer"]) print("loaded output layer") # del checkpoint if args.gpu >= 0: torch.cuda.empty_cache() # tensorboard # logger = tb_logger.Logger(logdir=args.tb_folder, flush_secs=2) sw = LogWriter(logdir=args.tb_folder) import gc gc.enable() for epoch in range(args.start_epoch, args.epochs + 1): adjust_learning_rate(epoch, args, optimizer) print("==> training...") time1 = time.time() try: if args.finetune: loss, _ = train_finetune( epoch, train_loader, model, output_layer, criterion, optimizer, output_layer_optimizer, sw, args, ) else: loss = train_moco( epoch, train_loader, model, model_ema, contrast, criterion, optimizer, sw, args, ) except: print("Error in Epoch", epoch) continue time2 = time.time() print("epoch {}, total time {:.2f}".format(epoch, time2 - time1)) # save model if epoch % args.save_freq == 0: print("==> Saving...") state = { "opt": vars(args).copy(), "model": model.state_dict(), "contrast": contrast.state_dict(), "optimizer": optimizer.state_dict() } if args.moco: state["model_ema"] = model_ema.state_dict() if args.finetune: state['output_layer'] = output_layer.state_dict() save_file = os.path.join( args.model_folder, "ckpt_epoch_{epoch}.pth".format(epoch=epoch)) torch.save(state, save_file) # help release GPU memory # del state # saving the model print("==> Saving...") state = { "opt": vars(args).copy(), "model": model.state_dict(), "contrast": contrast.state_dict(), "optimizer": optimizer.state_dict() } if args.moco: state["model_ema"] = model_ema.state_dict() if args.finetune: state['output_layer'] = output_layer.state_dict() save_file = os.path.join(args.model_folder, "current.pth") torch.save(state, save_file) if epoch % args.save_freq == 0: save_file = os.path.join( args.model_folder, "ckpt_epoch_{epoch}.pth".format(epoch=epoch)) torch.save(state, save_file) # help release GPU memory # del state if args.gpu >= 0: torch.cuda.empty_cache() if args.finetune: valid_loss, valid_f1 = test_finetune(epoch, valid_loader, model, output_layer, criterion, sw, args) print("epoch %d| valid f1: %.3f" % (epoch, valid_f1)) # del model,model_ema,train_loader gc.collect() return valid_f1
# place = fluid.CPUPlace() place = fluid.CUDAPlace(0) with fluid.dygraph.guard(place=place): x = np.random.randn(1, 3, 256, 256) print("paddle:", torch.nn.functional.avg_pool2d(torch.Tensor(x), kernel_size=3, stride=1, padding=1).numpy().mean()) print("torch:", pytorch.nn.functional.avg_pool2d(pytorch.FloatTensor(x), kernel_size=3, stride=1, padding=1).numpy().mean()) # sys.exit() model=InceptionV3() model.eval() pytorch_model=eval_pytorch_model() pytorch_model.eval() pytorch_model. x=np.ones((1,3,256,256)).astype("float32") torch_output=pytorch_model(pytorch.FloatTensor(x).) pytorch_model pytorch_state_dict=pytorch_model.state_dict() load_pytorch_pretrain_model(model, pytorch_state_dict) torch.save(model.state_dict(),"inception_v3_pretrained") paddle_output=model(torch.Tensor(x)) print("torch mean",torch_output.mean()) print("paddle mean", torch.mean(paddle_output).numpy())
# lpips_value += torch.mean(conv1x1((x_fmap - y_fmap) ** 2)) # print("torch alexnet mean", torch.mean(z),lpips_value) return lpips_value return LPIPS() if __name__ == '__main__': from paddorch.convert_pretrain_model import load_pytorch_pretrain_model import torch as pytorch import torchvision pytorch_model=eval_pytorch_model() pytorch_model. place = fluid.CPUPlace() x=np.ones((1,3,256,256)).astype("float32") y = np.zeros((1, 3, 256, 256)).astype("float32") pytorch_model.eval() torch_output=pytorch_model(pytorch.FloatTensor(x).,pytorch.FloatTensor(y).).detach().numpy() pytorch_model with fluid.dygraph.guard(place=place): model=LPIPS() pytorch_state_dict=pytorch_model.state_dict() load_pytorch_pretrain_model(model, pytorch_state_dict) torch.save(model.state_dict(),"LPIPS_pretrained") model.eval() paddle_output=model(torch.Tensor(x),torch.Tensor(y)).numpy() print("paddle output ",paddle_output) print("torch output ", torch_output)