def infer(args): transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) #加载词汇表 with open(args['vocab_path'], 'rb') as f: vocab = pickle.load(f) with open(args['data_path'], 'rb') as f: Data = pickle.load(f) #在测试阶段使用model.eval(),将BN和Dropout固定,使用训练好的值 encoder = Encoder(args['embed_size'], args['pooling_kernel']).eval().cuda() decoder = Decoder(args['embed_size'], args['hidden_size'], len(vocab), args['num_layers']).cuda() #加载训练时的参数 encoder.load_state_dict(torch.load(args['encoder_path'])) decoder.load_state_dict(torch.load(args['decoder_path'])) #加载图片 image = load_image(args['val_img_path'], transform, (args['resize'], args['resize'])) image_tensor = image.cuda() #送入模型并输出caption feature = encoder(image_tensor) index = decoder.sample(feature) index = index[0].cpu().numpy() #将index转化成word words = [] for ind in index: word = vocab.idx2word[word_id] words.append(word) if word == '<end>': break sentence = ' '.join(words[1:-1]) #去掉开头和结尾的特殊字符<start>,<end> print(sentence) image = Image.open(args.image) plt.imshow(np.asarray(image))
def main(): args = set_args() encoder = Encoder(out_dim=args.z_dim) generator = Generator(z_dim=args.z_dim) encoder = load_model(args, encoder, 'encoder') generator = load_model(args, generator, 'generator') loader = set_loader(args) save_one_batch_img(args, loader, generator, encoder)
lr = opt.lr gamma = opt.gamma def weights_init(m): classname = m.__class__.__name__ if classname.find('Conv') != -1: m.weight.data.normal_(0.0, 0.02) elif classname.find('BatchNorm') != -1: m.weight.data.normal_(1.0, 0.02) m.bias.data.fill_(0) NetG = Decoder(nc, ngf, nz).to(device) NetD = Discriminator(imageSize, nc, ndf, nz).to(device) NetE = Encoder(imageSize, nc, ngf, nz).to(device) Sampler = Sampler().to(device) NetE.apply(weights_init) NetG.apply(weights_init) NetD.apply(weights_init) # load weights if opt.netE != '': NetE.load_state_dict(torch.load(opt.netE)) if opt.netG != '': NetG.load_state_dict(torch.load(opt.netG)) if opt.netD != '': NetD.load_state_dict(torch.load(opt.netD)) optimizer_encorder = optim.RMSprop(params=NetE.parameters(),
device = { "network": torch.device(0), "images": torch.device(1), "test": torch.device(2) } csv_path = "../VOC2012/" train_path = csv_path + "train_v1.csv" test_path = csv_path + "test_v1.csv" os.makedirs(arg.save_dir, exist_ok=True) tensorboard = utils.TensorboardLogger("%s/tb" % (arg.save_dir)) E = nn.DataParallel(Encoder(), output_device=device["images"]).to(device["network"]) D = nn.DataParallel(Decoder(), output_device=device["images"]).to(device["network"]) loss = TotalLoss(device, (arg.batch_train, 3, *arg.resl)) optim = opt.Adam(list(E.parameters()) + list(D.parameters()), lr=arg.lr, betas=arg.betas) scheduler = opt.lr_scheduler.LambdaLR(optim, lr_lambda=lambda epoch: 0.965**epoch) train_loader = Loader(train_path, arg.batch_train, num_workers=arg.cpus,
def network_train(args): # set device device = torch.device('cuda' if args.gpu_no >= 0 else 'cpu') # get network network = AvatarNet(args.layers).to(device) # get data set data_set = ImageFolder(args.content_dir, args.imsize, args.cropsize, args.cencrop) # get loss calculator loss_network = Encoder(args.layers).to(device) mse_loss = torch.nn.MSELoss(reduction='mean').to(device) loss_seq = {'total': [], 'image': [], 'feature': [], 'tv': []} # get optimizer for param in network.encoder.parameters(): param.requires_grad = False optimizer = torch.optim.Adam(network.decoder.parameters(), lr=args.lr) # training for iteration in range(args.max_iter): data_loader = torch.utils.data.DataLoader(data_set, batch_size=args.batch_size, shuffle=True) input_image = next(iter(data_loader)).to(device) output_image = network(input_image, [input_image], train=True) # calculate losses total_loss = 0 ## image reconstruction loss image_loss = mse_loss(output_image, input_image) loss_seq['image'].append(image_loss.item()) total_loss += image_loss ## feature reconstruction loss input_features = loss_network(input_image) output_features = loss_network(output_image) feature_loss = 0 for output_feature, input_feature in zip(output_features, input_features): feature_loss += mse_loss(output_feature, input_feature) loss_seq['feature'].append(feature_loss.item()) total_loss += feature_loss * args.feature_weight ## total variation loss tv_loss = calc_tv_loss(output_image) loss_seq['tv'].append(tv_loss.item()) total_loss += tv_loss * args.tv_weight loss_seq['total'].append(total_loss.item()) optimizer.zero_grad() total_loss.backward() optimizer.step() # print loss log and save network, loss log and output images if (iteration + 1) % args.check_iter == 0: imsave(torch.cat([input_image, output_image], dim=0), args.save_path + "training_image.png") print( "%s: Iteration: [%d/%d]\tImage Loss: %2.4f\tFeature Loss: %2.4f\tTV Loss: %2.4f\tTotal: %2.4f" % (time.ctime(), iteration + 1, args.max_iter, lastest_arverage_value(loss_seq['image']), lastest_arverage_value(loss_seq['feature']), lastest_arverage_value(loss_seq['tv']), lastest_arverage_value(loss_seq['total']))) torch.save( { 'iteration': iteration + 1, 'state_dict': network.state_dict(), 'loss_seq': loss_seq }, args.save_path + 'check_point.pth') return network
args_dict["test_image_dir"] = args.test_image_dir[0] args_dict["test_mask_dir"] = args.test_mask_dir[0] args_dict["is_train"] = args.train[0] args_dict["is_test"] = args.test[0] args_dict["epoch"] = args.epoch[0] args_dict["step_per_epoch"] = args.step_per_epoch[0] args_dict["batch"] = args.batch[0] args_dict["aug"] = args.aug[0] args_dict["test_mask"] = args.test_mask[0] if args_dict["cuda"]: device = torch.device("cuda") else: device = torch.device("cpu") encoder = Encoder(structure=args_dict["encoder_net"], cuda=args_dict["cuda"]) net = encoder.net.to(device) if args_dict["is_train"]: train_dispatch(train_image_dir=args_dict["train_image_dir"], train_mask_dir=args_dict["train_mask_dir"], val_image_dir=args_dict["val_image_dir"], val_mask_dir=args_dict["val_mask_dir"], epoch=args_dict["epoch"], step_per_epoch=args_dict["step_per_epoch"], net=net, batch=args_dict["batch"], device=device, encoder=args_dict["encoder_net"], aug=args_dict["aug"])
def main(args): # Create model directory for saving trained models if not os.path.exists(args.model_path): os.makedirs(args.model_path) # Image preprocessing, augmentation, normalization for using the pretrained resnet transform = transforms.Compose([ transforms.RandomCrop(args.im_size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) # Load vocabulary with open(args.vocab_path, 'rb') as f: vocab = pickle.load(f) # Build data loader data_loader = get_loader(args.image_dir, args.caption_path, vocab, transform, args.batch_size, shuffle=True, num_workers=args.num_workers) # Configure the network encoder = Encoder(args.embed_size).to(device) decoder = Decoder(args.embed_size, args.hidden_size, len(vocab), args.num_layers).to(device) # Loss and optimizer criterion = nn.CrossEntropyLoss() params = list(decoder.parameters()) + list( encoder.linear.parameters()) + list(encoder.bn.parameters()) optimizer = torch.optim.Adam(params, lr=args.learning_rate) # Train the models total_step = len(data_loader) for epoch in range(args.num_epochs): for i, (images, captions, lengths) in enumerate(data_loader): # mini-batch images = images.to(device) captions = captions.to(device) targets = pack_padded_sequence(captions, lengths, batch_first=True)[0] # Forward, backward and optimize features = encoder(images) outputs = decoder(features, captions, lengths) loss = criterion(outputs, targets) decoder.zero_grad() encoder.zero_grad() loss.backward() optimizer.step() # Log info if i % args.log_step == 0: print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format( epoch, args.num_epochs, i, total_step, loss.item())) # Save the model checkpoints if (i + 1) % args.save_step == 0: torch.save(decoder.state_dict(), os.path.join(args.model_path, 'decoder.ckpt')) torch.save(encoder.state_dict(), os.path.join(args.model_path, 'encoder.ckpt'))
def look_network(device: str): pos_encoding = PositionalEncoding(10000, 512)(torch.zeros(1, 64, 512)) plt.pcolormesh(pos_encoding[0].numpy(), cmap="RdBu") plt.xlabel("Depth") plt.xlim((0, 512)) plt.ylabel("Position") plt.colorbar() plt.show() y = torch.rand(1, 60, 512) out = ScaledDotProductAttention()(y, y, y) print("Dot Attention Shape", out[0].shape, out[1].shape) temp_mha = MultiHeadAttention(features=512, num_heads=8) out, attn = temp_mha(q=torch.rand(1, 45, 512), k=y, v=y, mask=None) print("Multi Attention Shape", out.shape, attn.shape) sample_ffn = FeedForwardNetwork(512, 2048) print("Feed Forward Shape", sample_ffn(torch.rand(64, 50, 512)).shape) sample_encoder_layer = EncoderLayer(512, 8, 2048) sample_encoder_layer_output = sample_encoder_layer(torch.rand(64, 43, 512), None) print( "Encoder Shape", sample_encoder_layer_output.shape ) # (batch_size, input_seq_len, d_model) sample_encoder_layer = EncoderLayer(512, 8, 2048) sample_encoder_layer_output = sample_encoder_layer(torch.rand(64, 50, 512), None) print( "Encoder Shape", sample_encoder_layer_output.shape ) # (batch_size, input_seq_len, d_model) sample_encoder = Encoder( num_layers=2, features=512, num_heads=8, fffeatures=2048, input_vocab_size=8500, maximum_position_encoding=10000, ).to(device) temp_input = torch.rand(64, 62).type(torch.LongTensor).to(device) sample_encoder_output = sample_encoder(temp_input, mask=None) print( "Encoder Shape", sample_encoder_output.shape ) # (batch_size, input_seq_len, d_model) sample_decoder = Decoder( num_layers=2, features=512, num_heads=8, fffeatures=2048, target_vocab_size=8500, maximum_position_encoding=10000, ).to(device) temp_input = torch.rand(64, 26).type(torch.LongTensor).to(device) output, attn = sample_decoder( temp_input, enc_output=sample_encoder_output, look_ahead_mask=None, padding_mask=None, ) print("Decoder Shape", output.shape, attn["decoder_layer2_block2"].shape)
tag_vocab_size = len(tag_i2wDict) x_train = LoadIndexDataset('./index_dataset/index_train_source_8000.txt', src_i2wDict) y_train = LoadIndexDataset('./index_dataset/index_train_target_8000.txt', src_i2wDict) x_train = x_train[:100] y_train = y_train[:100] hidden_dim = 256 BATCH_SIZE = 1 EPOCH_NUM = 10 embed_dim = 50 device = torch.device("cuda" if torch.cuda.is_available() else "cpu") encoder = Encoder(src_vocab_size, embed_dim, hidden_dim) decoder = Decoder(tag_vocab_size, embed_dim, hidden_dim) network = Net(encoder, decoder, device, teacher_forcing_ratio=0.5) loss_fn = nn.CrossEntropyLoss() #使用交叉熵损失函数 optimizer = torch.optim.Adam(network.parameters()) #使用Adam优化器 for epoch in range(EPOCH_NUM): print('*********************************') print('epoch: ', epoch + 1, 'of', EPOCH_NUM) i = 0 while i * BATCH_SIZE < len(x_train): if (i + 1) * BATCH_SIZE < len(x_train): inputs = x_train[i * BATCH_SIZE:(i + 1) * BATCH_SIZE] target = y_train[i * BATCH_SIZE:(i + 1) * BATCH_SIZE] else:
ndf = int(opt.ndf) imageSize = int(opt.imageSize) lr = opt.lr gamma = opt.gamma def weights_init(m): classname = m.__class__.__name__ if classname.find('Conv') != -1: m.weight.data.normal_(0.0, 0.02) elif classname.find('BatchNorm') != -1: m.weight.data.normal_(1.0, 0.02) m.bias.data.fill_(0) NetE = Encoder(imageSize, nc, ngf, nz).to(device) Sampler = Sampler().to(device) NetG = Decoder(nc, ngf, nz).to(device) NetE.apply(weights_init) NetG.apply(weights_init) # load weights if opt.netE != '': NetE.load_state_dict(torch.load(opt.netE)) if opt.netG != '': NetG.load_state_dict(torch.load(opt.netG)) optimizer_encorder = optim.RMSprop(params=NetE.parameters( ), lr=lr, alpha=0.9, eps=1e-8, weight_decay=0, momentum=0, centered=False) optimizer_decoder = optim.RMSprop(params=NetG.parameters(
def train(args): #数据预处理,生成vocab和data preprocess(args['cap_path'], args['vocab_path'], args['data_path']) if not os.path.exists(args['model_path']): os.mkdir(args['model_path']) #对图片进行处理,进行数据增强 transform = transforms.Compose([ transforms.Resize((args['resize'], args['resize'])), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) with open(args['vocab_path'], 'rb') as f: vocab = pickle.load(f) with open(args['data_path'], 'rb') as f: Data = pickle.load(f) data_loader = get_loader(args['train_img_path'], Data, vocab, transform, args['batch_size'], shuffle=True, num_workers=args['num_workers']) encoder = Encoder(args['embed_size'], args['pooling_kernel']).cuda() decoder = Decoder(args['embed_size'], args['hidden_size'], len(vocab), args['num_layers']).cuda() criterion = nn.CrossEntropyLoss().cuda() params = list(decoder.parameters()) + list( encoder.linear.parameters()) + list(encoder.bn.parameters()) optimizer = torch.optim.Adam(params, lr=args['learning_rate']) total_step = len(data_loader) for epoch in range(args['num_epochs']): for i, (images, captions, lengths) in enumerate(data_loader): images = images.cuda() captions = captions.cuda() targets = pack_padded_sequence(captions, lengths, batch_first=True)[0] features = encoder(images) outputs = decoder(features, captions, lengths) loss = criterion(outputs, targets) decoder.zero_grad() encoder.zero_grad() loss.backward() optimizer.step() #打印训练信息 if i % args['log_step'] == 0: print( 'Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, Perplexity: {:5.4f}' .format(epoch, args['num_epochs'], i, total_step, loss.item(), np.exp(loss.item()))) #保存模型 if (i + 1) % args['save_step'] == 0: torch.save( decoder.state_dict(), os.path.join(args['model_path'], 'decoder-{}-{}.ckpt'.format(epoch + 1, i + 1))) torch.save( encoder.state_dict(), os.path.join(args['model_path'], 'encoder-{}-{}.ckpt'.format(epoch + 1, i + 1))) #每个epoch结束也保存一次模型 torch.save( decoder.state_dict(), os.path.join(args['model_path'], 'decoder-{}-{}.ckpt'.format(epoch + 1, i + 1))) torch.save( encoder.state_dict(), os.path.join(args['model_path'], 'encoder-{}-{}.ckpt'.format(epoch + 1, i + 1)))
def run(): # create directories save_dir = Path(FLAGS.save_dir) if save_dir.exists(): logging.warning('The directory can be overwritten: {}'.format( FLAGS.save_dir)) save_dir.mkdir(exist_ok=True, parents=True) log_dir = Path(FLAGS.tensorboard) if log_dir.exists(): logging.warning('The directory will be removed: {}'.format( FLAGS.tensorboard)) rm_path(log_dir) log_dir.mkdir(exist_ok=True, parents=True) # to handle errors while loading images Image.MAX_IMAGE_PIXELS = None ImageFile.LOAD_TRUNCATED_IMAGES = True # image generator dataset = ContentStyleLoader(content_root=FLAGS.content_dir, content_image_shape=(FLAGS.image_size, FLAGS.image_size), content_crop='random', content_crop_size=FLAGS.crop_size, style_root=FLAGS.style_dir, style_image_shape=(FLAGS.image_size, FLAGS.image_size), style_crop='random', style_crop_size=FLAGS.crop_size, n_per_epoch=FLAGS.dataset_size, batch_size=FLAGS.batch_size) # create model encoder = Encoder(input_shape=(FLAGS.crop_size, FLAGS.crop_size, 3), pretrained=True, name='encoder') # freeze the model for l in encoder.layers: l.trainable = False adain = AdaIN(alpha=1.0, name='adain') decoder = Decoder(input_shape=encoder.output_shape[-1][1:], name='decoder') # place holders for inputs content_input = Input(shape=(FLAGS.crop_size, FLAGS.crop_size, 3), name='content_input') style_input = Input(shape=(FLAGS.crop_size, FLAGS.crop_size, 3), name='style_input') # forwarding content_features = encoder(content_input) style_features = encoder(style_input) normalized_feature = adain([content_features[-1], style_features[-1]]) generated = decoder(normalized_feature) # loss calculation generated_features = encoder(generated) content_loss = Lambda(calculate_content_loss, name='content_loss')( [normalized_feature, generated_features[-1]]) style_loss = Lambda(calculate_style_loss, name='style_loss')( [style_features, generated_features]) loss = Lambda( lambda x: FLAGS.content_weight * x[0] + FLAGS.style_weight * x[1], name='loss')([content_loss, style_loss]) # trainer trainer = Model(inputs=[content_input, style_input], outputs=[loss]) optim = optimizers.Adam(learning_rate=FLAGS.learning_rate) trainer.compile(optimizer=optim, loss=lambda _, y_pred: y_pred) trainer.summary() # callbacks callbacks = [ # learning rate scheduler LearningRateScheduler(lambda epoch, _: FLAGS.learning_rate / ( 1.0 + FLAGS.learning_rate_decay * FLAGS.dataset_size * epoch)), # Tensor Board TensorBoard(str(log_dir), write_graph=False, update_freq='batch'), # save model SubmodelCheckpoint( str(save_dir / 'decoder.epoch-{epoch:d}.h5'), submodel_name='decoder', save_weights_only=True, save_best_only=FLAGS.save_best_only, save_freq=FLAGS.save_every if FLAGS.save_every else 'epoch') ] # train trainer.fit_generator(dataset, epochs=FLAGS.epochs, workers=FLAGS.workers, callbacks=callbacks)
nz = int(opt.nz) ngf = int(opt.ngf) ndf = int(opt.ndf) imageSize = int(opt.imageSize) def weights_init(m): classname = m.__class__.__name__ if classname.find('Conv') != -1: m.weight.data.normal_(0.0, 0.02) elif classname.find('BatchNorm') != -1: m.weight.data.normal_(1.0, 0.02) m.bias.data.fill_(0) NetE = Encoder(imageSize, nc, ngf, nz).to(device) NetG = Decoder(nc, ngf, nz).to(device) Sampler = Sampler().to(device) NetE.apply(weights_init) NetG.apply(weights_init) # load weights NetE.load_state_dict(torch.load(opt.netE, map_location=opt.cuda)) NetG.load_state_dict(torch.load(opt.netG, map_location=opt.cuda)) NetE.eval() NetG.eval() # 21 attributes
def main(args): # Image preprocessing transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))]) # Load vocabulary with open(args.vocab_path, 'rb') as f: vocab = pickle.load(f) # Build models encoder = Encoder(args.embed_size).eval() decoder = Decoder(args.embed_size, args.hidden_size, len(vocab), args.num_layers) encoder = encoder.to(device) decoder = decoder.to(device) # Load the trained model parameters encoder.load_state_dict(torch.load(args.encoder_path)) decoder.load_state_dict(torch.load(args.decoder_path)) # load validation image set lis = os.listdir(args.image_dir) num = len(lis) captions = [] for i in range(num): im_pth = os.path.join(args.image_dir, lis[i]) image = load_image(im_pth, transform) image_tensor = image.to(device) # Generate an caption from the image feature = encoder(image_tensor) sampled_ids = decoder.sample(feature) sampled_ids = sampled_ids[0].cpu().numpy() # (1, max_seq_length) -> (max_seq_length) # Convert word_ids to words sampled_caption = [] for word_id in sampled_ids: word = vocab.idx2word[word_id] if word == '<start>': continue if word == '<end>': break sampled_caption.append(word) sentence = ' '.join(sampled_caption) cap= {} id = int(lis[i][14:-4]) #extract image id cap['image_id'] = id cap['caption'] = sentence captions.append(cap) # save results with open('captions_res.json', 'w') as f: json.dump(captions, f) # evaluation with coco-caption evaluation tools coco = COCO(args.caption_path) cocoRes = coco.loadRes('captions_res.json') cocoEval = COCOEvalCap(coco, cocoRes) cocoEval.params['image_id'] = cocoRes.getImgIds() cocoEval.evaluate()
def train(savedir, _list, root, epochs, batch_size, nz): # 画像のチャンネル数 channel = 1 # ジェネレータのAdam設定(default: lr=0.001, betas=(0.9, 0.999), weight_decay=0) G_opt_para = {'lr': 0.0002, 'betas': (0.5, 0.9), 'weight_decay': 0} # ディスクリミネータのAdam設定 D_opt_para = {'lr': 0.0002, 'betas': (0.5, 0.9), 'weight_decay': 0} # エンコーダのAdam設定 E_opt_para = {'lr': 0.0002, 'betas': (0.5, 0.9), 'weight_decay': 0} # コードディスクリミネータのAdam設定 CD_opt_para = {'lr': 0.0002, 'betas': (0.5, 0.9), 'weight_decay': 0} device = 'cuda' # 保存先のファイルを作成 if os.path.exists(savedir): num = 1 while 1: if os.path.exists('{}({})'.format(savedir, num)): num += 1 else: savedir = '{}({})'.format(savedir, num) break os.makedirs(savedir, exist_ok=True) os.makedirs('{}/generating_image'.format(savedir), exist_ok=True) os.makedirs('{}/generating_image_rnd'.format(savedir), exist_ok=True) os.makedirs('{}/model'.format(savedir), exist_ok=True) os.makedirs('{}/loss'.format(savedir), exist_ok=True) myloss = MyLoss() df = pd.read_csv(_list, usecols=['Path']) img_id = df.values.tolist() check_img = Image.open('{}/{}'.format(root, img_id[0][0])) check_img = check_img.convert('L') width, height = check_img.size G_model, D_model, E_model, CD_model = Generator( nz, width, height, channel), Discriminator(width, height, channel), Encoder( nz, width, height, channel), CodeDiscriminator(nz) G_model, D_model, E_model, CD_model = nn.DataParallel( G_model), nn.DataParallel(D_model), nn.DataParallel( E_model), nn.DataParallel(CD_model) G_model, D_model, E_model, CD_model = G_model.to(device), D_model.to( device), E_model.to(device), CD_model.to(device) # 最適化アルゴリズムの設定 G_para = torch.optim.Adam(G_model.parameters(), lr=G_opt_para['lr'], betas=G_opt_para['betas'], weight_decay=G_opt_para['weight_decay']) D_para = torch.optim.Adam(D_model.parameters(), lr=D_opt_para['lr'], betas=D_opt_para['betas'], weight_decay=D_opt_para['weight_decay']) E_para = torch.optim.Adam(E_model.parameters(), lr=E_opt_para['lr'], betas=E_opt_para['betas'], weight_decay=E_opt_para['weight_decay']) CD_para = torch.optim.Adam(CD_model.parameters(), lr=CD_opt_para['lr'], betas=CD_opt_para['betas'], weight_decay=CD_opt_para['weight_decay']) # ロスの推移を保存するためのリストを確保 result = {} result['G_log_loss'] = [] result['D_log_loss'] = [] result['E_log_loss'] = [] result['CD_log_loss'] = [] dataset = LoadDataset(df, root, transform=Trans()) train_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True, drop_last=True) output_env('{}/env.txt'.format(savedir), batch_size, nz, G_opt_para, D_opt_para, E_opt_para, CD_opt_para, G_model, D_model, E_model, CD_model) # テスト用の一定乱数 z0 = torch.randn(batch_size, nz, 1, 1) for epoch in range(epochs): print('########## epoch : {}/{} ##########'.format(epoch + 1, epochs)) G_log_loss, D_log_loss, E_log_loss, CD_log_loss = [], [], [], [] for real_img in tqdm(train_loader): # 入力の乱数を作成 rnd_z = torch.randn(batch_size, nz, 1, 1) # GPU用の変数に変換 real_img = real_img.to(device) rnd_z = rnd_z.to(device) # 真正画像をエンコーダに入力し,特徴ベクトルを取得 real_z = E_model(real_img) # 特徴ベクトルをジェネレータに入力し,画像を生成 fake_img = G_model(real_z) rnd_img = G_model(rnd_z) # 特徴ベクトルをコードディスクリミネータに入力し,判定結果を取得 real_cy = CD_model(real_z) rnd_cy = CD_model(rnd_z) # 真正画像および生成画像をディスクリミネータに入力し,判定結果を取得 real_y = D_model(real_img) fake_y = D_model(fake_img) rnd_y = D_model(rnd_img) # エンコーダのロス計算 E_loss = myloss.E_loss( real_img, fake_img, real_cy, torch.tensor(1.0).expand_as(real_cy).to(device), 1.0) E_log_loss.append(E_loss.item()) # ジェネレータのロス計算 G_loss = myloss.G_loss( real_img, fake_img, fake_y, rnd_y, torch.tensor(1.0).expand_as(fake_y).to(device), 1.0) G_log_loss.append(G_loss.item()) # コードディスクリミネータのロス計算 CD_loss = myloss.CD_loss( real_cy, torch.tensor(0.0).expand_as(real_cy).to(device), rnd_cy, torch.tensor(1.0).expand_as(rnd_cy).to(device)) CD_log_loss.append(CD_loss.item()) # ディスクリミネータのロス計算 D_loss = myloss.D_loss( real_y, torch.tensor(1.0).expand_as(real_y).to(device), fake_y, rnd_y, torch.tensor(0.0).expand_as(fake_y).to(device)) D_log_loss.append(D_loss.item()) # エンコーダの重み更新 E_para.zero_grad() E_loss.backward(retain_graph=True) E_para.step() # ジェネレータの重み更新 G_para.zero_grad() G_loss.backward(retain_graph=True) G_para.step() # コードディスクリミネータの重み更新 CD_para.zero_grad() CD_loss.backward(retain_graph=True) CD_para.step() # ディスクリミネータの重み更新 D_para.zero_grad() D_loss.backward() D_para.step() result['G_log_loss'].append(statistics.mean(G_log_loss)) result['D_log_loss'].append(statistics.mean(D_log_loss)) result['E_log_loss'].append(statistics.mean(E_log_loss)) result['CD_log_loss'].append(statistics.mean(CD_log_loss)) print('G_loss = {} , D_loss = {} , E_loss = {} , CD_loss = {}'.format( result['G_log_loss'][-1], result['D_log_loss'][-1], result['E_log_loss'][-1], result['CD_log_loss'][-1])) # ロスのログを保存 with open('{}/loss/log.txt'.format(savedir), mode='a') as f: f.write('##### Epoch {:03} #####\n'.format(epoch + 1)) f.write('G: {}, D: {}, E: {}, CD: {}\n'.format( result['G_log_loss'][-1], result['D_log_loss'][-1], result['E_log_loss'][-1], result['CD_log_loss'][-1])) # 定めた保存周期ごとにモデル,出力画像を保存する if (epoch + 1) % 10 == 0: # モデルの保存 torch.save(G_model.module.state_dict(), '{}/model/G_model_{}.pth'.format(savedir, epoch + 1)) torch.save(D_model.module.state_dict(), '{}/model/D_model_{}.pth'.format(savedir, epoch + 1)) torch.save(E_model.module.state_dict(), '{}/model/E_model_{}.pth'.format(savedir, epoch + 1)) torch.save(CD_model.module.state_dict(), '{}/model/CD_model_{}.pth'.format(savedir, epoch + 1)) G_model.eval() # メモリ節約のためパラメータの保存は止める(テスト時にパラメータの保存は不要) with torch.no_grad(): rnd_img_test = G_model(z0) # ジェネレータの出力画像を保存 torchvision.utils.save_image( fake_img[:batch_size], "{}/generating_image/epoch_{:03}.png".format( savedir, epoch + 1)) torchvision.utils.save_image( rnd_img_test[:batch_size], "{}/generating_image_rnd/epoch_{:03}.png".format( savedir, epoch + 1)) G_model.train() # 定めた保存周期ごとにロスを保存する if (epoch + 1) % 50 == 0: x = np.linspace(1, epoch + 1, epoch + 1, dtype='int') plot(result['G_log_loss'], result['D_log_loss'], result['E_log_loss'], result['CD_log_loss'], x, savedir) # 最後のエポックが保存周期でない場合に,保存する if (epoch + 1) % 10 != 0 and epoch + 1 == epochs: torch.save(G_model.module.state_dict(), '{}/model/G_model_{}.pth'.format(savedir, epoch + 1)) torch.save(D_model.module.state_dict(), '{}/model/D_model_{}.pth'.format(savedir, epoch + 1)) torch.save(E_model.module.state_dict(), '{}/model/E_model_{}.pth'.format(savedir, epoch + 1)) torch.save(CD_model.module.state_dict(), '{}/model/CD_model_{}.pth'.format(savedir, epoch + 1)) G_model.eval() with torch.no_grad(): rnd_img_test = G_model(z0) torchvision.utils.save_image( fake_img[:batch_size], "{}/generating_image/epoch_{:03}.png".format(savedir, epoch + 1)) torchvision.utils.save_image( rnd_img_test[:batch_size], "{}/generating_image_rnd/epoch_{:03}.png".format( savedir, epoch + 1)) x = np.linspace(1, epoch + 1, epoch + 1, dtype='int') plot(result['G_log_loss'], result['D_log_loss'], result['E_log_loss'], result['CD_log_loss'], x, savedir)
def __init__(self, opt): super(Generator, self).__init__() self.encoder1 = Encoder(opt.ngpu, opt, opt.nz) self.decoder = Decoder(opt.ngpu, opt)