Exemple #1
0
def infer(args):
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])

    #加载词汇表
    with open(args['vocab_path'], 'rb') as f:
        vocab = pickle.load(f)
    with open(args['data_path'], 'rb') as f:
        Data = pickle.load(f)

    #在测试阶段使用model.eval(),将BN和Dropout固定,使用训练好的值
    encoder = Encoder(args['embed_size'], args['pooling_kernel']).eval().cuda()
    decoder = Decoder(args['embed_size'], args['hidden_size'], len(vocab),
                      args['num_layers']).cuda()

    #加载训练时的参数
    encoder.load_state_dict(torch.load(args['encoder_path']))
    decoder.load_state_dict(torch.load(args['decoder_path']))

    #加载图片
    image = load_image(args['val_img_path'], transform,
                       (args['resize'], args['resize']))
    image_tensor = image.cuda()

    #送入模型并输出caption
    feature = encoder(image_tensor)
    index = decoder.sample(feature)
    index = index[0].cpu().numpy()

    #将index转化成word
    words = []
    for ind in index:
        word = vocab.idx2word[word_id]
        words.append(word)
        if word == '<end>':
            break

    sentence = ' '.join(words[1:-1])  #去掉开头和结尾的特殊字符<start>,<end>
    print(sentence)
    image = Image.open(args.image)
    plt.imshow(np.asarray(image))
Exemple #2
0
def main():
    args = set_args()
    encoder = Encoder(out_dim=args.z_dim)
    generator = Generator(z_dim=args.z_dim)

    encoder = load_model(args, encoder, 'encoder')
    generator = load_model(args, generator, 'generator')

    loader = set_loader(args)
    save_one_batch_img(args, loader, generator, encoder)
Exemple #3
0
lr = opt.lr
gamma = opt.gamma


def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        m.weight.data.normal_(0.0, 0.02)
    elif classname.find('BatchNorm') != -1:
        m.weight.data.normal_(1.0, 0.02)
        m.bias.data.fill_(0)


NetG = Decoder(nc, ngf, nz).to(device)
NetD = Discriminator(imageSize, nc, ndf, nz).to(device)
NetE = Encoder(imageSize, nc, ngf, nz).to(device)
Sampler = Sampler().to(device)

NetE.apply(weights_init)
NetG.apply(weights_init)
NetD.apply(weights_init)

# load weights
if opt.netE != '':
    NetE.load_state_dict(torch.load(opt.netE))
if opt.netG != '':
    NetG.load_state_dict(torch.load(opt.netG))
if opt.netD != '':
    NetD.load_state_dict(torch.load(opt.netD))

optimizer_encorder = optim.RMSprop(params=NetE.parameters(),
    device = {
        "network": torch.device(0),
        "images": torch.device(1),
        "test": torch.device(2)
    }

    csv_path = "../VOC2012/"

    train_path = csv_path + "train_v1.csv"
    test_path = csv_path + "test_v1.csv"

    os.makedirs(arg.save_dir, exist_ok=True)
    tensorboard = utils.TensorboardLogger("%s/tb" % (arg.save_dir))

    E = nn.DataParallel(Encoder(),
                        output_device=device["images"]).to(device["network"])
    D = nn.DataParallel(Decoder(),
                        output_device=device["images"]).to(device["network"])

    loss = TotalLoss(device, (arg.batch_train, 3, *arg.resl))

    optim = opt.Adam(list(E.parameters()) + list(D.parameters()),
                     lr=arg.lr,
                     betas=arg.betas)
    scheduler = opt.lr_scheduler.LambdaLR(optim,
                                          lr_lambda=lambda epoch: 0.965**epoch)

    train_loader = Loader(train_path,
                          arg.batch_train,
                          num_workers=arg.cpus,
Exemple #5
0
def network_train(args):
    # set device
    device = torch.device('cuda' if args.gpu_no >= 0 else 'cpu')

    # get network
    network = AvatarNet(args.layers).to(device)

    # get data set
    data_set = ImageFolder(args.content_dir, args.imsize, args.cropsize,
                           args.cencrop)

    # get loss calculator
    loss_network = Encoder(args.layers).to(device)
    mse_loss = torch.nn.MSELoss(reduction='mean').to(device)
    loss_seq = {'total': [], 'image': [], 'feature': [], 'tv': []}

    # get optimizer
    for param in network.encoder.parameters():
        param.requires_grad = False
    optimizer = torch.optim.Adam(network.decoder.parameters(), lr=args.lr)

    # training
    for iteration in range(args.max_iter):
        data_loader = torch.utils.data.DataLoader(data_set,
                                                  batch_size=args.batch_size,
                                                  shuffle=True)
        input_image = next(iter(data_loader)).to(device)

        output_image = network(input_image, [input_image], train=True)

        # calculate losses
        total_loss = 0
        ## image reconstruction loss
        image_loss = mse_loss(output_image, input_image)
        loss_seq['image'].append(image_loss.item())
        total_loss += image_loss

        ## feature reconstruction loss
        input_features = loss_network(input_image)
        output_features = loss_network(output_image)
        feature_loss = 0
        for output_feature, input_feature in zip(output_features,
                                                 input_features):
            feature_loss += mse_loss(output_feature, input_feature)
        loss_seq['feature'].append(feature_loss.item())
        total_loss += feature_loss * args.feature_weight

        ## total variation loss
        tv_loss = calc_tv_loss(output_image)
        loss_seq['tv'].append(tv_loss.item())
        total_loss += tv_loss * args.tv_weight

        loss_seq['total'].append(total_loss.item())

        optimizer.zero_grad()
        total_loss.backward()
        optimizer.step()

        # print loss log and save network, loss log and output images
        if (iteration + 1) % args.check_iter == 0:
            imsave(torch.cat([input_image, output_image], dim=0),
                   args.save_path + "training_image.png")
            print(
                "%s: Iteration: [%d/%d]\tImage Loss: %2.4f\tFeature Loss: %2.4f\tTV Loss: %2.4f\tTotal: %2.4f"
                % (time.ctime(), iteration + 1, args.max_iter,
                   lastest_arverage_value(loss_seq['image']),
                   lastest_arverage_value(loss_seq['feature']),
                   lastest_arverage_value(loss_seq['tv']),
                   lastest_arverage_value(loss_seq['total'])))
            torch.save(
                {
                    'iteration': iteration + 1,
                    'state_dict': network.state_dict(),
                    'loss_seq': loss_seq
                }, args.save_path + 'check_point.pth')

    return network
Exemple #6
0
    args_dict["test_image_dir"] = args.test_image_dir[0]
    args_dict["test_mask_dir"] = args.test_mask_dir[0]
    args_dict["is_train"] = args.train[0]
    args_dict["is_test"] = args.test[0]
    args_dict["epoch"] = args.epoch[0]
    args_dict["step_per_epoch"] = args.step_per_epoch[0]
    args_dict["batch"] = args.batch[0]
    args_dict["aug"] = args.aug[0]
    args_dict["test_mask"] = args.test_mask[0]

    if args_dict["cuda"]:
        device = torch.device("cuda")
    else:
        device = torch.device("cpu")

    encoder = Encoder(structure=args_dict["encoder_net"],
                      cuda=args_dict["cuda"])
    net = encoder.net.to(device)

    if args_dict["is_train"]:
        train_dispatch(train_image_dir=args_dict["train_image_dir"],
                       train_mask_dir=args_dict["train_mask_dir"],
                       val_image_dir=args_dict["val_image_dir"],
                       val_mask_dir=args_dict["val_mask_dir"],
                       epoch=args_dict["epoch"],
                       step_per_epoch=args_dict["step_per_epoch"],
                       net=net,
                       batch=args_dict["batch"],
                       device=device,
                       encoder=args_dict["encoder_net"],
                       aug=args_dict["aug"])
Exemple #7
0
def main(args):
    # Create model directory for saving trained models
    if not os.path.exists(args.model_path):
        os.makedirs(args.model_path)

    # Image preprocessing, augmentation, normalization for using the pretrained resnet
    transform = transforms.Compose([
        transforms.RandomCrop(args.im_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])

    # Load vocabulary
    with open(args.vocab_path, 'rb') as f:
        vocab = pickle.load(f)

    # Build data loader
    data_loader = get_loader(args.image_dir,
                             args.caption_path,
                             vocab,
                             transform,
                             args.batch_size,
                             shuffle=True,
                             num_workers=args.num_workers)

    # Configure the network
    encoder = Encoder(args.embed_size).to(device)
    decoder = Decoder(args.embed_size, args.hidden_size, len(vocab),
                      args.num_layers).to(device)

    # Loss and optimizer
    criterion = nn.CrossEntropyLoss()
    params = list(decoder.parameters()) + list(
        encoder.linear.parameters()) + list(encoder.bn.parameters())
    optimizer = torch.optim.Adam(params, lr=args.learning_rate)

    # Train the models
    total_step = len(data_loader)
    for epoch in range(args.num_epochs):
        for i, (images, captions, lengths) in enumerate(data_loader):

            # mini-batch
            images = images.to(device)
            captions = captions.to(device)
            targets = pack_padded_sequence(captions, lengths,
                                           batch_first=True)[0]

            # Forward, backward and optimize
            features = encoder(images)
            outputs = decoder(features, captions, lengths)
            loss = criterion(outputs, targets)
            decoder.zero_grad()
            encoder.zero_grad()
            loss.backward()
            optimizer.step()

            # Log info
            if i % args.log_step == 0:
                print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format(
                    epoch, args.num_epochs, i, total_step, loss.item()))

            # Save the model checkpoints
            if (i + 1) % args.save_step == 0:
                torch.save(decoder.state_dict(),
                           os.path.join(args.model_path, 'decoder.ckpt'))
                torch.save(encoder.state_dict(),
                           os.path.join(args.model_path, 'encoder.ckpt'))
def look_network(device: str):
    pos_encoding = PositionalEncoding(10000, 512)(torch.zeros(1, 64, 512))
    plt.pcolormesh(pos_encoding[0].numpy(), cmap="RdBu")
    plt.xlabel("Depth")
    plt.xlim((0, 512))
    plt.ylabel("Position")
    plt.colorbar()
    plt.show()

    y = torch.rand(1, 60, 512)
    out = ScaledDotProductAttention()(y, y, y)
    print("Dot Attention Shape", out[0].shape, out[1].shape)

    temp_mha = MultiHeadAttention(features=512, num_heads=8)
    out, attn = temp_mha(q=torch.rand(1, 45, 512), k=y, v=y, mask=None)
    print("Multi Attention Shape", out.shape, attn.shape)

    sample_ffn = FeedForwardNetwork(512, 2048)
    print("Feed Forward Shape", sample_ffn(torch.rand(64, 50, 512)).shape)

    sample_encoder_layer = EncoderLayer(512, 8, 2048)
    sample_encoder_layer_output = sample_encoder_layer(torch.rand(64, 43, 512), None)
    print(
        "Encoder Shape", sample_encoder_layer_output.shape
    )  # (batch_size, input_seq_len, d_model)

    sample_encoder_layer = EncoderLayer(512, 8, 2048)
    sample_encoder_layer_output = sample_encoder_layer(torch.rand(64, 50, 512), None)
    print(
        "Encoder Shape", sample_encoder_layer_output.shape
    )  # (batch_size, input_seq_len, d_model)

    sample_encoder = Encoder(
        num_layers=2,
        features=512,
        num_heads=8,
        fffeatures=2048,
        input_vocab_size=8500,
        maximum_position_encoding=10000,
    ).to(device)
    temp_input = torch.rand(64, 62).type(torch.LongTensor).to(device)
    sample_encoder_output = sample_encoder(temp_input, mask=None)
    print(
        "Encoder Shape", sample_encoder_output.shape
    )  # (batch_size, input_seq_len, d_model)

    sample_decoder = Decoder(
        num_layers=2,
        features=512,
        num_heads=8,
        fffeatures=2048,
        target_vocab_size=8500,
        maximum_position_encoding=10000,
    ).to(device)
    temp_input = torch.rand(64, 26).type(torch.LongTensor).to(device)
    output, attn = sample_decoder(
        temp_input,
        enc_output=sample_encoder_output,
        look_ahead_mask=None,
        padding_mask=None,
    )
    print("Decoder Shape", output.shape, attn["decoder_layer2_block2"].shape)
Exemple #9
0
    tag_vocab_size = len(tag_i2wDict)

    x_train = LoadIndexDataset('./index_dataset/index_train_source_8000.txt',
                               src_i2wDict)
    y_train = LoadIndexDataset('./index_dataset/index_train_target_8000.txt',
                               src_i2wDict)
    x_train = x_train[:100]
    y_train = y_train[:100]

    hidden_dim = 256
    BATCH_SIZE = 1
    EPOCH_NUM = 10
    embed_dim = 50
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    encoder = Encoder(src_vocab_size, embed_dim, hidden_dim)
    decoder = Decoder(tag_vocab_size, embed_dim, hidden_dim)
    network = Net(encoder, decoder, device, teacher_forcing_ratio=0.5)

    loss_fn = nn.CrossEntropyLoss()  #使用交叉熵损失函数
    optimizer = torch.optim.Adam(network.parameters())  #使用Adam优化器

    for epoch in range(EPOCH_NUM):
        print('*********************************')
        print('epoch: ', epoch + 1, 'of', EPOCH_NUM)
        i = 0
        while i * BATCH_SIZE < len(x_train):
            if (i + 1) * BATCH_SIZE < len(x_train):
                inputs = x_train[i * BATCH_SIZE:(i + 1) * BATCH_SIZE]
                target = y_train[i * BATCH_SIZE:(i + 1) * BATCH_SIZE]
            else:
Exemple #10
0
ndf = int(opt.ndf)
imageSize = int(opt.imageSize)
lr = opt.lr
gamma = opt.gamma


def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        m.weight.data.normal_(0.0, 0.02)
    elif classname.find('BatchNorm') != -1:
        m.weight.data.normal_(1.0, 0.02)
        m.bias.data.fill_(0)


NetE = Encoder(imageSize, nc, ngf, nz).to(device)
Sampler = Sampler().to(device)
NetG = Decoder(nc, ngf, nz).to(device)

NetE.apply(weights_init)
NetG.apply(weights_init)

# load weights
if opt.netE != '':
    NetE.load_state_dict(torch.load(opt.netE))
if opt.netG != '':
    NetG.load_state_dict(torch.load(opt.netG))

optimizer_encorder = optim.RMSprop(params=NetE.parameters(
), lr=lr, alpha=0.9, eps=1e-8, weight_decay=0, momentum=0, centered=False)
optimizer_decoder = optim.RMSprop(params=NetG.parameters(
def train(args):
    #数据预处理,生成vocab和data
    preprocess(args['cap_path'], args['vocab_path'], args['data_path'])

    if not os.path.exists(args['model_path']):
        os.mkdir(args['model_path'])

    #对图片进行处理,进行数据增强
    transform = transforms.Compose([
        transforms.Resize((args['resize'], args['resize'])),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])

    with open(args['vocab_path'], 'rb') as f:
        vocab = pickle.load(f)

    with open(args['data_path'], 'rb') as f:
        Data = pickle.load(f)

    data_loader = get_loader(args['train_img_path'],
                             Data,
                             vocab,
                             transform,
                             args['batch_size'],
                             shuffle=True,
                             num_workers=args['num_workers'])

    encoder = Encoder(args['embed_size'], args['pooling_kernel']).cuda()
    decoder = Decoder(args['embed_size'], args['hidden_size'], len(vocab),
                      args['num_layers']).cuda()
    criterion = nn.CrossEntropyLoss().cuda()
    params = list(decoder.parameters()) + list(
        encoder.linear.parameters()) + list(encoder.bn.parameters())
    optimizer = torch.optim.Adam(params, lr=args['learning_rate'])

    total_step = len(data_loader)
    for epoch in range(args['num_epochs']):
        for i, (images, captions, lengths) in enumerate(data_loader):
            images = images.cuda()
            captions = captions.cuda()
            targets = pack_padded_sequence(captions, lengths,
                                           batch_first=True)[0]

            features = encoder(images)
            outputs = decoder(features, captions, lengths)
            loss = criterion(outputs, targets)
            decoder.zero_grad()
            encoder.zero_grad()
            loss.backward()
            optimizer.step()

            #打印训练信息
            if i % args['log_step'] == 0:
                print(
                    'Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, Perplexity: {:5.4f}'
                    .format(epoch, args['num_epochs'], i, total_step,
                            loss.item(), np.exp(loss.item())))

            #保存模型
            if (i + 1) % args['save_step'] == 0:
                torch.save(
                    decoder.state_dict(),
                    os.path.join(args['model_path'],
                                 'decoder-{}-{}.ckpt'.format(epoch + 1,
                                                             i + 1)))
                torch.save(
                    encoder.state_dict(),
                    os.path.join(args['model_path'],
                                 'encoder-{}-{}.ckpt'.format(epoch + 1,
                                                             i + 1)))

        #每个epoch结束也保存一次模型
        torch.save(
            decoder.state_dict(),
            os.path.join(args['model_path'],
                         'decoder-{}-{}.ckpt'.format(epoch + 1, i + 1)))
        torch.save(
            encoder.state_dict(),
            os.path.join(args['model_path'],
                         'encoder-{}-{}.ckpt'.format(epoch + 1, i + 1)))
Exemple #12
0
def run():

    # create directories
    save_dir = Path(FLAGS.save_dir)
    if save_dir.exists():
        logging.warning('The directory can be overwritten: {}'.format(
            FLAGS.save_dir))
    save_dir.mkdir(exist_ok=True, parents=True)
    log_dir = Path(FLAGS.tensorboard)
    if log_dir.exists():
        logging.warning('The directory will be removed: {}'.format(
            FLAGS.tensorboard))
        rm_path(log_dir)
    log_dir.mkdir(exist_ok=True, parents=True)

    # to handle errors while loading images
    Image.MAX_IMAGE_PIXELS = None
    ImageFile.LOAD_TRUNCATED_IMAGES = True

    # image generator
    dataset = ContentStyleLoader(content_root=FLAGS.content_dir,
                                 content_image_shape=(FLAGS.image_size,
                                                      FLAGS.image_size),
                                 content_crop='random',
                                 content_crop_size=FLAGS.crop_size,
                                 style_root=FLAGS.style_dir,
                                 style_image_shape=(FLAGS.image_size,
                                                    FLAGS.image_size),
                                 style_crop='random',
                                 style_crop_size=FLAGS.crop_size,
                                 n_per_epoch=FLAGS.dataset_size,
                                 batch_size=FLAGS.batch_size)

    # create model
    encoder = Encoder(input_shape=(FLAGS.crop_size, FLAGS.crop_size, 3),
                      pretrained=True,
                      name='encoder')
    # freeze the model
    for l in encoder.layers:
        l.trainable = False
    adain = AdaIN(alpha=1.0, name='adain')
    decoder = Decoder(input_shape=encoder.output_shape[-1][1:], name='decoder')

    # place holders for inputs
    content_input = Input(shape=(FLAGS.crop_size, FLAGS.crop_size, 3),
                          name='content_input')
    style_input = Input(shape=(FLAGS.crop_size, FLAGS.crop_size, 3),
                        name='style_input')

    # forwarding
    content_features = encoder(content_input)
    style_features = encoder(style_input)
    normalized_feature = adain([content_features[-1], style_features[-1]])
    generated = decoder(normalized_feature)

    # loss calculation
    generated_features = encoder(generated)
    content_loss = Lambda(calculate_content_loss, name='content_loss')(
        [normalized_feature, generated_features[-1]])
    style_loss = Lambda(calculate_style_loss, name='style_loss')(
        [style_features, generated_features])
    loss = Lambda(
        lambda x: FLAGS.content_weight * x[0] + FLAGS.style_weight * x[1],
        name='loss')([content_loss, style_loss])

    # trainer
    trainer = Model(inputs=[content_input, style_input], outputs=[loss])
    optim = optimizers.Adam(learning_rate=FLAGS.learning_rate)
    trainer.compile(optimizer=optim, loss=lambda _, y_pred: y_pred)
    trainer.summary()

    # callbacks
    callbacks = [
        # learning rate scheduler
        LearningRateScheduler(lambda epoch, _: FLAGS.learning_rate / (
            1.0 + FLAGS.learning_rate_decay * FLAGS.dataset_size * epoch)),
        # Tensor Board
        TensorBoard(str(log_dir), write_graph=False, update_freq='batch'),
        # save model
        SubmodelCheckpoint(
            str(save_dir / 'decoder.epoch-{epoch:d}.h5'),
            submodel_name='decoder',
            save_weights_only=True,
            save_best_only=FLAGS.save_best_only,
            save_freq=FLAGS.save_every if FLAGS.save_every else 'epoch')
    ]

    # train
    trainer.fit_generator(dataset,
                          epochs=FLAGS.epochs,
                          workers=FLAGS.workers,
                          callbacks=callbacks)
Exemple #13
0
nz = int(opt.nz)
ngf = int(opt.ngf)
ndf = int(opt.ndf)
imageSize = int(opt.imageSize)


def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        m.weight.data.normal_(0.0, 0.02)
    elif classname.find('BatchNorm') != -1:
        m.weight.data.normal_(1.0, 0.02)
        m.bias.data.fill_(0)


NetE = Encoder(imageSize, nc, ngf, nz).to(device)
NetG = Decoder(nc, ngf, nz).to(device)

Sampler = Sampler().to(device)

NetE.apply(weights_init)
NetG.apply(weights_init)

# load weights
NetE.load_state_dict(torch.load(opt.netE, map_location=opt.cuda))
NetG.load_state_dict(torch.load(opt.netG, map_location=opt.cuda))

NetE.eval()
NetG.eval()

# 21 attributes
Exemple #14
0
def main(args):
    # Image preprocessing
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406),
                             (0.229, 0.224, 0.225))])

    # Load vocabulary
    with open(args.vocab_path, 'rb') as f:
        vocab = pickle.load(f)

    # Build models
    encoder = Encoder(args.embed_size).eval()
    decoder = Decoder(args.embed_size, args.hidden_size, len(vocab), args.num_layers)
    encoder = encoder.to(device)
    decoder = decoder.to(device)

    # Load the trained model parameters
    encoder.load_state_dict(torch.load(args.encoder_path))
    decoder.load_state_dict(torch.load(args.decoder_path))

    # load validation image set
    lis = os.listdir(args.image_dir)
    num = len(lis)
    captions = []
    for i in range(num):

        im_pth = os.path.join(args.image_dir, lis[i])

        image = load_image(im_pth, transform)
        image_tensor = image.to(device)

        # Generate an caption from the image
        feature = encoder(image_tensor)
        sampled_ids = decoder.sample(feature)
        sampled_ids = sampled_ids[0].cpu().numpy()  # (1, max_seq_length) -> (max_seq_length)

        # Convert word_ids to words
        sampled_caption = []
        for word_id in sampled_ids:
            word = vocab.idx2word[word_id]
            if word == '<start>':
                continue
            if word == '<end>':
                break

            sampled_caption.append(word)

        sentence = ' '.join(sampled_caption)
        cap= {}
        id = int(lis[i][14:-4]) #extract image id
        cap['image_id'] = id
        cap['caption'] =  sentence
        captions.append(cap)
    # save results
    with open('captions_res.json', 'w') as f:
        json.dump(captions, f)

    # evaluation with coco-caption evaluation tools
    coco = COCO(args.caption_path)
    cocoRes = coco.loadRes('captions_res.json')
    cocoEval = COCOEvalCap(coco, cocoRes)
    cocoEval.params['image_id'] = cocoRes.getImgIds()
    cocoEval.evaluate()
Exemple #15
0
def train(savedir, _list, root, epochs, batch_size, nz):
    # 画像のチャンネル数
    channel = 1

    # ジェネレータのAdam設定(default: lr=0.001, betas=(0.9, 0.999), weight_decay=0)
    G_opt_para = {'lr': 0.0002, 'betas': (0.5, 0.9), 'weight_decay': 0}
    # ディスクリミネータのAdam設定
    D_opt_para = {'lr': 0.0002, 'betas': (0.5, 0.9), 'weight_decay': 0}
    # エンコーダのAdam設定
    E_opt_para = {'lr': 0.0002, 'betas': (0.5, 0.9), 'weight_decay': 0}
    # コードディスクリミネータのAdam設定
    CD_opt_para = {'lr': 0.0002, 'betas': (0.5, 0.9), 'weight_decay': 0}

    device = 'cuda'

    # 保存先のファイルを作成
    if os.path.exists(savedir):
        num = 1
        while 1:
            if os.path.exists('{}({})'.format(savedir, num)):
                num += 1
            else:
                savedir = '{}({})'.format(savedir, num)
                break
    os.makedirs(savedir, exist_ok=True)
    os.makedirs('{}/generating_image'.format(savedir), exist_ok=True)
    os.makedirs('{}/generating_image_rnd'.format(savedir), exist_ok=True)
    os.makedirs('{}/model'.format(savedir), exist_ok=True)
    os.makedirs('{}/loss'.format(savedir), exist_ok=True)

    myloss = MyLoss()

    df = pd.read_csv(_list, usecols=['Path'])
    img_id = df.values.tolist()

    check_img = Image.open('{}/{}'.format(root, img_id[0][0]))
    check_img = check_img.convert('L')
    width, height = check_img.size

    G_model, D_model, E_model, CD_model = Generator(
        nz, width, height,
        channel), Discriminator(width, height, channel), Encoder(
            nz, width, height, channel), CodeDiscriminator(nz)
    G_model, D_model, E_model, CD_model = nn.DataParallel(
        G_model), nn.DataParallel(D_model), nn.DataParallel(
            E_model), nn.DataParallel(CD_model)
    G_model, D_model, E_model, CD_model = G_model.to(device), D_model.to(
        device), E_model.to(device), CD_model.to(device)

    # 最適化アルゴリズムの設定
    G_para = torch.optim.Adam(G_model.parameters(),
                              lr=G_opt_para['lr'],
                              betas=G_opt_para['betas'],
                              weight_decay=G_opt_para['weight_decay'])
    D_para = torch.optim.Adam(D_model.parameters(),
                              lr=D_opt_para['lr'],
                              betas=D_opt_para['betas'],
                              weight_decay=D_opt_para['weight_decay'])
    E_para = torch.optim.Adam(E_model.parameters(),
                              lr=E_opt_para['lr'],
                              betas=E_opt_para['betas'],
                              weight_decay=E_opt_para['weight_decay'])
    CD_para = torch.optim.Adam(CD_model.parameters(),
                               lr=CD_opt_para['lr'],
                               betas=CD_opt_para['betas'],
                               weight_decay=CD_opt_para['weight_decay'])

    # ロスの推移を保存するためのリストを確保
    result = {}
    result['G_log_loss'] = []
    result['D_log_loss'] = []
    result['E_log_loss'] = []
    result['CD_log_loss'] = []

    dataset = LoadDataset(df, root, transform=Trans())
    train_loader = torch.utils.data.DataLoader(dataset,
                                               batch_size=batch_size,
                                               shuffle=True,
                                               drop_last=True)

    output_env('{}/env.txt'.format(savedir), batch_size, nz, G_opt_para,
               D_opt_para, E_opt_para, CD_opt_para, G_model, D_model, E_model,
               CD_model)

    # テスト用の一定乱数
    z0 = torch.randn(batch_size, nz, 1, 1)

    for epoch in range(epochs):
        print('########## epoch : {}/{} ##########'.format(epoch + 1, epochs))

        G_log_loss, D_log_loss, E_log_loss, CD_log_loss = [], [], [], []

        for real_img in tqdm(train_loader):
            # 入力の乱数を作成
            rnd_z = torch.randn(batch_size, nz, 1, 1)

            # GPU用の変数に変換
            real_img = real_img.to(device)
            rnd_z = rnd_z.to(device)

            # 真正画像をエンコーダに入力し,特徴ベクトルを取得
            real_z = E_model(real_img)

            # 特徴ベクトルをジェネレータに入力し,画像を生成
            fake_img = G_model(real_z)
            rnd_img = G_model(rnd_z)

            # 特徴ベクトルをコードディスクリミネータに入力し,判定結果を取得
            real_cy = CD_model(real_z)
            rnd_cy = CD_model(rnd_z)

            # 真正画像および生成画像をディスクリミネータに入力し,判定結果を取得
            real_y = D_model(real_img)
            fake_y = D_model(fake_img)
            rnd_y = D_model(rnd_img)

            # エンコーダのロス計算
            E_loss = myloss.E_loss(
                real_img, fake_img, real_cy,
                torch.tensor(1.0).expand_as(real_cy).to(device), 1.0)
            E_log_loss.append(E_loss.item())
            # ジェネレータのロス計算
            G_loss = myloss.G_loss(
                real_img, fake_img, fake_y, rnd_y,
                torch.tensor(1.0).expand_as(fake_y).to(device), 1.0)
            G_log_loss.append(G_loss.item())
            # コードディスクリミネータのロス計算
            CD_loss = myloss.CD_loss(
                real_cy,
                torch.tensor(0.0).expand_as(real_cy).to(device), rnd_cy,
                torch.tensor(1.0).expand_as(rnd_cy).to(device))
            CD_log_loss.append(CD_loss.item())
            # ディスクリミネータのロス計算
            D_loss = myloss.D_loss(
                real_y,
                torch.tensor(1.0).expand_as(real_y).to(device), fake_y, rnd_y,
                torch.tensor(0.0).expand_as(fake_y).to(device))
            D_log_loss.append(D_loss.item())

            # エンコーダの重み更新
            E_para.zero_grad()
            E_loss.backward(retain_graph=True)
            E_para.step()
            # ジェネレータの重み更新
            G_para.zero_grad()
            G_loss.backward(retain_graph=True)
            G_para.step()
            # コードディスクリミネータの重み更新
            CD_para.zero_grad()
            CD_loss.backward(retain_graph=True)
            CD_para.step()
            # ディスクリミネータの重み更新
            D_para.zero_grad()
            D_loss.backward()
            D_para.step()

        result['G_log_loss'].append(statistics.mean(G_log_loss))
        result['D_log_loss'].append(statistics.mean(D_log_loss))
        result['E_log_loss'].append(statistics.mean(E_log_loss))
        result['CD_log_loss'].append(statistics.mean(CD_log_loss))
        print('G_loss = {} , D_loss = {} , E_loss = {} , CD_loss = {}'.format(
            result['G_log_loss'][-1], result['D_log_loss'][-1],
            result['E_log_loss'][-1], result['CD_log_loss'][-1]))

        # ロスのログを保存
        with open('{}/loss/log.txt'.format(savedir), mode='a') as f:
            f.write('##### Epoch {:03} #####\n'.format(epoch + 1))
            f.write('G: {}, D: {}, E: {}, CD: {}\n'.format(
                result['G_log_loss'][-1], result['D_log_loss'][-1],
                result['E_log_loss'][-1], result['CD_log_loss'][-1]))

        # 定めた保存周期ごとにモデル,出力画像を保存する
        if (epoch + 1) % 10 == 0:
            # モデルの保存
            torch.save(G_model.module.state_dict(),
                       '{}/model/G_model_{}.pth'.format(savedir, epoch + 1))
            torch.save(D_model.module.state_dict(),
                       '{}/model/D_model_{}.pth'.format(savedir, epoch + 1))
            torch.save(E_model.module.state_dict(),
                       '{}/model/E_model_{}.pth'.format(savedir, epoch + 1))
            torch.save(CD_model.module.state_dict(),
                       '{}/model/CD_model_{}.pth'.format(savedir, epoch + 1))

            G_model.eval()

            # メモリ節約のためパラメータの保存は止める(テスト時にパラメータの保存は不要)
            with torch.no_grad():
                rnd_img_test = G_model(z0)

            # ジェネレータの出力画像を保存
            torchvision.utils.save_image(
                fake_img[:batch_size],
                "{}/generating_image/epoch_{:03}.png".format(
                    savedir, epoch + 1))
            torchvision.utils.save_image(
                rnd_img_test[:batch_size],
                "{}/generating_image_rnd/epoch_{:03}.png".format(
                    savedir, epoch + 1))

            G_model.train()

        # 定めた保存周期ごとにロスを保存する
        if (epoch + 1) % 50 == 0:
            x = np.linspace(1, epoch + 1, epoch + 1, dtype='int')
            plot(result['G_log_loss'], result['D_log_loss'],
                 result['E_log_loss'], result['CD_log_loss'], x, savedir)

    # 最後のエポックが保存周期でない場合に,保存する
    if (epoch + 1) % 10 != 0 and epoch + 1 == epochs:
        torch.save(G_model.module.state_dict(),
                   '{}/model/G_model_{}.pth'.format(savedir, epoch + 1))
        torch.save(D_model.module.state_dict(),
                   '{}/model/D_model_{}.pth'.format(savedir, epoch + 1))
        torch.save(E_model.module.state_dict(),
                   '{}/model/E_model_{}.pth'.format(savedir, epoch + 1))
        torch.save(CD_model.module.state_dict(),
                   '{}/model/CD_model_{}.pth'.format(savedir, epoch + 1))

        G_model.eval()

        with torch.no_grad():
            rnd_img_test = G_model(z0)

        torchvision.utils.save_image(
            fake_img[:batch_size],
            "{}/generating_image/epoch_{:03}.png".format(savedir, epoch + 1))
        torchvision.utils.save_image(
            rnd_img_test[:batch_size],
            "{}/generating_image_rnd/epoch_{:03}.png".format(
                savedir, epoch + 1))

        x = np.linspace(1, epoch + 1, epoch + 1, dtype='int')
        plot(result['G_log_loss'], result['D_log_loss'], result['E_log_loss'],
             result['CD_log_loss'], x, savedir)
Exemple #16
0
 def __init__(self, opt):
     super(Generator, self).__init__()
     self.encoder1 = Encoder(opt.ngpu, opt, opt.nz)
     self.decoder = Decoder(opt.ngpu, opt)