Example #1
0
    def __init__(self, logger, learning_rate, input_dim, z_dim, ae_h_dims, *args, **kwargs):
        self.scope = "AE"

        self.logger = logger
        self.learning_rate = learning_rate

        self.input_dim = input_dim
        self.z_dim = z_dim
        self.enc_layer_dims = [input_dim, *ae_h_dims, z_dim]
        self.dec_layer_dims = [z_dim, *list(reversed(ae_h_dims)), input_dim] #todo : just reverse enc_layer_dims

        self.logger.info("[*] Building AE model")

        with tf.variable_scope(self.scope):
            self.input = tf.placeholder(tf.float32, [None, self.input_dim])

            enc = Encoder(self.enc_layer_dims)
            dec = Decoder(self.dec_layer_dims)

            z_layer = enc.encode(self.input)
            # todo : how to handle output?
            _, _, self.output = dec.decode(z_layer)

            # todo: refactoring get theta method --> get solver?
            enc_theta = enc.get_theta()
            dec_theta = dec.get_theta()
            self.theta = [*enc_theta, *dec_theta]

            #l2_loss = enc.get_l2_loss() +
            self.recon_loss = tf.reduce_mean(tf.square(self.input-self.output))
            self.solver = tf.train.AdamOptimizer(self.learning_rate).minimize(self.recon_loss, var_list=theta)
Example #2
0
def run():
  
  # paths to data
  content_paths = []
  for c in FLAGS.contents:
    p = Path(c)
    if not p.exists():
      raise ValueError('The content image or directory is not exist: {}'.format(p))
    if p.is_dir():
      for f in p.glob('**/*.*'):
        content_paths.append(f)
    else:
      content_paths.append(p)
  style_path = Path(FLAGS.style)
  if not style_path.exists():
    raise ValueError('The style image is not exist: {}'.format(style_path))

  # output directory
  output_dir = Path(FLAGS.output) / style_path.stem
  if output_dir.exists():
    logging.warning('The folder will be deleted: {}'.format(output_dir))
    rm_path(output_dir)
  output_dir.mkdir(exist_ok=True, parents=True)

  # create model
  if not Path(FLAGS.decoder).exists():
    raise ValueError('The decoder model is not found: {}'.format(FLAGS.decoder))
  encoder = Encoder(input_shape=(None, None, 3), pretrained=True)
  content_feature_input = Input(shape=encoder.output_shape[-1][1:])
  style_feature_input = Input(shape=encoder.output_shape[-1][1:])
  adain = AdaIN(alpha=FLAGS.alpha)
  adain = Model(inputs=[content_feature_input, style_feature_input], outputs=[adain([content_feature_input, style_feature_input])])
  decoder = Decoder(input_shape=encoder.output_shape[-1][1:])
  decoder.load_weights(FLAGS.decoder)
  
  # load and encode style image
  style = np.expand_dims(load_image(style_path, image_shape=(FLAGS.style_size, FLAGS.style_size)), axis=0)
  style_feature = encoder.predict(style)[-1]

  for content_path in tqdm(content_paths):
    
    # load and encode content image
    content = load_image(content_path)
    content = np.expand_dims(content, axis=0)
    content_feature = encoder.predict(content)[-1]

    # normalize the feature
    normalized_feature = adain.predict([content_feature, style_feature])

    # generate image
    generated = decoder.predict(normalized_feature)
    
    # save image
    img_path = output_dir / '{}.{}'.format(content_path.stem, FLAGS.ext)
    img = array_to_img(generated[0])
    img.save(img_path)
Example #3
0
    def __init__(self, speaker_emb_reduction=3):
        super(VQVC, self).__init__()
        self.name = 'VQVC'

        self.speaker_emb_reduction = args.speaker_emb_reduction

        self.encoder = Encoder(mel_channels=args.n_mels, z_dim=args.z_dim)
        self.codebook = VQEmbeddingEMA(args.n_embeddings, args.z_dim)
        self.decoder = Decoder(in_channels=args.z_dim,
                               mel_channels=args.n_mels)
Example #4
0
 def __init__(self, config, name='model'):
     super(Model, self).__init__(name=name)
     self._normal_invvar = 1 / pow(config['normal_scale'], 2)
     self._normal_const = math.log(2 * math.pi / self._normal_invvar)
     self._seg_overlap = config['seg_overlap']
     with self._enter_variable_scope(check_same_graph=False):
         self._init = Initializer(config)
         self._upd = Updater(config)
         self._dec = Decoder(config)
         self._ln_grad_apc = snt.LayerNorm(axis=[-3, -2, -1],
                                           offset=False,
                                           scale=False,
                                           name='ln_grad_apc')
         self._ln_grad_mask = snt.LayerNorm(axis=[-3, -2, -1],
                                            offset=False,
                                            scale=False,
                                            name='ln_grad_mask')
         self._ln_pixel_ll = snt.LayerNorm(axis=[-3, -2, -1],
                                           offset=False,
                                           scale=False,
                                           name='ln_ll')
         self._ln_pixel_ll_excl = snt.LayerNorm(axis=[-3, -2, -1],
                                                offset=False,
                                                scale=False,
                                                name='ln_ll_exclude')
         self._ln_grad_post_param = snt.LayerNorm(axis=[-1],
                                                  offset=False,
                                                  scale=False,
                                                  name='ln_grad_post_param')
Example #5
0
 def __init__(self, config):
     super(ModelBase, self).__init__()
     # Hyperparameters
     self.noise_prob = config['noise_prob']
     self.seg_overlap = config['seg_overlap']
     # Neural networks
     self.upd = Updater(config)
     self.dec = Decoder(config)
Example #6
0
def infer(args):
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])

    #加载词汇表
    with open(args['vocab_path'], 'rb') as f:
        vocab = pickle.load(f)
    with open(args['data_path'], 'rb') as f:
        Data = pickle.load(f)

    #在测试阶段使用model.eval(),将BN和Dropout固定,使用训练好的值
    encoder = Encoder(args['embed_size'], args['pooling_kernel']).eval().cuda()
    decoder = Decoder(args['embed_size'], args['hidden_size'], len(vocab),
                      args['num_layers']).cuda()

    #加载训练时的参数
    encoder.load_state_dict(torch.load(args['encoder_path']))
    decoder.load_state_dict(torch.load(args['decoder_path']))

    #加载图片
    image = load_image(args['val_img_path'], transform,
                       (args['resize'], args['resize']))
    image_tensor = image.cuda()

    #送入模型并输出caption
    feature = encoder(image_tensor)
    index = decoder.sample(feature)
    index = index[0].cpu().numpy()

    #将index转化成word
    words = []
    for ind in index:
        word = vocab.idx2word[word_id]
        words.append(word)
        if word == '<end>':
            break

    sentence = ' '.join(words[1:-1])  #去掉开头和结尾的特殊字符<start>,<end>
    print(sentence)
    image = Image.open(args.image)
    plt.imshow(np.asarray(image))
def main(args):
    # Image preprocessing
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406),
                             (0.229, 0.224, 0.225))])

    # Load vocabulary
    with open(args.vocab_path, 'rb') as f:
        vocab = pickle.load(f)

    # Build models
    encoder = Encoder(args.embed_size).eval()
    decoder = Decoder(args.embed_size, args.hidden_size, len(vocab), args.num_layers)
    encoder = encoder.to(device)
    decoder = decoder.to(device)

    # Load the trained model parameters
    encoder.load_state_dict(torch.load(args.encoder_path))
    decoder.load_state_dict(torch.load(args.decoder_path))

    # Prepare an image
    image = load_image(args.image, transform)
    image_tensor = image.to(device)

    # Generate an caption from the image
    feature = encoder(image_tensor)
    sampled_ids = decoder.sample(feature)
    sampled_ids = sampled_ids[0].cpu().numpy()  # (1, max_seq_length) -> (max_seq_length)

    # Convert word_ids to words
    sampled_caption = []
    for word_id in sampled_ids:
        word = vocab.idx2word[word_id]
        sampled_caption.append(word)
        if word == '<end>':
            break
    sentence = ' '.join(sampled_caption)

    # Print out the image and the generated caption
    print(sentence)
    image = Image.open(args.image)
    plt.imshow(np.asarray(image))
Example #8
0
class VQVC(nn.Module):
    """
		VQVC

		Args:
			mels:	(N, T, C)			

		Returns:
			encode:
				z_enc:		(N, T, z_dim)
				z_quan:		(N, T, z_dim)
				c:	 	(N, T, c_dim)
				indices: 	(N,
			forward:
				z_enc:		(N, T, z_dim)
				z_quan:		(N, T, z_dim)
				c: 		(N, T, c_dim)
				loss:		(1, )
				perplexity	(1, )
	"""
    def __init__(self, speaker_emb_reduction=3):
        super(VQVC, self).__init__()
        self.name = 'VQVC'

        self.speaker_emb_reduction = args.speaker_emb_reduction

        self.encoder = Encoder(mel_channels=args.n_mels, z_dim=args.z_dim)
        self.codebook = VQEmbeddingEMA(args.n_embeddings, args.z_dim)
        self.decoder = Decoder(in_channels=args.z_dim,
                               mel_channels=args.n_mels)

    def average_through_time(self, x, dim):
        x = torch.mean(x, dim=dim, keepdim=True)
        return x

    def forward(self, mels):

        # encoder
        z_enc = self.encoder(mels)

        # quantization
        z_quan, commitment_loss, perplexity = self.codebook(z_enc)

        # speaker emb
        speaker_emb_ = z_enc - z_quan
        speaker_emb = self.average_through_time(speaker_emb_, dim=1)

        # decoder
        mels_hat = self.decoder(z_quan, speaker_emb)

        return mels_hat, commitment_loss, perplexity

    def evaluate(self, mels):
        # encoder
        z_enc = self.encoder(mels)

        # contents emb
        z_quan, commitment_loss, perplexity = self.codebook(z_enc)

        # speaker emb
        speaker_emb_ = z_enc - z_quan
        speaker_emb = self.average_through_time(speaker_emb_, dim=1)

        # decoder
        mels_hat, mels_code, mels_style = self.decoder.evaluate(
            z_quan, speaker_emb, speaker_emb_)

        return mels_hat, mels_code, mels_style, commitment_loss, perplexity

    def convert(self, src_mel, ref_mel):
        # source z_enc
        z_src_enc = self.encoder(src_mel)

        # source contents
        src_contents, _, _ = self.codebook(z_src_enc)

        # source style emb
        src_style_emb_ = z_src_enc - src_contents

        # ref z_enc
        ref_enc = self.encoder(ref_mel)

        # ref contents
        ref_contents, _, _ = self.codebook(ref_enc)

        # ref speaker emb
        ref_speaker_emb_ = ref_enc - ref_contents
        ref_speaker_emb = self.average_through_time(ref_speaker_emb_, dim=1)

        # decoder to generate mel
        mel_converted, mel_src_code, mel_src_style, mel_ref_code, mel_ref_style = self.decoder.convert(
            src_contents, src_style_emb_, ref_contents, ref_speaker_emb,
            ref_speaker_emb_)

        return mel_converted, mel_src_code, mel_src_style, mel_ref_code, mel_ref_style
Example #9
0
ndf = int(opt.ndf)
imageSize = int(opt.imageSize)
lr = opt.lr
gamma = opt.gamma


def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        m.weight.data.normal_(0.0, 0.02)
    elif classname.find('BatchNorm') != -1:
        m.weight.data.normal_(1.0, 0.02)
        m.bias.data.fill_(0)


NetG = Decoder(nc, ngf, nz).to(device)
NetD = Discriminator(imageSize, nc, ndf, nz).to(device)
NetE = Encoder(imageSize, nc, ngf, nz).to(device)
Sampler = Sampler().to(device)

NetE.apply(weights_init)
NetG.apply(weights_init)
NetD.apply(weights_init)

# load weights
if opt.netE != '':
    NetE.load_state_dict(torch.load(opt.netE))
if opt.netG != '':
    NetG.load_state_dict(torch.load(opt.netG))
if opt.netD != '':
    NetD.load_state_dict(torch.load(opt.netD))
        "network": torch.device(0),
        "images": torch.device(1),
        "test": torch.device(2)
    }

    csv_path = "../VOC2012/"

    train_path = csv_path + "train_v1.csv"
    test_path = csv_path + "test_v1.csv"

    os.makedirs(arg.save_dir, exist_ok=True)
    tensorboard = utils.TensorboardLogger("%s/tb" % (arg.save_dir))

    E = nn.DataParallel(Encoder(),
                        output_device=device["images"]).to(device["network"])
    D = nn.DataParallel(Decoder(),
                        output_device=device["images"]).to(device["network"])

    loss = TotalLoss(device, (arg.batch_train, 3, *arg.resl))

    optim = opt.Adam(list(E.parameters()) + list(D.parameters()),
                     lr=arg.lr,
                     betas=arg.betas)
    scheduler = opt.lr_scheduler.LambdaLR(optim,
                                          lr_lambda=lambda epoch: 0.965**epoch)

    train_loader = Loader(train_path,
                          arg.batch_train,
                          num_workers=arg.cpus,
                          shuffle=True,
                          drop_last=True)
Example #11
0
def run():

    # create directories
    save_dir = Path(FLAGS.save_dir)
    if save_dir.exists():
        logging.warning('The directory can be overwritten: {}'.format(
            FLAGS.save_dir))
    save_dir.mkdir(exist_ok=True, parents=True)
    log_dir = Path(FLAGS.tensorboard)
    if log_dir.exists():
        logging.warning('The directory will be removed: {}'.format(
            FLAGS.tensorboard))
        rm_path(log_dir)
    log_dir.mkdir(exist_ok=True, parents=True)

    # to handle errors while loading images
    Image.MAX_IMAGE_PIXELS = None
    ImageFile.LOAD_TRUNCATED_IMAGES = True

    # image generator
    dataset = ContentStyleLoader(content_root=FLAGS.content_dir,
                                 content_image_shape=(FLAGS.image_size,
                                                      FLAGS.image_size),
                                 content_crop='random',
                                 content_crop_size=FLAGS.crop_size,
                                 style_root=FLAGS.style_dir,
                                 style_image_shape=(FLAGS.image_size,
                                                    FLAGS.image_size),
                                 style_crop='random',
                                 style_crop_size=FLAGS.crop_size,
                                 n_per_epoch=FLAGS.dataset_size,
                                 batch_size=FLAGS.batch_size)

    # create model
    encoder = Encoder(input_shape=(FLAGS.crop_size, FLAGS.crop_size, 3),
                      pretrained=True,
                      name='encoder')
    # freeze the model
    for l in encoder.layers:
        l.trainable = False
    adain = AdaIN(alpha=1.0, name='adain')
    decoder = Decoder(input_shape=encoder.output_shape[-1][1:], name='decoder')

    # place holders for inputs
    content_input = Input(shape=(FLAGS.crop_size, FLAGS.crop_size, 3),
                          name='content_input')
    style_input = Input(shape=(FLAGS.crop_size, FLAGS.crop_size, 3),
                        name='style_input')

    # forwarding
    content_features = encoder(content_input)
    style_features = encoder(style_input)
    normalized_feature = adain([content_features[-1], style_features[-1]])
    generated = decoder(normalized_feature)

    # loss calculation
    generated_features = encoder(generated)
    content_loss = Lambda(calculate_content_loss, name='content_loss')(
        [normalized_feature, generated_features[-1]])
    style_loss = Lambda(calculate_style_loss, name='style_loss')(
        [style_features, generated_features])
    loss = Lambda(
        lambda x: FLAGS.content_weight * x[0] + FLAGS.style_weight * x[1],
        name='loss')([content_loss, style_loss])

    # trainer
    trainer = Model(inputs=[content_input, style_input], outputs=[loss])
    optim = optimizers.Adam(learning_rate=FLAGS.learning_rate)
    trainer.compile(optimizer=optim, loss=lambda _, y_pred: y_pred)
    trainer.summary()

    # callbacks
    callbacks = [
        # learning rate scheduler
        LearningRateScheduler(lambda epoch, _: FLAGS.learning_rate / (
            1.0 + FLAGS.learning_rate_decay * FLAGS.dataset_size * epoch)),
        # Tensor Board
        TensorBoard(str(log_dir), write_graph=False, update_freq='batch'),
        # save model
        SubmodelCheckpoint(
            str(save_dir / 'decoder.epoch-{epoch:d}.h5'),
            submodel_name='decoder',
            save_weights_only=True,
            save_best_only=FLAGS.save_best_only,
            save_freq=FLAGS.save_every if FLAGS.save_every else 'epoch')
    ]

    # train
    trainer.fit_generator(dataset,
                          epochs=FLAGS.epochs,
                          workers=FLAGS.workers,
                          callbacks=callbacks)
Example #12
0
def main(args):
    # Image preprocessing
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406),
                             (0.229, 0.224, 0.225))])

    # Load vocabulary
    with open(args.vocab_path, 'rb') as f:
        vocab = pickle.load(f)

    # Build models
    encoder = Encoder(args.embed_size).eval()
    decoder = Decoder(args.embed_size, args.hidden_size, len(vocab), args.num_layers)
    encoder = encoder.to(device)
    decoder = decoder.to(device)

    # Load the trained model parameters
    encoder.load_state_dict(torch.load(args.encoder_path))
    decoder.load_state_dict(torch.load(args.decoder_path))

    # load validation image set
    lis = os.listdir(args.image_dir)
    num = len(lis)
    captions = []
    for i in range(num):

        im_pth = os.path.join(args.image_dir, lis[i])

        image = load_image(im_pth, transform)
        image_tensor = image.to(device)

        # Generate an caption from the image
        feature = encoder(image_tensor)
        sampled_ids = decoder.sample(feature)
        sampled_ids = sampled_ids[0].cpu().numpy()  # (1, max_seq_length) -> (max_seq_length)

        # Convert word_ids to words
        sampled_caption = []
        for word_id in sampled_ids:
            word = vocab.idx2word[word_id]
            if word == '<start>':
                continue
            if word == '<end>':
                break

            sampled_caption.append(word)

        sentence = ' '.join(sampled_caption)
        cap= {}
        id = int(lis[i][14:-4]) #extract image id
        cap['image_id'] = id
        cap['caption'] =  sentence
        captions.append(cap)
    # save results
    with open('captions_res.json', 'w') as f:
        json.dump(captions, f)

    # evaluation with coco-caption evaluation tools
    coco = COCO(args.caption_path)
    cocoRes = coco.loadRes('captions_res.json')
    cocoEval = COCOEvalCap(coco, cocoRes)
    cocoEval.params['image_id'] = cocoRes.getImgIds()
    cocoEval.evaluate()
Example #13
0
def main(args):
    # Create model directory for saving trained models
    if not os.path.exists(args.model_path):
        os.makedirs(args.model_path)

    # Image preprocessing, augmentation, normalization for using the pretrained resnet
    transform = transforms.Compose([
        transforms.RandomCrop(args.im_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])

    # Load vocabulary
    with open(args.vocab_path, 'rb') as f:
        vocab = pickle.load(f)

    # Build data loader
    data_loader = get_loader(args.image_dir,
                             args.caption_path,
                             vocab,
                             transform,
                             args.batch_size,
                             shuffle=True,
                             num_workers=args.num_workers)

    # Configure the network
    encoder = Encoder(args.embed_size).to(device)
    decoder = Decoder(args.embed_size, args.hidden_size, len(vocab),
                      args.num_layers).to(device)

    # Loss and optimizer
    criterion = nn.CrossEntropyLoss()
    params = list(decoder.parameters()) + list(
        encoder.linear.parameters()) + list(encoder.bn.parameters())
    optimizer = torch.optim.Adam(params, lr=args.learning_rate)

    # Train the models
    total_step = len(data_loader)
    for epoch in range(args.num_epochs):
        for i, (images, captions, lengths) in enumerate(data_loader):

            # mini-batch
            images = images.to(device)
            captions = captions.to(device)
            targets = pack_padded_sequence(captions, lengths,
                                           batch_first=True)[0]

            # Forward, backward and optimize
            features = encoder(images)
            outputs = decoder(features, captions, lengths)
            loss = criterion(outputs, targets)
            decoder.zero_grad()
            encoder.zero_grad()
            loss.backward()
            optimizer.step()

            # Log info
            if i % args.log_step == 0:
                print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format(
                    epoch, args.num_epochs, i, total_step, loss.item()))

            # Save the model checkpoints
            if (i + 1) % args.save_step == 0:
                torch.save(decoder.state_dict(),
                           os.path.join(args.model_path, 'decoder.ckpt'))
                torch.save(encoder.state_dict(),
                           os.path.join(args.model_path, 'encoder.ckpt'))
def look_network(device: str):
    pos_encoding = PositionalEncoding(10000, 512)(torch.zeros(1, 64, 512))
    plt.pcolormesh(pos_encoding[0].numpy(), cmap="RdBu")
    plt.xlabel("Depth")
    plt.xlim((0, 512))
    plt.ylabel("Position")
    plt.colorbar()
    plt.show()

    y = torch.rand(1, 60, 512)
    out = ScaledDotProductAttention()(y, y, y)
    print("Dot Attention Shape", out[0].shape, out[1].shape)

    temp_mha = MultiHeadAttention(features=512, num_heads=8)
    out, attn = temp_mha(q=torch.rand(1, 45, 512), k=y, v=y, mask=None)
    print("Multi Attention Shape", out.shape, attn.shape)

    sample_ffn = FeedForwardNetwork(512, 2048)
    print("Feed Forward Shape", sample_ffn(torch.rand(64, 50, 512)).shape)

    sample_encoder_layer = EncoderLayer(512, 8, 2048)
    sample_encoder_layer_output = sample_encoder_layer(torch.rand(64, 43, 512), None)
    print(
        "Encoder Shape", sample_encoder_layer_output.shape
    )  # (batch_size, input_seq_len, d_model)

    sample_encoder_layer = EncoderLayer(512, 8, 2048)
    sample_encoder_layer_output = sample_encoder_layer(torch.rand(64, 50, 512), None)
    print(
        "Encoder Shape", sample_encoder_layer_output.shape
    )  # (batch_size, input_seq_len, d_model)

    sample_encoder = Encoder(
        num_layers=2,
        features=512,
        num_heads=8,
        fffeatures=2048,
        input_vocab_size=8500,
        maximum_position_encoding=10000,
    ).to(device)
    temp_input = torch.rand(64, 62).type(torch.LongTensor).to(device)
    sample_encoder_output = sample_encoder(temp_input, mask=None)
    print(
        "Encoder Shape", sample_encoder_output.shape
    )  # (batch_size, input_seq_len, d_model)

    sample_decoder = Decoder(
        num_layers=2,
        features=512,
        num_heads=8,
        fffeatures=2048,
        target_vocab_size=8500,
        maximum_position_encoding=10000,
    ).to(device)
    temp_input = torch.rand(64, 26).type(torch.LongTensor).to(device)
    output, attn = sample_decoder(
        temp_input,
        enc_output=sample_encoder_output,
        look_ahead_mask=None,
        padding_mask=None,
    )
    print("Decoder Shape", output.shape, attn["decoder_layer2_block2"].shape)
Example #15
0
    x_train = LoadIndexDataset('./index_dataset/index_train_source_8000.txt',
                               src_i2wDict)
    y_train = LoadIndexDataset('./index_dataset/index_train_target_8000.txt',
                               src_i2wDict)
    x_train = x_train[:100]
    y_train = y_train[:100]

    hidden_dim = 256
    BATCH_SIZE = 1
    EPOCH_NUM = 10
    embed_dim = 50
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    encoder = Encoder(src_vocab_size, embed_dim, hidden_dim)
    decoder = Decoder(tag_vocab_size, embed_dim, hidden_dim)
    network = Net(encoder, decoder, device, teacher_forcing_ratio=0.5)

    loss_fn = nn.CrossEntropyLoss()  #使用交叉熵损失函数
    optimizer = torch.optim.Adam(network.parameters())  #使用Adam优化器

    for epoch in range(EPOCH_NUM):
        print('*********************************')
        print('epoch: ', epoch + 1, 'of', EPOCH_NUM)
        i = 0
        while i * BATCH_SIZE < len(x_train):
            if (i + 1) * BATCH_SIZE < len(x_train):
                inputs = x_train[i * BATCH_SIZE:(i + 1) * BATCH_SIZE]
                target = y_train[i * BATCH_SIZE:(i + 1) * BATCH_SIZE]
            else:
                inputs = x_train[i * BATCH_SIZE:]
Example #16
0
ngf = int(opt.ngf)
ndf = int(opt.ndf)
imageSize = int(opt.imageSize)


def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        m.weight.data.normal_(0.0, 0.02)
    elif classname.find('BatchNorm') != -1:
        m.weight.data.normal_(1.0, 0.02)
        m.bias.data.fill_(0)


NetE = Encoder(imageSize, nc, ngf, nz).to(device)
NetG = Decoder(nc, ngf, nz).to(device)

Sampler = Sampler().to(device)

NetE.apply(weights_init)
NetG.apply(weights_init)

# load weights
NetE.load_state_dict(torch.load(opt.netE, map_location=opt.cuda))
NetG.load_state_dict(torch.load(opt.netG, map_location=opt.cuda))

NetE.eval()
NetG.eval()

# 21 attributes
attributes = [
Example #17
0
File: model.py Project: MS0147/1234
 def __init__(self, opt):
     super(Generator, self).__init__()
     self.encoder1 = Encoder(opt.ngpu, opt, opt.nz)
     self.decoder = Decoder(opt.ngpu, opt)
Example #18
0
lr = opt.lr
gamma = opt.gamma


def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        m.weight.data.normal_(0.0, 0.02)
    elif classname.find('BatchNorm') != -1:
        m.weight.data.normal_(1.0, 0.02)
        m.bias.data.fill_(0)


NetE = Encoder(imageSize, nc, ngf, nz).to(device)
Sampler = Sampler().to(device)
NetG = Decoder(nc, ngf, nz).to(device)

NetE.apply(weights_init)
NetG.apply(weights_init)

# load weights
if opt.netE != '':
    NetE.load_state_dict(torch.load(opt.netE))
if opt.netG != '':
    NetG.load_state_dict(torch.load(opt.netG))

optimizer_encorder = optim.RMSprop(params=NetE.parameters(
), lr=lr, alpha=0.9, eps=1e-8, weight_decay=0, momentum=0, centered=False)
optimizer_decoder = optim.RMSprop(params=NetG.parameters(
), lr=lr, alpha=0.9, eps=1e-8, weight_decay=0, momentum=0, centered=False)
Example #19
0
    latent_list = []
    for i in range(5):
        latent_list.append(
            UniformLatent(in_dim=1,
                          out_dim=1,
                          low=-1.0,
                          high=1.0,
                          apply_reg=True))
    latent_list.append(
        UniformLatent(in_dim=5, out_dim=5, low=-1.0, high=1.0,
                      apply_reg=False))
    latent = JointLatent(latent_list=latent_list)

    decoder = Decoder(output_width=width,
                      output_height=height,
                      output_depth=depth)
    infoGANDiscriminator = \
        InfoGANDiscriminator(output_length=latent.reg_out_dim)
    crDiscriminator = \
        CrDiscriminator(output_length=latent.num_reg_latent)

    checkpoint_dir = "./test/checkpoint"
    if not os.path.exists(checkpoint_dir):
        os.makedirs(checkpoint_dir)
    sample_dir = "./test/sample"
    if not os.path.exists(sample_dir):
        os.makedirs(sample_dir)
    time_path = "./test/time.txt"
    metric_path = "./test/metric.csv"
    epoch = 40
Example #20
0
def train(args):
    #数据预处理,生成vocab和data
    preprocess(args['cap_path'], args['vocab_path'], args['data_path'])

    if not os.path.exists(args['model_path']):
        os.mkdir(args['model_path'])

    #对图片进行处理,进行数据增强
    transform = transforms.Compose([
        transforms.Resize((args['resize'], args['resize'])),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])

    with open(args['vocab_path'], 'rb') as f:
        vocab = pickle.load(f)

    with open(args['data_path'], 'rb') as f:
        Data = pickle.load(f)

    data_loader = get_loader(args['train_img_path'],
                             Data,
                             vocab,
                             transform,
                             args['batch_size'],
                             shuffle=True,
                             num_workers=args['num_workers'])

    encoder = Encoder(args['embed_size'], args['pooling_kernel']).cuda()
    decoder = Decoder(args['embed_size'], args['hidden_size'], len(vocab),
                      args['num_layers']).cuda()
    criterion = nn.CrossEntropyLoss().cuda()
    params = list(decoder.parameters()) + list(
        encoder.linear.parameters()) + list(encoder.bn.parameters())
    optimizer = torch.optim.Adam(params, lr=args['learning_rate'])

    total_step = len(data_loader)
    for epoch in range(args['num_epochs']):
        for i, (images, captions, lengths) in enumerate(data_loader):
            images = images.cuda()
            captions = captions.cuda()
            targets = pack_padded_sequence(captions, lengths,
                                           batch_first=True)[0]

            features = encoder(images)
            outputs = decoder(features, captions, lengths)
            loss = criterion(outputs, targets)
            decoder.zero_grad()
            encoder.zero_grad()
            loss.backward()
            optimizer.step()

            #打印训练信息
            if i % args['log_step'] == 0:
                print(
                    'Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, Perplexity: {:5.4f}'
                    .format(epoch, args['num_epochs'], i, total_step,
                            loss.item(), np.exp(loss.item())))

            #保存模型
            if (i + 1) % args['save_step'] == 0:
                torch.save(
                    decoder.state_dict(),
                    os.path.join(args['model_path'],
                                 'decoder-{}-{}.ckpt'.format(epoch + 1,
                                                             i + 1)))
                torch.save(
                    encoder.state_dict(),
                    os.path.join(args['model_path'],
                                 'encoder-{}-{}.ckpt'.format(epoch + 1,
                                                             i + 1)))

        #每个epoch结束也保存一次模型
        torch.save(
            decoder.state_dict(),
            os.path.join(args['model_path'],
                         'decoder-{}-{}.ckpt'.format(epoch + 1, i + 1)))
        torch.save(
            encoder.state_dict(),
            os.path.join(args['model_path'],
                         'encoder-{}-{}.ckpt'.format(epoch + 1, i + 1)))