예제 #1
0
 def rerun_layers(self, output, update_hidden=True):
     for layer in range(self.decomposed_layer_number, self.model.nlayers):
         output, _ = getattr(self.model, self.model.rnn_module_name(layer))(
             output, self.hidden[layer])
         if update_hidden:
             self.hidden[layer] = _
     return model.decoder(output)
예제 #2
0
def train():
    # few things that we have define
    batch_size = 32
    train = True
    transform_train = transforms.Compose([
        transforms.Resize(256),  # smaller edge of image resized to 256
        transforms.RandomCrop(224),  # get 224x224 crop from random location
        transforms.ToTensor(),  # convert the PIL Image to a tensor
        transforms.Normalize(
            (0.485, 0.456, 0.406),  # normalize image for pre-trained model
            (0.229, 0.224, 0.225))
    ])
    iteration = 3
    vocabulary_threshold = 5
    embed_size = 512
    hidden_size = 512
    hidden_layer = 1
    model_save = "model_storage/"
    # calling the dataloader
    train_dataLoader = get_data_loader(vocabulary_threshold, train, batch_size,
                                       transform_train)
    enc = encoder(embed_size, batch_size)
    dec = decoder(len(train_dataLoader.dataset.vocab.word_to_index),
                  embed_size, hidden_layer, hidden_size)
    params = list(enc.dense.parameters()) + list(dec.parameters())
    criteria = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(params,
                                 lr=0.001,
                                 betas=(0.9, 0.999),
                                 eps=1e-08)
    steps_per_epoch = int(
        np.math.ceil(len(train_dataLoader.dataset.caption_len) / batch_size))
    for epoch in range(iteration):
        for step in range(steps_per_epoch):
            index = train_dataLoader.dataset.trainIndices(batch_size)
            sampler = torch.utils.data.SubsetRandomSampler(index)
            train_dataLoader.batch_sampler.sampler = sampler
            img, caption = next(iter(train_dataLoader))
            enc.zero_grad()
            dec.zero_grad()
            features = enc(img)
            prediction = dec(features, caption)
            loss = criteria(
                prediction.view(caption.size(0) * caption.size(1), -1),
                caption.view(-1))
            loss.backward()
            optimizer.step()
            stats = "[%d/%d] LOSS: %.4f, PERPLEXITY: %5.4f " % (
                step, iteration, loss.item(), np.exp(loss.item()))
            print("\r " + stats, end="")
            sys.stdout.flush()
            if step % 1000 == 0 and step != 0:
                # here we save the weights
                torch.save({"model_state": enc.state_dict()},
                           model_save + "encoder_" + str(step) + ".pth")
                torch.save({"model_state": dec.state_dict()},
                           model_save + "decoder_" + str(step) + ".pth")
                print("\r" + stats)
예제 #3
0
파일: pointer.py 프로젝트: luohongyin/PILM
def evaluate(data_source, batch_size=10, window=args.window):
    # Turn on evaluation mode which disables dropout.
    if args.model == 'QRNN': model.reset()
    model.eval()
    total_loss = 0
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(batch_size)
    next_word_history = None
    pointer_history = None
    for i in range(0, data_source.size(0) - 1, args.bptt):
        if i > 0: print(i, len(data_source), math.exp(total_loss / i))
        data, targets, _ = get_batch(data_source, i, evaluation=True, args=args)
        output, hidden, rnn_outs, _ = model(data, hidden, return_h=True)
        output = model.decoder(output)
        rnn_out = rnn_outs[-1].squeeze()
        print(output.size())
        output_flat = output.view(-1, ntokens)
        ###
        # Fill pointer history
        start_idx = len(next_word_history) if next_word_history is not None else 0
        next_word_history = torch.cat([one_hot(t.data[0], ntokens) for t in targets]) if next_word_history is None else torch.cat([next_word_history, torch.cat([one_hot(t.data[0], ntokens) for t in targets])])
        #print(next_word_history)
        pointer_history = Variable(rnn_out.data) if pointer_history is None else torch.cat([pointer_history, Variable(rnn_out.data)], dim=0)
        #print(pointer_history)
        ###
        # Built-in cross entropy
        # total_loss += len(data) * criterion(output_flat, targets).data[0]
        ###
        # Manual cross entropy
        # softmax_output_flat = torch.nn.functional.softmax(output_flat)
        # soft = torch.gather(softmax_output_flat, dim=1, index=targets.view(-1, 1))
        # entropy = -torch.log(soft)
        # total_loss += len(data) * entropy.mean().data[0]
        ###
        # Pointer manual cross entropy
        loss = 0
        softmax_output_flat = torch.nn.functional.softmax(output_flat)
        for idx, vocab_loss in enumerate(softmax_output_flat):
            p = vocab_loss
            if start_idx + idx > window:
                valid_next_word = next_word_history[start_idx + idx - window:start_idx + idx]
                valid_pointer_history = pointer_history[start_idx + idx - window:start_idx + idx]
                logits = torch.mv(valid_pointer_history, rnn_out[idx])
                theta = args.theta
                ptr_attn = torch.nn.functional.softmax(theta * logits).view(-1, 1)
                ptr_dist = (ptr_attn.expand_as(valid_next_word) * valid_next_word).sum(0).squeeze()
                lambdah = args.lambdasm
                p = lambdah * ptr_dist + (1 - lambdah) * vocab_loss
            ###
            target_loss = p[targets[idx].data]
            loss += (-torch.log(target_loss)).data[0]
        total_loss += loss / batch_size
        ###
        hidden = repackage_hidden(hidden)
        next_word_history = next_word_history[-window:]
        pointer_history = pointer_history[-window:]
    return total_loss / len(data_source)
예제 #4
0
def test():
    embed_size = 512
    hidden_size = 512
    weights = "model_storage/"
    weight_list = os.listdir(weights)
    selectedWeight = None
    index_to_word = readVocab()
    maxVal = 0
    transform_train = transforms.Compose([
        transforms.Resize(256),  # smaller edge of image resized to 256
        transforms.RandomCrop(224),  # get 224x224 crop from random location
        transforms.ToTensor(),  # convert the PIL Image to a tensor
        transforms.Normalize(
            (0.485, 0.456, 0.406),  # normalize image for pre-trained model
            (0.229, 0.224, 0.225))
    ])
    for weight in weight_list:
        if "encoder" in weight:
            val = int(weight.split(".")[0].split("_")[1])
            if val > maxVal:
                selectedWeight = weight
                maxVal = val

    encoder_weight = selectedWeight
    decoder_weight = selectedWeight.replace("encoder", "decoder")
    enc_weight = torch.load(weights + encoder_weight)
    dec_weight = torch.load(weights + decoder_weight)
    enc = encoder(embed_size, batch_size=1)
    enc.eval()
    enc.load_state_dict(enc_weight["model_state"])
    dec = decoder(len(index_to_word), embed_size, 1, hidden_size)
    dec.eval()
    dec.load_state_dict(dec_weight["model_state"])
    test_loader = get_data_loader(5, False, 1, transform_train)
    img_test, original_img = next(iter(test_loader))
    features = enc(img_test)
    output = dec.sample(features.unsqueeze(1), 20)
    sentence = ""
    for val in output:
        if val != 0 and val != 1 and val != 2:
            sentence += index_to_word[val] + "  "
    plt.imshow(np.uint8(original_img.squeeze(0).numpy()))
    plt.text(100,
             400,
             sentence,
             style='italic',
             bbox={
                 'facecolor': 'red',
                 'alpha': 0.5,
                 'pad': 10
             })
    plt.show()
예제 #5
0
def evaluate(data_source, batch_size=10):
    # Turn on evaluation mode which disables dropout.
    if args.model == 'QRNN': model.reset()
    model.eval()
    total_loss = 0
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(batch_size)
    for i in range(0, data_source.size(0) - 1, args.bptt):
        data, targets, _ = get_batch(data_source, i, args, evaluation=True)
        output, hidden = model(data, hidden)
        output = model.decoder(output)
        output_flat = output.view(-1, ntokens)
        total_loss += len(data) * criterion(output_flat, targets).data
        hidden = repackage_hidden(hidden)
    return total_loss[0] / len(data_source)
예제 #6
0
def evaluate(split, verbose=False, n_batches=None):
    # Recall model is a class that inherits nn.Module that we learned in the class.
    # This puts the model in eval mode as opposed to train mode, so it knows which one to use.
    model.encoder.eval()
    model.decoder.eval()
    # Initialize cumulative loss and the number of correctly predicted examples.
    loss = 0
    correct = 0
    n_examples = 0

    # Load the correct dataset between validation.
    if split == 'val':
        loader = val_loader

    # For each batch in the loaded dataset,
    with torch.no_grad():
        for batch_i, batch in enumerate(loader):

            data, caption, lengths = batch[0], batch[1], batch[2]

            targets = pack_padded_sequence(caption, lengths,
                                           batch_first=True)[0]

            # Load the current training example in the CUDA core if available.
            if args.cuda:
                data, caption = data.cuda(), caption.cuda()

            # Read images and their target labels in the current batch.
            data, caption = Variable(data), Variable(caption)

            # Measure the output results given the data.
            features = model.encoder(data)
            output = model.decoder(features, caption, lengths)

            # Accumulate the loss by comparing the predicted output and the true targets ( both are in pack padded sequence).
            loss += criterion(output, targets).data

            # Skip the rest of evaluation if the number of batches exceed the n_batches.
            if n_batches and (batch_i >= n_batches):
                break

    # Compute the average loss per example.
    loss /= (batch_i + 1)

    # If verbose is True, then print out the average loss and accuracy.
    if verbose:
        print('\n{} set: Average loss: {:.4f}'.format(split, loss))
    return loss
예제 #7
0
def evaluate(data_source, batch_size=10, test=False):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    if args.model == 'QRNN': model.reset()
    total_loss = 0
    total_oe_loss = 0
    num_batches = 0
    ntokens = len(corpus.dictionary)
    for i in range(0, data_source.size(0) - 1, args.bptt):
        data, targets = get_batch(data_source, i, args, evaluation=True)
        data_oe, _ = get_batch(oe_val_dataset, i, args, evaluation=True)

        if len(data.size()) == 1:  # happens for test set?
            data.unsqueeze(-1)
            data_oe.unsqueeze(-1)

        if data.size(0) != data_oe.size(0):
            continue

        bs = test_batch_size if test else eval_batch_size
        hidden = model.init_hidden(2 * bs)
        hidden = repackage_hidden(hidden)

        output, hidden, rnn_hs, dropped_rnn_hs = model(torch.cat(
            [data, data_oe], dim=1),
                                                       hidden,
                                                       return_h=True)
        output, output_oe = torch.chunk(dropped_rnn_hs[-1], dim=1, chunks=2)
        output, output_oe = output.contiguous(), output_oe.contiguous()
        output = output.view(output.size(0) * output.size(1), output.size(2))

        loss = criterion(model.decoder.weight, model.decoder.bias, output,
                         targets).data

        # OE loss
        logits_oe = model.decoder(output_oe)
        smaxes_oe = F.softmax(logits_oe -
                              torch.max(logits_oe, dim=-1, keepdim=True)[0],
                              dim=-1)
        loss_oe = -smaxes_oe.log().mean(-1)
        loss_oe = loss_oe.mean().data
        #

        total_loss += loss
        total_oe_loss += loss_oe
        num_batches += 1
    return total_loss[0] / num_batches, total_oe_loss[0] / num_batches
예제 #8
0
def evaluate(data_source, batch_size=10):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    if args.model == 'QRNN': model.reset()
    total_loss = 0
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(batch_size)
    for i in range(0, data_source.size(0) - 1, args.bptt):
        data, targets = get_batch(data_source, i, args, evaluation=True)
        output, hidden = model(data, hidden)
        logits = model.decoder(output)
        # logProba = nn.functional.log_softmax(logits, dim=1)
        # pred_idxs = torch.argmax(logProba, dim=1)
        total_loss += len(data) * criterion(
            model.decoder.weight, model.decoder.bias, output, targets).data
        hidden = repackage_hidden(hidden)
    return total_loss.item() / len(data_source)
    def run(self):
        batch_size = self.config['batch_size']
        learning_rate = self.config['learning_rate']

        # Create Model
        self.encoder = model.encoder().cuda()
        self.decoder = model.decoder().cuda()

        self.logger.debug('Encoder Architecture')
        summary(self.encoder, (3, 224, 224), batch_size=batch_size)

        self.logger.debug('Decoder Architecture')
        summary(self.decoder, (512, 14, 14), batch_size=batch_size)

        model_params = []
        model_params += self.encoder.parameters()
        model_params += self.decoder.parameters()

        self.optm = torch.optim.SGD(model_params,
                                    lr=learning_rate,
                                    momentum=self.config['momentum'],
                                    weight_decay=self.config['weight_decay'])

        # Restore Model
        if not self.args.restart:
            self.load_checkpoint()

        # Setup Global Train Index
        self.gidx = self.epoch * len(self.dataset_train)

        # Initial Validation
        self.valid = DataObject()
        self.run_valid()

        total_epochs = self.config['epochs']
        for _ in range(self.epoch, total_epochs):
            utils.adjust_learning_rate(learning_rate, self.optm, self.epoch)

            self.train = DataObject()
            self.run_train()

            self.valid = DataObject()
            self.run_valid()

            self.epoch += 1
예제 #10
0
def main(batch_size, train_df, trainLoader, embedding_dim, hidden_size, hidden_layer, index_to_word):
    vocab_size = len(index_to_word)+1
    enc = encoder(embedding_dim, batch_size)
    dec = decoder(vocab_size, embedding_dim, hidden_layer, hidden_size)
    iteration = 10
    #loss
    param = list(enc.dense.parameters()) + list(dec.parameters())
    criteria  = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(param, lr=0.001)
    total_steps = int(np.ceil(train_size/ batch_size))
    caption_len = captionLength(train_df)
    for epoch in range(iteration):
        total_loss = 0.0
        for step in range(total_steps):
            train_indices, _ = randomSelect(caption_len, batch_size)
            new_sampler = torch.utils.data.SubsetRandomSampler(train_indices)
            trainLoader.batch_sampler.sampler = new_sampler
            data = next(iter(trainLoader))
            original_img, caption = data
            enc.zero_grad()
            dec.zero_grad()
            features = enc(original_img)
            prediction   = dec(features.long(), caption)
            #loss
            loss = criteria(prediction.view(caption.size(0)*caption.size(1),-1), caption.view(-1))

            loss.backward()
            optimizer.step()
            stats = "[%d/%d] Loss: %.4f, Perplexity: %5.4f "%(step, iteration, loss.item(), np.exp(loss.item()))
            print("\r" +stats, end="")
            sys.stdout.flush()
            total_loss += loss.item()
            if step % 100 ==0 and step != 0:
                torch.save({
                    'epoch': epoch,
                    'model_state_dict': enc.state_dict(),
                    'loss': total_loss/100,
                }, "loss_folder/encoder_"+str(epoch)+".pth")
                torch.save({
                    'model_state_dict':dec.state_dict()
                },"loss_folder/decoder_"+str(epoch)+".pth")
                total_loss = 0.0
                print("\r" + stats)
예제 #11
0
    def __init__(self,
                 n_dim=2,
                 batch_size=100,
                 epochs=10,
                 log_freq=100,
                 results_path='./results',
                 make_gif=False):

        self.n_dim = n_dim
        self.batch_size = batch_size
        self.epochs = epochs
        self.log_freq = log_freq
        self.results_path = results_path
        self.results_img_path = results_path + "/imges"
        self.make_gif = make_gif

        if not os.path.exists(self.results_img_path):
            os.makedirs(self.results_img_path)
        if self.make_gif and not os.path.exists(self.results_path + "/gif"):
            os.makedirs(self.results_path + "/gif")

        # data load
        self.load_data()
        self.dataset_train = tf.data.Dataset.from_tensor_slices(
            (self.x_train, self.y_train))
        self.dtrain_shuffle = self.dataset_train.shuffle(
            self.x_train.shape[0]).batch(self.batch_size)
        self.dataset_test = tf.data.Dataset.from_tensor_slices(
            (self.x_test, self.y_test))
        self.dtest_shuffle = self.dataset_test.shuffle(
            self.x_test.shape[0]).batch(1000)

        # Models
        self.encoder = encoder(n_dim=self.n_dim)
        self.decoder = decoder()
        self.discriminator = discriminator()

        # optimizer
        self.ae_opt = tf.keras.optimizers.Adam(0.0001)
        self.gen_opt = tf.keras.optimizers.Adam(0.0001, beta_1=0, beta_2=0.9)
        self.disc_opt = tf.keras.optimizers.Adam(0.0001, beta_1=0, beta_2=0.9)
        self.loss_object = tf.keras.losses.BinaryCrossentropy(from_logits=True)
예제 #12
0
def play(text, batch_size=1):
    model.eval()
    text = text.lower()
    text = re.sub('\d+', 'N', text)
    punc = string.punctuation.replace(".", "’—“”")
    punc = punc.replace("'", "")
    text = text.translate(str.maketrans('', '', punc))
    text = text.replace("n't", " n't")
    text = text.replace("'s", " 's")
    text = text.replace("'ve", " 've")
    text = text.replace("'d", " 'd")
    text = text.replace("'ll", " 'll")
    data = new_tokenize(text).unsqueeze(1).cuda()
    hidden = model.init_hidden(batch_size)
    output, hidden = model(data, hidden)
    logits = model.decoder(output)
    logProba = nn.functional.log_softmax(logits, dim=1)
    pred_idxs = torch.argmax(logProba, dim=1)
    preds = [corpus.dictionary.idx2word[idx] for idx in pred_idxs]
    next_word = preds[-1]
    return next_word
예제 #13
0
def evaluate(data_source, corpus, batch_size=10, ood=False):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    if args.model == 'QRNN': model.reset()
    loss_accum = 0
    losses = []
    ntokens = len(corpus.dictionary)
    for i in range(0, data_source.size(0) - 1, args.bptt):
        if (i >= ood_num_examples // test_batch_size) and (ood is True):
            break

        hidden = model.init_hidden(batch_size)
        hidden = repackage_hidden(hidden)

        data, targets = get_batch(data_source, i, args, evaluation=True)
        output, hidden = model(data, hidden)

        logits = model.decoder(output)
        smaxes = F.softmax(logits - torch.max(logits, dim=1, keepdim=True)[0],
                           dim=1)
        tmp = smaxes[range(targets.size(0)), targets]
        log_prob = torch.log(tmp).mean(
            0)  # divided by seq len, so this is the negative nats per char
        loss = -log_prob.data.cpu().numpy()[0]

        loss_accum += loss
        # losses.append(loss)
        # Experimental!
        # anomaly_score = -torch.max(smaxes, dim=1)[0].mean()  # negative MSP
        anomaly_score = ((smaxes).add(1e-18).log() *
                         uniform_base_rates.unsqueeze(0)).sum(1).mean(
                             0)  # negative KL to uniform
        losses.append(anomaly_score.data.cpu().numpy()[0])
        #

    return loss_accum / (len(data_source) // args.bptt), losses
예제 #14
0
def test( batch_size, df, testLoader, index_to_word):

    enc = encoder(512, batch_size)
    enc.eval()
    dec = decoder(len(index_to_word)+1, 512, 1, 512)
    dec.eval()
    #load the model
    enc_weight = torch.load("loss_folder/encoder_2.pth")
    dec_weight = torch.load("loss_folder/decoder_2.pth")
    enc.load_state_dict(enc_weight["model_state_dict"])
    dec.load_state_dict(dec_weight["model_state_dict"])
    img, caption = next(iter(testLoader))
    print(img.shape)
    caption = caption[0]
    features = enc(img).unsqueeze(1)
    output   = dec.sample(features.float(), 27)
    sent  = ""

    for word in output:
        if index_to_word.get(word) !="START" and index_to_word.get(word)!="END" and word !=0:
            sent += index_to_word[word]+"  "
    print(sent)
    plt.imshow(img[0].permute(1,2,0).detach().numpy())
    plt.show()
예제 #15
0
def main():
    place = fluid.CUDAPlace(0) if InferTaskConfig.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)

    encoder_program = fluid.Program()
    with fluid.program_guard(main_program=encoder_program):
        enc_output = encoder(ModelHyperParams.src_vocab_size,
                             ModelHyperParams.max_length + 1,
                             ModelHyperParams.n_layer, ModelHyperParams.n_head,
                             ModelHyperParams.d_key, ModelHyperParams.d_value,
                             ModelHyperParams.d_model,
                             ModelHyperParams.d_inner_hid,
                             ModelHyperParams.dropout)

    decoder_program = fluid.Program()
    with fluid.program_guard(main_program=decoder_program):
        predict = decoder(ModelHyperParams.trg_vocab_size,
                          ModelHyperParams.max_length + 1,
                          ModelHyperParams.n_layer, ModelHyperParams.n_head,
                          ModelHyperParams.d_key, ModelHyperParams.d_value,
                          ModelHyperParams.d_model,
                          ModelHyperParams.d_inner_hid,
                          ModelHyperParams.dropout)

    # Load model parameters of encoder and decoder separately from the saved
    # transformer model.
    encoder_var_names = []
    for op in encoder_program.block(0).ops:
        encoder_var_names += op.input_arg_names
    encoder_param_names = filter(
        lambda var_name: isinstance(
            encoder_program.block(0).var(var_name), fluid.framework.Parameter),
        encoder_var_names)
    encoder_params = map(encoder_program.block(0).var, encoder_param_names)
    decoder_var_names = []
    for op in decoder_program.block(0).ops:
        decoder_var_names += op.input_arg_names
    decoder_param_names = filter(
        lambda var_name: isinstance(
            decoder_program.block(0).var(var_name), fluid.framework.Parameter),
        decoder_var_names)
    decoder_params = map(decoder_program.block(0).var, decoder_param_names)
    fluid.io.load_vars(exe, InferTaskConfig.model_path, vars=encoder_params)
    fluid.io.load_vars(exe, InferTaskConfig.model_path, vars=decoder_params)

    # This is used here to set dropout to the test mode.
    encoder_program = encoder_program.clone(for_test=True)
    decoder_program = decoder_program.clone(for_test=True)

    test_data = paddle.batch(paddle.dataset.wmt16.test(
        ModelHyperParams.src_vocab_size, ModelHyperParams.trg_vocab_size),
                             batch_size=InferTaskConfig.batch_size)

    trg_idx2word = paddle.dataset.wmt16.get_dict(
        "de", dict_size=ModelHyperParams.trg_vocab_size, reverse=True)

    def post_process_seq(seq,
                         bos_idx=ModelHyperParams.bos_idx,
                         eos_idx=ModelHyperParams.eos_idx,
                         output_bos=InferTaskConfig.output_bos,
                         output_eos=InferTaskConfig.output_eos):
        """
        Post-process the beam-search decoded sequence. Truncate from the first
        <eos> and remove the <bos> and <eos> tokens currently.
        """
        eos_pos = len(seq) - 1
        for i, idx in enumerate(seq):
            if idx == eos_idx:
                eos_pos = i
                break
        seq = seq[:eos_pos + 1]
        return filter(
            lambda idx: (output_bos or idx != bos_idx) and \
                (output_eos or idx != eos_idx),
            seq)

    for batch_id, data in enumerate(test_data()):
        batch_seqs, batch_scores = translate_batch(
            exe,
            [item[0] for item in data],
            encoder_program,
            encoder_data_input_fields + encoder_util_input_fields,
            [enc_output.name],
            decoder_program,
            decoder_data_input_fields[:-1] + decoder_util_input_fields +
            (decoder_data_input_fields[-1], ),
            [predict.name],
            InferTaskConfig.beam_size,
            InferTaskConfig.max_length,
            InferTaskConfig.n_best,
            len(data),
            ModelHyperParams.n_head,
            ModelHyperParams.d_model,
            ModelHyperParams.eos_idx,  # Use eos_idx to pad.
            ModelHyperParams.eos_idx,  # Use eos_idx to pad.
            ModelHyperParams.bos_idx,
            ModelHyperParams.eos_idx,
            ModelHyperParams.unk_idx,
            output_unk=InferTaskConfig.output_unk)
        for i in range(len(batch_seqs)):
            # Post-process the beam-search decoded sequences.
            seqs = map(post_process_seq, batch_seqs[i])
            scores = batch_scores[i]
            for seq in seqs:
                print(" ".join([trg_idx2word[idx] for idx in seq]))
예제 #16
0
def main():
    ## prepare data
    # load MNIST and MNIST-M
    (m_train, m_train_y), (m_test, m_test_y) = tf.keras.datasets.mnist.load_data()
    mm = pkl.load(open('data/mnistm_data.pkl', 'rb'))
    mm_train, mm_train_y = mm['train'], mm['train_label']

    # #  keep numbers 0-4 in MNIST as content, and numbers 5-9 in MNIST-M as style
    # content_image = m_train[m_train_y < 5, ...]
    # content_image_y = m_train_y[m_train_y < 5]
    # content_image = resize_image(content_image, size=(32, 32))
    # content_image = np.repeat(content_image[..., np.newaxis], 3, axis=-1)
    # test_content_image = m_train[m_train_y >= 5, ...]
    
    # style_image = mm_train[mm_train_y >= 5, ...]
    # style_image_y = mm_train[mm_train_y >= 5]
    # style_image, style_image_y = generate_few_shot_style(style_image, style_image_y, num_sample=5)
    # style_image = resize_image(style_image, size=(32, 32))

    # use all train data in MNIST as content image, and all train data in MNIST-M as style image
    content_image = resize_image(m_train, size=(32, 32))
    content_image = np.repeat(content_image[..., np.newaxis], 3, axis=-1)
    test_content_image = m_test

    style_image = resize_image(mm_train, size=(32, 32))

    
    ## prepare model
    # inputs placeholder
    c_img = tf.placeholder(tf.float32, shape=[None, 32, 32, 3])
    s_img = tf.placeholder(tf.float32, shape=[None, 32, 32, 3])
    
    # establish model
    c_encode, _ = encoder(c_img)
    s_encode, s_layers = encoder(s_img, reuse=True)

    c_adain_encode = adain(c_encode, s_encode)
    styled_img = decoder(c_adain_encode)
    styled_encode, styled_layers = encoder(styled_img, reuse=True)

    # loss
    content_loss = compute_content_loss(styled_encode, c_adain_encode)
    style_loss = compute_style_loss(styled_layers, s_layers)
    total_loss = content_loss + 0.01 * style_loss

    # optimizer 
    optimizer = tf.train.AdamOptimizer(1e-4)
    train_op = optimizer.minimize(total_loss)
    
    model_summary()

    ## training
    init = tf.global_variables_initializer()
    with tf.Session() as sess:
        # Creates a file writer for the log directory.
        logdir = "logs/"
        file_writer = tf.summary.FileWriter(logdir, sess.graph)

        # store variables
        tf.summary.image("Content image", c_img, max_outputs=10)
        tf.summary.image("Style image", s_img, max_outputs=10)
        tf.summary.image("Styled image", styled_img, max_outputs=10)
        tf.summary.scalar("Content loss", content_loss)
        tf.summary.scalar("Style loss", style_loss)
        tf.summary.scalar("Total loss", total_loss)
        merged = tf.summary.merge_all()

        sess.run(init)

        # total number of data
        num_data = content_image.shape[0]
        batch_size = 8
        num_batch = num_data // batch_size

        for i_episode in range(EPISODE):
            # shuffle data
            np.random.shuffle(content_image)
            np.random.shuffle(style_image)

            for i_batch in range(num_batch):
                # get a batch of content
                c_image = content_image[i_batch*batch_size: (i_batch+1)*batch_size, ...]
                c_image = c_image / 255

                # random sample a batch of style
                idx = np.random.choice(style_image.shape[0], batch_size, replace=False)
                s_image = style_image[idx, ...]
                s_image = s_image / 255

                # training                 
                _, train_loss = sess.run([train_op, total_loss], feed_dict={
                    c_img: c_image,
                    s_img: s_image
                })

                if i_batch % 100 == 0:
                    # evaluation on test content image
                    np.random.shuffle(test_content_image)
                    
                    test_c_image = test_content_image[:10, ...]
                    test_c_image = resize_image(test_c_image, size=(32, 32))
                    test_c_image = np.repeat(test_c_image[..., np.newaxis], 3, axis=-1)
                    test_c_image = test_c_image / 255

                    test_s_image = style_image[:10, ...] / 255

                    summary, test_loss = sess.run([merged, total_loss], feed_dict={
                        c_img: test_c_image,
                        s_img: test_s_image
                    })

                    # log all variables
                    #num_iter = i_episode * num_batch + i_batch
                    file_writer.add_summary(summary, global_step=i_episode * num_batch + i_batch)

                    print('Episode: %d, batch: %d, training cost: %g, test cost: %g' %
                          (i_episode, i_batch, train_loss, test_loss))

                    
        file_writer.close()
예제 #17
0
def train(base_rates):
    # Turn on training mode which enables dropout.
    if args.model == 'QRNN': model.reset()
    total_loss = 0
    total_oe_loss = 0
    start_time = time.time()
    ntokens = len(corpus.dictionary)
    batch, i = 0, 0

    # indices for randomizing order of segments
    train_indices = np.arange(train_data.size(0) // args.bptt)
    np.random.shuffle(train_indices)

    oe_indices = np.arange(oe_dataset.size(0) // args.bptt)
    np.random.shuffle(oe_indices)
    #

    seq_len = args.bptt

    br = None

    for i in range(
            0, train_data.size(0), args.bptt
    ):  # Assume OE dataset is larger. It is, because we're using wikitext-2.

        lr2 = optimizer.param_groups[0]['lr']
        optimizer.param_groups[0]['lr'] = lr2 * seq_len / args.bptt
        model.train()
        data, targets = get_batch(train_data, i, args, seq_len=seq_len)
        data_oe, _ = get_batch(oe_dataset, i, args, seq_len=seq_len)

        if data.size(0) != data_oe.size(
                0
        ):  # Don't train on this batch if the sequence lengths are different (happens at end of epoch).
            continue

        # We need a new hidden state for each segment, because this makes evaluation easier and more meaningful.
        hidden = model.init_hidden(2 * args.batch_size)
        hidden = repackage_hidden(hidden)

        output, hidden, rnn_hs, dropped_rnn_hs = model(torch.cat(
            [data, data_oe], dim=1),
                                                       hidden,
                                                       return_h=True)
        output, output_oe = torch.chunk(dropped_rnn_hs[-1], dim=1, chunks=2)
        output, output_oe = output.contiguous(), output_oe.contiguous()
        output = output.view(output.size(0) * output.size(1), output.size(2))

        raw_loss = criterion(model.decoder.weight, model.decoder.bias, output,
                             targets)

        loss = raw_loss
        # Activiation Regularization
        if args.alpha:
            loss = loss + sum(args.alpha * dropped_rnn_h.pow(2).mean()
                              for dropped_rnn_h in dropped_rnn_hs[-1:])
        # Temporal Activation Regularization (slowness)
        if args.beta:
            loss = loss + sum(args.beta *
                              (rnn_h[1:] - rnn_h[:-1]).pow(2).mean()
                              for rnn_h in rnn_hs[-1:])

        # OE loss
        logits_oe = model.decoder(output_oe)
        smaxes_oe = F.softmax(logits_oe -
                              torch.max(logits_oe, dim=-1, keepdim=True)[0],
                              dim=-1)
        br = Variable(
            torch.FloatTensor(base_rates).unsqueeze(0).unsqueeze(0).expand_as(
                smaxes_oe)).cuda() if br is None else br
        loss_oe = -(smaxes_oe.log() * br).sum(-1)  # for cross entropy
        loss_oe = loss_oe.mean()  # for ERM
        #

        if args.use_OE == 'yes':
            loss_bp = loss + 0.5 * loss_oe
        else:
            loss_bp = loss

        optimizer.zero_grad()
        loss_bp.backward()

        # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
        if args.clip: torch.nn.utils.clip_grad_norm(params, args.clip)
        optimizer.step()

        total_loss += raw_loss.data
        total_oe_loss += loss_oe.data
        optimizer.param_groups[0]['lr'] = lr2
        if batch % args.log_interval == 0 and batch > 0:
            cur_loss = total_loss[0] / args.log_interval
            cur_oe_loss = total_oe_loss[0] / args.log_interval
            elapsed = time.time() - start_time
            print(
                '| epoch {:3d} | {:5d}/{:5d} batches | lr {:05.5f} | ms/batch {:5.2f} | '
                'loss {:5.2f} | oe_loss {:5.2f} | ppl {:8.2f} | bpc {:8.3f}'.
                format(epoch, batch,
                       len(train_data) // args.bptt,
                       optimizer.param_groups[0]['lr'],
                       elapsed * 1000 / args.log_interval, cur_loss,
                       cur_oe_loss, math.exp(cur_loss),
                       cur_loss / math.log(2)))
            total_loss = 0
            total_oe_loss = 0
            start_time = time.time()
        ###
        batch += 1
예제 #18
0
################
img_shape = [32, 32, 3]
tf.reset_default_graph()
inputs = tf.placeholder(tf.float32,
                        shape=[None] + img_shape,
                        name='encoder_input')
inputs_norm = tf.div(tf.subtract(inputs, tf.reduce_min(inputs)),
                     tf.subtract(tf.reduce_max(inputs), tf.reduce_min(inputs)))
drop_prob = tf.placeholder_with_default(1.0, shape=())
## ENCODER
means, log_scales = model.gaussian_encoder(inputs, FLAGS.latent_size,
                                           drop_prob)  # (?, 4, 4, 8)
codes = model.gaussian_sample(means, log_scales)  # (?, 4, 4, 8)
tf.identity(codes, name='encoder_output')
## DECODER
outputs = model.decoder(codes, drop_prob)
tf.identity(outputs, name='decoder_output')

# calculate loss with learnable parameter for output log_scale
with tf.name_scope('loss') as scope:
    reconstruction_loss, latent_loss = util.vae_loss(inputs, outputs, means,
                                                     log_scales, 'bernoulli')
    total_loss = reconstruction_loss + tf.reduce_mean(latent_loss)

################
# Training VAE #
################
global_step_tensor = tf.get_variable('global_step',
                                     trainable=False,
                                     shape=[],
                                     initializer=tf.zeros_initializer)
예제 #19
0
argparser.add_argument('--draw',
                       action='store_true',
                       help='whether draw output')

args = argparser.parse_args()

model_path = os.path.join('model', args.model + '.ckpt')

x = tf.placeholder(tf.float32, [None, 28 * 28])
global_step = tf.Variable(0, name='global_step', trainable=False)

mnist = read_data_sets('tmp/MNIST_data')

with tf.Session() as sess:
    c, _ = model.encoder(x)
    x_, _ = model.decoder(c)
    loss = model.loss(x, x_)

    sess.run(tf.global_variables_initializer())

    saver = tf.train.Saver()
    saver.restore(sess, model_path)
    print('"%s" loaded' % (model_path))

    eval_x_, eval_loss, step = sess.run([x_, loss, global_step],
                                        feed_dict={x: mnist.test.images})
    print('loss: %g' % (eval_loss))

    if args.draw:
        dirpath = os.path.join('tmp', args.model, str(step))
        if not os.path.exists(dirpath):
def train(z_dim=None, model_name=None):
    """
    Used to train the autoencoder by passing in the necessary inputs.
    :param train_model: True -> Train the model, False -> Load the latest trained model and show the image grid.
    :return: does not return anything
    """
    X_train, y_train = datasets.create_datasets(retrain=0,
                                                task="aae_wgan_" + str(z_dim),
                                                num_aug=0)

    batch_size = BATCH_SIZE
    input_dim = X_train.shape[-1]

    with tf.device("/gpu:0"):
        sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
        x_input = tf.placeholder(dtype=tf.float32,
                                 shape=[batch_size, input_dim, input_dim, 1],
                                 name='Input')
        x_target = tf.placeholder(dtype=tf.float32,
                                  shape=[batch_size, input_dim, input_dim, 1],
                                  name='Target')
        real_distribution = tf.placeholder(dtype=tf.float32,
                                           shape=[batch_size, z_dim],
                                           name='Real_distribution')
        decoder_input = tf.placeholder(dtype=tf.float32,
                                       shape=[1, z_dim],
                                       name='Decoder_input')

        encoder_output = encoder(x_input, reuse=False, is_train=True)
        encoder_output_test = encoder(x_input, reuse=True, is_train=False)
        d_fake, d_fake_logits = discriminator(encoder_output, reuse=False)
        d_real, d_real_logits = discriminator(real_distribution, reuse=True)

        d_fake_test, d_fake_logits_test = discriminator(encoder_output,
                                                        reuse=True)
        d_real_test, d_real_logits_test = discriminator(real_distribution,
                                                        reuse=True)

        decoder_output, std = decoder(encoder_output,
                                      reuse=False,
                                      is_train=True)
        encoder_output_z = encoder(decoder_output, reuse=True, is_train=False)
        decoder_output_test, std_ = decoder(encoder_output,
                                            reuse=True,
                                            is_train=False)
        encoder_output_z_test = encoder(decoder_output_test,
                                        reuse=True,
                                        is_train=False)

        #decoder_image = decoder(decoder_input, reuse=True, is_train=False)

        # Autoencoder loss
        # summed = tf.reduce_mean(tf.square(decoder_output-x_target),[1,2,3])
        summed = tf.reduce_sum(tf.square(decoder_output - x_target), [1, 2, 3])
        # sqrt_summed = summed
        sqrt_summed = tf.sqrt(summed + 1e-8)
        autoencoder_loss = tf.reduce_mean(sqrt_summed)

        summed_test = tf.reduce_sum(tf.square(decoder_output_test - x_target),
                                    [1, 2, 3])
        # sqrt_summed_test = summed_test
        sqrt_summed_test = tf.sqrt(summed_test + 1e-8)
        autoencoder_loss_test = tf.reduce_mean(sqrt_summed_test)

        # l2 loss of z
        enc = tf.reduce_sum(tf.square(encoder_output - encoder_output_z), [1])
        encoder_l2loss = tf.reduce_mean(enc)
        enc_test = tf.reduce_sum(
            tf.square(encoder_output_test - encoder_output_z_test), [1])
        encoder_l2loss_test = tf.reduce_mean(enc_test)

        dc_loss = tf.reduce_mean(d_real_logits - d_fake_logits)
        dc_loss_test = tf.reduce_mean(d_real_logits_test - d_fake_logits_test)

        with tf.name_scope("Gradient_penalty"):
            eta = tf.placeholder(tf.float32, shape=[batch_size, 1], name="Eta")
            interp = eta * real_distribution + (1 - eta) * encoder_output
            _, c_interp = discriminator(interp, reuse=True)

            # taking the zeroth and only element because tf.gradients returns a list
            c_grads = tf.gradients(c_interp, interp)[0]

            # L2 norm, reshaping to [batch_size]
            slopes = tf.sqrt(tf.reduce_sum(tf.square(c_grads), axis=[1]))
            tf.summary.histogram("Critic gradient L2 norm", slopes)

            grad_penalty = tf.reduce_mean((slopes - 1)**2)
            lambd = 10.0
            dc_loss += lambd * grad_penalty

        # Generator loss
        # generator_loss = tf.reduce_mean(
        #    tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.ones_like(d_fake), logits=d_fake_logits))
        generator_loss = tf.reduce_mean(d_fake_logits)
        generator_loss_test = tf.reduce_mean(d_fake_logits_test)

        all_variables = tf.trainable_variables()
        dc_var = tl.layers.get_variables_with_name('Discriminator', True, True)
        en_var = tl.layers.get_variables_with_name('Encoder', True, True)
        #print en_var
        # dc_var = [var for var in all_variables if 'dc' in var.name]
        # en_var = [var for var in all_variables if 'encoder' in var.name]
        var_grad_autoencoder = tf.gradients(autoencoder_loss, all_variables)[0]
        var_grad_discriminator = tf.gradients(dc_loss, dc_var)[0]
        var_grad_generator = tf.gradients(generator_loss, en_var)[0]

        # Optimizers
        with tf.device("/gpu:0"):
            autoencoderl2_optimizer = tf.train.AdamOptimizer(
                learning_rate=LR, beta1=0.5,
                beta2=0.9).minimize(autoencoder_loss + 0.5 * encoder_l2loss)
            autoencoder_optimizer = tf.train.AdamOptimizer(
                learning_rate=LR, beta1=0.5,
                beta2=0.9).minimize(autoencoder_loss)
            discriminator_optimizer = tf.train.AdamOptimizer(
                learning_rate=LR, beta1=0.5,
                beta2=0.9).minimize(dc_loss, var_list=dc_var)
            generator_optimizer = tf.train.AdamOptimizer(learning_rate=LR,
                                                         beta1=0.5,
                                                         beta2=0.9).minimize(
                                                             generator_loss,
                                                             var_list=en_var)

            tl.layers.initialize_global_variables(sess)
        # Reshape immages to display them
        input_images = tf.reshape(x_input, [-1, input_dim, input_dim, 1])
        generated_images = tf.reshape(decoder_output,
                                      [-1, input_dim, input_dim, 1])
        # generated_images = tf.reshape(decoder_output, [-1, 28, 28, 1])
        tensorboard_path, saved_model_path, log_path, folder_name = form_results(
        )
        # bp()
        writer = tf.summary.FileWriter(logdir=tensorboard_path,
                                       graph=sess.graph)
        # Tensorboard visualization
        tf.summary.scalar(name='Autoencoder Loss', tensor=autoencoder_loss)
        tf.summary.scalar(name='Autoencoder Test Loss',
                          tensor=autoencoder_loss_test)
        tf.summary.scalar(name='Discriminator Loss', tensor=dc_loss)
        tf.summary.scalar(name='Generator Loss', tensor=generator_loss)
        tf.summary.scalar(name='Autoencoder z Loss', tensor=encoder_l2loss)
        tf.summary.histogram(name='Encoder Distribution',
                             values=encoder_output)
        tf.summary.histogram(name='Real Distribution',
                             values=real_distribution)
        tf.summary.histogram(name='Gradient AE', values=var_grad_autoencoder)
        tf.summary.histogram(name='Gradient D', values=var_grad_discriminator)
        tf.summary.histogram(name='Gradient G', values=var_grad_generator)
        tf.summary.image(name='Input Images',
                         tensor=input_images,
                         max_outputs=10)
        tf.summary.image(name='Generated Images',
                         tensor=generated_images,
                         max_outputs=10)
        summary_op = tf.summary.merge_all()
        saver = tf.train.Saver()
    # Saving the model

    step = 0
    # with tf.Session() as sess:
    with open(log_path + '/log.txt', 'a') as log:
        log.write("input_dim: {}\n".format(input_dim))
        log.write("z_dim: {}\n".format(z_dim))
        log.write("batch_size: {}\n".format(batch_size))
        log.write("\n")

    for i in range(EPOCHS):
        b = 0
        for batch in tl.iterate.minibatches(inputs=X_train,
                                            targets=np.zeros(X_train.shape),
                                            batch_size=batch_size,
                                            shuffle=True):
            z_real_dist = np.random.normal(0, 1, (batch_size, z_dim)) * 1.
            z_real_dist = z_real_dist.astype("float32")

            batch_x, _ = batch
            batch_x = batch_x[:, :, :, np.newaxis]
            #lambda_x = np.max(lambda_grow_max / np.float(i), lambda_grow_max)
            sess.run(autoencoderl2_optimizer,
                     feed_dict={
                         x_input: batch_x,
                         x_target: batch_x
                     })
            if i < 20:
                # sess.run(autoencoder_optimizer, feed_dict={x_input: batch_x, x_target: batch_x})
                for t in range(10):
                    for _ in range(20):
                        eta1 = np.random.rand(
                            batch_size,
                            1)  # sampling from uniform distribution
                        eta1 = eta1.astype("float32")
                        sess.run(discriminator_optimizer,
                                 feed_dict={
                                     x_input: batch_x,
                                     x_target: batch_x,
                                     real_distribution: z_real_dist,
                                     eta: eta1
                                 })
            else:
                # sess.run(autoencoderl2_optimizer, feed_dict={x_input: batch_x, x_target: batch_x})
                for _ in range(20):
                    eta1 = np.random.rand(
                        batch_size, 1)  # sampling from uniform distribution
                    eta1 = eta1.astype("float32")
                    sess.run(discriminator_optimizer,
                             feed_dict={
                                 x_input: batch_x,
                                 x_target: batch_x,
                                 real_distribution: z_real_dist,
                                 eta: eta1
                             })

            sess.run(generator_optimizer,
                     feed_dict={
                         x_input: batch_x,
                         x_target: batch_x
                     })
            if b % 50 == 0:
                a_loss, e_loss, d_loss, g_loss, a_grad, d_grad, g_grad, en_output, d_real_logits_, d_fake_logits_, de_output, summary = sess.run(
                    [
                        autoencoder_loss, encoder_l2loss, dc_loss,
                        generator_loss, var_grad_autoencoder,
                        var_grad_discriminator, var_grad_generator,
                        encoder_output, d_real_logits, d_fake_logits,
                        decoder_output, summary_op
                    ],
                    feed_dict={
                        x_input: batch_x,
                        x_target: batch_x,
                        real_distribution: z_real_dist,
                        eta: eta1
                    })
                print(model_name)
                saver.save(sess, save_path=saved_model_path, global_step=step)
                writer.add_summary(summary, global_step=step)

                print("Epoch: {}, iteration: {}".format(i, b))
                print("Autoencoder Loss: {}".format(a_loss))
                print("Autoencoder enc Loss: {}".format(e_loss))
                print("Discriminator Loss: {}".format(d_loss))
                print("Generator Loss: {}".format(g_loss))
                with open(log_path + '/log.txt', 'a') as log:
                    log.write("Epoch: {}, iteration: {}\n".format(i, b))
                    log.write("Autoencoder Loss: {}\n".format(a_loss))
                    log.write("Autoencoder enc Loss: {}\n".format(e_loss))
                    log.write("Discriminator Loss: {}\n".format(d_loss))
                    log.write("Generator Loss: {}\n".format(g_loss))
            b += 1
            step += 1

        b = 0
        for batch in tl.iterate.minibatches(inputs=y_train,
                                            targets=np.zeros(y_train.shape),
                                            batch_size=batch_size,
                                            shuffle=True):
            z_real_dist = np.random.normal(0, 1, (batch_size, z_dim)) * 1.
            z_real_dist = z_real_dist.astype("float32")
            batch_x, _ = batch
            batch_x = batch_x[:, :, :, np.newaxis]
            eta1 = np.random.rand(batch_size, 1)
            if b % 20 == 0:
                a_loss, e_loss, d_loss, g_loss = sess.run(
                    [
                        autoencoder_loss_test, encoder_l2loss_test,
                        dc_loss_test, generator_loss_test
                    ],
                    feed_dict={
                        x_input: batch_x,
                        x_target: batch_x,
                        real_distribution: z_real_dist,
                        eta: eta1
                    })
                print("v_Epoch: {}, iteration: {}".format(i, b))
                print("v_Autoencoder Loss: {}".format(a_loss))
                print("v_Autoencoder enc Loss: {}".format(e_loss))
                print("v_Discriminator Loss: {}".format(d_loss))
                print("v_Generator Loss: {}".format(g_loss))
                with open(log_path + '/log.txt', 'a') as log:
                    log.write("v_Epoch: {}, iteration: {}\n".format(i, b))
                    log.write("v_Autoencoder Loss: {}\n".format(a_loss))
                    log.write("v_Autoencoder enc Loss: {}\n".format(e_loss))
                    log.write("v_Discriminator Loss: {}\n".format(d_loss))
                    log.write("v_Generator Loss: {}\n".format(g_loss))
예제 #21
0
def train(epoch):
    #  model is a class that inherits nn.Module
    # This puts the model in train mode as opposed to eval mode, so it knows which one to use.
    print("check 5")
    model.encoder.train()

    #print(" check lalala")
    model.decoder.train()
    print("check 6")
    # print(model.fc)
    # For each batch of training images,
    cum_train_loss = 0
    cum_val_loss = 0
    for batch_idx, batch in enumerate(train_loader):
        # Read images and their target labels in the current batch.

        images, captions, lengths = Variable(batch[0]), Variable(
            batch[1]), batch[2]

        targets = pack_padded_sequence(captions, lengths, batch_first=True)[0]

        # Load the current training example in the CUDA core if available.
        if args.cuda:
            images = images.cuda()

        features = model.encoder(images)
        output = model.decoder(features, captions, lengths)

        criterion = torch.nn.CrossEntropyLoss()
        loss = criterion(output, targets)

        model.decoder.zero_grad()
        model.encoder.zero_grad()
        loss.backward()
        optimizer.step()

        pass
        cum_train_loss += loss

        # Print out the loss and accuracy on the first 10 batches of the validation set.
        #  adjusting the printing frequency by changing --log-interval option in the command-line.
        if batch_idx % args.log_interval == 0:
            # Compute the average validation loss and accuracy.
            val_loss = evaluate('val', n_batches=10)
            # Compute the training loss.
            train_loss = loss.data.item()

            # Compute the number of examples in this batch.
            examples_this_epoch = batch_idx * len(images)

            # Compute the progress rate in terms of the batch.
            epoch_progress = 100. * batch_idx / len(train_loader)

            # Print out the training loss, validation loss, and accuracy with epoch information.
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\t'
                  'Train Loss: {:.6f}\tVal Loss:{:.6f}\t'.format(
                      epoch, examples_this_epoch, len(train_loader.dataset),
                      epoch_progress, train_loss, val_loss))
        cum_val_loss += val_loss
    avg_val_loss = cum_val_loss / (batch_idx + 1)
    avg_train_loss = cum_train_loss / (batch_idx + 1)
    print('Train Epoch: {}\t'
          'Avg Train Loss: {:.6f}\t Val Loss:{:.6f}\t'.format(
              epoch, avg_train_loss, avg_val_loss))
예제 #22
0
        orgin = tf.reshape(x, (x.shape[0], -1))
        reconstruct_loss = 0.0005*tf.reduce_mean(tf.square(orgin-decoded))
        total_loss = margin_loss+reconstruct_loss
    return total_loss


if __name__ == "__main__":
    g = tf.get_default_graph()
    ds, ds_val = mnist_dataset()
    iterator = ds.make_one_shot_iterator()
    next_x, next_y = iterator.get_next()
    batch_x = tf.placeholder_with_default(next_x, shape=[100, 28, 28, 1])
    batch_y = tf.placeholder_with_default(next_y, shape=[100, 10])
    logits, caps_out = capsnet(batch_x)
    decoded = decoder(caps_out, batch_y)
    """ define loss """
    loss = calc_loss(logits, caps_out, batch_x, batch_y, decoded)
    """ define summary """
    acc_op, acc = tf.metrics.accuracy(tf.argmax(batch_y, -1), tf.argmax(logits, -1))
    tf.summary.scalar('loss', loss)
    tf.summary.scalar('acc', acc)
    tf.summary.image('reconstruction_img', tf.reshape(decoded, (100, 28, 28, 1)))
    summ = tf.summary.merge_all()
    """ define train op """
    steps = tf.train.get_or_create_global_step(g)
    train_op = tf.train.AdamOptimizer().minimize(loss, global_step=steps)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
예제 #23
0
# Initialize image batch
imBatch = Variable(torch.FloatTensor(opt.batchSize, 3, 300, 300) )
labelBatch = Variable(torch.FloatTensor(opt.batchSize, opt.numClasses, 300, 300) )
maskBatch = Variable(torch.FloatTensor(opt.batchSize, 1, 300, 300) )
labelIndexBatch = Variable(torch.LongTensor(opt.batchSize, 1, 300, 300) )

# Initialize network
if opt.isDilation:
    encoder = model.encoderDilation()
    decoder = model.decoderDilation()
elif opt.isSpp:
    encoder = model.encoderSPP()
    decoder = model.decoderSPP()
else:
    encoder = model.encoder()
    decoder = model.decoder()

encoder.load_state_dict(torch.load('%s/encoder_%d.pth' % (opt.modelRoot, opt.epochId) ) )
decoder.load_state_dict(torch.load('%s/decoder_%d.pth' % (opt.modelRoot, opt.epochId) ) )
encoder = encoder.eval()
decoder = decoder.eval()

# Move network and containers to gpu
if not opt.noCuda:
    imBatch = imBatch.cuda(opt.gpuId )
    labelBatch = labelBatch.cuda(opt.gpuId )
    labelIndexBatch = labelIndexBatch.cuda(opt.gpuId )
    maskBatch = maskBatch.cuda(opt.gpuId )
    encoder = encoder.cuda(opt.gpuId )
    decoder = decoder.cuda(opt.gpuId )
예제 #24
0
파일: run.py 프로젝트: haebeom-lee/vae
# get data
xtr, ytr, xte, yte = mnist_1000(args.mnist_path)

# placeholders
x = tf.placeholder(tf.float32, [None, 784])
n_train_batches = int(1000/args.batch_size)
n_test_batches = int(1000/args.batch_size)

# models
net = autoencoder(x, args.zdim, True) # train
tnet = autoencoder(x, args.zdim, False, reuse=True) # test

# for visualization
z = tf.placeholder(tf.float32, [None, args.zdim])
tennet = encoder(x, args.zdim, reuse=True) # test encoder
tdenet = decoder(z, reuse=True) # test decoder

def train():
    loss = -net['elbo'] # negative ELBO

    global_step = tf.train.get_or_create_global_step()
    lr = tf.train.piecewise_constant(tf.cast(global_step, tf.int32),
            [int(n_train_batches*args.n_epochs/2)], [1e-3, 1e-4])
    train_op = tf.train.AdamOptimizer(lr).minimize(loss,
            global_step=global_step)

    saver = tf.train.Saver(net['weights'])
    logfile = open(os.path.join(savedir, 'train.log'), 'w')

    sess = tf.Session()
    sess.run(tf.global_variables_initializer())
예제 #25
0
def py_infer(test_data, trg_idx2word, use_wordpiece):
    """
    Inference by beam search implented by python, while the calculations from
    symbols to probilities execute by Fluid operators.
    """
    place = fluid.CUDAPlace(0) if InferTaskConfig.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)

    encoder_program = fluid.Program()
    with fluid.program_guard(main_program=encoder_program):
        enc_output = encoder(
            ModelHyperParams.src_vocab_size, ModelHyperParams.max_length + 1,
            ModelHyperParams.n_layer, ModelHyperParams.n_head,
            ModelHyperParams.d_key, ModelHyperParams.d_value,
            ModelHyperParams.d_model, ModelHyperParams.d_inner_hid,
            ModelHyperParams.dropout, ModelHyperParams.weight_sharing)

    decoder_program = fluid.Program()
    with fluid.program_guard(main_program=decoder_program):
        predict = decoder(
            ModelHyperParams.trg_vocab_size, ModelHyperParams.max_length + 1,
            ModelHyperParams.n_layer, ModelHyperParams.n_head,
            ModelHyperParams.d_key, ModelHyperParams.d_value,
            ModelHyperParams.d_model, ModelHyperParams.d_inner_hid,
            ModelHyperParams.dropout, ModelHyperParams.weight_sharing)

    # Load model parameters of encoder and decoder separately from the saved
    # transformer model.
    encoder_var_names = []
    for op in encoder_program.block(0).ops:
        encoder_var_names += op.input_arg_names
    encoder_param_names = filter(
        lambda var_name: isinstance(
            encoder_program.block(0).var(var_name), fluid.framework.Parameter),
        encoder_var_names)
    encoder_params = map(encoder_program.block(0).var, encoder_param_names)
    decoder_var_names = []
    for op in decoder_program.block(0).ops:
        decoder_var_names += op.input_arg_names
    decoder_param_names = filter(
        lambda var_name: isinstance(
            decoder_program.block(0).var(var_name), fluid.framework.Parameter),
        decoder_var_names)
    decoder_params = map(decoder_program.block(0).var, decoder_param_names)
    fluid.io.load_vars(exe, InferTaskConfig.model_path, vars=encoder_params)
    fluid.io.load_vars(exe, InferTaskConfig.model_path, vars=decoder_params)

    # This is used here to set dropout to the test mode.
    encoder_program = encoder_program.inference_optimize()
    decoder_program = decoder_program.inference_optimize()

    for batch_id, data in enumerate(test_data.batch_generator()):
        batch_seqs, batch_scores = translate_batch(
            exe,
            [item[0] for item in data],
            encoder_program,
            encoder_data_input_fields + encoder_util_input_fields,
            [enc_output.name],
            decoder_program,
            decoder_data_input_fields[:-1] + decoder_util_input_fields +
            (decoder_data_input_fields[-1], ),
            [predict.name],
            InferTaskConfig.beam_size,
            InferTaskConfig.max_out_len,
            InferTaskConfig.n_best,
            len(data),
            ModelHyperParams.n_head,
            ModelHyperParams.d_model,
            ModelHyperParams.eos_idx,  # Use eos_idx to pad.
            ModelHyperParams.eos_idx,  # Use eos_idx to pad.
            ModelHyperParams.bos_idx,
            ModelHyperParams.eos_idx,
            ModelHyperParams.unk_idx,
            output_unk=InferTaskConfig.output_unk)
        for i in range(len(batch_seqs)):
            # Post-process the beam-search decoded sequences.
            seqs = map(post_process_seq, batch_seqs[i])
            scores = batch_scores[i]
            for seq in seqs:
                if use_wordpiece:
                    print(util.subword_ids_to_str(seq, trg_idx2word))
                else:
                    print(" ".join([trg_idx2word[idx] for idx in seq]))
                                               width=opt.width,
                                               keep_ratio=opt.keep_ratio))

val_dataset = dataset.listDataset(list_file=opt.valList,
                                  transform=dataset.resizeNormalize(
                                      (opt.width, opt.height)))

nclass = len(alphabet) + 3  # decoder的时候,需要的类别数,3 for SOS,EOS和blank
nc = 1

converter = utils.strLabelConverterForAttention(alphabet)
image = torch.FloatTensor(opt.batchSize, 3, opt.width, opt.height)
criterion = torch.nn.NLLLoss()  # 最后的输出要为log_softmax

encoder = model.encoder(opt.height, nc=nc, nh=256)
decoder = model.decoder(nh=256, nclass=nclass, dropout_p=0.1)

# continue training or use the pretrained model to initial the parameters of the encoder and decoder
encoder.apply(weights_init)
decoder.apply(weights_init)
if opt.encoder:
    print('loading pretrained encoder model from %s' % opt.encoder)
    encoder.load_state_dict(torch.load(opt.encoder))
if opt.decoder:
    print('loading pretrained decoder model from %s' % opt.decoder)
    decoder.load_state_dict(torch.load(opt.decoder))
if opt.loadModelEpoch > 0:
    encoder_path = 'model/encoder_%d.pth' % opt.loadModelEpoch
    print('loading pretrained encoder model from %s' % encoder_path)
    encoder.load_state_dict(torch.load(encoder_path))
    decoder_path = 'model/decoder_%d.pth' % opt.loadModelEpoch
if torch.cuda.is_available() and opt.noCuda:
    print(
        "WARNING: You have a CUDA device, so you should probably run with --cuda"
    )

# Initialize image batch
imBatch = Variable(torch.FloatTensor(opt.batchSize, 3, 300, 300))
labelBatch = Variable(
    torch.FloatTensor(opt.batchSize, opt.numClasses, 300, 300))
maskBatch = Variable(torch.FloatTensor(opt.batchSize, 1, 300, 300))
labelIndexBatch = Variable(torch.LongTensor(opt.batchSize, 1, 300, 300))

# Initialize network
encoder_normal = model.encoder()
decoder_normal = model.decoder()
model_root_normal = '/datasets/cse152-252-sp20-public/unet_checkpoints/unet_original_zq'
epoch_id_normal = 181
encoder_normal.load_state_dict(
    torch.load('%s/encoder_%d.pth' % (model_root_normal, epoch_id_normal)))
decoder_normal.load_state_dict(
    torch.load('%s/decoder_%d.pth' % (model_root_normal, epoch_id_normal)))
encoder_normal = encoder_normal.eval()
decoder_normal = decoder_normal.eval()

encoder_dilation = model.encoderDilation()
decoder_dilation = model.decoderDilation()
model_root_dilation = '/datasets/cse152-252-sp20-public/unet_checkpoints/unet_original_zq_dilation'
epoch_id_dilation = 180
encoder_dilation.load_state_dict(
    torch.load('%s/encoder_%d.pth' % (model_root_dilation, epoch_id_dilation)))
예제 #28
0
def train():
    # Turn on training mode which enables dropout.
    if args.model == 'QRNN': model.reset()
    total_loss = 0
    start_time = time.time()
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(args.batch_size)
    batch, i = 0, 0
    while i < train_data.size(0) - 1 - 1:
        bptt = args.bptt if np.random.random() < 0.95 else args.bptt / 2.
        # Prevent excessively small or negative sequence lengths
        seq_len = max(5, int(np.random.normal(bptt, 5)))
        # There's a very small chance that it could select a very long sequence length resulting in OOM
        seq_len = min(seq_len, args.bptt + 10)

        lr2 = optimizer.param_groups[0]['lr']
        optimizer.param_groups[0]['lr'] = lr2 * seq_len / args.bptt
        model.train()
        model_r.train()
        model_mlp.train()
        data, targets, _ = get_batch(train_data, i, args, seq_len=seq_len)
        data_long, _, _ = get_batch(train_data, i, args, seq_len=seq_len)
        seq_len_data = data.size(0)

        # Starting each batch, we detach the hidden state from how it was previously produced.
        # If we didn't, the model would try backpropagating all the way to start of the dataset.
        hidden = repackage_hidden(hidden)
        optimizer.zero_grad()

        output, hidden, rnn_hs, dropped_rnn_hs = model(data,
                                                       hidden,
                                                       return_h=True)
        output = model.decoder(output)

        input_emb = model.encoder(data)
        # input_emb = model.encoder(data).detach()
        # input_emb = model.encoder(data_long)
        # input_emb = model.encoder(data_long).detach()

        # input_emb_nhid = model_mlp(input_emb)

        attention, seq_len_data, reg_len = model_r(input_emb, seq_len_data)
        span_emb = (input_emb.unsqueeze(0) * attention).sum(1)
        # span_emb = (input_emb_nhid.unsqueeze(0) * attention).sum(1)

        span_emb = model_mlp(span_emb)

        raw_loss = criterion(output.view(-1, ntokens), targets)

        loss = raw_loss
        # Activiation Regularization
        loss = loss + sum(args.alpha * dropped_rnn_h.pow(2).mean()
                          for dropped_rnn_h in dropped_rnn_hs[-1:])
        # Temporal Activation Regularization (slowness)
        loss = loss + sum(args.beta * (rnn_h[1:] - rnn_h[:-1]).pow(2).mean()
                          for rnn_h in rnn_hs[-1:])

        context_emb = dropped_rnn_hs[-2][:seq_len_data]
        if args.ns:
            span_emb_t = span_emb.transpose(0, 1)
            pos_loss = (1 - (context_emb * span_emb).sum(2).sigmoid()).mean()
            neg_loss = 0

            split_idx_batch = int(torch.randint(args.batch_size, []))
            # split_idx_batch = int(torch.randint(1, args.batch_size, []))
            least_ns_seq = 0
            if split_idx_batch == 0:
                least_ns_seq = 10 if data.size(0) > 15 else int(
                    data.size(0) / 2)
            split_idx_seq = int(torch.randint(least_ns_seq, data.size(0), []))

            for j in range(1):
                span_emb_neg = torch.cat([
                    span_emb_t[split_idx_batch:], span_emb_t[:split_idx_batch]
                ], 0).transpose(0, 1)
                span_emb_neg = torch.cat([
                    span_emb_neg[split_idx_seq:], span_emb_neg[:split_idx_seq]
                ], 0)
                neg_loss += (context_emb *
                             span_emb_neg).sum(2).sigmoid().mean()

                # split_idx_batch = int(torch.randint(args.batch_size, []))
                # split_idx_batch = int(torch.randint(1, args.batch_size, []))
                # least_ns_seq = 0
                # if split_idx_batch == 0:
                #     least_ns_seq = 10 if data.size(0) > 15 else int(data.size(0) / 2)
                # split_idx_seq = int(torch.randint(least_ns_seq, data.size(0), []))

            loss += args.theta * (pos_loss + neg_loss)  # + 1e-6 * reg_len

        else:
            loss = loss + args.theta * (context_emb - span_emb).pow(2).mean()

        loss.backward()

        # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
        torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
        optimizer.step()

        total_loss += raw_loss.data
        optimizer.param_groups[0]['lr'] = lr2
        if batch % args.log_interval == 0 and batch > 0:
            cur_loss = total_loss[0] / args.log_interval
            elapsed = time.time() - start_time
            print(
                '| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | '
                'loss {:5.2f} | ppl {:8.2f}'.format(
                    epoch, batch,
                    len(train_data) // args.bptt,
                    optimizer.param_groups[0]['lr'],
                    elapsed * 1000 / args.log_interval, cur_loss,
                    math.exp(cur_loss)))
            total_loss = 0
            start_time = time.time()
        ###
        batch += 1
        i += seq_len
예제 #29
0
def create_generation_batch(model, num_words, random_choice_frequency,
                            trunc_size, bs, bptt, prompts, params, TEXT):
    """ Generate a batch of musical samples
    Input:
      model - pretrained generator model
      num_words - number of steps to generate
      random_choice_frequency - how often to pick a random choice rather than the top choice (range 0 to 1)
      trunc_size - for the random choice, cut off the options to include only the best trunc_size guesses (range 1 to vocab_size)
      bs - batch size - number of samples to generate
      bptt - back prop through time - size of prompt
      prompts - a list of training or test folder texts
      params - parameters of the generator model
      TEXT - holds vocab word to index dictionary

    Output:
      musical_prompts - the randomly selected prompts that were used to prime the model (these are human-composed samples)
      results - the generated samples

    This is very loosely based on an example in the FastAI notebooks, but is modified to include randomized prompts,
    to generate a batch at a time rather than a single example, and to include truncated random sampling.
    """
    with torch.no_grad():
        hidden = model.init_hidden(bs)

        musical_prompts = generate_musical_prompts(prompts, bptt, bs)

        results = [''] * bs
        model.eval()

        # Tokenize prompts and translate them to indices for input into model
        s = [music_tokenizer(prompt)[:bptt] for prompt in musical_prompts]
        t = TEXT.numericalize(s)

        print("Prompting network")
        # Feed the prompt one by one into the model (b is a vector of all the indices for each prompt at a given timestep)
        for b in t:
            res, hidden = model(b.unsqueeze(0).cuda(), hidden)

        print("Generating new sample")
        for i in range(num_words):
            res = model.decoder(res)
            # res holds the probabilities the model predicted given the input sequence
            # n_tok is the number of tokens (ie the vocab size)
            [ps, n] = res.topk(params["n_tok"])

            # By default, choose the most likely word (choice 0) for the next timestep (for all the samples in the batch)
            w = n[:, 0]

            # Cycle through the batch, randomly assign some of them to choose from the top trunc guesses, rather than to
            # automatically take the top choice
            for j in range(bs):
                """
                if random.random()<random_choice_frequency:
                    # Truncate to top trunc_size guesses only
                    ps=ps[:,:trunc_size]
                    # Sample based on the probability the model predicted for those top choices
                    r=torch.multinomial(ps[j].exp(), 1)
                    # Translate this to an index
                    #TODO: need to figure it out
                    ind=to_np(r[0])[0]
                    if ind!=0:
                        w[j].data[0]=n[j,ind].data[0]
                """
                # Translate the index back to a word (itos is index to string)
                # Append to the ongoing sample
                results[j] += TEXT.vocab.itos[w[j].item()] + " "

            # Feed all the predicted words from this timestep into the model, in order to get predictions for the next step
            res, hidden = model(w.unsqueeze(0).cuda(), hidden)
        return musical_prompts, results
# ----------------- calculate the number of batches per epoch --------------------
batch_per_ep = input_file.shape[
    0] // batch_size  # batch per epoch will be 40 [input total= 400 / 10 ]

ae_inputs = tf.placeholder(tf.float32, (None, 32, 32, 32, 1),
                           name="encoder_input")  # input to the network
#dicForShape = tf.placeholder(tf.string, shape=None, name="volume_name")

# ---------for variational auto encoder(this has to be commented when simple auto encoder model is used) --------------
#z_mean, z_std, l_space = md.encoder(ae_inputs)

# ---------for simple auto encoder(this has to be commented when variational model is used) --------------
l_space = md.encoder(ae_inputs, dim_of_z)

# --------- Output from decoder ---------------------
ae_outputs = md.decoder(l_space)

# ----------------- calculate the loss and optimize variational auto encoder network ------------------------

#generation_loss = -tf.reduce_sum(ae_inputs * tf.log(1e-8 + ae_outputs) + (1-ae_inputs) * tf.log(1e-8 + 1 - ae_outputs), 1)

#latent_loss = 0.5 * tf.reduce_sum(tf.square(z_mean) + tf.square(z_std) - tf.log(tf.square(z_std)) - 1,1)

# Voxel-Wise Reconstruction Loss
# Note that the output values are clipped to prevent the BCE from evaluating log(0).
'''ae_outputs = tf.clip_by_value(ae_outputs, 1e-8, 1 - 1e-8)
bce_loss = tf.reduce_sum(weighted_binary_crossentropy(ae_outputs, ae_inputs), [1,2])
bce_loss = tf.reduce_mean(bce_loss)
# KL Divergence from isotropic gaussian prior
kl_div = 0.5 * tf.reduce_sum(tf.square(z_mean) + tf.square(z_std) - tf.log(1e-8 + tf.square(z_std)) - 1, [1])
kl_div = tf.reduce_mean(kl_div)