Beispiel #1
0
def train_base(train_loader, model, criterion, optimizer_a, epoch, print_freq,
               writer):
    losses = AverageMeter()
    top1 = AverageMeter()
    model.train()

    begin_step = (epoch - 1) * len(train_loader)
    # total_t = 0

    for i, (input, target) in enumerate(train_loader):
        input_var = to_var(input, requires_grad=False)
        target_var = to_var(target, requires_grad=False)

        # t1 = time.time()

        output = model(input_var)
        loss = criterion(output, target_var)  # reduction='mean', [1,]
        prec_train = accuracy(output.data, target_var.data, topk=(1, ))[0]

        # 普通更新
        optimizer_a.zero_grad()
        loss.backward()
        optimizer_a.step()

        # CE loss, reduction='mean'
        losses.update(loss.item(), input.size(0))
        top1.update(prec_train.item(), input.size(0))

        writer.add_scalar('Train/loss', losses.avg, global_step=begin_step + i)
        writer.add_scalar('Train/top1_acc',
                          top1.avg,
                          global_step=begin_step + i)

        # total_t += time.time() - t1

        # idx in trainloader
        if i % print_freq == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Prec@1 {top1.val:.3f} ({top1.avg:.3f})'.format(
                      epoch, i, len(train_loader), loss=losses, top1=top1))
Beispiel #2
0
def validate_model(model, dataloder, criterion):
    model.train(False)

    steps = len(dataloder.dataset) // dataloder.batch_size

    running_loss = 0.0
    running_cls_loss = 0.0
    running_loc_loss = 0.0
    running_corrects = 0

    for i, (inputs, labels, bboxes, _) in enumerate(dataloder):
        inputs, labels, bboxes = to_var(inputs, True), to_var(labels,
                                                              True), to_var(
                                                                  bboxes, True)

        # forward
        scores, locs = model(inputs)
        _, preds = torch.max(scores.data, 1)
        cls_loss, loc_loss = criterion(scores, locs, labels, bboxes)
        loss = cls_loss + 10.0 * loc_loss

        # statistics
        running_cls_loss = (running_cls_loss * i + cls_loss.data[0]) / (i + 1)
        running_loc_loss = (running_loc_loss * i + loc_loss.data[0]) / (i + 1)
        running_loss = (running_loss * i + loss.data[0]) / (i + 1)
        running_corrects += torch.sum(preds == labels.data)

        # report
        sys.stdout.flush()
        sys.stdout.write(
            "\r  Step %d/%d | Loss: %.5f (%.5f + %.5f)" %
            (i, steps, running_loss, running_cls_loss, running_loc_loss))

    epoch_loss = running_loss
    epoch_acc = running_corrects / len(dataloder.dataset)

    sys.stdout.flush()
    print('\r{} Loss: {:.5f} ({:.5f} + {:.5f}), Acc: {:.5f}'.format(
        '  valid', epoch_loss, running_cls_loss, running_loc_loss, epoch_acc))

    return epoch_acc
    def test(self):
        self.model.eval()
        true_scores_list = list()
        false_scores_list = list()
        for batch_i, (contexts, res_true, res_ns1, res_ns2, res_ns3, res_ns4) in \
                enumerate(tqdm(self.eval_data_loader, ncols=80)):
            ns_list = [None, res_ns1, res_ns2, res_ns3, res_ns4]
            res_ns = ns_list[self.config.test_target_ng]
            with torch.no_grad():
                contexts = to_var(torch.FloatTensor(contexts))
                res_trues = to_var(torch.FloatTensor(res_true))
                res_falses = to_var(torch.FloatTensor(res_ns))

            # Call forward function
            true_scores = self.model.score(contexts, res_trues)
            false_scores = self.model.score(contexts, res_falses)

            true_scores_list += true_scores.data.cpu().numpy().tolist()
            false_scores_list += false_scores.data.cpu().numpy().tolist()

        return true_scores_list, false_scores_list
Beispiel #4
0
 def encode(self, x, o_cond=None):
     kwargs = {}
     batch_size = x.size(0)
     means, log_var = self.encoder(x, o_cond)
     if o_cond is not None:
         means_cond, log_var_cond = self.prior(o_cond)
         kwargs['means_cond'] = means_cond
         kwargs['log_var_cond'] = log_var_cond
     std = torch.exp(0.5 * log_var)
     eps = to_var(torch.randn([batch_size, self.latent_size]))
     z = eps * std + means
     return z, means, log_var, kwargs
Beispiel #5
0
    def __init__(self, *args, **kwargs):
        super().__init__()
        ignore = nn.Conv2d(*args, **kwargs)

        self.in_channels = ignore.in_channels
        self.out_channels = ignore.out_channels
        self.stride = ignore.stride
        self.padding = ignore.padding
        self.dilation = ignore.dilation
        self.groups = ignore.groups
        self.kernel_size = ignore.kernel_size

        # register_buffer, 存储 params
        self.register_buffer('weight',
                             to_var(ignore.weight.data, requires_grad=True))

        if ignore.bias is not None:
            self.register_buffer('bias',
                                 to_var(ignore.bias.data, requires_grad=True))
        else:
            self.register_buffer('bias', None)
Beispiel #6
0
def gen_score(adaptive, res_loader):
    LMcriterion = nn.CrossEntropyLoss(ignore_index=0)
    if torch.cuda.is_available():
        LMcriterion.cuda()

    adaptive.eval()
    total_scores = []
    print '--------------Start Scoring on Generated dataset---------------'
    for i, (word, sememes, definition) in enumerate(res_loader):
        word = to_var(word)
        sememes = to_var(sememes)
        definition = to_var(definition)
        targets = definition[:, 1:]

        scores, _ = adaptive(word, sememes, definition)
        scores = scores[:, :-1, :].transpose(1, 2)
        loss = LMcriterion(scores, targets)
        total_scores.append(str(np.exp(loss.data[0])))
        if (i + 1) % 10 == 0:
            print '[%s/%s]' % ((i + 1), len(res_loader))
    return total_scores
Beispiel #7
0
 def __init__(self, hps, data_loader, g_mode, enc_mode, log_dir='./log/'):
     self.hps = hps
     self.data_loader = data_loader
     self.model_kept = []
     self.max_keep = hps.max_to_keep
     self.logger = Logger(log_dir)
     self.g_mode = g_mode
     self.enc_mode = enc_mode
     if self.g_mode != 'naive':
         self.shift_c = to_var(torch.from_numpy(np.array([int(hps.n_speakers-hps.n_target_speakers) \
                     for _ in range(hps.batch_size)])), requires_grad=False)
     self.build_model()
Beispiel #8
0
def get_data_from_batch(batch, w2i, act2i):
    uttrs_list = [d[0] for d in batch]
    dialog_maxlen = max([len(uttrs) for uttrs in uttrs_list])
    uttr_maxlen = max([len(u) for uttrs in uttrs_list for u in uttrs])
    uttr_var = make_word_vector(uttrs_list, w2i, dialog_maxlen, uttr_maxlen)

    batch_labels = [d[1] for d in batch]
    labels_var = []
    for labels in batch_labels:
        vec_labels = [act2i[l] for l in labels]
        pad_len = dialog_maxlen - len(labels)
        for _ in range(pad_len):
            vec_labels.append(act2i[SILENT])
        labels_var.append(torch.LongTensor(vec_labels))
    labels_var = to_var(torch.stack(labels_var, 0))

    batch_prev_acts = [d[4] for d in batch]
    prev_var = []
    for prev_acts in batch_prev_acts:
        vec_prev_acts = []
        for act in prev_acts:
            tmp = [0] * len(act2i)
            tmp[act2i[act]] = 1
            vec_prev_acts.append(tmp)
        pad_len = dialog_maxlen - len(prev_acts)
        for _ in range(pad_len):
            vec_prev_acts.append([0] * len(act2i))
        prev_var.append(torch.FloatTensor(vec_prev_acts))
    prev_var = to_var(torch.stack(prev_var, 0))

    context = copy.deepcopy([d[2] for d in batch])
    context = padding(context, 1, dialog_maxlen)

    bow = copy.deepcopy([d[3] for d in batch])
    bow = padding(bow, 0, dialog_maxlen)

    act_filter = copy.deepcopy([d[5] for d in batch])
    act_filter = padding(act_filter, 0, dialog_maxlen)

    return uttr_var, labels_var, context, bow, prev_var, act_filter
Beispiel #9
0
def train(model, train_set, eval_set, dt_logger):

    if torch.cuda.is_available():
        model.cuda()

    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

    # for epoch in range(num_of_epochs):

    for data_file in train_set:

        model.train()

        data_iter = data_loader.get_loader(data_file, 1)

        running_loss = 0.0

        for idx, (stats, temporal, spatial, dr_state, short_ttf, long_ttf,
                  helpers) in enumerate(data_iter):

            stats, temporal, spatial, dr_state = utils.to_var(
                stats), utils.to_var(temporal), utils.to_var(
                    spatial), utils.to_var(dr_state)
            short_ttf, long_ttf = utils.to_var(short_ttf), utils.to_var(
                long_ttf)

            loss = model.evaluate(stats, temporal, spatial, dr_state,
                                  short_ttf, long_ttf, helpers)
            optimizer.zero_grad()
            loss.sum().backward()
            optimizer.step()

            running_loss += loss.mean().data.item()
Beispiel #10
0
def get_interpolations(vae, sample_start, sample_end, args):
    model = vae['model']
    tokenizer = vae['tokenizer']
    w2i = vae['w2i']
    i2w = vae['i2w']
    # Initialize semantic loss
    # sl = Semantic_Loss()

    start_encode = tokenizer.encode(sample_start)
    end_encode = tokenizer.encode(sample_end)
    with torch.no_grad():
        z1 = model._encode(**start_encode)
        z1_hidden = z1['z'].cpu()[0]

        z2 = model._encode(**end_encode)
        z2_hidden = z2['z'].cpu()[0]

    z_hidden = to_var(torch.from_numpy(interpolate(start=z1_hidden, end=z2_hidden, steps=args.steps)).float())

    if args.rnn_type == "lstm":
        z1_cell_state = z1['z_cell_state'].cpu()[0].squeeze()
        z2_cell_state = z2['z_cell_state'].cpu()[0].squeeze()

        # print(z1_cell_state.shape)

        z_cell_states = \
            to_var(torch.from_numpy(interpolate(start=z1_cell_state, end=z2_cell_state, steps=args.steps)).float())

        samples, _ = model.inference(z=z_hidden, z_cell_state=z_cell_states)
    else:
        samples, _ = model.inference(z=z_hidden, z_cell_state=None)
    # print('-------INTERPOLATION-------')

    interpolated_sentences = idx2word(samples, i2w=i2w, pad_idx=w2i['<pad>'])
    # For each sentence, get the perplexity and show it
    # for sentence in interpolated_sentences:
        # print(sentence + "\t\t" + str(sl.get_perplexity(sentence)))
        # print(sentence)

    return interpolated_sentences
Beispiel #11
0
def validate(valid_loader,
             model,
             criterion,
             epoch,
             print_freq,
             writer=None,
             prefix='Test'):
    """Perform validation on the validation set"""
    model.eval()
    losses = AverageMeter()
    top1 = AverageMeter()

    for i, (input, target) in enumerate(valid_loader):
        input_var = to_var(input, requires_grad=False)
        target_var = to_var(target, requires_grad=False)

        # compute output
        output = model(input_var)
        loss = criterion(output, target_var)

        # measure accuracy and record loss
        prec1 = accuracy(output.data, target_var.data, topk=(1, ))[0]

        losses.update(loss.data.item(), input.size(0))
        top1.update(prec1.item(), input.size(0))
        # measure elapsed time

        if (i + 1) % print_freq == 0:
            print('Test: [{0}/{1}]\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Prec@1 {top1.val:.3f} ({top1.avg:.3f})'.format(
                      i + 1, len(valid_loader), loss=losses, top1=top1))

    print(' * Prec@1 {top1.avg:.3f}'.format(top1=top1))

    if writer:
        writer.add_scalar(f'{prefix}/test_loss', losses.avg, global_step=epoch)
        writer.add_scalar(f'{prefix}/test_acc', top1.avg, global_step=epoch)

    return losses.avg, top1.avg
    def evaluate(self):
        self.model.eval()
        true_scores_list = list()
        false_scores_list = list()
        batch_loss_history = list()
        for batch_i, (contexts, res_true, res_ns1, res_ns2, res_ns3, res_ns4) in \
                enumerate(tqdm(self.eval_data_loader, ncols=80)):
            with torch.no_grad():
                contexts = to_var(torch.FloatTensor(contexts))
                res_trues = to_var(torch.FloatTensor(res_true))
                res_ns1 = to_var(torch.FloatTensor(res_ns1))
                res_ns2 = to_var(torch.FloatTensor(res_ns2))
                res_ns3 = to_var(torch.FloatTensor(res_ns3))
                res_ns4 = to_var(torch.FloatTensor(res_ns4))

            # Call forward function
            true_scores = self.model.score(contexts, res_trues)
            false_scores = self.model.score(contexts, res_ns4)

            true_scores_list += true_scores.data.cpu().numpy().tolist()
            false_scores_list += false_scores.data.cpu().numpy().tolist()

            # Call forward function
            batch_loglikelihood = self.model(contexts, res_trues, res_ns1,
                                             res_ns2, res_ns3, res_ns4)
            batch_loss = -torch.sum(batch_loglikelihood)

            assert not isnan(batch_loss.item())
            batch_loss_history.append(batch_loss.item())

        epoch_loss = np.sum(batch_loss_history)

        return np.mean(true_scores_list), np.mean(
            false_scores_list), epoch_loss
    def get_style_content_space(self, input_sequence):

        batch_size = input_sequence.size(0)  #get batch size
        input_embedding = self.embedding(
            input_sequence)  # convert to embeddings

        ################### encoder ##################
        _, hidden = self.encoder(input_embedding)  # hidden -> (B, H)

        ###### if the RNN has multiple layers, flatten all the hiddens states
        if self.bidirectional or self.num_layers > 1:
            hidden = torch.mean(hidden, axis=1)
        else:
            hidden = hidden.squeeze()

        ############## REPARAMETERIZATION of style and content###############

        ############style component############

        style_mean = self.hidden2stylemean(hidden)  #calc latent mean
        style_logv = self.hidden2stylelogv(hidden)  #calc latent variance
        style_std = torch.exp(0.5 * style_logv)  #find sd

        style_z = to_var(torch.randn([batch_size, self.style_space_size
                                      ]))  #get a random vector
        style_z = style_z * torch.exp(
            style_logv) + style_mean  #copmpute datapoint

        ############content component###############

        content_mean = self.hidden2contentmean(hidden)  #calc latent mean
        content_logv = self.hidden2contentlogv(hidden)  #calc latent variance
        content_std = torch.exp(0.5 * content_logv)  #find sd

        content_z = to_var(torch.randn([batch_size, self.content_space_size
                                        ]))  #get a random vector
        content_z = content_z * torch.exp(
            content_logv) + content_mean  #compute datapoint

        return style_z, content_z
    def eval(self, images_path, caption_path, name):
        eval_size = self.args.eval_size
        beam_size = self.args.beam_size

        # Get image data loader
        cocoFolder = CocoImageFolder(images_path, caption_path, self.transform)
        data_loader = torch.utils.data.DataLoader(cocoFolder,
                                                  batch_size=self.eval_size,
                                                  shuffle=False,
                                                  num_workers=self.num_workers,
                                                  drop_last=False)

        num_batches = len(data_loader)
        res = []
        # every item in list is a batch of imgs, imgids, filenames
        for i, (images, image_ids, filenames) in enumerate(data_loader):
            if i % 100 == 0:
                print "Processed {}/{}".format(i, num_batches)

            images = to_var(images)
            # generated_captions, attention, beta = self.model.sampler( images )
            # with beam search
            generated_captions, attention, beta = self.model.mysampler(
                images, beam_size=self.beam_size)

            captions = generated_captions.cpu().data.numpy()

            for image_idx in range(captions.shape[0]):

                sampled_ids = captions[image_idx]
                sampled_caption = []

                for word_id in sampled_ids:

                    word = self.vocab.idx2word[word_id]
                    if word == '<end>':
                        break
                    else:
                        sampled_caption.append(word)

                sentence = ' '.join(sampled_caption)

                res.append({
                    "image_id": image_ids[image_idx],
                    "caption": sentence
                })

        # save results to file
        resName = "./results/" + name + ".json"
        with open(resName, 'w') as file:
            json.dump(res, file)
        print "{} is saved!".format(resName)
Beispiel #15
0
    def forward(self, input_ids, attention_mask, length):
        batch_size = input_ids.shape[0]
        hidden = self.encoder(input_ids,
                              attention_mask).last_hidden_state[:, 0, :]

        # REPARAMETERIZATION
        mean = self.hidden2mean(hidden)
        logv = self.hidden2logv(hidden)
        std = torch.exp(0.5 * logv)

        z = to_var(torch.randn([batch_size, self.latent_size]))
        z = z * std + mean

        # DECODER
        hidden = self.latent2hidden(z)

        hidden = hidden.view(self.hidden_factor, batch_size, self.hidden_size)

        decoder_input_sequence = input_ids.clone()
        # decoder input
        if self.word_dropout_rate > 0:
            # randomly replace decoder input with <unk>
            prob = torch.rand(input_ids.size())
            if torch.cuda.is_available():
                prob = prob.cuda()
            prob[(input_ids.data - self.sos_idx) *
                 (input_ids.data - self.pad_idx) == 0] = 1
            decoder_input_sequence[
                prob < self.word_dropout_rate] = self.mask_idx
        input_embedding = self.embedding(decoder_input_sequence)
        input_embedding = self.embedding_dropout(input_embedding)

        # rnn input preparation
        sorted_lengths, sorted_idx = torch.sort(length, descending=True)
        packed_input = rnn_utils.pack_padded_sequence(
            input_embedding, sorted_lengths.data.tolist(), batch_first=True)

        # decoder forward pass
        outputs, _ = self.decoder_rnn(packed_input, hidden)

        # process outputs
        padded_outputs = rnn_utils.pad_packed_sequence(
            outputs, batch_first=True, padding_value=self.pad_idx)[0]
        padded_outputs = padded_outputs.contiguous()
        _, reversed_idx = torch.sort(sorted_idx)
        padded_outputs = padded_outputs[reversed_idx]

        # project outputs to vocab
        logp = nn.functional.log_softmax(self.outputs2vocab(padded_outputs),
                                         dim=-1)

        return logp, mean, logv, z
    def forward(self,
                input_utterances,
                input_utterance_length,
                input_conversation_length,
                target_utterances,
                decode=False):
        """
        Forward of HRED
        :param input_utterances: [num_utterances, max_utter_len]
        :param input_utterance_length: [num_utterances]
        :param input_conversation_length: [batch_size]
        :param target_utterances: [num_utterances, seq_len]
        :param decode: True or False
        :return: decoder_outputs
        """
        num_utterances = input_utterances.size(0)
        max_conv_len = input_conversation_length.data.max().item()

        encoder_outputs, encoder_hidden = self.encoder(input_utterances,
                                                       input_utterance_length)
        encoder_hidden = encoder_hidden.transpose(1, 0).contiguous().view(
            num_utterances, -1)
        start = torch.cumsum(
            torch.cat((to_var(input_conversation_length.data.new(1).zero_()),
                       input_conversation_length[:-1])), 0)

        encoder_hidden = torch.stack([
            pad(encoder_hidden.narrow(0, s, l), max_conv_len) for s, l in zip(
                start.data.tolist(), input_conversation_length.data.tolist())
        ], 0)

        context_outputs, context_last_hidden = self.context_encoder(
            encoder_hidden, input_conversation_length)
        context_outputs = torch.cat([
            context_outputs[i, :l, :]
            for i, l in enumerate(input_conversation_length.data)
        ])

        decoder_init = self.context2decoder(context_outputs)
        decoder_init = decoder_init.view(self.decoder.num_layers, -1,
                                         self.decoder.hidden_size)

        if not decode:
            decoder_outputs = self.decoder(target_utterances,
                                           init_h=decoder_init,
                                           decode=decode)
            return decoder_outputs

        else:
            prediction, final_score, length = self.decoder.beam_decode(
                init_h=decoder_init)
            return prediction
Beispiel #17
0
def geom_prior_step(y_prev, z_pres_prev):
    """ The AIR inference network produces three sets of variables for each entity at every time-step:
          * a 1-dimensional Bernoulli variable indicating the entity’s presence (z^{i}_{pres})
          * a C-dimensional distributed vector describing its class or appearance (z^{i}_{what})
          * a 3-dimensional vector specifying the affine parameters of its position and scale (z^{i}_{where})
        [Eslami et al., 2016]
    """
    # z_pres: bernoulli random variable indicating if sampled digit should be included
    z_pres = np.random.binomial(1, 0.5 * z_pres_prev) # if z_pres_prev = 0, z_pres = 0
    z_pres = utils.to_var(torch.from_numpy(z_pres).float())
    y = sample_glimpse_prior() * z_pres # sample a single digit, according to z_pres

    return y_prev + y, z_pres
Beispiel #18
0
def validate_step(model, valid_dl, criterion):
    model.eval()

    N = len(valid_dl.dataset)
    steps = N // valid_dl.batch_size
    avg_loss = 0.0
    for i, (anc, pos, neg) in enumerate(valid_dl):
        anc = to_var(anc, volatile=True)
        pos = to_var(pos, volatile=True)
        neg = to_var(neg, volatile=True)

        f_anc, f_pos, f_neg = model(anc, pos, neg)
        loss = criterion(f_anc, f_pos, f_neg)
        avg_loss = (avg_loss * i + loss.data[0]) / (i + 1)

        # report
        sys.stdout.flush()
        sys.stdout.write("\r Validation Step [{}/{}]: loss {:.5f}  ".format(
            i + 1, steps + 1, avg_loss))
    print()

    return avg_loss
Beispiel #19
0
 def inference(self, o_cond=None, n_samples=1, layer_cond=True):
     """
     :param o_cond: if we want to condition on real images  n x C x H x W
     :param n_samples: the number of z samples per o_cond
     :return: sample from learned P_theta
     """
     batch_size = n_samples if o_cond is None else n_samples * o_cond.size(
         0)
     z = to_var(torch.randn([1, n_samples, self.latent_size])).repeat(batch_size // n_samples, 1, 1) \
         .permute(1, 0, 2).reshape(batch_size, -1)
     o_cond_rep = None
     if o_cond is not None:
         o_cond_rep = o_cond.repeat(n_samples, 1, 1, 1)
         mu_cond, logvar_cond = self.prior(o_cond_rep)
         std_cond = torch.exp(0.5 * logvar_cond)
         eps = to_var(torch.randn([batch_size, self.latent_size]))
         z = eps * std_cond + mu_cond
     # Now both z and o_cond has size 0 = batch_size
     recon_x = self.decoder(z, o_cond_rep)
     if o_cond is not None and layer_cond:
         recon_x = torch.cat([o_cond, recon_x])
     return recon_x
Beispiel #20
0
def test():
    model.eval()
    test_loss = 0.
    test_accuracy = 0.
    counter = 0
    correct = 0
    for data, target in test_dataset:

        data = to_var(torch.LongTensor(data))  # (bs, seq_len)
        target = to_var(torch.LongTensor(target))  # (bs,)
        if args.cuda:
            data, target = data.cuda(), target.cuda()
        output = model(data)
        # sum up batch loss
        test_loss += F.nll_loss(output, target).item()
        _, pred_ids = torch.max(output, 1)
        # get the index of the max log-probability
        pred = output.data.max(1, keepdim=True)[1]
        correct += torch.sum(pred_ids == target).data.item()

        counter += data.size(0)
        test_accuracy += pred.eq(target.data.view_as(pred)).cpu().float().sum()

    # print('Test Acc: {:.2f} % ({}/{})'.format(100 * correct / counter, correct, counter))
    # print('Test Loss: {:.4f}'.format(losses/counter))

    # # Horovod: use test_sampler to determine the number of examples in
    # # this worker's partition.
    test_loss /= counter
    test_accuracy /= counter

    # Horovod: average metric values across workers.
    test_loss = metric_average(test_loss, 'avg_loss')
    test_accuracy = metric_average(test_accuracy, 'avg_accuracy')

    # Horovod: print output only on first rank.
    if hvd.rank() == 0:
        print('\nTest set: Average loss: {:.4f}, Accuracy: {:.2f}%\n'.format(
            test_loss, 100. * test_accuracy))
Beispiel #21
0
def sample_noise(dim):
    """
    Generate a PyTorch Variable of uniform random noise.

    Input:
    - batch_size: Integer giving the batch size of noise to generate.
    - dim: Integer giving the dimension of noise to generate.

    Output:
    - A PyTorch Variable of shape (batch_size, dim, 1, 1) containing uniform
      random noise in the range (-1, 1).
    """
    return utils.to_var(torch.rand(batch_size, dim) * 2 - 1).unsqueeze(2).unsqueeze(3)
Beispiel #22
0
def train(model, data, test_data, optimizer, loss_fn, n_epoch=10):
    print('=========training=========')
    model.train()
    for epoch in range(n_epoch):
        print('----epoch', epoch)
        random.shuffle(data)
        for batch_ct, (X, Y) in enumerate(data):
            X = to_var(torch.LongTensor(X))  # (bs, seq_len)
            Y = to_var(torch.LongTensor(Y))  # (bs,)
            # print(X.size(), Y.size())
            # print(X)
            pred = model(X)  # (bs, ans_size)
            # _, pred_ids = torch.max(pred, 1)
            loss = loss_fn(pred, Y)
            if batch_ct % 100 == 0:
                print('loss: {:.4f}'.format(loss.data[0]))

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        print('current performance at ecpoh', epoch)
        test(model, test_data)
Beispiel #23
0
def test(model, data):
    model.eval()
    counter = 0
    correct = 0
    losses = 0.0
    counter = 0

    for batch_ct, (X, Y) in enumerate(data):
        X = to_var(torch.LongTensor(X))  # (bs, seq_len)
        print(X.size(0))
        Y = to_var(torch.LongTensor(Y))  # (bs,)
        pred = model(X)  # (bs, ans_size)
        loss = loss_fn(pred, Y)
        losses += torch.sum(loss).data.item() * X.size(0)
        _, pred_ids = torch.max(pred, 1)
        # print('loss: {:.4f}'.format(loss.data[0]))
        correct += torch.sum(pred_ids == Y).data.item()
        counter += X.size(0)

    print('Test Acc: {:.2f} % ({}/{})'.format(100 * correct / counter, correct,
                                              counter))
    print('Test Loss: {:.4f}'.format(math.exp(losses / counter)))
Beispiel #24
0
    def embed(self, x):
        """word index: [batch_size] => word vectors: [batch_size, hidden_size]"""

        if self.training and self.word_drop > 0.0:
            if random.random() < self.word_drop:
                embed = self.embedding(to_var(x.data.new([UNK_ID] *
                                                         x.size(0))))
            else:
                embed = self.embedding(x)
        else:
            embed = self.embedding(x)

        return embed
Beispiel #25
0
    def get_position_embedding(self, story_sent_len, story_len, batch_size):
        J = story_sent_len
        d = self.embd_size
        pe = to_var(torch.zeros(J, d))  # (story_sent_len, embd_size)
        for j in range(1, J + 1):
            for k in range(1, d + 1):
                l_kj = (1 - j / J) - (k / d) * (1 - 2 * j / J)
                pe[j - 1][k - 1] = l_kj
        pe = pe.unsqueeze(0).unsqueeze(0)  # (1, 1, story_sent_len, embd_size)
        pe = pe.repeat(batch_size, story_len, 1,
                       1)  # (bs, story_len, story_sent_len, embd_size)

        return pe
Beispiel #26
0
def save_gradient_pic(D, fixed_generated_images, iteration, opts):
    pic = utils.to_var(fixed_generated_images)
    pic.requires_grad_(True)
    loss = mse_loss(D(pic), 1)
    path = os.path.join(opts.sample_dir,
                        'gradients-{:06d}.png'.format(iteration))
    loss.backward()
    gradients = utils.to_data(pic.grad)

    # Only red channel
    gradients[:, :] = np.sqrt(np.sum(gradients**2, axis=1, keepdims=True))
    grid = create_image_grid(gradients)
    scipy.misc.imsave(path, grid)
Beispiel #27
0
def plot_errors(model, dataloader):
    model.train(False)
    
    plt.figure(figsize=(12, 24))
    count = 0
    
    for (inputs, labels, _) in tqdm(dataloader):
        inputs, labels = to_var(inputs, volatile=True), to_var(labels, volatile=True)
        outputs = model(inputs)
        _, preds = torch.max(outputs.data, 1)
        incorrect_idxs = np.flatnonzero(preds.cpu().numpy() != labels.data.cpu().numpy())
        
        for idx in incorrect_idxs:
            count += 1
            if count > 30: break
            ax = plt.subplot(10, 3, count)
            ax.axis('off')
            ax.set_title('predicted: {}'.format(dataloader.dataset.classes[preds[idx]]))
            imshow(inputs.cpu().data[idx])
    plt.show()

    print("{} images out of {} were misclassified.".format(count, len(dataloader.dataset)))
Beispiel #28
0
def main(args):

    with open(args.data_dir + '/poems.vocab.json', 'r') as file:
        vocab = json.load(file)

    w2i, i2w = vocab['w2i'], vocab['i2w']

    model = SentenceVAE(vocab_size=len(w2i),
                        sos_idx=w2i['<sos>'],
                        eos_idx=w2i['<eos>'],
                        pad_idx=w2i['<pad>'],
                        unk_idx=w2i['<unk>'],
                        max_sequence_length=args.max_sequence_length,
                        embedding_size=args.embedding_size,
                        rnn_type=args.rnn_type,
                        hidden_size=args.hidden_size,
                        word_dropout=args.word_dropout,
                        embedding_dropout=args.embedding_dropout,
                        latent_size=args.latent_size,
                        num_layers=args.num_layers,
                        bidirectional=args.bidirectional,
                        condition_size=0)

    if not os.path.exists(args.load_checkpoint):
        raise FileNotFoundError(args.load_checkpoint)

    model.load_state_dict(
        torch.load(args.load_checkpoint, map_location=torch.device('cpu')))
    print("Model loaded from %s" % (args.load_checkpoint))

    if torch.cuda.is_available():
        model = model.cuda()

    model.eval()
    samples, z = model.inference(n=args.num_samples)
    print('----------SAMPLES----------')
    print(*idx2word(samples, i2w=i2w, pad_idx=w2i['<pad>']), sep='\n')
    # while True:
    #     samples, z = model.inference(n=1, condition=torch.Tensor([[1, 0, 0, 0, 0, 0, 0]]).cuda())
    #     poem = idx2word(samples, i2w=i2w, pad_idx=w2i['<pad>'])[0]
    #     if 'love' in poem:
    #         breakpoint()

    z1 = torch.randn([args.latent_size]).numpy()
    z2 = torch.randn([args.latent_size]).numpy()
    z = to_var(
        torch.from_numpy(interpolate(start=z1, end=z2, steps=8)).float())
    # samples, _ = model.inference(z=z, condition=torch.Tensor([[1, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0]]).cuda())
    samples, _ = model.inference(z=z)
    print('-------INTERPOLATION-------')
    print(*idx2word(samples, i2w=i2w, pad_idx=w2i['<pad>']), sep='\n')
Beispiel #29
0
    def visualize_results(self, X=None, epoch=0):
        print("visualize results...")
        image_num = 100
        batch_size = image_num / 2
        # row = int(sqrt(image_num))
        row = 10
        nrows = 8
        ncols = 8
        reconstruc = True
        save_dir = os.path.join(self.root, self.result_dir, self.dataset, self.model_name)


        self.G.eval()
        #self.E.eval()
        self.FC.eval()

        if not os.path.exists(save_dir):
            os.makedirs(save_dir)

        # Reconstruction and generation
        z = utils.to_var(torch.randn(batch_size * 2, self.z_dim))


        X_hat = self.G(z)  # randomly generated sample


        self.G.train()
        #self.E.train()
        self.FC.train()

        if torch.cuda.is_available():

            samples = X_hat.cpu().data.numpy().transpose(0, 2, 3, 1)  # 1
            origins = X.cpu().data.numpy().transpose(0, 2, 3, 1)  # 2

        else:

            samples = X_hat.data.numpy().transpose(0, 2, 3, 1)
            origins = X.data.numpy().transpose(0, 2, 3, 1)  # 2





        # Save images


        utils.save_images(origins[:image_num, :, :, :], [row, row],
                          os.path.join(save_dir, 'original' + '_epoch%03d' % epoch + '.png'))
        utils.save_images(samples[:image_num, :, :, :], [row, row],
                          os.path.join(save_dir, 'random' + '_epoch%03d' % epoch + '.png'))
Beispiel #30
0
def train(epoch):
    a = time.time()
    model.train()
    b = time.time()
    print("train: ", b - a)
    a = time.time()
    # Horovod: set epoch to sampler for shuffling.
    train_sampler.set_epoch(epoch)
    b = time.time()
    print("set_epoch: ", b - a)
    for batch_idx, (data, target) in enumerate(train_dataset):
        a = time.time()
        # for i in range(len(data)):
        #     print(len(data[0][0]))
        #     data[i] = to_var(torch.stack(data[i]))

        # data = torch.stack(data)
        # target = torch.stack(target)

        data = to_var(torch.LongTensor(data))  # (bs, seq_len)
        target = to_var(torch.LongTensor(target))  # (bs,)
        # print( data.size(), target.size())
        if args.cuda:
            data, target = data.cuda(), target.cuda()
        output = model(data)
        loss = F.nll_loss(output, target)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        b = time.time()
        if batch_idx % args.log_interval == 0:
            # Horovod: use train_sampler to determine the number of examples in
            # this worker's partition.
            if hvd.rank() == 0:
                print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                    epoch, batch_idx * len(data), len(train_dataset),
                    100. * batch_idx / len(train_dataset), loss.item()))
                print("Train time: ", b - a)