Beispiel #1
0
def evaluate(data_source, batch_size=10, window=args.window):
    # Turn on evaluation mode which disables dropout.
    if args.model == 'QRNN': model.reset()
    model.eval()
    total_loss = 0
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(batch_size)
    next_word_history = None
    pointer_history = None
    for i in range(0, data_source.size(0) - 1, args.bptt):
        if i > 0: print(i, len(data_source), math.exp(total_loss / i))
        data, targets = get_batch(data_source, i, evaluation=True, args=args)
        output, hidden, rnn_outs, _ = model(data, hidden, return_h=True)
        rnn_out = rnn_outs[-1].squeeze()
        output_flat = output.view(-1, ntokens)
        ###
        # Fill pointer history
        start_idx = len(next_word_history) if next_word_history is not None else 0
        next_word_history = torch.cat([one_hot(t.data[0], ntokens) for t in targets]) if next_word_history is None else torch.cat([next_word_history, torch.cat([one_hot(t.data[0], ntokens) for t in targets])])
        #print(next_word_history)
        pointer_history = Variable(rnn_out.data) if pointer_history is None else torch.cat([pointer_history, Variable(rnn_out.data)], dim=0)
        #print(pointer_history)
        ###
        # Built-in cross entropy
        # total_loss += len(data) * criterion(output_flat, targets).data[0]
        ###
        # Manual cross entropy
        # softmax_output_flat = torch.nn.functional.softmax(output_flat)
        # soft = torch.gather(softmax_output_flat, dim=1, index=targets.view(-1, 1))
        # entropy = -torch.log(soft)
        # total_loss += len(data) * entropy.mean().data[0]
        ###
        # Pointer manual cross entropy
        loss = 0
        softmax_output_flat = torch.nn.functional.softmax(output_flat)
        for idx, vocab_loss in enumerate(softmax_output_flat):
            p = vocab_loss
            if start_idx + idx > window:
                valid_next_word = next_word_history[start_idx + idx - window:start_idx + idx]
                valid_pointer_history = pointer_history[start_idx + idx - window:start_idx + idx]
                logits = torch.mv(valid_pointer_history, rnn_out[idx])
                theta = args.theta
                ptr_attn = torch.nn.functional.softmax(theta * logits).view(-1, 1)
                ptr_dist = (ptr_attn.expand_as(valid_next_word) * valid_next_word).sum(0).squeeze()
                lambdah = args.lambdasm
                p = lambdah * ptr_dist + (1 - lambdah) * vocab_loss
            ###
            target_loss = p[targets[idx].data]
            loss += (-torch.log(target_loss)).data[0]
        total_loss += loss / batch_size
        ###
        hidden = repackage_hidden(hidden)
        next_word_history = next_word_history[-window:]
        pointer_history = pointer_history[-window:]
    return total_loss / len(data_source)
Beispiel #2
0
def train():
    # Turn on training mode which enables dropout.
    if args.model == 'QRNN': model.reset()
    total_loss = 0
    start_time = time.time()
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(args.batch_size)
    batch, i = 0, 0
    while i < train_data.size(0) - 1 - 1:
        bptt = args.bptt if np.random.random() < 0.95 else args.bptt / 2.
        # Prevent excessively small or negative sequence lengths
        seq_len = max(5, int(np.random.normal(bptt, 5)))
        # There's a very small chance that it could select a very long sequence length resulting in OOM
        # seq_len = min(seq_len, args.bptt + 10)

        lr2 = optimizer.param_groups[0]['lr']
        optimizer.param_groups[0]['lr'] = lr2 * seq_len / args.bptt
        model.train()
        data, targets = get_batch(train_data, i, args, seq_len=seq_len)

        # Starting each batch, we detach the hidden state from how it was previously produced.
        # If we didn't, the model would try backpropagating all the way to start of the dataset.
        hidden = repackage_hidden(hidden)
        optimizer.zero_grad()

        output, hidden, rnn_hs, dropped_rnn_hs = model(data, hidden, return_h=True)
        raw_loss = criterion(model.decoder.weight, model.decoder.bias, output, targets)

        loss = raw_loss
        # Activiation Regularization
        if args.alpha: loss = loss + sum(args.alpha * dropped_rnn_h.pow(2).mean() for dropped_rnn_h in dropped_rnn_hs[-1:])
        # Temporal Activation Regularization (slowness)
        if args.beta: loss = loss + sum(args.beta * (rnn_h[1:] - rnn_h[:-1]).pow(2).mean() for rnn_h in rnn_hs[-1:])
        loss.backward()

        # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
        if args.clip: torch.nn.utils.clip_grad_norm_(params, args.clip)
        optimizer.step()

        total_loss += raw_loss.data
        optimizer.param_groups[0]['lr'] = lr2
        if batch % args.log_interval == 0 and batch > 0:
            cur_loss = total_loss.item() / args.log_interval
            elapsed = time.time() - start_time
            print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:05.5f} | ms/batch {:5.2f} | '
                    'loss {:5.2f} | ppl {:8.2f} | bpc {:8.3f}'.format(
                epoch, batch, len(train_data) // args.bptt, optimizer.param_groups[0]['lr'],
                elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss), cur_loss / math.log(2)))
            total_loss = 0
            start_time = time.time()
        ###
        batch += 1
        i += seq_len
Beispiel #3
0
def evaluate(data_source, batch_size=10):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    if args.model == 'QRNN': model.reset()
    total_loss = 0
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(batch_size)
    for i in range(0, data_source.size(0) - 1, args.bptt):
        data, targets = get_batch(data_source, i, args, evaluation=True)
        output, hidden = model(data, hidden)
        total_loss += len(data) * criterion(model.decoder.weight, model.decoder.bias, output, targets).data
        hidden = repackage_hidden(hidden)
    return total_loss.item() / len(data_source)
Beispiel #4
0
def evaluate(data_source, batch_size=10):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    total_loss = 0
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(batch_size)
    for i in range(0, data_source.size(0) - 1, args.bptt):
        data, targets = get_batch(data_source, i, args, evaluation=True)
        targets = targets.view(-1)
        
        log_prob, hidden = parallel_model(data, hidden)
        loss = nn.functional.nll_loss(log_prob.view(-1, log_prob.size(2)), targets).data

        total_loss += len(data) * loss
        hidden = repackage_hidden(hidden)
    return total_loss[0] / len(data_source)
Beispiel #5
0
def evaluate(data_source, batch_size=10):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    if args.model == 'QRNN': model.reset()
    total_loss = 0
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(batch_size)
    for i in range(0, data_source.size(0) - 1, args.bptt):
        data, targets = get_batch(data_source, i, args, evaluation=True)
        output, hidden, _ = model(data, hidden, reset_experience=True)
        output_flat = output.view(-1, ntokens)
        total_loss += len(data) * criterion(output_flat, targets).data
        if 'dnc' not in args.model.lower():
            hidden = repackage_hidden(hidden)
        else:
            hidden = repackage_hidden_dnc(hidden)
    return total_loss[0] / len(data_source)
Beispiel #6
0
def evaluate(data_source, batch_size=10):
    # Turn on evaluation mode which disables dropout.
    if args.model == 'QRNN': model.reset()
    model.eval()
    total_loss = 0
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(batch_size)
    for i in range(0, data_source.size(0) - 1, args.bptt):
        data, targets = get_batch(data_source, i, args, evaluation=True)
        hidden_previous = hidden
        for tn_timestep in range(args.tn_timesteps):
            output, hidden = model(data, tn_m_hidden(hidden, hidden_previous), decoded=True)
            hidden_previous = hidden
        output_flat = output.view(-1, ntokens)
        total_loss += len(data) * criterion(output_flat, targets).data
        hidden = repackage_hidden(hidden)
    return total_loss.item() / len(data_source)
def evaluate(data_source_words, data_source_langs, batch_size=10):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    total_loss = 0
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(batch_size)
    for i in range(0, data_source_words.size(0) - 1, args.bptt):
        data, targets = get_batch(data_source_words, i, args, evaluation=True)
        langData, langTargets = get_batch(data_source_langs,
                                          i,
                                          args,
                                          evaluation=True)
        output, _, hidden = model(data, langData, hidden)
        output_flat = output.view(-1, ntokens)
        total_loss += len(data) * criterion(output_flat, targets).data
        hidden = repackage_hidden(hidden)
    return total_loss[0] / len(data_source_words)
Beispiel #8
0
def evaluate(data_source, batch_size=10):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    total_loss = 0
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(batch_size)
    with torch.no_grad():
        for i in range(0, data_source.size(0) - 1, args.bptt):
            data, targets = get_batch(data_source, i, args)
            targets = targets.view(-1)
            ### culprit in memory leak
            log_prob, hidden = parallel_model(data, hidden)
            loss = nn.functional.nll_loss(log_prob.view(-1, log_prob.size(2)),
                                          targets).data
            total_loss += loss * len(data)
            hidden = repackage_hidden(hidden)
    return total_loss.item() / len(data_source)
Beispiel #9
0
def evaluate(data_source, batch_size=10):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    if args.model == 'QRNN': model.reset()
    total_loss = 0
    ntokens = len(corpus['words'].idx2word)
    for i in range(0, len(data_source['sentences']) - 1, batch_size):
        data, lengths, max_length, targets = get_batch(data_source, i,
                                                       batch_size)
        cur_batch_size = data.size(1)
        hidden = model.init_hidden(cur_batch_size)
        output, hidden = model(data, lengths, max_length, hidden)
        loss = batch_size * criterion(output, targets.long())
        total_loss += loss
        hidden = repackage_hidden(hidden)
    # return total_loss.item() / batch_size
    return total_loss.item() / len(data_source['sentences'])
Beispiel #10
0
def evaluate(data_source, batch_size=10):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    total_loss = 0
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(batch_size)
    for i in range(0, data_source.size(0) - 1, args.bptt):
        data, targets = get_batch(data_source, i, args, evaluation=True)
        targets = targets.view(-1)

        log_prob, hidden = parallel_model(data, hidden)
        loss = nn.functional.nll_loss(log_prob.view(-1, log_prob.size(2)), targets).data

        total_loss += loss * len(data)

        hidden = repackage_hidden(hidden)
    return total_loss[0] / len(data_source)
Beispiel #11
0
def evaluate(data_source, batch_size=10):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    if args.model == 'QRNN': model.reset()
    total_loss = 0
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(batch_size)
    for i in range(0, data_source.size(0) - 1, args.bptt):
        data, targets = get_batch(data_source, i, args, evaluation=True)
        output, hidden = model(data, hidden)
        logits = model.decoder(output)
        # logProba = nn.functional.log_softmax(logits, dim=1)
        # pred_idxs = torch.argmax(logProba, dim=1)
        total_loss += len(data) * criterion(
            model.decoder.weight, model.decoder.bias, output, targets).data
        hidden = repackage_hidden(hidden)
    return total_loss.item() / len(data_source)
Beispiel #12
0
def evaluate(data_source, batch_size=10):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    if args.model == 'QRNN' and getattr(model, 'reset', None): model.reset()
    total_loss = 0
    ntokens = len(corpus.dictionary)
    hidden = None
    mems = None
    with torch.no_grad():
        for i in range(0, data_source.size(0) - 1, args.bptt):
            data, targets = get_batch(data_source, i, args, evaluation=True)
            #output, hidden = model(data, hidden)
            output, hidden, mems = model(data, hidden, mems=mems, return_h=False)
            total_loss += len(data) * criterion(model.decoder.weight, model.decoder.bias, output, targets.view(-1)).data
            if hidden is not None:
                hidden = repackage_hidden(hidden)
    return total_loss.item() / len(data_source)
def validate(model, val_loader, loss_fn, n_batchs, eval_batch_size=50):

    model.eval()
    batch_index = 0
    hidden = model.init_hidden(eval_batch_size)
    val_loss = 0
    counter = 0
    while (batch_index < n_batchs - 1):

        X, y, seq_len = next(val_loader)
        out, hidden = model(X, hidden)
        val_loss += loss_fn(out, y)
        hidden = utils.repackage_hidden(hidden)
        batch_index += seq_len
        counter += 1

    return val_loss / counter
Beispiel #14
0
def evaluate(genotype, data_source, batch_size=10):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    total_loss = 0
    hidden = model.init_hidden(batch_size)
    logging.info('Genotype: {}'.format(genotype))

    with torch.no_grad():
        for i in range(0, data_source.size(0) - 1, args.bptt):
            data, targets = get_batch(data_source, i, args, evaluation=True)
            targets = targets.view(-1)
            log_prob, hidden = parallel_model(data, hidden, genotype)
            loss = nn.functional.nll_loss(log_prob.view(-1, log_prob.size(2)),
                                          targets).data
            total_loss += loss * len(data)
            hidden = repackage_hidden(hidden)
    return total_loss / len(data_source)
Beispiel #15
0
def evaluate_dist(x, support):
    y_vec = np.arange(x - 7.5, x + 7.5, 0.05)
    y_givenx = np.zeros(len(y_vec))
    for k, y in enumerate(y_vec):
        batch_size = 1
        # Creating test data - for x,y respectively
        data_xy = {
            'features': (torch.ones(50, batch_size, args.ndim) * x),
            'labels': (torch.ones(50, batch_size, args.ndim) * y)
        }
        test_data_f2 = batchify_f2(data_xy, batch_size, args)
        # setting uniform distribution for y~
        test_randata_f2 = batchify_f2(data_xy,
                                      batch_size,
                                      args,
                                      uniformly=True)

        # Turn on evaluation mode which disables dropout.
        model_f2.eval()
        hidden_f2 = model_f2.init_hidden(batch_size, model_f2.ncell)
        dist_vector = torch.FloatTensor()

        for i in range(0, test_data_f2.size(0) - 1, args.bptt):
            data_f2 = get_batch_dine(test_data_f2, i, args, evaluation=True)
            randata_f2 = get_batch_dine(test_randata_f2,
                                        i,
                                        args,
                                        evaluation=True)
            # forward
            out_f2, out_reused_f2, hidden_f2 = parallel_model_f2(
                data_f2, randata_f2, hidden_f2)
            # distribution calculation
            if i:
                dist = torch.exp(out_f2) * (1 / support)
                dist_vector = torch.cat((dist_vector, dist), 0)
            else:
                dist_vector = torch.exp(out_f2) * (1 / support)

            # hidden repackage
            hidden_f2 = repackage_hidden(hidden_f2)

        y_givenx[k] = torch.mean(dist_vector).detach().cpu().numpy()

    y_givenx = y_givenx / np.sum(y_givenx)
    return y_vec, y_givenx
Beispiel #16
0
def evaluate(model, test_data, test_data_loader, batch_size):
    model.eval()    
    sum_losses_syll = AverageMeter()
    sum_losses_lyric = AverageMeter()

    """ Build Optimizers """
    # lr = 0.001
    # optimizer = torch.optim.Adam(model.parameters(), lr=lr) # lr = 0.001
    loss_criterion = nn.CrossEntropyLoss() # Combines LogSoftmax() and NLLLoss() (Negative log likelihood loss)

    hidden = model.init_hidden(batch_size)

    for i, (syllable, lyric, melody, lengths) in enumerate(test_data_loader):
        local_bs = lyric.size(0)
        if local_bs != batch_size:
            continue

        """ Move dataloaders to GPU """
        syllable = syllable.to(device)
        lyric = lyric.to(device)
        melody = melody.to(device).float()
        lengths = lengths.to(device)

        """ Remove first melody feature """
        melody = melody[:, 1:] # We dont really want to do this?

        """ Detach hidden layers """
        hidden = repackage_hidden(hidden) # Function from PyTorch NLP official example

        """ Feedforward """
        # Feedforward
        syllable_output, lyrics_output, hidden = model(lyric[:, :-1], melody, lengths, hidden)
        
        # Define packed padded targets
        target_syllable = pack_padded_sequence(syllable[:, 1:], lengths-1, batch_first=True)[0]
        target_lyrics = pack_padded_sequence(lyric[:, 1:], lengths-1, batch_first=True)[0]
        
        # Calculate and update Cross-Entropy loss
        loss_syllable = loss_criterion(syllable_output, target_syllable)
        sum_losses_syll.update(loss_syllable)

        loss_lyrics = loss_criterion(lyrics_output, target_lyrics)
        sum_losses_lyric.update(loss_lyrics)
    
    return sum_losses_lyric, sum_losses_syll
Beispiel #17
0
def train_dream():
    dr_model.train()  # turn on training mode for dropout
    dr_hidden = dr_model.init_hidden(dr_config.batch_size)
    total_loss = 0
    start_time = time()
    num_batchs = ceil(len(train_ub) / dr_config.batch_size)
    for i, x in enumerate(batchify(train_ub, dr_config.batch_size)):
        baskets, lens, _ = x
        dr_hidden = repackage_hidden(dr_hidden)  # repackage hidden state for RNN
        dr_model.zero_grad()  # optim.zero_grad()
        dynamic_user, _ = dr_model(baskets, lens, dr_hidden)
        loss = bpr_loss(baskets, dynamic_user, dr_model.encode.weight, dr_config)
        loss.backward()

        # Clip to avoid gradient exploding
        torch.nn.utils.clip_grad_norm(dr_model.parameters(), dr_config.clip)

        # Parameter updating
        # manual SGD
        # for p in dr_model.parameters(): # Update parameters by -lr*grad
        #    p.data.add_(- dr_config.learning_rate, p.grad.data)
        # adam
        grad_norm = get_grad_norm(dr_model)
        previous_params = deepcopy(list(dr_model.parameters()))
        optim.step()

        total_loss += loss.data
        params = deepcopy(list(dr_model.parameters()))
        delta = get_weight_update(previous_params, params)
        weight_update_ratio = get_ratio_update(delta, params)

        # Logging
        if i % dr_config.log_interval == 0 and i > 0:
            elapsed = (time() - start_time) * 1000 / dr_config.log_interval
            cur_loss = total_loss[0] / dr_config.log_interval / dr_config.batch_size  # turn tensor into float
            total_loss = 0
            start_time = time()
            print(
                '[Training]| Epochs {:3d} | Batch {:5d} / {:5d} | ms/batch {:02.2f} | Loss {:05.2f} |'.format(epoch, i,
                                                                                                              num_batchs,
                                                                                                              elapsed,
                                                                                                              cur_loss))
            writer.add_scalar('model/train_loss', cur_loss, epoch * num_batchs + i)
            writer.add_scalar('model/grad_norm', grad_norm, epoch * num_batchs + i)
            writer.add_scalar('model/weight_update_ratio', weight_update_ratio, epoch * num_batchs + i)
Beispiel #18
0
def evaluate(data_source, batch_size=10):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    model_now = model.module
    criterion_now = criterion.module
    if args.model == 'QRNN': model_now.reset()
    total_loss = 0
    ntokens = len(corpus.dictionary)
    hidden = model_now.init_hidden(batch_size)
    for i in range(0, data_source.size(0) - 1, args.bptt):
        data, targets = get_batch(data_source, i, args, evaluation=True)
        output, hidden = model_now(data, hidden)
        criterion_now.replicate_weight_and_bias(model.module.decoder.weight,
                                                model.module.decoder.bias)
        total_loss += len(data) * criterion_now(hiddens=output,
                                                targets=targets).data
        hidden = repackage_hidden(hidden)
    return total_loss.item() / len(data_source)
Beispiel #19
0
def evaluate(data_source_src, data_source_trg, batch_size=10):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    total_eval_loss = 0
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(batch_size)
    for i in range(0, data_source_src.size(0) - 1, args.bptt):
        data, prev_targets, targets = get_batch(data_source_src,
                                                data_source_trg,
                                                i,
                                                args,
                                                evaluation=True)
        output, hidden = model(data, prev_targets, hidden)
        output_flat = output.view(-1, ntokens)
        total_eval_loss += len(data) * criterion(output_flat, targets).data

        hidden = repackage_hidden(hidden)
    return total_eval_loss.item() / len(data_source_src)
def evaluate(data_source, batch_size=10, temperature=1.0):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    with torch.no_grad():
        # model.reset()
        total_loss = 0
        ntokens = len(corpus.dictionary)
        hidden = model.init_hidden(batch_size)
        for i in range(0, data_source.size(0) - 1, args.bptt):
            data, targets = get_batch(data_source, i, args, evaluation=True)
            output, hidden = model(data, hidden)

            output = output / temperature

            total_loss += len(data) * criterion(
                model.decoder.weight, model.decoder.bias, output, targets).data
            hidden = repackage_hidden(hidden)
    return total_loss / len(data_source)
Beispiel #21
0
def evaluate(data_source, use_dropout=False, batch_size=10):
    # Turn on evaluation mode which disables dropout.
    if not use_dropout:
        model.eval()
    else:
        model.train()
    total_loss = 0
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(batch_size)
    for i in range(0, data_source.size(0) - 1, args.bptt):
        data, targets = get_batch(data_source, i, args, evaluation=True)
        output, hidden = model(data, hidden)
        total_loss += len(data) * criterion(model.decoder.weight, model.decoder.bias, output, targets).data
        hidden = repackage_hidden(hidden)

    # turn on eval mode at the end because we expect eval mode
    model.eval()
    return total_loss.item() / len(data_source)
Beispiel #22
0
def evaluate(data_source):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    total_loss = 0
    hidden = model.init_hidden(eval_batch_size)

    for i in range(0, data_source.size(0) - 1, args.bptt):
        data, targets = get_batch(data_source, i, args.bptt, evaluation=True)
        #> output has size seq_length x batch_size x vocab_size
        output, hidden = model(data, hidden)
        #> output_flat has size num_targets x vocab_size (batches are stacked together)
        #> ! important, otherwise softmax computation (e.g. with F.softmax()) is incorrect
        output_flat = output.view(-1, ntokens)
        #output_candidates_info(output_flat.data, targets.data)
        total_loss += len(data) * nn.CrossEntropyLoss()(output_flat, targets).data
        hidden = repackage_hidden(hidden)

    return total_loss[0] /len(data_source)
Beispiel #23
0
def evaluate(test, batch_size=1):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    total_loss = 0
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(1)
    for i in range(0, test.size(0) - 1, 70):
        data, targets = get_batch(test, i, evaluation=True)
        targets = targets.view(-1)

        log_prob, hidden = parallel_model(data, hidden)
        loss = nn.functional.nll_loss(log_prob.view(-1, log_prob.size(2)),
                                      targets).data

        total_loss += loss * len(data)

        hidden = repackage_hidden(hidden)
    return math.exp(total_loss[0] / len(test))
Beispiel #24
0
def evaluate(data_source, batch_size=10):
    # Turn on evaluation mode which disables dropout.
    if args.model == 'QRNN': model.reset()
    model.eval()
    total_loss = 0
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(batch_size)
    for i in range(0, data_source.size(0) - 1, args.bptt):
        data, targets = get_batch(data_source, i, args, evaluation=True)
        output, hidden = model(data, hidden)
        if isinstance(criterion, SplitCrossEntropyLoss):
            total_loss += len(data) * criterion(
                model.decoder.weight, model.decoder.bias, output, targets).data
        else:
            output_flat = output.view(-1, ntokens)
            total_loss += len(data) * criterion(output_flat, targets).data
        hidden = repackage_hidden(hidden)
    return total_loss.item() / len(data_source)
Beispiel #25
0
def evaluate(args, model, data_iterator, criterion):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    total_loss = 0.
    example_count = 0

    hidden = model.init_hidden(args.batch_size)
    with torch.no_grad():
        for _, batch in tqdm(enumerate(data_iterator), total=len(data_iterator), disable=True):
            data, targets = batch.text.t(), batch.target.t().contiguous()
            output, hidden = model(data, hidden)
            output_flat = output.view(-1, model.vocab_size)
            total_loss += len(data) * criterion(output_flat,
                                                targets.view(-1)).item()
            example_count += len(data)
            hidden = repackage_hidden(hidden)
    model.train()
    return total_loss / example_count
Beispiel #26
0
    def init_hidden(self, batch_size: int, init_batch: Tensor) -> HiddenState:
        padded_aliases = init_batch
        batched_aliases = []
        max_len_aliases = padded_aliases.size(1)
        for i in range((max_len_aliases - 1) // self.bptt_size + 1):
            batched_aliases.append(
                padded_aliases[:, i * self.bptt_size:(i + 1) * self.bptt_size])

        hidden = self.rnn.init_hidden(batch_size)
        for batched_alias in batched_aliases:
            batched_alias = batched_alias.to(self.device)
            if hidden is not None:
                hidden = repackage_hidden(hidden)

            alias_embeds = self.word_embed(batched_alias)
            _, hidden = self.rnn.forward(alias_embeds, hidden)

        return hidden
Beispiel #27
0
def evaluate(data_source, source_sampler, target_sampler, batch_size=10):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    if args.model == 'QRNN':
        model.reset()
    total_loss = 0
    hidden = model.init_hidden(batch_size)

    for source_sample, target_sample in zip(source_sampler, target_sampler):
        model.train()
        data = torch.stack([data_source[i] for i in source_sample])
        targets = torch.stack([data_source[i] for i in target_sample]).view(-1)
        with torch.no_grad():
            output, hidden = model(data, hidden)
        total_loss += len(data) * criterion(
            model.decoder.weight, model.decoder.bias, output, targets).item()
        hidden = repackage_hidden(hidden)
    return total_loss / len(data_source)
Beispiel #28
0
def evaluate(data_source, batch_size=10):
    # Turn on evaluation mode which disables dropout.
    model_lm.eval()
    # model_mlp.eval()
    if args.model == 'QRNN': model_lm.reset()
    total_loss = 0
    ntokens = len(corpus.dictionary)
    hidden = model_lm.init_hidden(batch_size)
    for i in range(0, data_source.size(0) - 1, args.bptt):
        data, targets, _ = get_batch(data_source, i, args, evaluation=True)
        output, hidden, _, all_outputs = model_lm(data, hidden, return_h=True)
        # output = model_mlp(all_outputs[-1]) + all_outputs[-1]
        # output = output.view(output.size(0)*output.size(1), output.size(2))
        total_loss += len(data) * criterion(model_lm.decoder.weight,
                                            model_lm.decoder.bias, output,
                                            targets).data
        hidden = repackage_hidden(hidden)
    return total_loss.item() / len(data_source)
Beispiel #29
0
def evaluate(data_source, batch_size=10):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    if args.model == 'QRNN': model.reset()
    total_loss = 0
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(batch_size)
    for i in range(0,
                   data_source.size(0) - 1,
                   args.bptt):  # Jump forwards in bptt (70) increments
        data, targets = get_batch(
            data_source, i, args, evaluation=True
        )  # Gets the data and the target data to be produced
        output, hidden = model(data, hidden)
        total_loss += len(data) * criterion(
            model.decoder.weight, model.decoder.bias, output, targets).data
        hidden = repackage_hidden(hidden)
    return total_loss.item() / len(data_source)
Beispiel #30
0
def evaluate(data_source, batch_size=10):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    if args.model == 'QRNN': model.reset()
    total_loss = 0
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(batch_size)
    for i in range(0, data_source.size(0) - 1, args.bptt):
        data, targets = get_batch(data_source, i, args, evaluation=True)
        output, hidden = model(data, hidden)
        output_flat = output.view(-1, ntokens)
        if args.split_cross:
            total_loss += len(data) * criterion(model.decoder, output,
                                                targets).data
        else:
            total_loss += len(data) * criterion(output_flat, targets).data
        hidden = repackage_hidden(hidden)
    return total_loss[0] / len(data_source)
Beispiel #31
0
def train():
    # Turn on training mode which enables dropout.
    model.train()
    total_loss = 0
    start_time = time.time()

    hidden = model.init_hidden(args.batch_size)

    for batch, i in enumerate(range(0, train_data.size(0) - 1, args.bptt)):
        data, targets = get_batch(train_data, i, args.bptt)
        sflg = corpus.dictionary.idx2sflg[data]

        # truncated BPP
        hidden = repackage_hidden(hidden)
        model.zero_grad()
        output, hidden = model((data, sflg), hidden)

        #logging.info("sizes")
        #logging.info(model.emb_size)
        #logging.info(model.input_size)
        #logging.info(model.output_size)

        loss = criterion(output.view(-1, ntokens), targets)
        loss.backward()

        # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
        torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip)
        for p in model.parameters():
            p.data.add_(-lr, p.grad.data)

        total_loss += loss.item()

        if batch % args.log_interval == 0 and batch > 0:
            cur_loss = total_loss / args.log_interval
            elapsed = time.time() - start_time
            logging.info(
                '| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | '
                'loss {:5.2f} | ppl {:8.2f}'.format(
                    epoch, batch,
                    len(train_data) // args.bptt, lr,
                    elapsed * 1000 / args.log_interval, cur_loss,
                    math.exp(cur_loss)))
            total_loss = 0
            start_time = time.time()
Beispiel #32
0
def evaluate(model, data_source, batch_size=10):
    model.eval()
    if args.model == 'QRNN':
        model.reset()

    total_loss = 0
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(batch_size)
    for i in range(0, data_source.size(0) - 1, args.seq_len):
        data, targets = get_batch(data_source, i, args, evaluation=True)

        output = model(data, hidden)
        if isinstance(output, tuple):
            output, hidden = output

        output_flat = output.view(-1, ntokens)
        total_loss += len(data) * criterion(output_flat, targets).data
        hidden = repackage_hidden(hidden)
    return total_loss[0] / len(data_source)
Beispiel #33
0
def evaluate(data_source, batch_size=10):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    if args.model == 'QRNN': model.reset()
    total_loss = 0
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(batch_size)
    for i in range(0, data_source.size(0) - 1, args.bptt):
        data, targets = get_batch(data_source, i, args, evaluation=True)
        output, hidden = model(data, hidden)
        input = torch.mm(model.decoder.weight, output.transpose(
            0, 1)).transpose(0, 1) + model.decoder.bias
        if args.loss == 'splitcrossentropy':
            total_loss += len(data) * criterion(
                model.decoder.weight, model.decoder.bias, output, targets).data
        elif args.loss == 'focal':
            total_loss += len(data) * criterion(input, targets, test=True).data
        hidden = repackage_hidden(hidden)
    return total_loss.item() / len(data_source)
Beispiel #34
0
def evaluate_reorder_dream():
    dr_model.eval()
    dr_hidden = dr_model.init_hidden(dr_config.batch_size) 

    total_loss = 0
    start_time = time()
    num_batchs = ceil(len(test_ub) / dr_config.batch_size)
    for i,x in enumerate(batchify(test_ub, dr_config.batch_size, is_reordered = True)):
        baskets, lens, _, r_baskets, h_baskets = x
        dynamic_user, _  = dr_model(baskets, lens, dr_hidden)
        loss = reorder_bpr_loss(r_baskets, h_baskets, dynamic_user, dr_model.encode.weight, dr_config)
        dr_hidden = repackage_hidden(dr_hidden)
        total_loss += loss.data

    # Logging
    elapsed = (time() - start_time) * 1000 / num_batchs
    total_loss = total_loss[0] / num_batchs
    print('[Evaluation]| Epochs {:3d} | Elapsed {:02.2f} | Loss {:05.2f} |'.format(epoch, elapsed, total_loss))
    return total_loss
Beispiel #35
0
def train():
    assert args.batch_size % args.small_batch_size == 0, 'batch_size must be divisible by small_batch_size'

    # Turn on training mode which enables dropout.
    total_loss = 0
    start_time = time.time()
    ntokens = len(corpus.dictionary)
    hidden = [model.init_hidden(args.small_batch_size) for _ in range(args.batch_size // args.small_batch_size)]
    batch, i = 0, 0
    while i < train_data.size(0) - 1 - 1:
        bptt = args.bptt if np.random.random() < 0.95 else args.bptt / 2.
        # Prevent excessively small or negative sequence lengths
        seq_len = max(5, int(np.random.normal(bptt, 5)))
        # There's a very small chance that it could select a very long sequence length resulting in OOM
        seq_len = min(seq_len, args.bptt + args.max_seq_len_delta)

        lr2 = optimizer.param_groups[0]['lr']
        optimizer.param_groups[0]['lr'] = lr2 * seq_len / args.bptt
        model.train()
        data, targets = get_batch(train_data, i, args, seq_len=seq_len)

        optimizer.zero_grad()

        start, end, s_id = 0, args.small_batch_size, 0
        while start < args.batch_size:
            cur_data, cur_targets = data[:, start: end], targets[:, start: end].contiguous().view(-1)

            # Starting each batch, we detach the hidden state from how it was previously produced.
            # If we didn't, the model would try backpropagating all the way to start of the dataset.
            hidden[s_id] = repackage_hidden(hidden[s_id])

            log_prob, hidden[s_id], rnn_hs, dropped_rnn_hs = parallel_model(cur_data, hidden[s_id], return_h=True)
            raw_loss = nn.functional.nll_loss(log_prob.view(-1, log_prob.size(2)), cur_targets)

            loss = raw_loss
            # Activiation Regularization
            loss = loss + sum(args.alpha * dropped_rnn_h.pow(2).mean() for dropped_rnn_h in dropped_rnn_hs[-1:])
            # Temporal Activation Regularization (slowness)
            loss = loss + sum(args.beta * (rnn_h[1:] - rnn_h[:-1]).pow(2).mean() for rnn_h in rnn_hs[-1:])
            loss *= args.small_batch_size / args.batch_size
            total_loss += raw_loss.data * args.small_batch_size / args.batch_size
            loss.backward()

            s_id += 1
            start = end
            end = start + args.small_batch_size

            gc.collect()

        # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
        torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
        optimizer.step()

        # total_loss += raw_loss.data
        optimizer.param_groups[0]['lr'] = lr2
        if batch % args.log_interval == 0 and batch > 0:
            cur_loss = total_loss[0] / args.log_interval
            elapsed = time.time() - start_time
            logging('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | '
                    'loss {:5.2f} | ppl {:8.2f}'.format(
                epoch, batch, len(train_data) // args.bptt, optimizer.param_groups[0]['lr'],
                elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss)))
            total_loss = 0
            start_time = time.time()
        ###
        batch += 1
        i += seq_len