コード例 #1
0
def evaluate_sents(data_source, uids, batch_size=10):
    # Turn on evaluation mode which disables dropout.
    if args.model == 'QRNN': model.reset()
    model.eval()
    total_loss = 0
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(batch_size)
    sent_loss = defaultdict(list)
    for i in range(0, data_source.size(0) - 1, args.bptt):
        data_batch = torch.load(open('test.pickle','rb'))
        data_test=data_batch['data']
        targets_test = data_batch['targets']
        data, targets = get_batch(data_source, i, args, evaluation=True)
        batch_uids = get_ids(uids, i, args, evaluation=True)
        # pdb.set_trace()
        output, hidden = model(data, hidden, decode=True)
        output_flat = output.view(-1, ntokens)
        per_word_loss = criterion(output_flat, targets)
        batch_uids_list = batch_uids.reshape(-1).tolist()
        loss_list = per_word_loss.tolist()
        for loss, uid in zip(loss_list, batch_uids_list):
            sent_loss[uid].append(loss)
        incre = (torch.mean(per_word_loss).item()*len(data))
        total_loss += incre
        # print('incre=',incre)
        hidden = repackage_hidden(hidden)
        # pdb.set_trace()
    avg_sent_loss = {}
    for (uid, losses) in sent_loss.items():
        avg_sent_loss[uid]=float(np.mean(losses))
    # pdb.set_trace()
    return total_loss / len(data_source), avg_sent_loss
コード例 #2
0
ファイル: main.py プロジェクト: madlag/Frequency-Agnostic
def evaluate(data_source, batch_size=10, valid_or_test="valid"):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    if args.model == 'QRNN': model.reset()
    total_loss = 0
    total_embedding_loss = 0.0
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(batch_size)

    for i in range(0, data_source.size(0) - 1, args.bptt):
        data, targets = get_batch(data_source, i, args, evaluation=True)
        output, hidden = model(data, hidden)
        loss = len(data) * criterion(model.decoder.weight, model.decoder.bias,
                                     output, targets).item()
        total_loss += loss
        if args.embedder != "classic":
            total_embedding_loss += len(data) * float(
                embedder.last_batch_loss().cpu().detach().item())

        hidden = repackage_hidden(hidden)

    ret = total_loss / len(data_source)

    writer.add_scalar('Loss/%s/main' % valid_or_test, ret / math.log(2))
    writer.add_scalar('Loss/%s/embedder' % valid_or_test,
                      total_embedding_loss / len(data_source) / math.log(2))

    return ret
コード例 #3
0
def evaluate(data_source, batch_size=10, test=False):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    if args.model == 'QRNN': model.reset()
    epoch_loss = 0  #running mean
    total_loss = 0  #running sum
    hidden = model.init_hidden(batch_size)
    batch = 0
    for i in range(0, data_source.size(0) - 1, args.bptt):  #batch loop
        data, targets = get_batch(data_source, i, args, evaluation=True)
        output, hidden = model(data, hidden)
        total_loss += len(data) * criterion(
            model.decoder.weight, model.decoder.bias, output, targets).data
        hidden = repackage_hidden(hidden)

        if test == False:
            epoch_loss = (epoch_loss * batch +
                          (criterion(model.decoder.weight, model.decoder.bias,
                                     output, targets).data)).item() / (
                                         batch + 1)  #
            batch += 1
    if test == False:
        global valid_loss, valid_ppl, valid_bpc
        valid_loss = np.append(valid_loss, epoch_loss)
        valid_ppl = np.append(valid_ppl, np.exp(epoch_loss))
        valid_bpc = np.append(valid_bpc, epoch_loss / np.log(2))
    return total_loss.item() / len(data_source)
コード例 #4
0
ファイル: finetune.py プロジェクト: stjordanis/drill-1
def store_word_cediff(data_source, model, batch_size=10, fname='out'):
    """
       Store the cross-entropy loss per word in the vocabulary.
    """
    # Turn on evaluation mode which disables dropout.
    model.eval()
    if args.model == 'QRNN': model.reset()
    hidden = model.init_hidden(batch_size)

    # Initialize vocabulary structure to store the crossentropy losses.
    vocab, words = {}, corpus.dictionary.idx2word

    # Add the loss per word in the vocabulary structure for each different context.
    for i in range(0, data_source.size(0) - 1, args.bptt):
        data, targets = get_batch(data_source, i, args, evaluation=True)
        output, weight, bias, hidden = model(data, hidden)
        pred_targets = torch.mm(output, weight.t()) + bias
        for j, target in enumerate(targets):
            target_loss = criterion(pred_targets[j:j + 1],
                                    targets[j:j + 1]).data
            word = words[target.tolist()]
            if word in vocab:
                vocab[word].append(target_loss.tolist())
            else:
                vocab[word] = [target_loss.tolist()]
        hidden = repackage_hidden(hidden)

    # Store the vocabulary to the disk.
    pickle.dump(vocab, open(fname + '.pkl', 'wb'))
コード例 #5
0
ファイル: main.py プロジェクト: aiedward/PyTorch-NLP-1
def train():
    # Turn on training mode which enables dropout.
    if args.model == 'QRNN':
        model.reset()
    total_loss = 0
    start_time = time.time()
    hidden = model.init_hidden(args.batch_size)
    batch = 0
    for source_sample, target_sample in zip(train_source_sampler,
                                            train_target_sampler):
        model.train()
        data = torch.stack([train_data[i]
                            for i in source_sample]).t_().contiguous()
        targets = torch.stack([train_data[i] for i in target_sample
                               ]).t_().contiguous().view(-1)

        # Starting each batch, we detach the hidden state from how it was previously produced.
        # If we didn't, the model would try backpropagating all the way to start of the dataset.
        hidden = repackage_hidden(hidden)
        optimizer.zero_grad()

        output, hidden, rnn_hs, dropped_rnn_hs = model(data,
                                                       hidden,
                                                       return_h=True)
        raw_loss = criterion(model.decoder.weight, model.decoder.bias, output,
                             targets)

        loss = raw_loss
        # Activiation Regularization
        if args.alpha:
            loss = loss + sum(args.alpha * dropped_rnn_h.pow(2).mean()
                              for dropped_rnn_h in dropped_rnn_hs[-1:])
        # Temporal Activation Regularization (slowness)
        if args.beta:
            loss = loss + sum(args.beta *
                              (rnn_h[1:] - rnn_h[:-1]).pow(2).mean()
                              for rnn_h in rnn_hs[-1:])
        loss.backward()

        # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
        if args.clip:
            torch.nn.utils.clip_grad_norm_(params, args.clip)
        optimizer.step()

        total_loss += raw_loss.item()
        if batch % args.log_interval == 0 and batch > 0:
            cur_loss = total_loss / args.log_interval
            elapsed = time.time() - start_time
            print(
                '| epoch {:3d} | {:5d}/{:5d} batches | lr {:05.5f} | ms/batch {:5.2f} | '
                'loss {:5.2f} | ppl {:8.2f} | bpc {:8.3f}'.format(
                    epoch, batch,
                    len(train_source_sampler) // args.bptt,
                    optimizer.param_groups[0]['lr'],
                    elapsed * 1000 / args.log_interval, cur_loss,
                    math.exp(cur_loss), cur_loss / math.log(2)))
            total_loss = 0
            start_time = time.time()
        ###
        batch += 1
コード例 #6
0
def train():
    # Turn on training mode which enables dropout.
    if args.model == 'QRNN': model.reset()
    total_loss = 0
    start_time = time.time()
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(args.batch_size)
    batch, i = 0, 0
    while i < train_data.size(0) - 1 - 1:
        bptt = args.bptt if np.random.random() < 0.95 else args.bptt / 2.
        # Prevent excessively small or negative sequence lengths
        seq_len = max(5, int(np.random.normal(bptt, 5)))
        # There's a very small chance that it could select a very long sequence length resulting in OOM
        # seq_len = min(seq_len, args.bptt + 10)

        lr2 = optimizer.param_groups[0]['lr']
        optimizer.param_groups[0]['lr'] = lr2 * seq_len / args.bptt
        model.train()
        data, targets = get_batch(train_data, i, args, seq_len=seq_len)

        # Starting each batch, we detach the hidden state from how it was previously produced.
        # If we didn't, the model would try backpropagating all the way to start of the dataset.
        hidden = repackage_hidden(hidden)
        optimizer.zero_grad()

        output, hidden, rnn_hs, dropped_rnn_hs = model(data, hidden, return_h=True)
        raw_loss = criterion(model.decoder.weight, model.decoder.bias, output, targets)

        loss = raw_loss
        # Activiation Regularization
        if args.alpha: loss = loss + sum(
            args.alpha * dropped_rnn_h.pow(2).mean() for dropped_rnn_h in dropped_rnn_hs[-1:])
        # Temporal Activation Regularization (slowness)
        if args.beta: loss = loss + sum(args.beta * (rnn_h[1:] - rnn_h[:-1]).pow(2).mean() for rnn_h in rnn_hs[-1:])
        loss.backward()

        # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
        if args.clip: torch.nn.utils.clip_grad_norm_(params, args.clip)
        optimizer.step()

        total_loss += raw_loss.data
        optimizer.param_groups[0]['lr'] = lr2
        if batch % args.log_interval == 0 and batch > 0:
            cur_loss = total_loss.item() / args.log_interval
            elapsed = time.time() - start_time
            message = f"\033[K"
            message += f"| epoch {epoch} "
            message += f"| {batch}/{len(train_data) // args.bptt} batches "
            message += f"| lr {optimizer.param_groups[0]['lr']} "
            message += f"| ms/batch {(elapsed * 1000 / args.log_interval):.3f} "
            message += f"| loss {cur_loss:.3f} "
            message += f"| ppl {math.exp(cur_loss):.3f} "
            message += f"| bpc {(cur_loss / math.log(2)):.3f}"
            print(message, end='\r', flush=True)
            total_loss = 0
            start_time = time.time()
        ###
        batch += 1
        i += seq_len
    print('')
コード例 #7
0
ファイル: finetune.py プロジェクト: stjordanis/drill-1
def store_datadist(data_source, model, batch_size=10, fname='datamatrix.h5'):
    """
       Store the log-probability matrix for a given method.
    """
    # Turn on evaluation mode which disables dropout.
    model.eval()
    if args.model == 'QRNN': model.reset()
    hidden = model.init_hidden(batch_size)

    # Initialize a data matrix structure which can be stored directly to the disk.
    f = tables.open_file(filename, mode='w')
    atom = tables.Float64Atom()
    array_c = f.create_earray(f.root, 'data', atom, (0, 10000))

    # Add a row sequentially to the matrix for each different context.
    for i in range(0, data_source.size(0) - 1, args.bptt):
        data, targets = get_batch(data_source, i, args, evaluation=True)
        output, weight, bias, hidden = model(data, hidden)
        pred_targets = torch.mm(output, weight.t()) + bias
        hidden = repackage_hidden(hidden)
        datadist = nn.LogSoftmax()(pred_targets)
        array_c.append(datadist.detach().cpu().numpy())

    # Close file.
    f.close()
コード例 #8
0
ファイル: main.py プロジェクト: mzr/awd-lstm-lm
def train():
    # Turn on training mode which enables dropout.
    if args.model == 'QRNN': model.reset()
    total_loss = 0
    start_time = time.time()
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(args.batch_size)
    batch, i = 0, 0
    while i < train_data.size(0) - 1 - 1:
        bptt = args.bptt if np.random.random() < 0.95 else args.bptt / 2.
        # Prevent excessively small or negative sequence lengths
        seq_len = max(5, int(np.random.normal(bptt, 5)))
        # There's a very small chance that it could select a very long sequence length resulting in OOM
        # seq_len = min(seq_len, args.bptt + 10)

        lr2 = optimizer.param_groups[0]['lr']
        optimizer.param_groups[0]['lr'] = lr2 * seq_len / args.bptt
        model.train()
        data, targets = get_batch(train_data, i, args, seq_len=seq_len)

        # Starting each batch, we detach the hidden state from how it was previously produced.
        # If we didn't, the model would try backpropagating all the way to start of the dataset.
        hidden = repackage_hidden(hidden)
        optimizer.zero_grad()

        output, hidden, rnn_hs, dropped_rnn_hs = model(data, hidden, return_h=True)
        raw_loss = criterion(output.view(-1, ntokens), targets)

        loss = raw_loss
        # Activiation Regularization
        loss = loss + sum(args.alpha * dropped_rnn_h.pow(2).mean() for dropped_rnn_h in dropped_rnn_hs[-1:])
        # Temporal Activation Regularization (slowness)
        loss = loss + sum(args.beta * (rnn_h[1:] - rnn_h[:-1]).pow(2).mean() for rnn_h in rnn_hs[-1:])
        loss.backward()

        # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
        torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
        optimizer.step()

        total_loss += raw_loss.data
        optimizer.param_groups[0]['lr'] = lr2
        if batch % args.log_interval == 0 and batch > 0:
            cur_loss = total_loss[0] / args.log_interval
            elapsed = time.time() - start_time
            if args.logging == 'default':
            	print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | '
                    'loss {:5.2f} | ppl {:8.2f}'.format(
                epoch, batch, len(train_data) // args.bptt, optimizer.param_groups[0]['lr'],
                elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss)), file=sys.stderr)
            else:
            	print('{:3d}, {:5d}, {:02.2f}, {:5.2f}, {:5.2f}, {:8.2f}'.format(
                epoch, batch, len(train_data) // args.bptt, optimizer.param_groups[0]['lr'],
                elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss)), file=sys.stderr)
            total_loss = 0
            start_time = time.time()
        ###
        batch += 1
        i += seq_len
コード例 #9
0
ファイル: pointer.py プロジェクト: jb33k/awd-lstm-lm-ThinkNet
def evaluate(data_source, batch_size=10, window=args.window):
    # Turn on evaluation mode which disables dropout.
    if args.model == 'QRNN': model.reset()
    model.eval()
    total_loss = 0
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(batch_size)
    next_word_history = None
    pointer_history = None
    for i in range(0, data_source.size(0) - 1, args.bptt):
        if i > 0: print(i, len(data_source), math.exp(total_loss / i))
        data, targets = get_batch(data_source, i, evaluation=True, args=args)
        hidden_previous = hidden
        for tn_timestep in range(args.tn_timesteps):
            output, hidden, rnn_outs, _ = model(data, tn_m_hidden(hidden, hidden_previous), return_h=True, decoded=True)
            hidden_previous = hidden
        rnn_out = rnn_outs[-1].squeeze()
        output_flat = output.view(-1, ntokens)
        ###
        # Fill pointer history
        start_idx = len(next_word_history) if next_word_history is not None else 0
        next_word_history = torch.cat([one_hot(t.data.item(), ntokens) for t in targets]) if next_word_history is None else torch.cat([next_word_history, torch.cat([one_hot(t.data.item(), ntokens) for t in targets])])
        #print(next_word_history)
        pointer_history = Variable(rnn_out.data) if pointer_history is None else torch.cat([pointer_history, Variable(rnn_out.data)], dim=0)
        #print(pointer_history)
        ###
        # Built-in cross entropy
        # total_loss += len(data) * criterion(output_flat, targets).data[0]
        ###
        # Manual cross entropy
        # softmax_output_flat = torch.nn.functional.softmax(output_flat)
        # soft = torch.gather(softmax_output_flat, dim=1, index=targets.view(-1, 1))
        # entropy = -torch.log(soft)
        # total_loss += len(data) * entropy.mean().data[0]
        ###
        # Pointer manual cross entropy
        loss = 0
        softmax_output_flat = torch.nn.functional.softmax(output_flat)
        for idx, vocab_loss in enumerate(softmax_output_flat):
            p = vocab_loss
            if start_idx + idx > window:
                valid_next_word = next_word_history[start_idx + idx - window:start_idx + idx]
                valid_pointer_history = pointer_history[start_idx + idx - window:start_idx + idx]
                logits = torch.mv(valid_pointer_history, rnn_out[idx])
                theta = args.theta
                ptr_attn = torch.nn.functional.softmax(theta * logits).view(-1, 1)
                ptr_dist = (ptr_attn.expand_as(valid_next_word) * valid_next_word).sum(0).squeeze()
                lambdah = args.lambdasm
                p = lambdah * ptr_dist + (1 - lambdah) * vocab_loss
            ###
            target_loss = p[targets[idx].data]
            loss += (-torch.log(target_loss)).data.item()
        total_loss += loss / batch_size
        ###
        hidden = repackage_hidden(hidden)
        next_word_history = next_word_history[-window:]
        pointer_history = pointer_history[-window:]
    return total_loss / len(data_source)
コード例 #10
0
    async def post(self):

        if self.get_argument('clear_cache', None):
            helpers.clear_cache()
            self.logger.info('Cleared cache.')
            self.flash('Cache Cleared')

        elif self.get_argument('make_admin', None):
            form_data, errors, valid_data = self.validate()
            if not errors:
                user = model.User.getByEmail(valid_data["email"])
                if user:
                    user.is_admin = True
                    user.save()

                    # the user may currently be signed in so invalidate its cache to get the new permissions
                    helpers.uncache(user.slug)
                    self.logger.info('Made user admin: ' + valid_data['email'])
                    self.flash('User successfully made admin.',
                               level='success')
                else:
                    errors['exists'] = True
            if errors:
                return self.redisplay(form_data, errors)

        elif self.get_argument('migrate', None):
            self.logger.info('Beginning migration.')

            # FUTURE: probably want to move this to a script outside the webserver
            # change and uncomment to do migration work
            # can also use a dictionary instead of kwargs here
            # q = model.User.update(model.User.prop='value').where()
            total = 0  # q.execute()

            self.logger.info('Migration finished. Modified ' + str(total) +
                             ' items.')
            self.flash('Migrations Complete', level='success')

        elif self.get_argument('reset', None) and self.debug:
            # use model.py to reset the db, then you can run this to add fixture data
            model.reset()

            # add any fixtures needed for development here
            password_salt, hashed_password = model.User.changePassword('test')
            user = model.User(first_name='Test',
                              last_name='Testerson',
                              email='*****@*****.**',
                              password_salt=password_salt,
                              hashed_password=hashed_password)
            user.save()

            # auto signout since the IDs and keys have all changed
            self.clear_all_cookies(domain=self.host)
            helpers.clear_cache()
            self.flash('Data Reset')

        self.redisplay()
コード例 #11
0
ファイル: main.py プロジェクト: Ada520/topic_lms
def evaluate(data_source, batch_size=10):
    print("EVALUATION")
    # Turn on evaluation mode which disables dropout.
    model.eval()
    if args.model == 'QRNN':
        model.reset()
    total_loss = 0
    hidden = model.init_hidden(args.batch_size)
    #_, batch_len = data_source.shape
    #n_batches = (batch_len -1) // seq_len
    b_n = 0
    for batch_n in range(0,
                         len(data_source) - args.batch_size, args.batch_size):
        b_n += 1
        sub = train_data[batch_n:batch_n + args.batch_size]
        padded = np.array(list(itertools.zip_longest(*sub, fillvalue=0))).T
        targets = np.roll(padded, -1)
        targets[:, -1] = 0
        if args.cuda:
            #data = Variable(torch.from_numpy(data_source[:, batch_n * seq_len: (batch_n + 1) * seq_len])).transpose(0, 1).cuda()
            #targets = Variable(torch.from_numpy(data_source[:, batch_n * seq_len + 1: (batch_n + 1) * seq_len + 1].transpose(1, 0).flatten())).cuda()
            data = Variable(torch.from_numpy(padded.T)).cuda()
            targets = Variable(torch.from_numpy(targets.T.flatten())).cuda()
        else:
            #data = Variable(torch.from_numpy(data_source[:, batch_n * seq_len: (batch_n + 1) * seq_len])).transpose(0, 1)
            #targets = Variable(torch.from_numpy(data_source[:, batch_n * seq_len + 1: (batch_n + 1) * seq_len + 1].transpose(1, 0).flatten()))
            data = Variable(torch.from_numpy(padded))
            targets = Variable(torch.from_numpy(targets.flatten()))
        #print len(data), len(targets)
        #print data.size()

        #print "evaluating!"
        #comment out this line to get the original lda vector
        if args.cuda:
            inp_topic = get_theta(data.data.cpu().numpy(), lda_model,
                                  lda_dictionary, idx2word).cuda()
            inp_topic = inp_topic.type(torch.cuda.FloatTensor)
        else:
            inp_topic = get_theta(data.data.cpu().numpy(), lda_model,
                                  lda_dictionary, idx2word)
            inp_topic = inp_topic.type(torch.FloatTensor)
        #inp_topic = torch.from_numpy(np.zeros((args.batch_size, 50))).cuda()

        topic_var = torch.autograd.Variable(inp_topic, requires_grad=False)

        # Starting each batch, we detach the hidden state from how it was previously produced.
        # If we didn't, the model would try backpropagating all the way to start of the dataset.
        hidden = repackage_hidden(hidden)
        optimizer.zero_grad()
        if args.mit_topic:
            output = model(data, topic_var, hidden)
        else:
            output = model(data, hidden)
        output_flat = output.view(-1, ntokens)
        total_loss += criterion(output_flat, targets).data
        #hidden = repackage_hidden(hidden)
    return total_loss[0] / b_n
コード例 #12
0
ファイル: main.py プロジェクト: flennerhag/alstm
def train():
    def getseq():
        lr_original = optimizer.param_groups[0]['lr']
        if args.var_seq:
            # Vary sequence length
            seq_len = args.seq_len if np.random.random(
            ) < 0.95 else args.seq_len / 2.
            seq_len = max(5, int(np.random.normal(seq_len, 5)))
            optimizer.param_groups[0][
                'lr'] = lr_original * seq_len / args.seq_len
        else:
            seq_len = args.seq_len
        data, targets = get_batch(train_data, i, args, seq_len=seq_len)
        return data, targets, seq_len, lr_original

    if args.model == 'QRNN':
        model.reset()

    total_loss = 0
    start_time = time.time()
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(args.batch_size)
    batch, i = 0, 0
    while i < train_data.size(0) - 1 - 1:
        model.train()
        data, targets, seq_len, lro = getseq()

        hidden = repackage_hidden(hidden)
        optimizer.zero_grad()

        output, hidden = model(data, hidden)
        loss = criterion(output.view(-1, ntokens), targets)
        loss.backward()
        if args.clip:
            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
        optimizer.step()
        total_loss += loss.data

        # Ensure learning rate is reset (only applicable with var_seq)
        optimizer.param_groups[0]['lr'] = lro

        if batch % args.log_interval == 0 and batch > 0:
            cur_loss = total_loss[0] / args.log_interval
            elapsed = time.time() - start_time
            logger.info(
                'TRAIN | epoch {:3d} | {:5d}/{:5d} batches | lr {:01.8f} '
                '| ms/batch {:5.2f} | loss {:5.2f} | ppl {:8.2f}'.format(
                    epoch, batch,
                    len(train_data) // args.seq_len,
                    optimizer.param_groups[0]['lr'],
                    elapsed * 1000 / args.log_interval, cur_loss,
                    ppl(cur_loss)))
            total_loss = 0
            start_time = time.time()
        ###
        batch += 1
        i += seq_len
コード例 #13
0
def train():
    # Turn on training mode which enables dropout.
    if args.model == 'QRNN': model.reset()
    total_loss = 0
    start_time = time.time()
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(args.batch_size)
    batch, i = 0, 0
    while i < train_data.size(0) - 1 - 1:
        bptt = args.bptt if np.random.random() < 0.95 else args.bptt / 2.
        # Prevent excessively small or negative sequence lengths
        seq_len = max(5, int(np.random.normal(bptt, 5)))
        # There's a very small chance that it could select a very long sequence length resulting in OOM
        # seq_len = min(seq_len, args.bptt + 10)

        lr2 = optimizer.param_groups[0]['lr']
        optimizer.param_groups[0]['lr'] = lr2 * seq_len / args.bptt
        model.train()
        data, targets = get_batch(train_data, i, args, seq_len=seq_len)

        # Starting each batch, we detach the hidden state from how it was previously produced.
        # If we didn't, the model would try backpropagating all the way to start of the dataset.
        hidden = repackage_hidden(hidden)
        optimizer.zero_grad()

        output, hidden, rnn_hs, dropped_rnn_hs = model(data, hidden, return_h=True)
        weight_noise = torch.distributions.normal.Normal(torch.zeros_like(model.decoder.weight), torch.ones_like(model.decoder.weight) * 1).sample() * 0.2
        binary_mask = torch.distributions.bernoulli.Bernoulli(torch.ones(model.decoder.weight.size(0)) * 0.1).sample().cuda()
        binary_mask[targets.view(-1)] = 1
        weight_noise = binary_mask.view([-1, 1]) * weight_noise 
        raw_loss = criterion(model.decoder.weight + weight_noise, model.decoder.bias, output, targets)

        loss = raw_loss
        # Activiation Regularization
        if args.alpha: loss = loss + sum(args.alpha * dropped_rnn_h.pow(2).mean() for dropped_rnn_h in dropped_rnn_hs[-1:])
        # Temporal Activation Regularization (slowness)
        if args.beta: loss = loss + sum(args.beta * (rnn_h[1:] - rnn_h[:-1]).pow(2).mean() for rnn_h in rnn_hs[-1:])
        loss.backward()

        # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
        if args.clip: torch.nn.utils.clip_grad_norm_(params, args.clip)
        optimizer.step()

        total_loss += raw_loss.data
        optimizer.param_groups[0]['lr'] = lr2
        if batch % args.log_interval == 0 and batch > 0:
            cur_loss = total_loss.item() / args.log_interval
            elapsed = time.time() - start_time
            print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:05.5f} | ms/batch {:5.2f} | '
                    'loss {:5.2f} | ppl {:8.2f} | bpc {:8.3f}'.format(
                epoch, batch, len(train_data) // args.bptt, optimizer.param_groups[0]['lr'],
                elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss), cur_loss / math.log(2)))
            total_loss = 0
            start_time = time.time()
        ###
        batch += 1
        i += seq_len
コード例 #14
0
ファイル: pointer.py プロジェクト: batermj/awd-lstm-lm
def evaluate(data_source, batch_size=10, window=args.window):
    # Turn on evaluation mode which disables dropout.
    if args.model == 'QRNN': model.reset()
    model.eval()
    total_loss = 0
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(batch_size)
    next_word_history = None
    pointer_history = None
    for i in range(0, data_source.size(0) - 1, args.bptt):
        if i > 0: print(i, len(data_source), math.exp(total_loss / i))
        data, targets = get_batch(data_source, i, evaluation=True, args=args)
        output, hidden, rnn_outs, _ = model(data, hidden, return_h=True)
        rnn_out = rnn_outs[-1].squeeze()
        output_flat = output.view(-1, ntokens)
        ###
        # Fill pointer history
        start_idx = len(next_word_history) if next_word_history is not None else 0
        next_word_history = torch.cat([one_hot(t.data[0], ntokens) for t in targets]) if next_word_history is None else torch.cat([next_word_history, torch.cat([one_hot(t.data[0], ntokens) for t in targets])])
        #print(next_word_history)
        pointer_history = Variable(rnn_out.data) if pointer_history is None else torch.cat([pointer_history, Variable(rnn_out.data)], dim=0)
        #print(pointer_history)
        ###
        # Built-in cross entropy
        # total_loss += len(data) * criterion(output_flat, targets).data[0]
        ###
        # Manual cross entropy
        # softmax_output_flat = torch.nn.functional.softmax(output_flat)
        # soft = torch.gather(softmax_output_flat, dim=1, index=targets.view(-1, 1))
        # entropy = -torch.log(soft)
        # total_loss += len(data) * entropy.mean().data[0]
        ###
        # Pointer manual cross entropy
        loss = 0
        softmax_output_flat = torch.nn.functional.softmax(output_flat)
        for idx, vocab_loss in enumerate(softmax_output_flat):
            p = vocab_loss
            if start_idx + idx > window:
                valid_next_word = next_word_history[start_idx + idx - window:start_idx + idx]
                valid_pointer_history = pointer_history[start_idx + idx - window:start_idx + idx]
                logits = torch.mv(valid_pointer_history, rnn_out[idx])
                theta = args.theta
                ptr_attn = torch.nn.functional.softmax(theta * logits).view(-1, 1)
                ptr_dist = (ptr_attn.expand_as(valid_next_word) * valid_next_word).sum(0).squeeze()
                lambdah = args.lambdasm
                p = lambdah * ptr_dist + (1 - lambdah) * vocab_loss
            ###
            target_loss = p[targets[idx].data]
            loss += (-torch.log(target_loss)).data[0]
        total_loss += loss / batch_size
        ###
        hidden = repackage_hidden(hidden)
        next_word_history = next_word_history[-window:]
        pointer_history = pointer_history[-window:]
    return total_loss / len(data_source)
コード例 #15
0
    def setUp(self):
        # an error in a previous test could prevent this from shutting down correctly
        try:
            model.peewee_db.connect()
        except OperationalError:
            pass

        model.reset()

        import helpers
        helpers.clear_cache()
コード例 #16
0
ファイル: main_2.py プロジェクト: Dhiraj100892/awd-lstm-lm
def evaluate(batch_size=args.batch_size):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    if args.model == 'QRNN': model.reset()
    total_loss = 0
    hidden = model.init_hidden(batch_size)
    for i in range(0, len(val_data) - 1, args.bptt):
        data, targets = val_data.get_batch(i, args.bptt)
        output, hidden = model(data, hidden)
        total_loss += len(data) * criterion(model.decoder.weight, model.decoder.bias, output, targets).data
        hidden = repackage_hidden(hidden)
    return total_loss.item() / len(val_data)
コード例 #17
0
ファイル: main.py プロジェクト: batermj/awd-lstm-lm
def train():
    # Turn on training mode which enables dropout.
    if args.model == 'QRNN': model.reset()
    total_loss = 0
    start_time = time.time()
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(args.batch_size)
    batch, i = 0, 0
    while i < train_data.size(0) - 1 - 1:
        bptt = args.bptt if np.random.random() < 0.95 else args.bptt / 2.
        # Prevent excessively small or negative sequence lengths
        seq_len = max(5, int(np.random.normal(bptt, 5)))
        # There's a very small chance that it could select a very long sequence length resulting in OOM
        # seq_len = min(seq_len, args.bptt + 10)

        lr2 = optimizer.param_groups[0]['lr']
        optimizer.param_groups[0]['lr'] = lr2 * seq_len / args.bptt
        model.train()
        data, targets = get_batch(train_data, i, args, seq_len=seq_len)

        # Starting each batch, we detach the hidden state from how it was previously produced.
        # If we didn't, the model would try backpropagating all the way to start of the dataset.
        hidden = repackage_hidden(hidden)
        optimizer.zero_grad()

        output, hidden, rnn_hs, dropped_rnn_hs = model(data, hidden, return_h=True)
        raw_loss = criterion(model.decoder.weight, model.decoder.bias, output, targets)

        loss = raw_loss
        # Activiation Regularization
        if args.alpha: loss = loss + sum(args.alpha * dropped_rnn_h.pow(2).mean() for dropped_rnn_h in dropped_rnn_hs[-1:])
        # Temporal Activation Regularization (slowness)
        if args.beta: loss = loss + sum(args.beta * (rnn_h[1:] - rnn_h[:-1]).pow(2).mean() for rnn_h in rnn_hs[-1:])
        loss.backward()

        # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
        if args.clip: torch.nn.utils.clip_grad_norm_(params, args.clip)
        optimizer.step()

        total_loss += raw_loss.data
        optimizer.param_groups[0]['lr'] = lr2
        if batch % args.log_interval == 0 and batch > 0:
            cur_loss = total_loss.item() / args.log_interval
            elapsed = time.time() - start_time
            print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:05.5f} | ms/batch {:5.2f} | '
                    'loss {:5.2f} | ppl {:8.2f} | bpc {:8.3f}'.format(
                epoch, batch, len(train_data) // args.bptt, optimizer.param_groups[0]['lr'],
                elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss), cur_loss / math.log(2)))
            total_loss = 0
            start_time = time.time()
        ###
        batch += 1
        i += seq_len
コード例 #18
0
ファイル: main.py プロジェクト: batermj/awd-lstm-lm
def evaluate(data_source, batch_size=10):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    if args.model == 'QRNN': model.reset()
    total_loss = 0
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(batch_size)
    for i in range(0, data_source.size(0) - 1, args.bptt):
        data, targets = get_batch(data_source, i, args, evaluation=True)
        output, hidden = model(data, hidden)
        total_loss += len(data) * criterion(model.decoder.weight, model.decoder.bias, output, targets).data
        hidden = repackage_hidden(hidden)
    return total_loss.item() / len(data_source)
コード例 #19
0
ファイル: routes.py プロジェクト: abhi-vik/mis281n-proj
    def reset():
        if 'user_id' not in session:
            return redirect('/login')

        if not session['user_admin']:
            return router['main']()

        if request.method == 'POST':
            model.reset()

            return redirect('/reports')

        return router['reset']()
コード例 #20
0
def evaluate(data_source, batch_size=10):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    if args.model == 'QRNN': model.reset()
    total_loss = 0
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(batch_size)
    for i in range(0, data_source.size(0) - 1, args.bptt):
        data, targets = get_batch(data_source, i, args, evaluation=True)
        output, hidden = model(data, hidden)
        total_loss += len(data) * criterion(model.decoder.weight, model.decoder.bias, output, targets).data
        hidden = repackage_hidden(hidden)
    return total_loss.item() / len(data_source)
コード例 #21
0
ファイル: finetune.py プロジェクト: stjordanis/drill-1
def evaluate(data_source, batch_size=10):
    # Turn on evaluation mode which disables dropout.
    if args.model == 'QRNN': model.reset()
    model.eval()
    total_loss = 0
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(batch_size)
    for i in range(0, data_source.size(0) - 1, args.bptt):
        data, targets = get_batch(data_source, i, args, evaluation=True)
        output, weight, bias, hidden = model(data, hidden)
        pred_targets = torch.mm(output, weight.t()) + bias
        total_loss += len(data) * criterion(pred_targets, targets).data
        hidden = repackage_hidden(hidden)
    return total_loss[0] / len(data_source)
コード例 #22
0
def train():
    # Turn on training mode which enables dropout.
    if args.model == 'QRNN':
        model.reset()
    model.train()
    total_loss = 0
    hidden = model.init_hidden(args.batch_size)
    start_time = time.time()
    for batch, i in enumerate(range(0, train_data.size(0) - 1, args.bptt)):
        data, targets = get_batch(train_data, i, args)

        # Starting each batch, we detach the hidden state from how it was previously produced.
        # If we didn't, the model would try backpropagating all the way to start of the dataset.
        hidden = repackage_hidden(hidden)
        optimizer.zero_grad()

        output, hidden = model(data, hidden)
        loss = criterion(model.decoder.weight, model.decoder.bias, output,
                         targets)
        # Activiation Regularization
        if args.alpha:
            loss = loss + args.alpha * output.pow(2).mean()
        # TODO: emporal Activation Regularization (slowness)
        # if args.beta:
        #     loss = loss + sum(args.beta * (rnn_h[1:] - rnn_h[:-1]).pow(2).mean() for rnn_h in rnn_hs[-1:])
        loss.backward()

        # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
        if args.clip:
            torch.nn.utils.clip_grad_norm_(params, args.clip)
        optimizer.step()

        total_loss += loss.data.item()
        if batch % args.log_interval == 0 and batch > 0:
            elapsed = time.time() - start_time
            cur_loss = total_loss / args.log_interval
            log_loss(
                os.path.join(os.path.dirname(args.save), 'train_loss.pkl'),
                cur_loss, batch == args.log_interval)
            start_time = time.time()
            print(
                '| epoch {:3d} | {:5d}/{:5d} batches | lr {:05.5f} | {:5.2f} ms/batch  | '
                'loss {:5.2f} | ppl {:8.2f} | bpc {:8.3f}'.format(
                    epoch, batch,
                    len(train_data) // args.bptt,
                    optimizer.param_groups[0]['lr'],
                    elapsed * 1000 / args.log_interval, cur_loss,
                    math.exp(cur_loss), cur_loss / math.log(2)))
            total_loss = 0
コード例 #23
0
def evaluate(data_iter, batch_size=10):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    if args.model == 'QRNN': model.reset()
    total_loss = 0
    total_len = 0
    hidden = model.init_hidden(batch_size)
    for i in np.arange(len(data_iter)):
        ((data, data_l), (targets, targets_l)), _ = next(iter(data_iter))
        output, hidden = model(data, hidden)
        total_loss += len(data) * criterion(model.decoder.weight, model.decoder.bias, output, targets).data
        total_len += data_l.sum()
        hidden = repackage_hidden(hidden)

    return total_loss.item() / total_len
コード例 #24
0
ファイル: finetune_span.py プロジェクト: luohongyin/PILM
def evaluate(data_source, batch_size=10):
    # Turn on evaluation mode which disables dropout.
    if args.model == 'QRNN': model.reset()
    model.eval()
    total_loss = 0
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(batch_size)
    for i in range(0, data_source.size(0) - 1, args.bptt):
        data, targets, _ = get_batch(data_source, i, args, evaluation=True)
        output, hidden = model(data, hidden)
        output = model.decoder(output)
        output_flat = output.view(-1, ntokens)
        total_loss += len(data) * criterion(output_flat, targets).data
        hidden = repackage_hidden(hidden)
    return total_loss[0] / len(data_source)
コード例 #25
0
ファイル: mainlz.py プロジェクト: NightmareVoid/LSTM_for_EEG
def one_fold_evaluate(data_source, data_source_target, batch_size=128):
    model.eval()
    if args.model == 'QRNN': model.reset()
    hidden = model.init_hidden(batch_size)
    data, targets = get_batchlz(data_source,
                                data_source_target,
                                0,
                                batch_size,
                                evaluation=True)
    output, hidden, rnn_hs, dropped_rnn_hs = model(data, None, return_h=True)
    pred_y = torch.max(output, 1)[1].data
    accuracy = (pred_y == targets).float().sum() / len(targets)

    wri.add_scalar('one_fold_accuracy', accuracy, epoch)

    print('|   one_fold_accuracy:{:5.2f}   |'.format(accuracy), '\n')
コード例 #26
0
def evaluate(data_source, batch_size=10, test=False):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    if args.model == 'QRNN': model.reset()
    total_loss = 0
    total_oe_loss = 0
    num_batches = 0
    ntokens = len(corpus.dictionary)
    for i in range(0, data_source.size(0) - 1, args.bptt):
        data, targets = get_batch(data_source, i, args, evaluation=True)
        data_oe, _ = get_batch(oe_val_dataset, i, args, evaluation=True)

        if len(data.size()) == 1:  # happens for test set?
            data.unsqueeze(-1)
            data_oe.unsqueeze(-1)

        if data.size(0) != data_oe.size(0):
            continue

        bs = test_batch_size if test else eval_batch_size
        hidden = model.init_hidden(2 * bs)
        hidden = repackage_hidden(hidden)

        output, hidden, rnn_hs, dropped_rnn_hs = model(torch.cat(
            [data, data_oe], dim=1),
                                                       hidden,
                                                       return_h=True)
        output, output_oe = torch.chunk(dropped_rnn_hs[-1], dim=1, chunks=2)
        output, output_oe = output.contiguous(), output_oe.contiguous()
        output = output.view(output.size(0) * output.size(1), output.size(2))

        loss = criterion(model.decoder.weight, model.decoder.bias, output,
                         targets).data

        # OE loss
        logits_oe = model.decoder(output_oe)
        smaxes_oe = F.softmax(logits_oe -
                              torch.max(logits_oe, dim=-1, keepdim=True)[0],
                              dim=-1)
        loss_oe = -smaxes_oe.log().mean(-1)
        loss_oe = loss_oe.mean().data
        #

        total_loss += loss
        total_oe_loss += loss_oe
        num_batches += 1
    return total_loss[0] / num_batches, total_oe_loss[0] / num_batches
コード例 #27
0
def evaluate(data_source, batch_size=10):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    if args.model == 'QRNN' and getattr(model, 'reset', None): model.reset()
    total_loss = 0
    ntokens = len(corpus.dictionary)
    hidden = None
    mems = None
    with torch.no_grad():
        for i in range(0, data_source.size(0) - 1, args.bptt):
            data, targets = get_batch(data_source, i, args, evaluation=True)
            #output, hidden = model(data, hidden)
            output, hidden, mems = model(data, hidden, mems=mems, return_h=False)
            total_loss += len(data) * criterion(model.decoder.weight, model.decoder.bias, output, targets.view(-1)).data
            if hidden is not None:
                hidden = repackage_hidden(hidden)
    return total_loss.item() / len(data_source)
コード例 #28
0
def evaluate(data_source, batch_size=10):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    if args.model == 'QRNN': model.reset()
    total_loss = 0
    ntokens = len(corpus['words'].idx2word)
    for i in range(0, len(data_source['sentences']) - 1, batch_size):
        data, lengths, max_length, targets = get_batch(data_source, i,
                                                       batch_size)
        cur_batch_size = data.size(1)
        hidden = model.init_hidden(cur_batch_size)
        output, hidden = model(data, lengths, max_length, hidden)
        loss = batch_size * criterion(output, targets.long())
        total_loss += loss
        hidden = repackage_hidden(hidden)
    # return total_loss.item() / batch_size
    return total_loss.item() / len(data_source['sentences'])
コード例 #29
0
def evaluate(data_source, batch_size=10):
    # Turn on evaluation mode which disables dropout.
    if args.model == 'QRNN': model.reset()
    model.eval()
    total_loss = 0
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(batch_size)
    for i in range(0, data_source.size(0) - 1, args.bptt):
        data, targets = get_batch(data_source, i, args, evaluation=True)
        hidden_previous = hidden
        for tn_timestep in range(args.tn_timesteps):
            output, hidden = model(data, tn_m_hidden(hidden, hidden_previous), decoded=True)
            hidden_previous = hidden
        output_flat = output.view(-1, ntokens)
        total_loss += len(data) * criterion(output_flat, targets).data
        hidden = repackage_hidden(hidden)
    return total_loss.item() / len(data_source)
コード例 #30
0
ファイル: mainlz.py プロジェクト: NightmareVoid/LSTM_for_EEG
def evaluate_all_data(data_source, data_source_target, batch_size=128):
    model.eval()
    if args.model == 'QRNN': model.reset()
    hidden = model.init_hidden(batch_size)
    e = 0
    while e < len(data_source_target):
        data, targets = get_batchlz(data_source,
                                    data_source_target,
                                    e,
                                    batch_size,
                                    evaluation=True)
        output, hidden, rnn_hs, dropped_rnn_hs = model(data,
                                                       None,
                                                       return_h=True)
        pred_y = torch.max(output, 1)[1].data
        accuracy = (pred_y == targets).float().sum() / len(targets)
        e += batch_size
        print('|   all_accuracy:{:5.2f}   |'.format(accuracy), '\n')
コード例 #31
0
ファイル: main.py プロジェクト: aiedward/PyTorch-NLP-1
def evaluate(data_source, source_sampler, target_sampler, batch_size=10):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    if args.model == 'QRNN':
        model.reset()
    total_loss = 0
    hidden = model.init_hidden(batch_size)

    for source_sample, target_sample in zip(source_sampler, target_sampler):
        model.train()
        data = torch.stack([data_source[i] for i in source_sample])
        targets = torch.stack([data_source[i] for i in target_sample]).view(-1)
        with torch.no_grad():
            output, hidden = model(data, hidden)
        total_loss += len(data) * criterion(
            model.decoder.weight, model.decoder.bias, output, targets).item()
        hidden = repackage_hidden(hidden)
    return total_loss / len(data_source)
コード例 #32
0
ファイル: main.py プロジェクト: flennerhag/alstm
def evaluate(model, data_source, batch_size=10):
    model.eval()
    if args.model == 'QRNN':
        model.reset()

    total_loss = 0
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(batch_size)
    for i in range(0, data_source.size(0) - 1, args.seq_len):
        data, targets = get_batch(data_source, i, args, evaluation=True)

        output = model(data, hidden)
        if isinstance(output, tuple):
            output, hidden = output

        output_flat = output.view(-1, ntokens)
        total_loss += len(data) * criterion(output_flat, targets).data
        hidden = repackage_hidden(hidden)
    return total_loss[0] / len(data_source)
コード例 #33
0
def evaluate(data_source, batch_size=10):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    if args.model == 'QRNN':
        model.reset()
    loss_measure = AverageMeter()
    acc_measure = AverageMeter()
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(batch_size)
    for i in range(0, data_source.size(0) - 1, args.bptt):
        data, targets = get_batch(data_source, i, args, evaluation=True)
        output, hidden = model(data, hidden)
        loss = criterion(model.decoder.weight, model.decoder.bias, output,
                         targets).data
        loss_measure.update(float(loss), targets.nelement())
        acc = float(accuracy(output.data, targets.data)[0])
        acc_measure.update(acc, targets.nelement())
        hidden = repackage_hidden(hidden)
    return loss_measure.avg, acc_measure.avg
コード例 #34
0
ファイル: gaps.py プロジェクト: schorlet/javabox
 def DELETE(self, version, user, from_day, to_day):
     model.reset()
     web.ctx.status = '204 No Content'
コード例 #35
0
ファイル: gaps.py プロジェクト: schorlet/javabox
 def DELETE(self):
     model.reset()
     web.ctx.status = '204 No Content'
コード例 #36
0
ファイル: tasktest.py プロジェクト: hoozifachi/hubbub
 def tearDown(self):
     model.reset()