def evaluate_sents(data_source, uids, batch_size=10): # Turn on evaluation mode which disables dropout. if args.model == 'QRNN': model.reset() model.eval() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) sent_loss = defaultdict(list) for i in range(0, data_source.size(0) - 1, args.bptt): data_batch = torch.load(open('test.pickle','rb')) data_test=data_batch['data'] targets_test = data_batch['targets'] data, targets = get_batch(data_source, i, args, evaluation=True) batch_uids = get_ids(uids, i, args, evaluation=True) # pdb.set_trace() output, hidden = model(data, hidden, decode=True) output_flat = output.view(-1, ntokens) per_word_loss = criterion(output_flat, targets) batch_uids_list = batch_uids.reshape(-1).tolist() loss_list = per_word_loss.tolist() for loss, uid in zip(loss_list, batch_uids_list): sent_loss[uid].append(loss) incre = (torch.mean(per_word_loss).item()*len(data)) total_loss += incre # print('incre=',incre) hidden = repackage_hidden(hidden) # pdb.set_trace() avg_sent_loss = {} for (uid, losses) in sent_loss.items(): avg_sent_loss[uid]=float(np.mean(losses)) # pdb.set_trace() return total_loss / len(data_source), avg_sent_loss
def evaluate(data_source, batch_size=10, valid_or_test="valid"): # Turn on evaluation mode which disables dropout. model.eval() if args.model == 'QRNN': model.reset() total_loss = 0 total_embedding_loss = 0.0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, args, evaluation=True) output, hidden = model(data, hidden) loss = len(data) * criterion(model.decoder.weight, model.decoder.bias, output, targets).item() total_loss += loss if args.embedder != "classic": total_embedding_loss += len(data) * float( embedder.last_batch_loss().cpu().detach().item()) hidden = repackage_hidden(hidden) ret = total_loss / len(data_source) writer.add_scalar('Loss/%s/main' % valid_or_test, ret / math.log(2)) writer.add_scalar('Loss/%s/embedder' % valid_or_test, total_embedding_loss / len(data_source) / math.log(2)) return ret
def evaluate(data_source, batch_size=10, test=False): # Turn on evaluation mode which disables dropout. model.eval() if args.model == 'QRNN': model.reset() epoch_loss = 0 #running mean total_loss = 0 #running sum hidden = model.init_hidden(batch_size) batch = 0 for i in range(0, data_source.size(0) - 1, args.bptt): #batch loop data, targets = get_batch(data_source, i, args, evaluation=True) output, hidden = model(data, hidden) total_loss += len(data) * criterion( model.decoder.weight, model.decoder.bias, output, targets).data hidden = repackage_hidden(hidden) if test == False: epoch_loss = (epoch_loss * batch + (criterion(model.decoder.weight, model.decoder.bias, output, targets).data)).item() / ( batch + 1) # batch += 1 if test == False: global valid_loss, valid_ppl, valid_bpc valid_loss = np.append(valid_loss, epoch_loss) valid_ppl = np.append(valid_ppl, np.exp(epoch_loss)) valid_bpc = np.append(valid_bpc, epoch_loss / np.log(2)) return total_loss.item() / len(data_source)
def store_word_cediff(data_source, model, batch_size=10, fname='out'): """ Store the cross-entropy loss per word in the vocabulary. """ # Turn on evaluation mode which disables dropout. model.eval() if args.model == 'QRNN': model.reset() hidden = model.init_hidden(batch_size) # Initialize vocabulary structure to store the crossentropy losses. vocab, words = {}, corpus.dictionary.idx2word # Add the loss per word in the vocabulary structure for each different context. for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, args, evaluation=True) output, weight, bias, hidden = model(data, hidden) pred_targets = torch.mm(output, weight.t()) + bias for j, target in enumerate(targets): target_loss = criterion(pred_targets[j:j + 1], targets[j:j + 1]).data word = words[target.tolist()] if word in vocab: vocab[word].append(target_loss.tolist()) else: vocab[word] = [target_loss.tolist()] hidden = repackage_hidden(hidden) # Store the vocabulary to the disk. pickle.dump(vocab, open(fname + '.pkl', 'wb'))
def train(): # Turn on training mode which enables dropout. if args.model == 'QRNN': model.reset() total_loss = 0 start_time = time.time() hidden = model.init_hidden(args.batch_size) batch = 0 for source_sample, target_sample in zip(train_source_sampler, train_target_sampler): model.train() data = torch.stack([train_data[i] for i in source_sample]).t_().contiguous() targets = torch.stack([train_data[i] for i in target_sample ]).t_().contiguous().view(-1) # Starting each batch, we detach the hidden state from how it was previously produced. # If we didn't, the model would try backpropagating all the way to start of the dataset. hidden = repackage_hidden(hidden) optimizer.zero_grad() output, hidden, rnn_hs, dropped_rnn_hs = model(data, hidden, return_h=True) raw_loss = criterion(model.decoder.weight, model.decoder.bias, output, targets) loss = raw_loss # Activiation Regularization if args.alpha: loss = loss + sum(args.alpha * dropped_rnn_h.pow(2).mean() for dropped_rnn_h in dropped_rnn_hs[-1:]) # Temporal Activation Regularization (slowness) if args.beta: loss = loss + sum(args.beta * (rnn_h[1:] - rnn_h[:-1]).pow(2).mean() for rnn_h in rnn_hs[-1:]) loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. if args.clip: torch.nn.utils.clip_grad_norm_(params, args.clip) optimizer.step() total_loss += raw_loss.item() if batch % args.log_interval == 0 and batch > 0: cur_loss = total_loss / args.log_interval elapsed = time.time() - start_time print( '| epoch {:3d} | {:5d}/{:5d} batches | lr {:05.5f} | ms/batch {:5.2f} | ' 'loss {:5.2f} | ppl {:8.2f} | bpc {:8.3f}'.format( epoch, batch, len(train_source_sampler) // args.bptt, optimizer.param_groups[0]['lr'], elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss), cur_loss / math.log(2))) total_loss = 0 start_time = time.time() ### batch += 1
def train(): # Turn on training mode which enables dropout. if args.model == 'QRNN': model.reset() total_loss = 0 start_time = time.time() ntokens = len(corpus.dictionary) hidden = model.init_hidden(args.batch_size) batch, i = 0, 0 while i < train_data.size(0) - 1 - 1: bptt = args.bptt if np.random.random() < 0.95 else args.bptt / 2. # Prevent excessively small or negative sequence lengths seq_len = max(5, int(np.random.normal(bptt, 5))) # There's a very small chance that it could select a very long sequence length resulting in OOM # seq_len = min(seq_len, args.bptt + 10) lr2 = optimizer.param_groups[0]['lr'] optimizer.param_groups[0]['lr'] = lr2 * seq_len / args.bptt model.train() data, targets = get_batch(train_data, i, args, seq_len=seq_len) # Starting each batch, we detach the hidden state from how it was previously produced. # If we didn't, the model would try backpropagating all the way to start of the dataset. hidden = repackage_hidden(hidden) optimizer.zero_grad() output, hidden, rnn_hs, dropped_rnn_hs = model(data, hidden, return_h=True) raw_loss = criterion(model.decoder.weight, model.decoder.bias, output, targets) loss = raw_loss # Activiation Regularization if args.alpha: loss = loss + sum( args.alpha * dropped_rnn_h.pow(2).mean() for dropped_rnn_h in dropped_rnn_hs[-1:]) # Temporal Activation Regularization (slowness) if args.beta: loss = loss + sum(args.beta * (rnn_h[1:] - rnn_h[:-1]).pow(2).mean() for rnn_h in rnn_hs[-1:]) loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. if args.clip: torch.nn.utils.clip_grad_norm_(params, args.clip) optimizer.step() total_loss += raw_loss.data optimizer.param_groups[0]['lr'] = lr2 if batch % args.log_interval == 0 and batch > 0: cur_loss = total_loss.item() / args.log_interval elapsed = time.time() - start_time message = f"\033[K" message += f"| epoch {epoch} " message += f"| {batch}/{len(train_data) // args.bptt} batches " message += f"| lr {optimizer.param_groups[0]['lr']} " message += f"| ms/batch {(elapsed * 1000 / args.log_interval):.3f} " message += f"| loss {cur_loss:.3f} " message += f"| ppl {math.exp(cur_loss):.3f} " message += f"| bpc {(cur_loss / math.log(2)):.3f}" print(message, end='\r', flush=True) total_loss = 0 start_time = time.time() ### batch += 1 i += seq_len print('')
def store_datadist(data_source, model, batch_size=10, fname='datamatrix.h5'): """ Store the log-probability matrix for a given method. """ # Turn on evaluation mode which disables dropout. model.eval() if args.model == 'QRNN': model.reset() hidden = model.init_hidden(batch_size) # Initialize a data matrix structure which can be stored directly to the disk. f = tables.open_file(filename, mode='w') atom = tables.Float64Atom() array_c = f.create_earray(f.root, 'data', atom, (0, 10000)) # Add a row sequentially to the matrix for each different context. for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, args, evaluation=True) output, weight, bias, hidden = model(data, hidden) pred_targets = torch.mm(output, weight.t()) + bias hidden = repackage_hidden(hidden) datadist = nn.LogSoftmax()(pred_targets) array_c.append(datadist.detach().cpu().numpy()) # Close file. f.close()
def train(): # Turn on training mode which enables dropout. if args.model == 'QRNN': model.reset() total_loss = 0 start_time = time.time() ntokens = len(corpus.dictionary) hidden = model.init_hidden(args.batch_size) batch, i = 0, 0 while i < train_data.size(0) - 1 - 1: bptt = args.bptt if np.random.random() < 0.95 else args.bptt / 2. # Prevent excessively small or negative sequence lengths seq_len = max(5, int(np.random.normal(bptt, 5))) # There's a very small chance that it could select a very long sequence length resulting in OOM # seq_len = min(seq_len, args.bptt + 10) lr2 = optimizer.param_groups[0]['lr'] optimizer.param_groups[0]['lr'] = lr2 * seq_len / args.bptt model.train() data, targets = get_batch(train_data, i, args, seq_len=seq_len) # Starting each batch, we detach the hidden state from how it was previously produced. # If we didn't, the model would try backpropagating all the way to start of the dataset. hidden = repackage_hidden(hidden) optimizer.zero_grad() output, hidden, rnn_hs, dropped_rnn_hs = model(data, hidden, return_h=True) raw_loss = criterion(output.view(-1, ntokens), targets) loss = raw_loss # Activiation Regularization loss = loss + sum(args.alpha * dropped_rnn_h.pow(2).mean() for dropped_rnn_h in dropped_rnn_hs[-1:]) # Temporal Activation Regularization (slowness) loss = loss + sum(args.beta * (rnn_h[1:] - rnn_h[:-1]).pow(2).mean() for rnn_h in rnn_hs[-1:]) loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. torch.nn.utils.clip_grad_norm(model.parameters(), args.clip) optimizer.step() total_loss += raw_loss.data optimizer.param_groups[0]['lr'] = lr2 if batch % args.log_interval == 0 and batch > 0: cur_loss = total_loss[0] / args.log_interval elapsed = time.time() - start_time if args.logging == 'default': print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | ' 'loss {:5.2f} | ppl {:8.2f}'.format( epoch, batch, len(train_data) // args.bptt, optimizer.param_groups[0]['lr'], elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss)), file=sys.stderr) else: print('{:3d}, {:5d}, {:02.2f}, {:5.2f}, {:5.2f}, {:8.2f}'.format( epoch, batch, len(train_data) // args.bptt, optimizer.param_groups[0]['lr'], elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss)), file=sys.stderr) total_loss = 0 start_time = time.time() ### batch += 1 i += seq_len
def evaluate(data_source, batch_size=10, window=args.window): # Turn on evaluation mode which disables dropout. if args.model == 'QRNN': model.reset() model.eval() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) next_word_history = None pointer_history = None for i in range(0, data_source.size(0) - 1, args.bptt): if i > 0: print(i, len(data_source), math.exp(total_loss / i)) data, targets = get_batch(data_source, i, evaluation=True, args=args) hidden_previous = hidden for tn_timestep in range(args.tn_timesteps): output, hidden, rnn_outs, _ = model(data, tn_m_hidden(hidden, hidden_previous), return_h=True, decoded=True) hidden_previous = hidden rnn_out = rnn_outs[-1].squeeze() output_flat = output.view(-1, ntokens) ### # Fill pointer history start_idx = len(next_word_history) if next_word_history is not None else 0 next_word_history = torch.cat([one_hot(t.data.item(), ntokens) for t in targets]) if next_word_history is None else torch.cat([next_word_history, torch.cat([one_hot(t.data.item(), ntokens) for t in targets])]) #print(next_word_history) pointer_history = Variable(rnn_out.data) if pointer_history is None else torch.cat([pointer_history, Variable(rnn_out.data)], dim=0) #print(pointer_history) ### # Built-in cross entropy # total_loss += len(data) * criterion(output_flat, targets).data[0] ### # Manual cross entropy # softmax_output_flat = torch.nn.functional.softmax(output_flat) # soft = torch.gather(softmax_output_flat, dim=1, index=targets.view(-1, 1)) # entropy = -torch.log(soft) # total_loss += len(data) * entropy.mean().data[0] ### # Pointer manual cross entropy loss = 0 softmax_output_flat = torch.nn.functional.softmax(output_flat) for idx, vocab_loss in enumerate(softmax_output_flat): p = vocab_loss if start_idx + idx > window: valid_next_word = next_word_history[start_idx + idx - window:start_idx + idx] valid_pointer_history = pointer_history[start_idx + idx - window:start_idx + idx] logits = torch.mv(valid_pointer_history, rnn_out[idx]) theta = args.theta ptr_attn = torch.nn.functional.softmax(theta * logits).view(-1, 1) ptr_dist = (ptr_attn.expand_as(valid_next_word) * valid_next_word).sum(0).squeeze() lambdah = args.lambdasm p = lambdah * ptr_dist + (1 - lambdah) * vocab_loss ### target_loss = p[targets[idx].data] loss += (-torch.log(target_loss)).data.item() total_loss += loss / batch_size ### hidden = repackage_hidden(hidden) next_word_history = next_word_history[-window:] pointer_history = pointer_history[-window:] return total_loss / len(data_source)
async def post(self): if self.get_argument('clear_cache', None): helpers.clear_cache() self.logger.info('Cleared cache.') self.flash('Cache Cleared') elif self.get_argument('make_admin', None): form_data, errors, valid_data = self.validate() if not errors: user = model.User.getByEmail(valid_data["email"]) if user: user.is_admin = True user.save() # the user may currently be signed in so invalidate its cache to get the new permissions helpers.uncache(user.slug) self.logger.info('Made user admin: ' + valid_data['email']) self.flash('User successfully made admin.', level='success') else: errors['exists'] = True if errors: return self.redisplay(form_data, errors) elif self.get_argument('migrate', None): self.logger.info('Beginning migration.') # FUTURE: probably want to move this to a script outside the webserver # change and uncomment to do migration work # can also use a dictionary instead of kwargs here # q = model.User.update(model.User.prop='value').where() total = 0 # q.execute() self.logger.info('Migration finished. Modified ' + str(total) + ' items.') self.flash('Migrations Complete', level='success') elif self.get_argument('reset', None) and self.debug: # use model.py to reset the db, then you can run this to add fixture data model.reset() # add any fixtures needed for development here password_salt, hashed_password = model.User.changePassword('test') user = model.User(first_name='Test', last_name='Testerson', email='*****@*****.**', password_salt=password_salt, hashed_password=hashed_password) user.save() # auto signout since the IDs and keys have all changed self.clear_all_cookies(domain=self.host) helpers.clear_cache() self.flash('Data Reset') self.redisplay()
def evaluate(data_source, batch_size=10): print("EVALUATION") # Turn on evaluation mode which disables dropout. model.eval() if args.model == 'QRNN': model.reset() total_loss = 0 hidden = model.init_hidden(args.batch_size) #_, batch_len = data_source.shape #n_batches = (batch_len -1) // seq_len b_n = 0 for batch_n in range(0, len(data_source) - args.batch_size, args.batch_size): b_n += 1 sub = train_data[batch_n:batch_n + args.batch_size] padded = np.array(list(itertools.zip_longest(*sub, fillvalue=0))).T targets = np.roll(padded, -1) targets[:, -1] = 0 if args.cuda: #data = Variable(torch.from_numpy(data_source[:, batch_n * seq_len: (batch_n + 1) * seq_len])).transpose(0, 1).cuda() #targets = Variable(torch.from_numpy(data_source[:, batch_n * seq_len + 1: (batch_n + 1) * seq_len + 1].transpose(1, 0).flatten())).cuda() data = Variable(torch.from_numpy(padded.T)).cuda() targets = Variable(torch.from_numpy(targets.T.flatten())).cuda() else: #data = Variable(torch.from_numpy(data_source[:, batch_n * seq_len: (batch_n + 1) * seq_len])).transpose(0, 1) #targets = Variable(torch.from_numpy(data_source[:, batch_n * seq_len + 1: (batch_n + 1) * seq_len + 1].transpose(1, 0).flatten())) data = Variable(torch.from_numpy(padded)) targets = Variable(torch.from_numpy(targets.flatten())) #print len(data), len(targets) #print data.size() #print "evaluating!" #comment out this line to get the original lda vector if args.cuda: inp_topic = get_theta(data.data.cpu().numpy(), lda_model, lda_dictionary, idx2word).cuda() inp_topic = inp_topic.type(torch.cuda.FloatTensor) else: inp_topic = get_theta(data.data.cpu().numpy(), lda_model, lda_dictionary, idx2word) inp_topic = inp_topic.type(torch.FloatTensor) #inp_topic = torch.from_numpy(np.zeros((args.batch_size, 50))).cuda() topic_var = torch.autograd.Variable(inp_topic, requires_grad=False) # Starting each batch, we detach the hidden state from how it was previously produced. # If we didn't, the model would try backpropagating all the way to start of the dataset. hidden = repackage_hidden(hidden) optimizer.zero_grad() if args.mit_topic: output = model(data, topic_var, hidden) else: output = model(data, hidden) output_flat = output.view(-1, ntokens) total_loss += criterion(output_flat, targets).data #hidden = repackage_hidden(hidden) return total_loss[0] / b_n
def train(): def getseq(): lr_original = optimizer.param_groups[0]['lr'] if args.var_seq: # Vary sequence length seq_len = args.seq_len if np.random.random( ) < 0.95 else args.seq_len / 2. seq_len = max(5, int(np.random.normal(seq_len, 5))) optimizer.param_groups[0][ 'lr'] = lr_original * seq_len / args.seq_len else: seq_len = args.seq_len data, targets = get_batch(train_data, i, args, seq_len=seq_len) return data, targets, seq_len, lr_original if args.model == 'QRNN': model.reset() total_loss = 0 start_time = time.time() ntokens = len(corpus.dictionary) hidden = model.init_hidden(args.batch_size) batch, i = 0, 0 while i < train_data.size(0) - 1 - 1: model.train() data, targets, seq_len, lro = getseq() hidden = repackage_hidden(hidden) optimizer.zero_grad() output, hidden = model(data, hidden) loss = criterion(output.view(-1, ntokens), targets) loss.backward() if args.clip: torch.nn.utils.clip_grad_norm(model.parameters(), args.clip) optimizer.step() total_loss += loss.data # Ensure learning rate is reset (only applicable with var_seq) optimizer.param_groups[0]['lr'] = lro if batch % args.log_interval == 0 and batch > 0: cur_loss = total_loss[0] / args.log_interval elapsed = time.time() - start_time logger.info( 'TRAIN | epoch {:3d} | {:5d}/{:5d} batches | lr {:01.8f} ' '| ms/batch {:5.2f} | loss {:5.2f} | ppl {:8.2f}'.format( epoch, batch, len(train_data) // args.seq_len, optimizer.param_groups[0]['lr'], elapsed * 1000 / args.log_interval, cur_loss, ppl(cur_loss))) total_loss = 0 start_time = time.time() ### batch += 1 i += seq_len
def train(): # Turn on training mode which enables dropout. if args.model == 'QRNN': model.reset() total_loss = 0 start_time = time.time() ntokens = len(corpus.dictionary) hidden = model.init_hidden(args.batch_size) batch, i = 0, 0 while i < train_data.size(0) - 1 - 1: bptt = args.bptt if np.random.random() < 0.95 else args.bptt / 2. # Prevent excessively small or negative sequence lengths seq_len = max(5, int(np.random.normal(bptt, 5))) # There's a very small chance that it could select a very long sequence length resulting in OOM # seq_len = min(seq_len, args.bptt + 10) lr2 = optimizer.param_groups[0]['lr'] optimizer.param_groups[0]['lr'] = lr2 * seq_len / args.bptt model.train() data, targets = get_batch(train_data, i, args, seq_len=seq_len) # Starting each batch, we detach the hidden state from how it was previously produced. # If we didn't, the model would try backpropagating all the way to start of the dataset. hidden = repackage_hidden(hidden) optimizer.zero_grad() output, hidden, rnn_hs, dropped_rnn_hs = model(data, hidden, return_h=True) weight_noise = torch.distributions.normal.Normal(torch.zeros_like(model.decoder.weight), torch.ones_like(model.decoder.weight) * 1).sample() * 0.2 binary_mask = torch.distributions.bernoulli.Bernoulli(torch.ones(model.decoder.weight.size(0)) * 0.1).sample().cuda() binary_mask[targets.view(-1)] = 1 weight_noise = binary_mask.view([-1, 1]) * weight_noise raw_loss = criterion(model.decoder.weight + weight_noise, model.decoder.bias, output, targets) loss = raw_loss # Activiation Regularization if args.alpha: loss = loss + sum(args.alpha * dropped_rnn_h.pow(2).mean() for dropped_rnn_h in dropped_rnn_hs[-1:]) # Temporal Activation Regularization (slowness) if args.beta: loss = loss + sum(args.beta * (rnn_h[1:] - rnn_h[:-1]).pow(2).mean() for rnn_h in rnn_hs[-1:]) loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. if args.clip: torch.nn.utils.clip_grad_norm_(params, args.clip) optimizer.step() total_loss += raw_loss.data optimizer.param_groups[0]['lr'] = lr2 if batch % args.log_interval == 0 and batch > 0: cur_loss = total_loss.item() / args.log_interval elapsed = time.time() - start_time print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:05.5f} | ms/batch {:5.2f} | ' 'loss {:5.2f} | ppl {:8.2f} | bpc {:8.3f}'.format( epoch, batch, len(train_data) // args.bptt, optimizer.param_groups[0]['lr'], elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss), cur_loss / math.log(2))) total_loss = 0 start_time = time.time() ### batch += 1 i += seq_len
def evaluate(data_source, batch_size=10, window=args.window): # Turn on evaluation mode which disables dropout. if args.model == 'QRNN': model.reset() model.eval() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) next_word_history = None pointer_history = None for i in range(0, data_source.size(0) - 1, args.bptt): if i > 0: print(i, len(data_source), math.exp(total_loss / i)) data, targets = get_batch(data_source, i, evaluation=True, args=args) output, hidden, rnn_outs, _ = model(data, hidden, return_h=True) rnn_out = rnn_outs[-1].squeeze() output_flat = output.view(-1, ntokens) ### # Fill pointer history start_idx = len(next_word_history) if next_word_history is not None else 0 next_word_history = torch.cat([one_hot(t.data[0], ntokens) for t in targets]) if next_word_history is None else torch.cat([next_word_history, torch.cat([one_hot(t.data[0], ntokens) for t in targets])]) #print(next_word_history) pointer_history = Variable(rnn_out.data) if pointer_history is None else torch.cat([pointer_history, Variable(rnn_out.data)], dim=0) #print(pointer_history) ### # Built-in cross entropy # total_loss += len(data) * criterion(output_flat, targets).data[0] ### # Manual cross entropy # softmax_output_flat = torch.nn.functional.softmax(output_flat) # soft = torch.gather(softmax_output_flat, dim=1, index=targets.view(-1, 1)) # entropy = -torch.log(soft) # total_loss += len(data) * entropy.mean().data[0] ### # Pointer manual cross entropy loss = 0 softmax_output_flat = torch.nn.functional.softmax(output_flat) for idx, vocab_loss in enumerate(softmax_output_flat): p = vocab_loss if start_idx + idx > window: valid_next_word = next_word_history[start_idx + idx - window:start_idx + idx] valid_pointer_history = pointer_history[start_idx + idx - window:start_idx + idx] logits = torch.mv(valid_pointer_history, rnn_out[idx]) theta = args.theta ptr_attn = torch.nn.functional.softmax(theta * logits).view(-1, 1) ptr_dist = (ptr_attn.expand_as(valid_next_word) * valid_next_word).sum(0).squeeze() lambdah = args.lambdasm p = lambdah * ptr_dist + (1 - lambdah) * vocab_loss ### target_loss = p[targets[idx].data] loss += (-torch.log(target_loss)).data[0] total_loss += loss / batch_size ### hidden = repackage_hidden(hidden) next_word_history = next_word_history[-window:] pointer_history = pointer_history[-window:] return total_loss / len(data_source)
def setUp(self): # an error in a previous test could prevent this from shutting down correctly try: model.peewee_db.connect() except OperationalError: pass model.reset() import helpers helpers.clear_cache()
def evaluate(batch_size=args.batch_size): # Turn on evaluation mode which disables dropout. model.eval() if args.model == 'QRNN': model.reset() total_loss = 0 hidden = model.init_hidden(batch_size) for i in range(0, len(val_data) - 1, args.bptt): data, targets = val_data.get_batch(i, args.bptt) output, hidden = model(data, hidden) total_loss += len(data) * criterion(model.decoder.weight, model.decoder.bias, output, targets).data hidden = repackage_hidden(hidden) return total_loss.item() / len(val_data)
def train(): # Turn on training mode which enables dropout. if args.model == 'QRNN': model.reset() total_loss = 0 start_time = time.time() ntokens = len(corpus.dictionary) hidden = model.init_hidden(args.batch_size) batch, i = 0, 0 while i < train_data.size(0) - 1 - 1: bptt = args.bptt if np.random.random() < 0.95 else args.bptt / 2. # Prevent excessively small or negative sequence lengths seq_len = max(5, int(np.random.normal(bptt, 5))) # There's a very small chance that it could select a very long sequence length resulting in OOM # seq_len = min(seq_len, args.bptt + 10) lr2 = optimizer.param_groups[0]['lr'] optimizer.param_groups[0]['lr'] = lr2 * seq_len / args.bptt model.train() data, targets = get_batch(train_data, i, args, seq_len=seq_len) # Starting each batch, we detach the hidden state from how it was previously produced. # If we didn't, the model would try backpropagating all the way to start of the dataset. hidden = repackage_hidden(hidden) optimizer.zero_grad() output, hidden, rnn_hs, dropped_rnn_hs = model(data, hidden, return_h=True) raw_loss = criterion(model.decoder.weight, model.decoder.bias, output, targets) loss = raw_loss # Activiation Regularization if args.alpha: loss = loss + sum(args.alpha * dropped_rnn_h.pow(2).mean() for dropped_rnn_h in dropped_rnn_hs[-1:]) # Temporal Activation Regularization (slowness) if args.beta: loss = loss + sum(args.beta * (rnn_h[1:] - rnn_h[:-1]).pow(2).mean() for rnn_h in rnn_hs[-1:]) loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. if args.clip: torch.nn.utils.clip_grad_norm_(params, args.clip) optimizer.step() total_loss += raw_loss.data optimizer.param_groups[0]['lr'] = lr2 if batch % args.log_interval == 0 and batch > 0: cur_loss = total_loss.item() / args.log_interval elapsed = time.time() - start_time print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:05.5f} | ms/batch {:5.2f} | ' 'loss {:5.2f} | ppl {:8.2f} | bpc {:8.3f}'.format( epoch, batch, len(train_data) // args.bptt, optimizer.param_groups[0]['lr'], elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss), cur_loss / math.log(2))) total_loss = 0 start_time = time.time() ### batch += 1 i += seq_len
def evaluate(data_source, batch_size=10): # Turn on evaluation mode which disables dropout. model.eval() if args.model == 'QRNN': model.reset() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, args, evaluation=True) output, hidden = model(data, hidden) total_loss += len(data) * criterion(model.decoder.weight, model.decoder.bias, output, targets).data hidden = repackage_hidden(hidden) return total_loss.item() / len(data_source)
def reset(): if 'user_id' not in session: return redirect('/login') if not session['user_admin']: return router['main']() if request.method == 'POST': model.reset() return redirect('/reports') return router['reset']()
def evaluate(data_source, batch_size=10): # Turn on evaluation mode which disables dropout. if args.model == 'QRNN': model.reset() model.eval() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, args, evaluation=True) output, weight, bias, hidden = model(data, hidden) pred_targets = torch.mm(output, weight.t()) + bias total_loss += len(data) * criterion(pred_targets, targets).data hidden = repackage_hidden(hidden) return total_loss[0] / len(data_source)
def train(): # Turn on training mode which enables dropout. if args.model == 'QRNN': model.reset() model.train() total_loss = 0 hidden = model.init_hidden(args.batch_size) start_time = time.time() for batch, i in enumerate(range(0, train_data.size(0) - 1, args.bptt)): data, targets = get_batch(train_data, i, args) # Starting each batch, we detach the hidden state from how it was previously produced. # If we didn't, the model would try backpropagating all the way to start of the dataset. hidden = repackage_hidden(hidden) optimizer.zero_grad() output, hidden = model(data, hidden) loss = criterion(model.decoder.weight, model.decoder.bias, output, targets) # Activiation Regularization if args.alpha: loss = loss + args.alpha * output.pow(2).mean() # TODO: emporal Activation Regularization (slowness) # if args.beta: # loss = loss + sum(args.beta * (rnn_h[1:] - rnn_h[:-1]).pow(2).mean() for rnn_h in rnn_hs[-1:]) loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. if args.clip: torch.nn.utils.clip_grad_norm_(params, args.clip) optimizer.step() total_loss += loss.data.item() if batch % args.log_interval == 0 and batch > 0: elapsed = time.time() - start_time cur_loss = total_loss / args.log_interval log_loss( os.path.join(os.path.dirname(args.save), 'train_loss.pkl'), cur_loss, batch == args.log_interval) start_time = time.time() print( '| epoch {:3d} | {:5d}/{:5d} batches | lr {:05.5f} | {:5.2f} ms/batch | ' 'loss {:5.2f} | ppl {:8.2f} | bpc {:8.3f}'.format( epoch, batch, len(train_data) // args.bptt, optimizer.param_groups[0]['lr'], elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss), cur_loss / math.log(2))) total_loss = 0
def evaluate(data_iter, batch_size=10): # Turn on evaluation mode which disables dropout. model.eval() if args.model == 'QRNN': model.reset() total_loss = 0 total_len = 0 hidden = model.init_hidden(batch_size) for i in np.arange(len(data_iter)): ((data, data_l), (targets, targets_l)), _ = next(iter(data_iter)) output, hidden = model(data, hidden) total_loss += len(data) * criterion(model.decoder.weight, model.decoder.bias, output, targets).data total_len += data_l.sum() hidden = repackage_hidden(hidden) return total_loss.item() / total_len
def evaluate(data_source, batch_size=10): # Turn on evaluation mode which disables dropout. if args.model == 'QRNN': model.reset() model.eval() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) for i in range(0, data_source.size(0) - 1, args.bptt): data, targets, _ = get_batch(data_source, i, args, evaluation=True) output, hidden = model(data, hidden) output = model.decoder(output) output_flat = output.view(-1, ntokens) total_loss += len(data) * criterion(output_flat, targets).data hidden = repackage_hidden(hidden) return total_loss[0] / len(data_source)
def one_fold_evaluate(data_source, data_source_target, batch_size=128): model.eval() if args.model == 'QRNN': model.reset() hidden = model.init_hidden(batch_size) data, targets = get_batchlz(data_source, data_source_target, 0, batch_size, evaluation=True) output, hidden, rnn_hs, dropped_rnn_hs = model(data, None, return_h=True) pred_y = torch.max(output, 1)[1].data accuracy = (pred_y == targets).float().sum() / len(targets) wri.add_scalar('one_fold_accuracy', accuracy, epoch) print('| one_fold_accuracy:{:5.2f} |'.format(accuracy), '\n')
def evaluate(data_source, batch_size=10, test=False): # Turn on evaluation mode which disables dropout. model.eval() if args.model == 'QRNN': model.reset() total_loss = 0 total_oe_loss = 0 num_batches = 0 ntokens = len(corpus.dictionary) for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, args, evaluation=True) data_oe, _ = get_batch(oe_val_dataset, i, args, evaluation=True) if len(data.size()) == 1: # happens for test set? data.unsqueeze(-1) data_oe.unsqueeze(-1) if data.size(0) != data_oe.size(0): continue bs = test_batch_size if test else eval_batch_size hidden = model.init_hidden(2 * bs) hidden = repackage_hidden(hidden) output, hidden, rnn_hs, dropped_rnn_hs = model(torch.cat( [data, data_oe], dim=1), hidden, return_h=True) output, output_oe = torch.chunk(dropped_rnn_hs[-1], dim=1, chunks=2) output, output_oe = output.contiguous(), output_oe.contiguous() output = output.view(output.size(0) * output.size(1), output.size(2)) loss = criterion(model.decoder.weight, model.decoder.bias, output, targets).data # OE loss logits_oe = model.decoder(output_oe) smaxes_oe = F.softmax(logits_oe - torch.max(logits_oe, dim=-1, keepdim=True)[0], dim=-1) loss_oe = -smaxes_oe.log().mean(-1) loss_oe = loss_oe.mean().data # total_loss += loss total_oe_loss += loss_oe num_batches += 1 return total_loss[0] / num_batches, total_oe_loss[0] / num_batches
def evaluate(data_source, batch_size=10): # Turn on evaluation mode which disables dropout. model.eval() if args.model == 'QRNN' and getattr(model, 'reset', None): model.reset() total_loss = 0 ntokens = len(corpus.dictionary) hidden = None mems = None with torch.no_grad(): for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, args, evaluation=True) #output, hidden = model(data, hidden) output, hidden, mems = model(data, hidden, mems=mems, return_h=False) total_loss += len(data) * criterion(model.decoder.weight, model.decoder.bias, output, targets.view(-1)).data if hidden is not None: hidden = repackage_hidden(hidden) return total_loss.item() / len(data_source)
def evaluate(data_source, batch_size=10): # Turn on evaluation mode which disables dropout. model.eval() if args.model == 'QRNN': model.reset() total_loss = 0 ntokens = len(corpus['words'].idx2word) for i in range(0, len(data_source['sentences']) - 1, batch_size): data, lengths, max_length, targets = get_batch(data_source, i, batch_size) cur_batch_size = data.size(1) hidden = model.init_hidden(cur_batch_size) output, hidden = model(data, lengths, max_length, hidden) loss = batch_size * criterion(output, targets.long()) total_loss += loss hidden = repackage_hidden(hidden) # return total_loss.item() / batch_size return total_loss.item() / len(data_source['sentences'])
def evaluate(data_source, batch_size=10): # Turn on evaluation mode which disables dropout. if args.model == 'QRNN': model.reset() model.eval() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, args, evaluation=True) hidden_previous = hidden for tn_timestep in range(args.tn_timesteps): output, hidden = model(data, tn_m_hidden(hidden, hidden_previous), decoded=True) hidden_previous = hidden output_flat = output.view(-1, ntokens) total_loss += len(data) * criterion(output_flat, targets).data hidden = repackage_hidden(hidden) return total_loss.item() / len(data_source)
def evaluate_all_data(data_source, data_source_target, batch_size=128): model.eval() if args.model == 'QRNN': model.reset() hidden = model.init_hidden(batch_size) e = 0 while e < len(data_source_target): data, targets = get_batchlz(data_source, data_source_target, e, batch_size, evaluation=True) output, hidden, rnn_hs, dropped_rnn_hs = model(data, None, return_h=True) pred_y = torch.max(output, 1)[1].data accuracy = (pred_y == targets).float().sum() / len(targets) e += batch_size print('| all_accuracy:{:5.2f} |'.format(accuracy), '\n')
def evaluate(data_source, source_sampler, target_sampler, batch_size=10): # Turn on evaluation mode which disables dropout. model.eval() if args.model == 'QRNN': model.reset() total_loss = 0 hidden = model.init_hidden(batch_size) for source_sample, target_sample in zip(source_sampler, target_sampler): model.train() data = torch.stack([data_source[i] for i in source_sample]) targets = torch.stack([data_source[i] for i in target_sample]).view(-1) with torch.no_grad(): output, hidden = model(data, hidden) total_loss += len(data) * criterion( model.decoder.weight, model.decoder.bias, output, targets).item() hidden = repackage_hidden(hidden) return total_loss / len(data_source)
def evaluate(model, data_source, batch_size=10): model.eval() if args.model == 'QRNN': model.reset() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) for i in range(0, data_source.size(0) - 1, args.seq_len): data, targets = get_batch(data_source, i, args, evaluation=True) output = model(data, hidden) if isinstance(output, tuple): output, hidden = output output_flat = output.view(-1, ntokens) total_loss += len(data) * criterion(output_flat, targets).data hidden = repackage_hidden(hidden) return total_loss[0] / len(data_source)
def evaluate(data_source, batch_size=10): # Turn on evaluation mode which disables dropout. model.eval() if args.model == 'QRNN': model.reset() loss_measure = AverageMeter() acc_measure = AverageMeter() ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, args, evaluation=True) output, hidden = model(data, hidden) loss = criterion(model.decoder.weight, model.decoder.bias, output, targets).data loss_measure.update(float(loss), targets.nelement()) acc = float(accuracy(output.data, targets.data)[0]) acc_measure.update(acc, targets.nelement()) hidden = repackage_hidden(hidden) return loss_measure.avg, acc_measure.avg
def DELETE(self, version, user, from_day, to_day): model.reset() web.ctx.status = '204 No Content'
def DELETE(self): model.reset() web.ctx.status = '204 No Content'
def tearDown(self): model.reset()