def evaluate(data_source, batch_size=10, window=args.window): # Turn on evaluation mode which disables dropout. if args.model == 'QRNN': model.reset() model.eval() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) next_word_history = None pointer_history = None for i in range(0, data_source.size(0) - 1, args.bptt): if i > 0: print(i, len(data_source), math.exp(total_loss / i)) data, targets = get_batch(data_source, i, evaluation=True, args=args) output, hidden, rnn_outs, _ = model(data, hidden, return_h=True) rnn_out = rnn_outs[-1].squeeze() output_flat = output.view(-1, ntokens) ### # Fill pointer history start_idx = len(next_word_history) if next_word_history is not None else 0 next_word_history = torch.cat([one_hot(t.data[0], ntokens) for t in targets]) if next_word_history is None else torch.cat([next_word_history, torch.cat([one_hot(t.data[0], ntokens) for t in targets])]) #print(next_word_history) pointer_history = Variable(rnn_out.data) if pointer_history is None else torch.cat([pointer_history, Variable(rnn_out.data)], dim=0) #print(pointer_history) ### # Built-in cross entropy # total_loss += len(data) * criterion(output_flat, targets).data[0] ### # Manual cross entropy # softmax_output_flat = torch.nn.functional.softmax(output_flat) # soft = torch.gather(softmax_output_flat, dim=1, index=targets.view(-1, 1)) # entropy = -torch.log(soft) # total_loss += len(data) * entropy.mean().data[0] ### # Pointer manual cross entropy loss = 0 softmax_output_flat = torch.nn.functional.softmax(output_flat) for idx, vocab_loss in enumerate(softmax_output_flat): p = vocab_loss if start_idx + idx > window: valid_next_word = next_word_history[start_idx + idx - window:start_idx + idx] valid_pointer_history = pointer_history[start_idx + idx - window:start_idx + idx] logits = torch.mv(valid_pointer_history, rnn_out[idx]) theta = args.theta ptr_attn = torch.nn.functional.softmax(theta * logits).view(-1, 1) ptr_dist = (ptr_attn.expand_as(valid_next_word) * valid_next_word).sum(0).squeeze() lambdah = args.lambdasm p = lambdah * ptr_dist + (1 - lambdah) * vocab_loss ### target_loss = p[targets[idx].data] loss += (-torch.log(target_loss)).data[0] total_loss += loss / batch_size ### hidden = repackage_hidden(hidden) next_word_history = next_word_history[-window:] pointer_history = pointer_history[-window:] return total_loss / len(data_source)
def train(): # Turn on training mode which enables dropout. if args.model == 'QRNN': model.reset() total_loss = 0 start_time = time.time() ntokens = len(corpus.dictionary) hidden = model.init_hidden(args.batch_size) batch, i = 0, 0 while i < train_data.size(0) - 1 - 1: bptt = args.bptt if np.random.random() < 0.95 else args.bptt / 2. # Prevent excessively small or negative sequence lengths seq_len = max(5, int(np.random.normal(bptt, 5))) # There's a very small chance that it could select a very long sequence length resulting in OOM # seq_len = min(seq_len, args.bptt + 10) lr2 = optimizer.param_groups[0]['lr'] optimizer.param_groups[0]['lr'] = lr2 * seq_len / args.bptt model.train() data, targets = get_batch(train_data, i, args, seq_len=seq_len) # Starting each batch, we detach the hidden state from how it was previously produced. # If we didn't, the model would try backpropagating all the way to start of the dataset. hidden = repackage_hidden(hidden) optimizer.zero_grad() output, hidden, rnn_hs, dropped_rnn_hs = model(data, hidden, return_h=True) raw_loss = criterion(model.decoder.weight, model.decoder.bias, output, targets) loss = raw_loss # Activiation Regularization if args.alpha: loss = loss + sum(args.alpha * dropped_rnn_h.pow(2).mean() for dropped_rnn_h in dropped_rnn_hs[-1:]) # Temporal Activation Regularization (slowness) if args.beta: loss = loss + sum(args.beta * (rnn_h[1:] - rnn_h[:-1]).pow(2).mean() for rnn_h in rnn_hs[-1:]) loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. if args.clip: torch.nn.utils.clip_grad_norm_(params, args.clip) optimizer.step() total_loss += raw_loss.data optimizer.param_groups[0]['lr'] = lr2 if batch % args.log_interval == 0 and batch > 0: cur_loss = total_loss.item() / args.log_interval elapsed = time.time() - start_time print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:05.5f} | ms/batch {:5.2f} | ' 'loss {:5.2f} | ppl {:8.2f} | bpc {:8.3f}'.format( epoch, batch, len(train_data) // args.bptt, optimizer.param_groups[0]['lr'], elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss), cur_loss / math.log(2))) total_loss = 0 start_time = time.time() ### batch += 1 i += seq_len
def evaluate(data_source, batch_size=10): # Turn on evaluation mode which disables dropout. model.eval() if args.model == 'QRNN': model.reset() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, args, evaluation=True) output, hidden = model(data, hidden) total_loss += len(data) * criterion(model.decoder.weight, model.decoder.bias, output, targets).data hidden = repackage_hidden(hidden) return total_loss.item() / len(data_source)
def train(epoch, X_train, mask_train, Y_train, batch_size, seq_len, ntokens, char_vocab_size, args): if epoch % args.betapoint == 0: args.beta /= 2 print ('Decrease beta = {}'.format(args.beta)) model.train() start_time = time.time() total_loss = 0 if args.num in [2, 3]: total_seq_loss = 0 total_pred_loss = 0 for batch, i in enumerate(range(0, X_train.size(0) - 1, batch_size)): X, mask, Y = utils.get_batch(X_train, mask_train, Y_train, batch_size, i) X = X.to(device) mask = mask.to(device) Y = Y.to(device) optimizer.zero_grad() if args.num == 1: output, hidden = model(X, mask) loss = criterion(output.view(-1, ntokens), Y.view(-1)) loss.backward() optimizer.step() total_loss += loss.item() if args.num in [2, 3]: output, hidden, seq_output = model(X, mask) # seq_output = b, l, c-1, char_vocab_size loss_pred = criterion(output.view(-1, ntokens), Y.view(-1)) seq_pred = seq_output.view(-1, char_vocab_size) loss_seq = seq_criterion(seq_pred, X[:,:,1:].contiguous().view(-1)) loss = loss_pred + args.beta*loss_seq loss.backward() optimizer.step() total_pred_loss += loss_pred.item() total_seq_loss += loss_seq.item() total_loss += loss.item() elapsed = time.time() - start_time if args.num == 1: s = ('| epoch {:3d} | ms/epoch {:5.2f} | ' 'loss {:5.3f}'.format(epoch, elapsed * 1000, total_loss)) output_s(s, message_filename) if args.num in [2,3]: s = ('| epoch {:3d} | ms/epoch {:5.2f} | ' 'pred_loss {:5.3f} | {:5.3f} x seq_loss {:5.3f} | loss {:5.3f} '.format(epoch, elapsed * 1000, total_pred_loss, args.beta, total_seq_loss, total_loss)) output_s(s, message_filename) return total_loss / X_train.size(0)
def evaluate_copy(data_source, batch_size=10): # Turn on evaluation mode which disables dropout. model_copy.eval() total_loss = 0 hidden = model_copy.init_hidden(batch_size) for i in range(0, data_source.size(0)-1, args.bptt): data, targets = get_batch(data_source, i, args, evaluation=True) output, hidden = model_copy(data, hidden) total_loss += len(data) * criterion(model_copy.decoder.weight, model_copy.decoder.bias, output, targets).data hidden = repackage_hidden(hidden) return total_loss.item() / len(data_source)
def valid_model(epoch, best_acc, learning_rate): model.eval() batch_time = AverageMeter() losses = AverageMeter() end = time.time() ntokens = len(corpus.dictionary) hidden = (torch.zeros(2, eval_batch_size, args.lstm_dim).to(device), torch.zeros(2, eval_batch_size, args.lstm_dim).to(device)) with torch.no_grad(): for batch, i in enumerate( range(0, valid_inputs.size(0) - 1, args.seq_length)): data, targets = get_batch(valid_inputs, valid_targets, i, args) data = data.to(device) targets = targets.to(device) hidden = [state.detach() for state in hidden] output, hidden = model(data, hidden) loss = F.cross_entropy(output, targets) losses.update(loss.item(), args.batch_size) batch_time.update(time.time() - end) end = time.time() if batch % args.print_freq == 0: print( 'Test Epoch: {} [{}]| Loss: {:.3f} | pexplexity: {:.3f} | batch time: {:.3f}' .format(epoch, batch, losses.avg, np.exp(losses.avg), batch_time.avg)) # acc = 100.0 * (correct / total) writer.add_scalar('log/test loss', losses.avg, epoch) writer.add_scalar('log/test perplexity', np.exp(losses.avg), epoch) if abs(np.exp(losses.avg) - best_acc) < 1 and learning_rate > 0.001: learning_rate *= 0.5 if np.exp(losses.avg) < best_acc: print('==> Saving model..') if not os.path.isdir('save_model'): os.mkdir('save_model') torch.save(model.state_dict(), './save_model/' + args.name + '.pth') best_acc = np.exp(losses.avg) return best_acc, learning_rate
def test(test_data): print("test the model...") model.eval() correct = 0 for j in range(0, len(test_data), config.batch_size): batch = test_data[j:j + config.batch_size] X_tensor, Y_tensor = utils.get_batch(batch, use_cuda) logits = model(X_tensor) predict = torch.max(logits, 1)[1] for p, g in zip(predict, Y_tensor): correct += 1 if p == g else 0 acc = correct / len(test_data) print("test model: accuarcy : %.4f " % acc) return acc
def evaluate(data_source): # Turn on evaluation mode which disables dropout. model.eval() total_loss = 0. ntokens = len(corpus.dictionary) hidden = model.init_hidden(test_batch_size) with torch.no_grad(): for i in range(0, data_source.size(0) - 1, args_bptt): data, targets = utils.get_batch(data_source, i, args_bptt) output, hidden = model(data, hidden) output_flat = output.view(-1, ntokens) total_loss += len(data) * criterion(output_flat, targets).item() hidden = utils.repackage_hidden(hidden) return total_loss / (len(data_source) - 1)
def run(self): if not self.next_run_required: self.state = TaskState.COMPLETED if self.state == TaskState.YET_TO_START or self.state == TaskState.IDLE: self.state = TaskState.RUNNING # here run func with actual input for current batch batch = utils.get_batch(self.input.data, self.batch_size) if batch is None or len(batch) == 0: self.next_run_required = False return self.execute(batch) elif self.state == TaskState.COMPLETED: raise TaskCompleted() elif self.state == TaskState.RUNNING: raise RunningAlreadyRunningTaskError()
def evaluate(data_source, batch_size=10): # Turn on evaluation mode which disables dropout. model.eval() if args.model == 'QRNN': model.reset() total_loss = 0 ntokens = len(corpus.dictionary) hidden, w_e, vt_1 = model.init_hidden(batch_size) for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, args, evaluation=True) output, hidden, vt_1 = model(data, hidden, w_e, vt_1) total_loss += len(data) * criterion( model.decoder.weight, model.decoder.bias, output, targets).data hidden = repackage_hidden(hidden) return total_loss.item() / len(data_source)
def evaluate(data_source, batch_size=10): # Turn on evaluation mode which disables dropout. if args.model == 'QRNN': model.reset() model.eval() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, args, evaluation=True) output, weight, bias, hidden = model(data, hidden) pred_targets = torch.mm(output, weight.t()) + bias total_loss += len(data) * criterion(pred_targets, targets).data hidden = repackage_hidden(hidden) return total_loss[0] / len(data_source)
def evaluate(data_source, batch_size=10): # Turn on evaluation mode which disables dropout. model.eval() if args.model == 'QRNN': model.reset() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, args, evaluation=True) output, hidden = model(data, hidden) output_flat = output.view(-1, ntokens) total_loss += len(data) * criterion(output_flat, targets).data hidden = repackage_hidden(hidden) return total_loss[0] / len(data_source)
def demo(): ############################# # 1. load dataset ############################# image_dir = os.path.join(DATASET_TRAIN_PATH, 'image') mask_dir = os.path.join(DATASET_TRAIN_PATH, 'mask') image_path_list = [os.path.join(image_dir, v) for v in os.listdir(image_dir)] mask_path_list = [os.path.join(mask_dir, v.replace('.jpg', '.png')) for v in os.listdir(image_dir)] image_num = len(image_path_list) print("Training image num -> ", image_num) for image_path, mask_path in list(zip(image_path_list, mask_path_list))[:3]: print(image_path, mask_path) image = utils.cv_imread(image_path) mask = utils.cv_imread(mask_path, 1) print(image.shape, mask.shape) cv2.imshow('Sample', np.hstack([image, mask])) cv2.waitKey(1) pass batch_num = image_num // BATCH_SIZE data_idx_list = list(range(image_num)) if not os.path.exists(OUT_IMAGE_DIR): os.makedirs(OUT_IMAGE_DIR) ############################# # 2. Create Model ############################# sess, tf_x, tf_y, tf_lr, tf_train, tf_logit, tf_predict, tf_cost, tf_optimizer, tf_saver = unet.model(H_IN, W_IN, C_IN, 8) global_step = 0 for epoch in range(MAX_EPOCH): np.random.shuffle(data_idx_list) for step in range(batch_num): idx_list = data_idx_list[step * BATCH_SIZE: (step + 1) * BATCH_SIZE] image_batch, mask_batch = utils.get_batch(idx_list, image_path_list, mask_path_list, H_IN, W_IN) _, cost = sess.run([tf_optimizer, tf_cost], feed_dict={tf_x: image_batch, tf_y: mask_batch, tf_train: True, tf_lr: LEARNING_RATE}) if global_step % 10 == 0: print("Epoch %d: Step %d -> loss: %.5g" % (epoch, step, cost)) predict_mask = sess.run(tf_predict, feed_dict={tf_x: image_batch, tf_train: False}) compare_result_image = utils.create_compare_image(image_batch[0], mask_batch[0], predict_mask[0]) cv2.imshow('Sample', compare_result_image) cv2.waitKey(1) cv2.imwrite(os.path.join(OUT_IMAGE_DIR, "train_step_%d.png" % global_step), compare_result_image) global_step += 1 print("Finished. Save model to %s ..." % MODEL_SAVE_PATH) tf_saver.save(sess, MODEL_SAVE_PATH)
def evaluate(data_source, batch_size=10): # Turn on evaluation mode which disables dropout. model.eval() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) with torch.no_grad(): for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, args, evaluation=True) targets = targets.view(-1) log_prob, hidden = parallel_model(data, hidden) loss = nn.functional.nll_loss(log_prob.view(-1, log_prob.size(2)), targets).data total_loss += loss * len(data) hidden = repackage_hidden(hidden) return total_loss.item() / len(data_source)
def evaluate(data_source, tokens): # Turn on evaluation mode which disables dropout. model.eval() hidden = model.init_hidden(eval_batch_size) with torch.no_grad(): for i in range(0, data_source.size(0) - 1, seq_len): list_len = min(seq_len, len(tokens)-1-i) batch_tokens = tokens[i:i+list_len] # keep continuous hidden state across all sentences in the input file data, targets = get_batch(data_source, i, seq_len) output, hidden = model(data, hidden) output_flat = output.view(-1, vocab_size) output_surprisal(output_flat, targets, batch_tokens) hidden = repackage_hidden(hidden)
def evaluate(data_source, batch_size=10): # Turn on evaluation mode which disables dropout. model.eval() if args.model == 'QRNN': model.reset() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size, args.cuda) for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, args, evaluation=True) output, hidden = model(data, hidden, optimizer) total_loss += len(data) * criterion(output, targets).data if args.no_warm_start: hidden = model.init_hidden(batch_size) hidden = repackage_hidden(hidden) return total_loss.item() / len(data_source)
def evaluate(data_source, batch_size=10): # Turn on evaluation mode which disables dropout. model.eval() with torch.no_grad(): total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, args, evaluation=True) output, hidden = model(data, hidden) total_loss += len(data) * criterion(output.view(-1, ntokens), targets).data hidden = repackage_hidden(hidden) return total_loss.item() / len(data_source)
def evaluate(data_source, mask): # Turn on evaluation mode which disables dropout. model.eval() total_loss = 0 hidden = model.init_hidden(eval_batch_size) with torch.no_grad(): for i in range(0, data_source.size(0) - 1, seq_len): # keep continuous hidden state across all sentences in the input file data, targets = get_batch(data_source, i, seq_len) print(data) print(targets) _, targets_mask = get_batch(mask, i, seq_len) output, hidden = model(data, hidden) output_flat = output.view(-1, vocab_size) total_loss += len(data) * nn.CrossEntropyLoss()(output_flat, targets) output_candidates_probs(output_flat, targets, targets_mask) hidden = repackage_hidden(hidden) return total_loss.item() / (len(data_source) - 1)
def evaluate(data_source, batch_size=10): # Turn on evaluation mode which disables dropout. model.eval() total_loss = 0 with torch.no_grad(): hidden = model.init_hidden(batch_size) c_hidden = model.init_c_hidden(batch_size) for i in range(0, data_source.size(0) - 1, args.bptt): data, _, targets = get_batch(data_source, i, args=args) hidden = repackage_hidden(hidden) c_hidden = repackage_hidden(c_hidden) output, _, hidden, c_hidden = model(data, hidden, c_hidden) total_loss += len(data) * criterion( model.decoder.weight, model.decoder.bias, output, targets).data return total_loss.item() / len(data_source)
def evaluate(_model, criterion, valid_data, eval_batch_size): _model.eval() total_loss = .0 hidden = _model.init_hidden(eval_batch_size) with torch.no_grad(): for i in range(0, valid_data.size(0) - 1, args.sequence_length): data, targets = utils.get_batch( valid_data, i, min(args.sequence_length, len(valid_data) - 1 - i)) output, hidden = _model(data, hidden) hidden = utils.repackage_hidden(hidden) total_loss += len(data) * criterion(output, targets).item() return total_loss / (len(valid_data) - 1)
def train(self): NUM_EPOCH = [] self.TRAIN_COLLECT = 50 self.TRAIN_PRINT = self.TRAIN_COLLECT * 2 saver = tf.train.Saver() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) iter = 100 for e in range(self.EPOCH): for batch_x, batch_y in utils.get_batch( self.INPUT, self.LABEL, self.BATCH_SIZE): iter += 1 feed = { self.MODEL.inputs: self.INPUT, self.MODEL.labels: self.LABEL, self.MODEL.learning_rate: self.LEARNING_RATE, self.MODEL.is_training: True } TRAIN_LOSS, _, TRAIN_ACC = sess.run([ self.MODEL.cost, self.MODEL.optimizer, self.MODEL.accuracy ], feed_dict=feed) if iter % self.TRAIN_COLLECT == 0: NUM_EPOCH.append(e) if iter % self.TRAIN_PRINT == 0: print("Epoch: {}/{}".format(e + 1, self.EPOCH), "Train Loss: {:.4f}".format(TRAIN_LOSS), "Train Accuracy: {:.4f}".format(TRAIN_ACC)) feed = { self.MODEL.inputs: self.VAL_INPUT, self.MODEL.labels: self.VAL_LABEL, self.MODEL.is_training: False } VAL_LOSS, VAL_ACC = sess.run( [self.MODEL.cost, self.MODEL.accuracy], feed_dict=feed) if iter % self.TRAIN_PRINT == 0: print( "Epoch: {}/{}".format(e + 1, self.EPOCH), "Validation Loss: {:.4f}".format(VAL_LOSS), "Validation Accuracy: {:.4f}".format(VAL_ACC)) saver.save(sess, "checkpoint/porto_pilsa.ckpt")
def evaluate(data_source, batch_size=10): # Turn on evaluation mode which disables dropout. model_lm.eval() # model_mlp.eval() if args.model == 'QRNN': model_lm.reset() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model_lm.init_hidden(batch_size) for i in range(0, data_source.size(0) - 1, args.bptt): data, targets, _ = get_batch(data_source, i, args, evaluation=True) output, hidden, _, all_outputs = model_lm(data, hidden, return_h=True) # output = model_mlp(all_outputs[-1]) + all_outputs[-1] # output = output.view(output.size(0)*output.size(1), output.size(2)) total_loss += len(data) * criterion(model_lm.decoder.weight, model_lm.decoder.bias, output, targets).data hidden = repackage_hidden(hidden) return total_loss.item() / len(data_source)
def evaluate(model, criterion, data_source, batch_size=10): # Turn on evaluation mode which disables dropout. model.eval() model_now = model.module criterion_now = criterion.module if args.model == 'QRNN': model_now.reset() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model_now.init_hidden(batch_size) for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, args, evaluation=True) output, hidden = model_now(data, hidden) criterion_now.replicate_weight_and_bias(torch.nn.Parameter(model.module.decoder.weight),torch.nn.Parameter(model.module.decoder.bias)) total_loss += len(data) * criterion_now(hiddens = output, targets = targets).data hidden = repackage_hidden(hidden) return total_loss.item() / len(data_source)
def evaluate(data_source, batch_size=10): # Turn on evaluation mode which disables dropout. model.eval() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, args, evaluation=True) targets = targets.view(-1) log_prob, hidden = parallel_model(data, hidden) loss = nn.functional.nll_loss(log_prob.view(-1, log_prob.size(2)), targets).data total_loss += len(data) * loss hidden = repackage_hidden(hidden) return total_loss[0] / len(data_source)
def evaluate(opt, valid_data, model, criterion): accu_loss = 0.0 model.eval() hidden = model.init_hidden(opt.batch_size) for i in range(0, valid_data.shape[1] - 1, opt.bptt_len): origin, target = get_batch(opt, valid_data, i) origin = np2tensor(opt, origin) target = np2tensor(opt, target) hidden = repackage_hidden(hidden) predict, hidden = model(origin, hidden) loss = criterion(predict, target) accu_loss += loss.data[0] accu_loss /= valid_data.shape[1] return accu_loss
def test_loss(self, model, args): model.eval() total_loss = 0 hidden = model.init_hidden(self.batch_size) for i in range(0, self.data_source.size(0) - 1, args.bptt): data, targets = get_batch(self.data_source, i, args, evaluation=True) output, hidden = model(data, hidden) total_loss += len(data) * criterion( model.decoder.weight, model.decoder.bias, output, targets).data hidden = repackage_hidden(hidden) return total_loss.item() / len(self.data_source)
def evaluate(data_source, batch_size=10): # Turn on evaluation mode which disables dropout. model.eval() with torch.no_grad(): if args.model == 'QRNN': model.reset() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, args, evaluation=True) output, hidden = model(data, hidden) total_loss += len(data) * criterion( model.decoder.weight, model.decoder.bias, output, targets).data hidden = repackage_hidden(hidden) #return total_loss[0] / len(data_source) # Error under modern PyTorch return total_loss / len(data_source)
def rvae_estimate(self, data_x, num_iter): for i in range(num_iter): b_x, ids = utils.get_batch(data_x, self.params.batch_size) _, l, gen_loss, v_loss = self.sess.run( (self.optimizer, self.loss, self.gen_loss, self.v_loss), feed_dict={ self.x: b_x, self.v: self.m_V[ids, :] }) # Display logs per epoch step if i % self.print_step == 0 and self.verbose: print "Iter:", '%04d' % (i+1), \ "loss=", "{:.5f}".format(l), \ "genloss=", "{:.5f}".format(gen_loss), \ "vloss=", "{:.5f}".format(v_loss) return gen_loss
def rvae_estimate(self, data_x, links, num_iter): gradBuffer = self.sess.run(self.tvars) for ix, grad in enumerate(gradBuffer): gradBuffer[ix] = grad * 0 for i in range(num_iter): b_x, ids = utils.get_batch(data_x, self.params.batch_size) num = 0 gen_loss = 0 for j in range(self.params.batch_size): x = b_x[j].reshape((1, -1)) id = ids[j] link_ids = links[id] if len(link_ids) == 0: continue link_v = self.m_V[link_ids] tGrad, gen_loss_ins = self.sess.run( (self.newGrads, self.gen_loss), feed_dict={ self.x: x, self.linked_v: link_v, self.eta_vae: self.eta }) gen_loss += gen_loss_ins for ix, grad in enumerate(tGrad): gradBuffer[ix] += grad num += 1 gen_loss = gen_loss / num tGrad = self.sess.run(self.regGrads) for ix, grad in enumerate(tGrad): gradBuffer[ ix] += gradBuffer[ix] / num + grad * self.params.lambda_w feed_dict = {} for j in range(len(self.batchGrad)): feed_dict[self.batchGrad[j]] = gradBuffer[j] self.sess.run(self.updateGrads, feed_dict=feed_dict) for ix, grad in enumerate(gradBuffer): gradBuffer[ix] = grad * 0 # Display logs per epoch step if i % self.print_step == 0 and self.verbose: print "Iter:", '%04d' % (i+1), \ "loss=", "{:.5f}".format(l), \ "genloss=", "{:.5f}".format(gen_loss), \ "vloss=", "{:.5f}".format(v_loss) return gen_loss
def train(cur_epoch): # Turn on training mode which enables dropout. total_loss = 0 start_time = time.time() final_hidden_states = my_model.get_first_hidden(env.batch_size, env) batch, i = 0, 0 seq_len = env.seq_len batches_in_epoch = len(train_data) // env.seq_len total_batches = batches_in_epoch * env.epochs while i < train_data.size(0) - 1 - 1: cur_total_batch = (cur_epoch - 1) * batches_in_epoch + batch optimizer.param_groups[0]['lr'] = lr_start * (math.exp( -cur_total_batch / total_batches)) my_model.train() data, targets = get_batch(train_data, i, env, seq_len=seq_len) # Starting each batch, we detach the hidden state from how it was previously produced. # If we didn't, the model would try backpropagating all the way to start of the dataset. initial_hidden_states = repackage_hidden(final_hidden_states) optimizer.zero_grad() output, final_hidden_states = my_model(data, initial_hidden_states) loss = criterion(output, targets) loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. if env.clip: torch.nn.utils.clip_grad_norm_(params, env.clip) optimizer.step() total_loss += loss.data #optimizer.param_groups[0]['lr'] = lr2 if batch % env.log_interval == 0 and batch > 0: cur_loss = total_loss.item() / env.log_interval elapsed = time.time() - start_time print( '| epoch {:3d} | {:5d}/{:5d} batches | lr {:05.5f} | ms/batch {:5.2f} | ' 'ppl {:8.2f}'.format(epoch, batch, len(train_data) // env.seq_len, optimizer.param_groups[0]['lr'], elapsed * 1000 / env.log_interval, math.exp(cur_loss))) total_loss = 0 start_time = time.time() ### batch += 1 i += seq_len
def evaluate(data_source, batch_size=10): # Turn on evaluation mode which disables dropout. model.eval() if args.model == 'QRNN' and getattr(model, 'reset', None): model.reset() total_loss = 0 ntokens = len(corpus.dictionary) hidden = None mems = None with torch.no_grad(): for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, args, evaluation=True) #output, hidden = model(data, hidden) output, hidden, mems = model(data, hidden, mems=mems, return_h=False) total_loss += len(data) * criterion(model.decoder.weight, model.decoder.bias, output, targets.view(-1)).data if hidden is not None: hidden = repackage_hidden(hidden) return total_loss.item() / len(data_source)
def train(): # Turn on training mode which enables dropout. model.train() total_loss = 0. start_time = time.time() hidden = model.init_hidden(args.batch_size) #m, batch_len = train_data.shape #n_batches = (batch_len -1) // seq_len data_len = len(train_data) b_n = 0 for batch, i in enumerate(range(0, train_data.size(0) - 1, args.bptt)): bptt = args.bptt if np.random.random() < 0.95 else args.bptt / 2. lr2 = optimizer.param_groups[0]['lr'] optimizer.param_groups[0]['lr'] = lr2 * seq_len / args.bptt data, targets = get_batch( train_data, i, args) # data size SEQ X BATCH_SIZE, targets: SEQ X BATCH_SIZE, 1 output, rnn_hs, dropped_rnn_hs = model(data, hidden, return_h=True) raw_loss = criterion(output.view(-1, ntokens), targets) loss = raw_loss loss = loss + sum(args.alpha * dropped_rnn_h.pow(2).mean() for dropped_rnn_h in dropped_rnn_hs[-1:]) # Temporal Activation Regularization (slowness) loss = loss + sum(args.beta * (rnn_h[1:] - rnn_h[:-1]).pow(2).mean() for rnn_h in rnn_hs[-1:]) loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. torch.nn.utils.clip_grad_norm(model.parameters(), args.clip) optimizer.step() #print (total_loss) total_loss += raw_loss.data optimizer.param_groups[0]['lr'] = lr2 if b_n % args.log_interval == 0 and b_n > 0: cur_loss = total_loss[0] / args.log_interval elapsed = time.time() - start_time print( '| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | ' 'loss {:5.2f} | ppl {:8.2f}'.format( epoch, b_n, len(train_data) // args.batch_size, optimizer.param_groups[0]['lr'], elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss))) total_loss = 0 start_time = time.time()
def evaluate(data_source, batch_size=10): # Turn on evaluation mode which disables dropout. model.eval() if args.model == 'QRNN': model.reset() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, args, evaluation=True) output, hidden = model(data, hidden) logits = model.decoder(output) # logProba = nn.functional.log_softmax(logits, dim=1) # pred_idxs = torch.argmax(logProba, dim=1) total_loss += len(data) * criterion( model.decoder.weight, model.decoder.bias, output, targets).data hidden = repackage_hidden(hidden) return total_loss.item() / len(data_source)
def train_gan(self, epochs, batch_size, sample_interval, train_data): # Create labels for real and fake data real = np.ones((batch_size, 1)) fake = np.zeros((batch_size, 1)) for epoch in range(epochs): # Get batch of real data real_seqs = get_batch(train_data, batch_size) # Generate batch of fake data using random noise noise = np.random.normal(0, 1, (batch_size, self.model.latent_dim)) gen_seqs = self.model.generator.predict(noise) # Train the discriminator to accept real data and reject fake data d_loss_real = self.model.discriminator.train_on_batch( real_seqs, real) d_loss_fake = self.model.discriminator.train_on_batch( gen_seqs, fake) # Train the generator such that when it takes random noise as an # input, it will produce fake data which the discriminator accepts # as real noise = np.random.normal(0, 1, (batch_size, self.model.latent_dim)) g_loss = self.model.gan.train_on_batch(noise, real) if epoch % sample_interval == 0: print("""%d [DiscLoss/Acc Real: (%10f, %10f)] [DiscLoss/Acc Fake: (%10f, %10f)] [DiscAcc %10f][GenLoss = %10f]""" % (epoch, d_loss_real[0], d_loss_real[1], d_loss_fake[0], d_loss_fake[1], 0.5 * (d_loss_real[1] + d_loss_fake[1]), g_loss)) self.disc_loss_r.append(d_loss_real) self.disc_loss_f.append(d_loss_fake) self.gen_loss.append(g_loss) sample_image(self.model, epoch, real_seqs, self.path) if (epoch % 1000 == 0): self.save_models(self.path, epoch, self.model.generator, self.model.discriminator) self.savedata(self.path, train_data) self.showLoss(self.path, save=True)
def test(step,verbose=None): N_test = len(q_test) n_batches = N_test // batch_size acc = [] for idx in range(n_batches): if verbose: if idx%20==0: print("%d/%d - accuracy = %1.3f"%(idx,n_batches, np.mean(acc))) begin = idx*batch_size end = min((idx+1)*batch_size, N_test) Q, mask, A = get_batch(begin,end,q_test,a_test,batch_size,max_q,Na) a_pred = sess.run(model_outputs['answer_pred'], feed_dict={model_outputs['question']:Q, model_outputs['mask']:mask, model_outputs['answer']:A}) equals = 1*np.equal(A.argmax(axis=1),a_pred) equals = list(equals[:end-begin]) acc += equals acc = tf.reduce_mean(tf.to_float(acc)) acc_s = tf.scalar_summary("acc_tf",acc,name="acc_tf") acc,acc_s = sess.run([acc,acc_s]) writer.add_summary(acc_s,step) return acc
n_batches = N_train // batch_size + 1 for epoch in range(n_epochs): epoch_loss = [] times = 0. indexes = np.arange(N_train) np.random.shuffle(indexes) q_train = q_train[indexes] a_train = a_train[indexes] for idx in range(n_batches): tic = time() if idx%(n_batches//10)==0: print("Epoch %d - %d/%d : loss = %1.4f - time = %1.3fs"%(epoch,idx, n_batches,np.mean(epoch_loss), times/((n_train//10)*batch_size))) times = 0. begin = idx*batch_size end = min((idx+1)*batch_size, N_train) Q, mask, A = get_batch(begin,end,q_train,a_train,batch_size,max_q,Na) _,l,l_s = sess.run([model_outputs['train_op'], model_outputs['loss'], model_outputs['loss_summary']], feed_dict={model_outputs['question']:Q, model_outputs['mask']:mask, model_outputs['answer']:A}) epoch_loss.append(l) writer.add_summary(l_s,idx+epoch*n_batches) times += time() - tic with tf.device('/cpu:0'): test_acc = test((1+epoch)*n_batches) print("Epoch %d - Test accuracy = %1.3f" % (epoch+1, test_acc)) saver.save(sess, join('/home/hbenyounes/vqa/saved_models/','model'), global_step=epoch)
def train(): assert args.batch_size % args.small_batch_size == 0, 'batch_size must be divisible by small_batch_size' # Turn on training mode which enables dropout. total_loss = 0 start_time = time.time() ntokens = len(corpus.dictionary) hidden = [model.init_hidden(args.small_batch_size) for _ in range(args.batch_size // args.small_batch_size)] batch, i = 0, 0 while i < train_data.size(0) - 1 - 1: bptt = args.bptt if np.random.random() < 0.95 else args.bptt / 2. # Prevent excessively small or negative sequence lengths seq_len = max(5, int(np.random.normal(bptt, 5))) # There's a very small chance that it could select a very long sequence length resulting in OOM seq_len = min(seq_len, args.bptt + args.max_seq_len_delta) lr2 = optimizer.param_groups[0]['lr'] optimizer.param_groups[0]['lr'] = lr2 * seq_len / args.bptt model.train() data, targets = get_batch(train_data, i, args, seq_len=seq_len) optimizer.zero_grad() start, end, s_id = 0, args.small_batch_size, 0 while start < args.batch_size: cur_data, cur_targets = data[:, start: end], targets[:, start: end].contiguous().view(-1) # Starting each batch, we detach the hidden state from how it was previously produced. # If we didn't, the model would try backpropagating all the way to start of the dataset. hidden[s_id] = repackage_hidden(hidden[s_id]) log_prob, hidden[s_id], rnn_hs, dropped_rnn_hs = parallel_model(cur_data, hidden[s_id], return_h=True) raw_loss = nn.functional.nll_loss(log_prob.view(-1, log_prob.size(2)), cur_targets) loss = raw_loss # Activiation Regularization loss = loss + sum(args.alpha * dropped_rnn_h.pow(2).mean() for dropped_rnn_h in dropped_rnn_hs[-1:]) # Temporal Activation Regularization (slowness) loss = loss + sum(args.beta * (rnn_h[1:] - rnn_h[:-1]).pow(2).mean() for rnn_h in rnn_hs[-1:]) loss *= args.small_batch_size / args.batch_size total_loss += raw_loss.data * args.small_batch_size / args.batch_size loss.backward() s_id += 1 start = end end = start + args.small_batch_size gc.collect() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. torch.nn.utils.clip_grad_norm(model.parameters(), args.clip) optimizer.step() # total_loss += raw_loss.data optimizer.param_groups[0]['lr'] = lr2 if batch % args.log_interval == 0 and batch > 0: cur_loss = total_loss[0] / args.log_interval elapsed = time.time() - start_time logging('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | ' 'loss {:5.2f} | ppl {:8.2f}'.format( epoch, batch, len(train_data) // args.bptt, optimizer.param_groups[0]['lr'], elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss))) total_loss = 0 start_time = time.time() ### batch += 1 i += seq_len