def train(self, ts, dropout, batchsz=1): total_loss = total_corr = total = 0 steps = int(math.floor(len(ts) / float(batchsz))) start_time = time.time() steps = int(math.floor(len(ts) / float(batchsz))) shuffle = np.random.permutation(np.arange(steps)) for i in range(steps): si = shuffle[i] ts_i = data.batch(ts, si, batchsz) feed_dict = self.model.ex2dict(ts_i, 1.0 - dropout) _, step, summary_str, lossv, accv = self.sess.run( [ self.train_op, self.global_step, self.summary_op, self.loss, self.acc ], feed_dict=feed_dict) self.train_writer.add_summary(summary_str, step) total_corr += accv total_loss += lossv total += len(ts_i) duration = time.time() - start_time print('Train (Loss %.4f) (Acc %d/%d = %.4f) (%.3f sec)' % (float(total_loss) / total, total_corr, total, float(total_corr) / total, duration))
def train(self, ts, cm, dropout, batchsz=1): total_loss = 0 start_time = time.time() steps = int(math.floor(len(ts)/float(batchsz))) shuffle = np.random.permutation(np.arange(steps)) pg = ProgressBar(steps) cm.reset() for i in range(steps): si = shuffle[i] ts_i = data.batch(ts, si, batchsz) feed_dict = self.model.ex2dict(ts_i, 1.0-dropout) _, step, summary_str, lossv, guess = self.sess.run([self.train_op, self.global_step, self.summary_op, self.loss, self.model.best], feed_dict=feed_dict) self.train_writer.add_summary(summary_str, step) total_loss += lossv cm.add_batch(ts_i.y, guess) pg.update() pg.done() total = cm.get_total() total_corr = cm.get_correct() duration = time.time() - start_time print('Train (Loss %.4f) (Acc %d/%d = %.4f) (%.3f sec)' % (float(total_loss)/total, total_corr, total, float(total_corr)/total, duration)) print(cm)
def train(self, ts, batchsz): self.model.train() start_time = time.time() steps = int(math.floor(len(ts) / float(batchsz))) shuffle = np.random.permutation(np.arange(steps)) total_loss = total = 0 pg = ProgressBar(steps) for i in range(steps): self.optimizer.zero_grad() si = shuffle[i] ts_i = data.batch(ts, si, batchsz, long_tensor_alloc, tensor_shape, tensor_max) src, dst, tgt = self._wrap(ts_i) pred = self.model((src, dst)) loss = self.crit(pred, tgt) total_loss += loss.data[0] loss.backward() torch.nn.utils.clip_grad_norm(self.model.parameters(), self.clip) total += self._total(tgt) self.optimizer.step() pg.update() pg.done() duration = time.time() - start_time avg_loss = float(total_loss) / total print('Train (Loss %.4f) (Perplexity %.4f) (%.3f sec)' % (avg_loss, np.exp(avg_loss), duration))
def train(self, ts, dropout, batchsz): start_time = time.time() steps = int(math.floor(len(ts) / float(batchsz))) shuffle = np.random.permutation(np.arange(steps)) total_loss = total_err = total_sum = 0 for i in range(steps): si = shuffle[i] ts_i = batch(ts, si, batchsz) feed_dict = self.model.ex2dict(ts_i, 1.0 - dropout) _, step, summary_str, lossv = self.sess.run( [self.train_op, self.global_step, self.summary_op, self.loss], feed_dict=feed_dict) self.train_writer.add_summary(summary_str, step) total_loss += lossv duration = time.time() - start_time print('Train (Loss %.4f) (%.3f sec)' % (float(total_loss) / len(ts), duration))
def predict(self, input_generator, batch_size=1): input_generator = batch(input_generator, batch_size=batch_size, targets=self.inputs + self.targets) preds, targets = self.__consume_generator(input_generator, fit=False, mask_missing=False) return preds['target']
def evaluate(self, test_gen, batch_size=1): test_gen = batch(test_gen, batch_size=batch_size, targets=self.inputs + self.targets) preds, targets = self.__consume_generator(test_gen, fit=False, mask_missing=True) print("Evaluation scores: ", self.score(preds, targets))
def show_examples(use_gpu, model, es, rlut1, rlut2, embed2, mxlen, sample, prob_clip, max_examples): batch = data.batch(es, 0, max_examples, long_tensor_alloc, tensor_shape, tensor_max) GO = embed2.vocab['<GO>'] EOS = embed2.vocab['<EOS>'] src_array = batch['src'] tgt_array = batch['tgt'] if use_gpu: src_array = src_array.cuda() for src_i, tgt_i in zip(src_array, tgt_array): print( '========================================================================' ) sent = lookup_sentence(rlut1, src_i.cpu().numpy(), reverse=True) print('[OP] %s' % sent) sent = lookup_sentence(rlut2, tgt_i) print('[Actual] %s' % sent) dst_i = torch.zeros(1, mxlen).long() if use_gpu: dst_i = dst_i.cuda() next_value = GO src_i = src_i.view(1, -1) for j in range(mxlen): dst_i[0, j] = next_value probv = model((Variable(src_i), Variable(dst_i))) output = probv.squeeze()[j] if sample is False: _, next_value = torch.max(output, 0) next_value = int(next_value.data[0]) else: probs = output.data.exp() # This is going to zero out low prob. events so they are not # sampled from best, ids = probs.topk(prob_clip, 0, largest=True, sorted=True) probs.zero_() probs.index_copy_(0, ids, best) probs.div_(torch.sum(probs)) fv = torch.multinomial(probs, 1)[0] next_value = fv if next_value == EOS: break sent = lookup_sentence(rlut2, dst_i.squeeze()) print('Guess: %s' % sent) print( '------------------------------------------------------------------------' )
def _batch(self, ts, si, batchsz): ds = data.batch(ts, si, batchsz, long_tensor_alloc, tensor_shape) xch = ds["xch"] x = ds["x"] y = ds["y"] if self.gpu: xch = xch.cuda() x = x.cuda() y = y.cuda() return Variable(xch), Variable(x), Variable(y)
def evaluate(model, epoch): """Evaluate after a train epoch""" print('Epoch [{}] -- Evaluate'.format(epoch)) x_val, y_val = batch(4) out, _ = model(x_val, y_val, teacher_force_ratio=0.) out = out.permute(1, 0) for i in range(out.size(0)): print('{} --> {} --> {}'.format(x_val[i], x_val[i].gather(0, out[i]), x_val[i].gather(0, y_val[i])))
def _run_epoch(self, phase, ts, pkeep, is_training=False): """Runs the model on the given data.""" start_time = time.time() costs = 0.0 iters = 0 state = self.sess.run(self.model.initial_state) fetches = { "loss": self.loss, "final_state": self.model.final_state, } if is_training: fetches["train_op"] = self.train_op fetches["global_step"] = self.global_step fetches["summary_str"] = self.summary_op step = 0 nbptt = self.model.batch_info['nbptt'] maxw = self.model.batch_info['maxw'] batchsz = self.model.batch_info['batchsz'] for next_batch in batch(ts, nbptt, batchsz, maxw): feed_dict = { self.model.x: next_batch[0], self.model.xch: next_batch[1], self.model.y: next_batch[2], self.model.pkeep: pkeep } for i, (c, h) in enumerate(self.model.initial_state): feed_dict[c] = state[i].c feed_dict[h] = state[i].h vals = self.sess.run(fetches, feed_dict) cost = vals["loss"] state = vals["final_state"] if is_training: summary_str = vals["summary_str"] step = vals["global_step"] self.train_writer.add_summary(summary_str, step) costs += cost iters += nbptt step += 1 if step % 500 == 0: print("step [%d] perplexity: %.3f" % (step, np.exp(costs / iters))) duration = time.time() - start_time avg_loss = costs / iters perplexity = np.exp(costs / iters) print('%s (Loss %.4f) (Perplexity = %.4f) (%.3f sec)' % (phase, avg_loss, perplexity, duration)) return perplexity
def test_batch(): s1 = np.random.rand(10, 3) s2 = np.random.rand(11, 3) s3 = np.random.rand(12, 3) b, m = batch([s1, s2, s3]) expected_mask = np.array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]).transpose() print(m) print(expected_mask) print(m == expected_mask) assert (m == expected_mask).all()
def _batch(self, ts, si, batchsz): ds = data.batch(ts, si, batchsz, vec_alloc=long_tensor_alloc, ExType=TorchExamples) x = ds.x y = ds.y if self.gpu: x = x.cuda() y = y.cuda() return Variable(x), Variable(y)
def best_in_batch(self, ts, sess, batchsz): steps = int(math.floor(len(ts)/float(batchsz))) start_time = time.time() shuffle = np.random.permutation(np.arange(steps)) ts_i = batch(ts, shuffle[0], batchsz) feed_dict = self.model.ex2dict(ts_i, 1.0) best = sess.run(self.model.best, feed_dict=feed_dict) duration = time.time() - start_time print('Show (%.3f sec)' % duration) return best
def test_compute_loss_batch_different_length_long(): rnn = GeneratorRNN(1) optimizer = create_optimizer(rnn) max_len = 500 seq_len = [2] * 49 + [max_len] batch_len = 50 features = 3 strokes = [np.random.rand(l, features) for l in seq_len] batched_strokes, mask = batch(strokes) train_batch(rnn, optimizer, strokes, batched_strokes, mask) assert False
def fit(self, train_gen, val_gen=None, batch_size=32, verbose=True): if not self.compiled: raise Exception("Model not compiled") train_gen = batch(train_gen, batch_size=batch_size, targets=self.inputs + self.targets) preds, targets = self.__consume_generator(train_gen, fit=True, mask_missing=True) if verbose: print("Train scores: ", self.score(preds, targets)) if val_gen is not None: val_gen = batch(val_gen, batch_size=batch_size, targets=self.inputs + self.targets) preds, targets = self.__consume_generator(val_gen, fit=False, mask_missing=True) if verbose: print("Validation scores: ", self.score(preds, targets)) self.checkpoint()
def test(self, ts, sess, batchsz=1): total_loss = 0 steps = int(math.floor(len(ts)/float(batchsz))) start_time = time.time() for i in range(steps): ts_i = batch(ts, i, batchsz) feed_dict = self.model.ex2dict(ts_i, 1.0) lossv = sess.run(self.loss, feed_dict=feed_dict) total_loss += lossv duration = time.time() - start_time avg_loss = total_loss / steps print('Test (Loss %.4f) (%.3f sec)' % (avg_loss, duration)) return avg_loss
def train(model, optimizer, epoch, clip=1.): """Train single epoch""" print('Epoch [{}] -- Train'.format(epoch)) for step in range(STEPS_PER_EPOCH): optimizer.zero_grad() # Forward x, y = batch(BATCH_SIZE) out, loss = model(x, y) # Backward loss.backward() nn.utils.clip_grad_norm_(model.parameters(), clip) optimizer.step() if (step + 1) % 100 == 0: print('Epoch [{}] loss: {}'.format(epoch, loss.item()))
def test(self, ts, batchsz=1, phase='Test', conll_file=None, txts=None): total_correct = total_sum = fscore = 0 total_gold_count = total_guess_count = total_overlap_count = 0 start_time = time.time() steps = int(math.floor(len(ts) / float(batchsz))) # Only if they provide a file and the raw txts, we can write CONLL file handle = None if conll_file is not None and txts is not None: handle = open(conll_file, "w") for i in range(steps): ts_i = batch(ts, i, batchsz) correct, count, overlaps, golds, guesses = self._batch( ts_i, handle, txts) total_correct += correct total_sum += count total_gold_count += golds total_guess_count += guesses total_overlap_count += overlaps duration = time.time() - start_time total_acc = total_correct / float(total_sum) # Only show the fscore if requested if self.fscore > 0: fscore = f_score(total_overlap_count, total_gold_count, total_guess_count, self.fscore) print('%s (F%d = %.4f) (Acc %d/%d = %.4f) (%.3f sec)' % (phase, self.fscore, fscore, total_correct, total_sum, total_acc, duration)) else: print('%s (Acc %d/%d = %.4f) (%.3f sec)' % (phase, total_correct, total_sum, total_acc, duration)) if handle is not None: handle.close() return total_acc, fscore
def test(self, ts, batchsz, phase='Test'): self.model.eval() total_loss = total = 0 start_time = time.time() steps = int(math.floor(len(ts) / float(batchsz))) for i in range(steps): ts_i = data.batch(ts, i, batchsz, long_tensor_alloc, tensor_shape, tensor_max) src, dst, tgt = self._wrap(ts_i) pred = self.model((src, dst)) loss = self.crit(pred, tgt) total_loss += loss.data[0] total += self._total(tgt) duration = time.time() - start_time avg_loss = float(total_loss) / total print('%s (Loss %.4f) (Perplexity %.4f) (%.3f sec)' % (phase, avg_loss, np.exp(avg_loss), duration)) return avg_loss
def test(self, ts, batchsz=1, phase='Test'): total_loss = total_corr = total = 0 start_time = time.time() steps = int(math.floor(len(ts) / float(batchsz))) for i in range(steps): ts_i = data.batch(ts, i, batchsz) feed_dict = self.model.ex2dict(ts_i, 1) lossv, accv = self.sess.run([self.loss, self.acc], feed_dict=feed_dict) total_corr += accv total_loss += lossv total += len(ts_i) duration = time.time() - start_time print('%s (Loss %.4f) (Acc %d/%d = %.4f) (%.3f sec)' % (phase, float(total_loss) / total, total_corr, total, float(total_corr) / total, duration)) return float(total_corr) / total
def test(self, ts, cm, batchsz=1, phase='Test'): total_loss = 0 start_time = time.time() steps = int(math.floor(len(ts)/float(batchsz))) cm.reset() for i in range(steps): ts_i = data.batch(ts, i, batchsz) feed_dict = self.model.ex2dict(ts_i, 1) lossv, guess = self.sess.run([self.loss, self.model.best], feed_dict=feed_dict) cm.add_batch(ts_i.y, guess) total_loss += lossv total = cm.get_total() total_corr = cm.get_correct() duration = time.time() - start_time print('%s (Loss %.4f) (Acc %d/%d = %.4f) (%.3f sec)' % (phase, float(total_loss)/total, total_corr, total, float(total_corr)/total, duration)) print(cm) return float(total_corr)/total
def train(self, ts, sess, summary_writer, dropout, batchsz): total_loss = 0 steps = int(math.floor(len(ts)/float(batchsz))) shuffle = np.random.permutation(np.arange(steps)) start_time = time.time() pg = ProgressBar(steps) for i in range(steps): si = shuffle[i] ts_i = batch(ts, si, batchsz) feed_dict = self.model.ex2dict(ts_i, 1.0-dropout) _, step, summary_str, lossv = sess.run([self.train_op, self.global_step, self.summary_op, self.loss], feed_dict=feed_dict) summary_writer.add_summary(summary_str, step) #print(lossv, errv, totv) total_loss += lossv pg.update() pg.done() duration = time.time() - start_time print('Train (Loss %.4f) (%.3f sec)' % (total_loss/steps, duration))
batch_size = 10 train_data, valid_data, test_data = data.load_mnist('mnist.pkl.gz') index = T.lscalar() x = T.matrix('x') y = T.ivector('y') p_y_given_x, layer_params = model.meta(x) params = list(itertools.chain(*layer_params)) cost = model.negative_log_likelihood(p_y_given_x, y) errors = model.errors(p_y_given_x, y) validate_model = data.build_validation_function( data.batch(valid_data, batch_size=1000), errors, x, y) n_epochs = 500 learning_rate = 0.01 L1_lambda = 0.001 L2_lambda = 0.001 train_batched = data.batch(train_data, batch_size) train_model = model.build_train_function( train_batched, cost + L1_lambda * T.sum(abs(params[0])) + L2_lambda * T.sum(params[1]**2), x, y, params) def save_model(name): import cPickle
import itertools batch_size = 10 train_data, valid_data, test_data = data.load_mnist('mnist.pkl.gz') index = T.lscalar() x = T.matrix('x') y = T.ivector('y') p_y_given_x, layer_params = model.meta(x) params = list(itertools.chain(*layer_params)) cost = model.negative_log_likelihood(p_y_given_x, y) errors = model.errors(p_y_given_x, y) validate_model = data.build_validation_function(data.batch(valid_data, batch_size=1000), errors, x, y) n_epochs = 500 learning_rate = 0.01 L1_lambda = 0.001 L2_lambda = 0.001 train_batched = data.batch(train_data, batch_size) train_model = model.build_train_function(train_batched, cost + L1_lambda*T.sum(abs(params[0])) + L2_lambda*T.sum(params[1]**2), x, y, params) def save_model(name): import cPickle with open(name+'-params.pkl', 'wb') as f: cPickle.dump(params, f)
def train_full_batch(rnn: GeneratorRNN, optimizer: torch.optim.Optimizer, strokes): batched_strokes, mask = batch(strokes) train_batch(rnn, optimizer, strokes, batched_strokes, mask)