def train_epoch(epoch, samples, labels, sess, Z, X, CG, CD, CS,accuracy, D_loss, G_loss, D_solver, G_solver, batch_size, use_time, D_rounds, G_rounds, seq_length, latent_dim, num_generated_features, cond_dim, max_val, WGAN_clip, one_hot): """ Train generator and discriminator for one epoch. """ for batch_idx in range(0, int(len(samples) / batch_size) - (D_rounds + (cond_dim > 0)*G_rounds), D_rounds + (cond_dim > 0)*G_rounds): # update the discriminator for d in range(D_rounds): X_mb, Y_mb = data_utils.get_batch(samples, batch_size, batch_idx + d, labels) Z_mb = sample_Z(batch_size, seq_length, latent_dim, use_time) if cond_dim > 0: # CGAN Y_mb = Y_mb.reshape(-1, cond_dim) # if one_hot: # # change all of the labels to a different one # offsets = np.random.choice(cond_dim-1, batch_size) + 1 # new_labels = (np.argmax(Y_mb, axis=1) + offsets) % cond_dim # Y_wrong = np.zeros_like(Y_mb) # Y_wrong[np.arange(batch_size), new_labels] = 1 # else: # # flip all of the bits (assuming binary...) # # Y_wrong = 1 - Y_mb _ = sess.run(D_solver, feed_dict={X: X_mb, Z: Z_mb, CD: Y_mb, CG: Y_mb}) else: _ = sess.run(D_solver, feed_dict={X: X_mb, Z: Z_mb}) if WGAN_clip: # clip the weights _ = sess.run([clip_disc_weights]) # update the generator for g in range(G_rounds): if cond_dim > 0: # note we are essentially throwing these X_mb away... X_mb, Y_mb = data_utils.get_batch(samples, batch_size, batch_idx + D_rounds + g, labels) Y_mb = Y_mb.reshape(-1,1) _ = sess.run(G_solver, feed_dict={Z: sample_Z(batch_size, seq_length, latent_dim, use_time=use_time), CG: Y_mb}) else: _ = sess.run(G_solver, feed_dict={Z: sample_Z(batch_size, seq_length, latent_dim, use_time=use_time)}) # at the end, get the loss if cond_dim > 0: D_loss_curr, G_loss_curr, acc = sess.run([D_loss, G_loss, accuracy], feed_dict={X: X_mb, Z: sample_Z(batch_size, seq_length, latent_dim, use_time=use_time), CG: Y_mb, CD: Y_mb}) print("monish",acc) D_loss_curr = np.mean(D_loss_curr) G_loss_curr = np.mean(G_loss_curr) else: D_loss_curr, G_loss_curr = sess.run([D_loss, G_loss], feed_dict={X: X_mb, Z: sample_Z(batch_size, seq_length, latent_dim, use_time=use_time)}) D_loss_curr = np.mean(D_loss_curr) G_loss_curr = np.mean(G_loss_curr) return D_loss_curr, G_loss_curr
def step(self, dataset, training=False): ''' one epoch ''' if (training): next_batch = data_utils.get_batch(dataset, self.batch_size, self.input_dim, self.output_dim) self.cur_batch_size = self.batch_size else: self.cur_batch_size = len(dataset) next_batch = data_utils.get_batch(dataset, self.cur_batch_size, self.input_dim, self.output_dim, shuffle=False) loss = 0 while True: try: input_data, output_data, seq_length, seq_length2 = self.sess.run( next_batch) if (input_data.shape[0] != self.cur_batch_size): continue if (training): pre_loss, _ = self.sess.run( [self.loss, self.train_op], feed_dict={ self.train_input: input_data, self.train_output: output_data, self.seq_length: seq_length }) loss = self.sess.run(self.loss, feed_dict={ self.train_input: input_data, self.train_output: output_data, self.seq_length: seq_length }) print('bach: %d preloss %.4f loss: %.4f' % (input_data.shape[0], pre_loss, loss)) else: loss = self.sess.run(self.loss, feed_dict={ self.train_input: input_data, self.train_output: output_data, self.seq_length: seq_length }) except tf.errors.OutOfRangeError: #if (not training): # loss = loss / ind break return loss
def get_response(message): line = str.encode(message) if len(line) > 0 and line[-1] == '\n': line = line[:-1] if line == '': response = 'What did you say?' output_file.write('Human: ' + message + '\n' + 'Bot: ' + str(response) + '\n') return message, response token_ids = data_utils.sentence2id(enc_vocab, line) if len(token_ids) > max_length: response = ('The maximum length I can handle is ', max_length) output_file.write('Human: ' + message + '\n' + 'Bot: ' + str(response) + '\n') return message, response bucket_id = chatbot.find_right_bucket(len(token_ids)) encoder_inputs, decoder_inputs, decoder_masks = data_utils.get_batch( [(token_ids, [])], bucket_id, batch_size=1) _, _, output_logits = chatbot.run_step(sess, model, encoder_inputs, decoder_inputs, decoder_masks, bucket_id, True) response = chatbot.construct_response(output_logits, inv_dec_vocab) output_file.write('Human: ' + message + '\n' + 'Bot: ' + str(response) + '\n') return message, response
def train(train_set, test_set, vocabulary): with tf.Session() as session: model = create_model(session, False) loss = 0.0 current_step = 0 previous_losses = [] while True: current_step += 1 encoder_inputs, decoder_inputs, target_weights = data_utils.get_batch( train_set, BATCH_SIZE, MODEL_LENGTH) _, step_loss, _ = model.step(session, encoder_inputs, decoder_inputs, target_weights, False) loss += step_loss / STEPS_PER_CHECKPOINT if current_step % STEPS_PER_CHECKPOINT == 0: perplexity = math.exp(loss) if loss < 300 else float('inf') print "global step %d learning rate %.4f perpexity %.2f" % ( current_step, model.learning_rate.eval(), perplexity) if len(previous_losses) > 2 and loss > max( previous_losses[-3:]): session.run(model.learning_rate_decay_op) previous_losses.append(loss) loss = 0.0 checkpoint_path = os.path.join(DATA_PATH, "spell_correction.ckpt") model.saver.save(session, checkpoint_path, global_step=model.global_step) test(session, model, vocabulary, test_set)
def train_epoch(epoch, samples, labels, sess, Z, X, D_loss, G_loss, D_solver, G_solver, batch_size, use_time, D_rounds, G_rounds, seq_length, latent_dim, num_signals): """ Train generator and discriminator for one epoch. """ # for batch_idx in range(0, int(len(samples) / batch_size) - (D_rounds + (cond_dim > 0) * G_rounds), D_rounds + (cond_dim > 0) * G_rounds): for batch_idx in range(0, int(len(samples) / batch_size) - (D_rounds + G_rounds), D_rounds + G_rounds): # update the discriminator X_mb, Y_mb = data_utils.get_batch(samples, batch_size, batch_idx, labels) Z_mb = sample_Z(batch_size, seq_length, latent_dim, use_time) for d in range(D_rounds): # run the discriminator solver _ = sess.run(D_solver, feed_dict={X: X_mb, Z: Z_mb}) # update the generator for g in range(G_rounds): # run the generator solver _ = sess.run(G_solver, feed_dict={Z: sample_Z(batch_size, seq_length, latent_dim, use_time=use_time)}) # at the end, get the loss D_loss_curr, G_loss_curr = sess.run([D_loss, G_loss], feed_dict={X: X_mb, Z: sample_Z(batch_size, seq_length, latent_dim, use_time=use_time)}) D_loss_curr = np.mean(D_loss_curr) G_loss_curr = np.mean(G_loss_curr) return D_loss_curr, G_loss_curr
def test(sess, dataset, out_dir, input_dim, output_dim, apply_cmvn=False, param_cmvn=None): next_batch = data_utils.get_batch(dataset, 1, input_dim, output_dim, shuffle=False) ind = 0 while True: try: input, output, seq_length = sess.run(next_batch) output = numpy.reshape(output, (-1, output_dim)) output = output[:seq_length[0]] print(seq_length[0]) if (apply_cmvn): output = output * param_cmvn[1] + param_cmvn[0] filename = os.path.basename(dataset[ind]).split('.')[0] + '.cmp' numpy.savetxt(out_dir + '/' + filename, output, fmt='%f') print('write one %s' % filename) ind += 1 except tf.errors.OutOfRangeError: break
def train(epoch): model.train() # Turn on the train mode total_loss = 0. start_time = time.time() for batch, i in enumerate(range(0, train_data.size(0)-1, bptt)): inputs, targets = get_batch(train_data, i, bptt) inputs = inputs.to(device) targets = targets.to(device) model.zero_grad() optimizer.zero_grad() output = model(inputs) loss = criterion(output.view(-1, ntokens), targets) loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5) optimizer.step() total_loss += loss.item() if batch % args.log_interval == 0 and batch > 0: cur_loss = total_loss / args.log_interval elapsed = time.time() - start_time print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | ' 'loss {:5.2f} | ppl {:8.2f}'.format( epoch, batch, len(train_data) // args.bptt, scheduler.get_lr()[0], elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss))) total_loss = 0 start_time = time.time()
def single_test(bin_id, model, sess, nprint, batch_size, dev, p, print_out=True, offset=None, beam_model=None): """Test model on test data of length l using the given session.""" if not dev[p][bin_id]: data.print_out(" bin %d (%d)\t%s\tppl NA errors NA seq-errors NA" % (bin_id, data.bins[bin_id], p)) return 1.0, 1.0, 0.0 inpt, target = data.get_batch( bin_id, batch_size, dev[p], FLAGS.height, offset) if FLAGS.beam_size > 1 and beam_model: loss, res, new_tgt, scores = m_step( model, beam_model, sess, batch_size, inpt, target, bin_id, FLAGS.eval_beam_steps, p) score_avgs = [sum(s) / float(len(s)) for s in scores] score_maxs = [max(s) for s in scores] score_str = ["(%.2f, %.2f)" % (score_avgs[i], score_maxs[i]) for i in xrange(FLAGS.eval_beam_steps)] data.print_out(" == scores (avg, max): %s" % "; ".join(score_str)) errors, total, seq_err = data.accuracy(inpt, res, target, batch_size, nprint, new_tgt, scores[-1]) else: loss, res, _, _ = model.step(sess, inpt, target, False) errors, total, seq_err = data.accuracy(inpt, res, target, batch_size, nprint) seq_err = float(seq_err) / batch_size if total > 0: errors = float(errors) / total if print_out: data.print_out(" bin %d (%d)\t%s\tppl %.2f errors %.2f seq-errors %.2f" % (bin_id, data.bins[bin_id], p, data.safe_exp(loss), 100 * errors, 100 * seq_err)) return (errors, seq_err, loss)
def test(sess, model, vocabulary, test_set): rev_vocabulary = {v: k for k, v in vocabulary.items()} rev_vocabulary[data_utils.PAD_ID] = '' rev_vocabulary[data_utils.GO_ID] = '' rev_vocabulary[data_utils.UNK_ID] = '' encoder_inputs, decoder_inputs, target_weights = data_utils.get_batch( test_set, BATCH_SIZE, MODEL_LENGTH) _, _, output_logits = model.step(sess, encoder_inputs, decoder_inputs, target_weights, True) output_matrix = np.empty((len(output_logits), BATCH_SIZE)) for lenIdx in xrange(len(output_logits)): output_matrix[lenIdx] = np.array( [int(np.argmax(logit)) for logit in output_logits[lenIdx]]) typos = visualize(rev_vocabulary, encoder_inputs, should_reverse=True) rewrites = visualize(rev_vocabulary, decoder_inputs) guesses = visualize(rev_vocabulary, output_matrix) total = 0 correct = 0 for i in xrange(len(typos)): total += 1 correct += 1 if rewrites[i] == guesses[i] else 0 print typos[i], ' - ', rewrites[i], ' - ', guesses[i] print 'total: ', total, ' corrected: ', correct, ' acc: ', correct / ( total + 0.0)
def train(): """ Train the bot. """ # test_buckets, data_buckets: <type "list">: # [[[[Context], [Response]], ], ]] # test_buckets[0]: first bucket # test_buckets[0][0]: first pair of the first bucket # test_buckets[0][0][0], test_buckets[0][0][1]: Context and response # test_buckets[0][0][0][0]: word index of the first words # train_buckets_scale: list of increasing numbers from 0 to 1 that # we"ll use to select a bucket. len(train_buckets_scale) = len(BUCKETS) test_buckets, data_buckets, train_buckets_scale = _get_buckets() # in train mode, we need to create the backward path, so forward_only is False model = ChatBotModel(False, config.BATCH_SIZE) # build graph model.build_graph() saver = tf.train.Saver() with tf.Session() as sess: print("Running session...") sess.run(tf.global_variables_initializer()) check_restore_parameters(sess, saver) iteration = model.global_step.eval() total_loss = 0 logging.info("Training...") try: while True: skip_step = _get_skip_step(iteration) bucket_id = _get_random_bucket(train_buckets_scale) encoder_inputs, decoder_inputs, decoder_masks = data_utils.get_batch( data_buckets[bucket_id], bucket_id, batch_size=config.BATCH_SIZE) start = time.time() _, step_loss, _ = run_step(sess, model, encoder_inputs, decoder_inputs, decoder_masks, bucket_id, False) total_loss += step_loss iteration += 1 if iteration % skip_step == 0: logging.info( "Training @ iter {:d}: loss {:.4f}, time {:.4f}". format(iteration, total_loss / skip_step, time.time() - start)) total_loss = 0 saver.save(sess, os.path.join(config.CPT_PATH, "chatbot"), global_step=model.global_step) if iteration % (10 * skip_step) == 0: logging.info("Testing...") # Run evals on development set and print their loss _eval_test_set(sess, model, test_buckets) sys.stdout.flush() except KeyboardInterrupt: logging.info("Training interrupted.")
def chat(question): """ In test mode, we don"t to create the backward path. """ _, enc_vocab = data_utils.load_vocab( os.path.join(config.DATA_PATH, "vocab.enc")) # `inv_dec_vocab` <type "list">: id2word. inv_dec_vocab, _ = data_utils.load_vocab( os.path.join(config.DATA_PATH, "vocab.dec")) model = ChatBotModel(True, batch_size=1) model.build_graph() saver = tf.train.Saver() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) check_restore_parameters(sess, saver) output_file = open(os.path.join(config.DATA_PATH, config.TERMINAL_OUTPUT), "a+", encoding="utf-8") # Decode from standard input. max_length = config.BUCKETS[-1][0] print( "Welcome to TensorBro. Say something. Enter to exit. Max length is", max_length) line = question if hasattr(line, "decode"): # If using Python 2 # FIXME: UnicodeError when deleting Chinese in terminal. line = line.decode("utf-8") if len(line) > 0 and line[-1] == "\n": line = line[:-1] if not line: pass output_file.write("HUMAN ++++ " + line + "\n") # Get token-ids for the input sentence. token_ids = data_utils.sentence2id(enc_vocab, line) if len(token_ids) > max_length: print("Max length I can handle is:", max_length) # line = _get_user_input() pass # Which bucket does it belong to? bucket_id = find_right_bucket(len(token_ids)) # Get a 1-element batch to feed the sentence to the model. encoder_inputs, decoder_inputs, decoder_masks = data_utils.get_batch( [(token_ids, [])], bucket_id, batch_size=1) # Get output logits for the sentence. _, _, output_logits = run_step(sess, model, encoder_inputs, decoder_inputs, decoder_masks, bucket_id, True) response = construct_response(output_logits, inv_dec_vocab) print(response) output_file.write("BOT ++++ " + response + "\n") output_file.write("=============================================\n") output_file.close()
def train(): """ Train the bot """ test_buckets, data_buckets, train_buckets_scale = get_buckets() model = ChatBotModel(False, config.BATCH_SIZE) model.build_graph() saver = tf.train.Saver() with tf.Session() as sess: print('Running session') sess.run(tf.global_variables_initializer()) check_restore_parameters(sess, saver) iteration = model.global_step.eval() total_loss = 0 file_writer = tf.summary.FileWriter( os.path.join(config.LOG_PATH, 'tensorboard'), sess.graph) training_loss_summary = tf.Summary() testing_loss_summary = tf.Summary() while True: skip_step = get_skip_step(iteration) bucket_id = get_random_bucket(train_buckets_scale) encoder_inputs, decoder_inputs, decoder_masks = data_utils.get_batch( data_buckets[bucket_id], bucket_id, batch_size=config.BATCH_SIZE) start = time.time() _, step_loss, _ = run_step(sess, model, encoder_inputs, decoder_inputs, decoder_masks, bucket_id, False) total_loss += step_loss iteration += 1 if iteration % skip_step == 0: print('Iter {}: loss {}, time {}'.format( iteration, total_loss / skip_step, time.time() - start)) bucket_value = training_loss_summary.value.add() bucket_value.tag = "training_loss_bucket_%d" % bucket_id bucket_value.simple_value = step_loss file_writer.add_summary(training_loss_summary, model.global_step.eval()) start = time.time() total_loss = 0 saver.save(sess, os.path.join(config.CPT_PATH, 'chatbot'), global_step=model.global_step) if iteration % (10 * skip_step) == 0: # Run evals on development set and print their loss eval_test_set(sess, model, test_buckets, testing_loss_summary, file_writer) start = time.time() sys.stdout.flush()
def __gen_training_data(for_training): x = [] y = [] for index, seq_len in enumerate(cnf.bins): data, labels = data_gen.get_batch(seq_len, cnf.batch_size, for_training, cnf.task) x += [data] y += [labels] return x, y
def single_test(l, model, sess, task, nprint, batch_size, print_out=True, offset=None): """Test model on test data of length l using the given session.""" inpt, target = data.get_batch(l, batch_size, False, task, offset) _, res, _, steps = model.step(sess, inpt, target, False) errors, total, seq_err = data.accuracy(inpt, res, target, batch_size, nprint) seq_err = float(seq_err) / batch_size if total > 0: errors = float(errors) / total if print_out: data.print_out(" %s len %d errors %.2f sequence-errors %.2f" % (task, l, 100*errors, 100*seq_err)) return errors, seq_err, (steps, inpt, [np.argmax(o, axis=1) for o in res])
def evaluate(eval_model, data_source): eval_model.eval() # Turn on the evaluation mode total_loss = 0. # ntokens = len(TEXT.vocab.stoi) with torch.no_grad(): for i in range(0, data_source.size(0) - 1, bptt): inputs, targets = get_batch(data_source, i, bptt) inputs = inputs.to(device) targets = targets.to(device) output = eval_model(inputs) output_flat = output.view(-1, ntokens) total_loss += len(inputs) * criterion(output_flat, targets).item() return total_loss / (len(data_source) - 1)
def single_test(l, model, sess, task, nprint, batch_size, print_out=True, offset=None, ensemble=None, get_steps=False): """Test model on test data of length l using the given session.""" inpt, target = data.get_batch(l, batch_size, False, task, offset) _, res, _, steps = model.step(sess, inpt, target, False, get_steps=get_steps) errors, total, seq_err = data.accuracy(inpt, res, target, batch_size, nprint) seq_err = float(seq_err) / batch_size if total > 0: errors = float(errors) / total if print_out: data.print_out(" %s len %d errors %.2f sequence-errors %.2f" % (task, l, 100 * errors, 100 * seq_err)) # Ensemble eval. if ensemble: results = [] for m in ensemble: model.saver.restore(sess, m) _, result, _, _ = model.step(sess, inpt, target, False) m_errors, m_total, m_seq_err = data.accuracy( inpt, result, target, batch_size, nprint) m_seq_err = float(m_seq_err) / batch_size if total > 0: m_errors = float(m_errors) / m_total data.print_out( " %s len %d m-errors %.2f m-sequence-errors %.2f" % (task, l, 100 * m_errors, 100 * m_seq_err)) results.append(result) ens = [sum(o) for o in zip(*results)] errors, total, seq_err = data.accuracy(inpt, ens, target, batch_size, nprint) seq_err = float(seq_err) / batch_size if total > 0: errors = float(errors) / total if print_out: data.print_out( " %s len %d ens-errors %.2f ens-sequence-errors %.2f" % (task, l, 100 * errors, 100 * seq_err)) return errors, seq_err, (steps, inpt, [np.argmax(o, axis=1) for o in res])
def _eval_test_set(sess, model, test_buckets): for bucket_id in range(len(config.BUCKETS)): if len(test_buckets[bucket_id]) == 0: print(" Test: empty bucket {:d}".format(bucket_id)) continue start = time.time() encoder_inputs, decoder_inputs, decoder_masks = data_utils.get_batch( test_buckets[bucket_id], bucket_id, batch_size=config.BATCH_SIZE) _, step_loss, _ = run_step(sess, model, encoder_inputs, decoder_inputs, decoder_masks, bucket_id, True) logging.info("Test bucket {:d}: loss {:.4f}, time {:.4f}".format( bucket_id, step_loss, time.time() - start))
def chat(): """ in test mode, we don't to create the backward path """ _, enc_vocab = data_utils.load_vocab( os.path.join(config.PROCESSED_PATH, 'vocab.enc')) inv_dec_vocab, _ = data_utils.load_vocab( os.path.join(config.PROCESSED_PATH, 'vocab.dec')) model = ChatBotModel(True, batch_size=1) model.build_graph() saver = tf.train.Saver() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) check_restore_parameters(sess, saver) output_file = open( '/Users/EleanorLeung/Documents/CITS4404/chatbot/output_convo.txt', 'a+') # Decode from standard input. max_length = config.BUCKETS[-1][0] print('Talk to me! Enter to exit. Max length is', max_length) while True: line = str.encode(get_user_input()) if len(line) > 0 and line[-1] == '\n': line = line[:-1] if line == '': break output_file.write('HUMAN: ' + str(line) + '\n') token_ids = data_utils.sentence2id(enc_vocab, line) if len(token_ids) > max_length: print('Max length I can handle is:', max_length) line = get_user_input() continue bucket_id = find_right_bucket(len(token_ids)) # Get a 1-element batch to feed the sentence to the model. encoder_inputs, decoder_inputs, decoder_masks = data_utils.get_batch( [(token_ids, [])], bucket_id, batch_size=1) # Get output logits for the sentence. _, _, output_logits = run_step(sess, model, encoder_inputs, decoder_inputs, decoder_masks, bucket_id, True) response = construct_response(output_logits, inv_dec_vocab) print(response) output_file.write('BOT: ' + response + '\n') output_file.write('=============================================\n') output_file.close()
def interactive(): """Interactively probe an existing model.""" with tf.Session() as sess: model, _, _, _, _, _ = initialize(sess) sys.stdout.write("Input to Neural GPU, e.g., 0 1. Use -1 for PAD.\n") sys.stdout.write("> ") sys.stdout.flush() inpt = sys.stdin.readline() while inpt: ids = [data.to_id(s) for s in inpt.strip().split()] inpt, target = data.get_batch(len(ids), 1, False, "", preset=(ids, [0 for _ in ids])) _, res, _, _ = model.step(sess, inpt, target, False) res = [np.argmax(o, axis=1) for o in res] res = [o for o in res[:len(ids)] if o > 0] print " " + " ".join([data.to_symbol(output[0]) for output in res]) sys.stdout.write("> ") sys.stdout.flush() inpt = sys.stdin.readline()
def interactive(): """Interactively probe an existing model.""" with tf.Session() as sess: model, _, _, _, _, _ = initialize(sess) sys.stdout.write("Input to Neural GPU, e.g., 0 1. Use -1 for PAD.\n") sys.stdout.write("> ") sys.stdout.flush() inpt = sys.stdin.readline() while inpt: ids = [data.to_id(s) for s in inpt.strip().split()] inpt, target = data.get_batch(len(ids), 1, False, "", preset=(ids, [0 for _ in ids])) _, res, _, _ = model.step(sess, inpt, target, False) res = [np.argmax(o, axis=1) for o in res] res = [o for o in res[:len(ids)] if o > 0] print(" " + " ".join([data.to_symbol(output[0]) for output in res])) sys.stdout.write("> ") sys.stdout.flush() inpt = sys.stdin.readline()
def evaluate_CNN(X_test, Y_test, X_raw): graph = tf.Graph() with graph.as_default(): session_conf = tf.ConfigProto( allow_soft_placement=ALLOW_SOFT_PLACEMENT, log_device_placement=LOG_DEVICE_PLACEMENT ) sess = tf.Session(config=session_conf) with sess.as_default(): # Load the saved mta graph and restore variables tf.saved_model.loader.load(sess, [tag_constants.SERVING], MODEL_DIR) # Get the placeholders from the graph by name input_x = graph.get_operation_by_name("input_x").outputs[0] input_y = graph.get_operation_by_name("input_y").outputs[0] dropout_keep_prob = graph.get_operation_by_name("dropout_keep_prob").outputs[0] # Tensors we want to evaluate predictions = graph.get_operation_by_name("output/predictions").outputs[0] # Generate batches for one epoch batches = get_batch(list(X_test), BATCH_SIZE, 1, shuffle=False) # Collect the predictions here all_predictions = [] for x_test_batch in batches: batch_predictions = sess.run(predictions, {input_x: x_test_batch, dropout_keep_prob: 1.0}) all_predictions = np.concatenate([all_predictions, batch_predictions]) # Print the accuracy if _test is defined if Y_test is not None: correct_predicions = float(sum(all_predictions == Y_test)) print("total number of test examples: {}".format(len(Y_test))) print("Accuracy: {:g}".format(correct_predicions/float(len(Y_test)))) predictions_csv = np.column_stack((np.array(X_raw), all_predictions)) out_path = os.path.join(MODEL_DIR, "..", "prediction.csv") print("Saving evaluation to {0}".format(out_path)) with open(out_path, 'w', encoding="utf8", newline="") as f: csv.writer(f).writerows(predictions_csv)
def test(self, dataset, out_dir, apply_cmvn=False, param_cmvn=None): self.cur_batch_size = 2 next_batch = data_utils.get_batch(dataset, self.cur_batch_size, self.input_dim, self.output_dim, shuffle=True) ind = 0 while True: try: input_data, output_data, seq_length, _ = self.sess.run( next_batch) if (input_data.shape[0] != self.cur_batch_size): continue output, loss = self.sess.run( [self.logits, self.loss], feed_dict={ self.train_input: input_data, self.train_output: output_data, self.seq_length: seq_length }) output_data = output_data[1] output = output[0] output = output[:seq_length[0]] output_data = output_data[:seq_length[1]] if (apply_cmvn): output = output * param_cmvn[1] + param_cmvn[0] output_data = output_data * param_cmvn[1] + param_cmvn[0] filename = os.path.basename( dataset[ind]).split('.')[0] + '.cmp' tr_filename = os.path.basename( dataset[ind]).split('.')[0] + '_tr.cmp' print('%.4f %s' % (loss, filename)) numpy.savetxt(out_dir + '/' + filename, output, fmt='%f') numpy.savetxt(out_dir + '/' + tr_filename, output_data, fmt='%f') ind += 1 except tf.errors.OutOfRangeError: break
def eval_test_set(sess, model, test_buckets, testing_loss_summary, file_writer): """ Evaluate on the test set. """ for bucket_id in range(len(config.BUCKETS)): if len(test_buckets[bucket_id]) == 0: print(" Test: empty bucket %d" % (bucket_id)) continue start = time.time() encoder_inputs, decoder_inputs, decoder_masks = data_utils.get_batch( test_buckets[bucket_id], bucket_id, batch_size=config.BATCH_SIZE) _, step_loss, _ = run_step(sess, model, encoder_inputs, decoder_inputs, decoder_masks, bucket_id, True) bucket_value = testing_loss_summary.value.add() bucket_value.tag = "testing_loss_bucket_%d" % bucket_id bucket_value.simple_value = step_loss file_writer.add_summary(testing_loss_summary, model.global_step.eval()) print('Test bucket {}: loss {}, time {}'.format( bucket_id, step_loss, time.time() - start))
def wechat_text(msg): ''' get the response to msg :param msg: the type of the msg is 'Text' :return: response to msg by using seq2seq model ''' text = msg["Text"] token_ids = data_utils.sentence2id(enc_vocab, text) if len(token_ids) > max_length: print("Max length I can handle is:", max_length) # line = _get_user_input() # Which bucket does it belong to? bucket_id = find_right_bucket(len(token_ids)) # Get a 1-element batch to feed the sentence to the model. encoder_inputs, decoder_inputs, decoder_masks = data_utils.get_batch( [(token_ids, [])], bucket_id, batch_size=1) # Get output logits for the sentence. _, _, output_logits = run_step(sess, model, encoder_inputs, decoder_inputs, decoder_masks, bucket_id, True) response = construct_response(output_logits, inv_dec_vocab) return response
def seq_pred(question): _, enc_vocab = data_utils.load_vocab(os.path.join(config.DATA_PATH, "vocab.enc")) inv_dec_vocab, _ = data_utils.load_vocab(os.path.join(config.DATA_PATH, "vocab.dec")) model = ChatBotModel(True, batch_size=1) model.build_graph() saver = tf.train.Saver() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) check_restore_parameters(sess, saver) max_length = config.BUCKETS[-1][0] line = question if hasattr(line, "decode"): # If using Python 2 # FIXME: UnicodeError when deleting Chinese in terminal. line = line.decode("utf-8") if len(line) > 0 and line[-1] == "\n": line = line[:-1] if not line: pass token_ids = data_utils.sentence2id(enc_vocab, line) if len(token_ids) > max_length: line = question pass bucket_id = find_right_bucket(len(token_ids)) # Get a 1-element batch to feed the sentence to the model. encoder_inputs, decoder_inputs, decoder_masks = data_utils.get_batch( [(token_ids, [])], bucket_id, batch_size=1) # Get output logits for the sentence. _, _, output_logits = run_step(sess, model, encoder_inputs, decoder_inputs, decoder_masks, bucket_id, True) response = construct_response(output_logits, inv_dec_vocab) answer = response return answer
def single_test(l, model, sess, task, nprint, batch_size, print_out=True, offset=None, ensemble=None, get_steps=False): """Test model on test data of length l using the given session.""" inpt, target = data.get_batch(l, batch_size, False, task, offset) _, res, _, steps = model.step(sess, inpt, target, False, get_steps=get_steps) errors, total, seq_err = data.accuracy(inpt, res, target, batch_size, nprint) seq_err = float(seq_err) / batch_size if total > 0: errors = float(errors) / total if print_out: data.print_out(" %s len %d errors %.2f sequence-errors %.2f" % (task, l, 100*errors, 100*seq_err)) # Ensemble eval. if ensemble: results = [] for m in ensemble: model.saver.restore(sess, m) _, result, _, _ = model.step(sess, inpt, target, False) m_errors, m_total, m_seq_err = data.accuracy(inpt, result, target, batch_size, nprint) m_seq_err = float(m_seq_err) / batch_size if total > 0: m_errors = float(m_errors) / m_total data.print_out(" %s len %d m-errors %.2f m-sequence-errors %.2f" % (task, l, 100*m_errors, 100*m_seq_err)) results.append(result) ens = [sum(o) for o in zip(*results)] errors, total, seq_err = data.accuracy(inpt, ens, target, batch_size, nprint) seq_err = float(seq_err) / batch_size if total > 0: errors = float(errors) / total if print_out: data.print_out(" %s len %d ens-errors %.2f ens-sequence-errors %.2f" % (task, l, 100*errors, 100*seq_err)) return errors, seq_err, (steps, inpt, [np.argmax(o, axis=1) for o in res])
vis_sample = sess.run(G_sample, feed_dict={Z: vis_Z, CG: vis_C}) else: vis_sample = sess.run(G_sample, feed_dict={Z: vis_Z}) plotting.visualise_at_epoch(vis_sample, data, predict_labels, one_hot, epoch, identifier, num_epochs, resample_rate_in_min, multivariate_mnist, seq_length, labels=vis_C) # compute mmd2 and, if available, prob density if epoch % eval_freq == 0: t = time() - t0 print('%d\t%.2f\t%.4f\t%.4f' % (epoch, t, D_loss_curr, G_loss_curr)) if 'eICU' in data: gen_samples = [] labels_gen_samples = [] for batch_idx in range(int(len(train_seqs) / batch_size)): X_mb, Y_mb = data_utils.get_batch(train_seqs, batch_size, batch_idx, train_targets) z_ = model.sample_Z(batch_size, seq_length, latent_dim, use_time=use_time) gen_samples_mb = sess.run(G_sample, feed_dict={Z: z_, CG: Y_mb}) gen_samples.append(gen_samples_mb) labels_gen_samples.append(Y_mb) for batch_idx in range(int(len(vali_seqs) / batch_size)): X_mb, Y_mb = data_utils.get_batch(vali_seqs, batch_size, batch_idx, vali_targets) z_ = model.sample_Z(batch_size, seq_length, latent_dim, use_time=use_time) gen_samples_mb = sess.run(G_sample, feed_dict={Z: z_, CG: Y_mb}) gen_samples.append(gen_samples_mb) labels_gen_samples.append(Y_mb) gen_samples = np.vstack(gen_samples) labels_gen_samples = np.vstack(labels_gen_samples)
def train(): """Train the model.""" batch_size = FLAGS.batch_size tasks = FLAGS.task.split("-") with tf.Session() as sess: (model, min_length, max_length, checkpoint_dir, curriculum, _) = initialize(sess) quant_op = neural_gpu.quantize_weights_op(512, 8) max_cur_length = min(min_length + 3, max_length) prev_acc_perp = [1000000 for _ in xrange(3)] prev_seq_err = 1.0 # Main traning loop. while True: global_step, pull, max_cur_length, learning_rate = sess.run( [model.global_step, model.pull, model.cur_length, model.lr]) acc_loss, acc_total, acc_errors, acc_seq_err = 0.0, 0, 0, 0 acc_grad_norm, step_count, step_time = 0.0, 0, 0.0 for _ in xrange(FLAGS.steps_per_checkpoint): global_step += 1 task = random.choice(tasks) # Select the length for curriculum learning. l = np.random.randint(max_cur_length - min_length + 1) + min_length # Prefer longer stuff 60% of time. if np.random.randint(100) < 60: l1 = np.random.randint(max_cur_length - min_length + 1) + min_length l = max(l, l1) # Mixed curriculum learning: in 25% of cases go to any larger length. if np.random.randint(100) < 25: l1 = np.random.randint(max_length - min_length + 1) + min_length l = max(l, l1) # Run a step and time it. start_time = time.time() inp, target = data.get_batch(l, batch_size, True, task) noise_param = math.sqrt( math.pow(global_step, -0.55) * prev_seq_err) * FLAGS.grad_noise_scale loss, res, gnorm, _ = model.step(sess, inp, target, True, noise_param) step_time += time.time() - start_time acc_grad_norm += float(gnorm) # Accumulate statistics only if we did not exceed curriculum length. if l < max_cur_length + 1: step_count += 1 acc_loss += loss errors, total, seq_err = data.accuracy( inp, res, target, batch_size, 0) acc_total += total acc_errors += errors acc_seq_err += seq_err # Normalize and print out accumulated statistics. acc_loss /= step_count step_time /= FLAGS.steps_per_checkpoint acc_seq_err = float(acc_seq_err) / (step_count * batch_size) prev_seq_err = max(0.0, acc_seq_err - 0.02) # No noise at error < 2%. acc_errors = float( acc_errors) / acc_total if acc_total > 0 else 1.0 msg1 = "step %d step-time %.2f" % (global_step, step_time) msg2 = "lr %.8f pull %.3f" % (learning_rate, pull) msg3 = ("%s %s grad-norm %.8f" % (msg1, msg2, acc_grad_norm / FLAGS.steps_per_checkpoint)) data.print_out( "%s len %d ppx %.8f errors %.2f sequence-errors %.2f" % (msg3, max_cur_length, data.safe_exp(acc_loss), 100 * acc_errors, 100 * acc_seq_err)) # If errors are below the curriculum threshold, move curriculum forward. if curriculum > acc_seq_err: if FLAGS.quantize: # Quantize weights. data.print_out(" Quantizing parameters.") sess.run([quant_op]) # Increase current length (until the next with training data). do_incr = True while do_incr and max_cur_length < max_length: sess.run(model.cur_length_incr_op) for t in tasks: if data.train_set[t]: do_incr = False # Forget last perplexities if we're not yet at the end. if max_cur_length < max_length: prev_acc_perp.append(1000000) # Either increase pull or, if it's large, average parameters. if pull < 0.1: sess.run(model.pull_incr_op) else: data.print_out(" Averaging parameters.") sess.run(model.avg_op) if acc_seq_err < (curriculum / 3.0): sess.run(model.lr_decay_op) # Lower learning rate if we're worse than the last 3 checkpoints. acc_perp = data.safe_exp(acc_loss) if acc_perp > max(prev_acc_perp[-3:]): sess.run(model.lr_decay_op) prev_acc_perp.append(acc_perp) # Save checkpoint. checkpoint_path = os.path.join(checkpoint_dir, "neural_gpu.ckpt") model.saver.save(sess, checkpoint_path, global_step=model.global_step) # Run evaluation. bound = data.bins[-1] + 1 for t in tasks: l = min_length while l < max_length + EXTRA_EVAL and l < bound: _, seq_err, _ = single_test(l, model, sess, t, FLAGS.nprint, batch_size) l += 1 while l < bound + 1 and not data.test_set[t][l]: l += 1 if seq_err < 0.05: # Run larger test if we're good enough. _, seq_err = multi_test(data.forward_max, model, sess, t, FLAGS.nprint, batch_size * 4) if seq_err < 0.01: # Super-large test on 1-task large-forward models. if data.forward_max > 4000 and len(tasks) == 1: multi_test(data.forward_max, model, sess, tasks[0], FLAGS.nprint, batch_size * 16, 0)
optimizer = tf.train.AdamOptimizer(learning_rate=LEARNING_RATE) train_step = optimizer.minimize(loss, global_step=global_step) session.run([tf.global_variables_initializer(), tf.tables_initializer()]) saver = tf.train.Saver() for epoch in range(NUM_EPOCH_): # set total epochs print("Epoch: ", epoch) for epoch2 in range(2): # epochs per mini batch Image_x = [] Image_x2 = [] Text_x = [] Label_y = [] j = 0 if (getBatch): ran = random.randint(0, len(df) - 200) mini_batch = data_utils.get_batch(ran, ran + 200, df) for d in mini_batch: Label_y_ = [] for i in range(80): Label_y_.append(0) Image_x.append(d.img_feat) r = random.randint(0, 4) Text_x.append(d.sentences[r]) Label_y_[j] = 1 r2 = random.randint(0, 4) Text_x.append(d.sentences[r2]) Label_y_[j + 1] = 1 Label_y.append(Label_y_) j += 2 Image_x = np.asarray(Image_x) Label_y = np.asarray(Label_y)
def supply_test_data(self, length, batch_size): data, labels = data_gen.get_batch(length, batch_size, False, cnf.task) return [data], [labels]
def lstm_cell(): cell = tf.contrib.rnn.LSTMCell(params.hidden_size) return tf.contrib.rnn.DropoutWrapper(cell, output_keep_prob=keep_prob) with tf.name_scope('rnn_layer'): mlstm_cell = tf.contrib.rnn.MultiRNNCell([lstm_cell() for _ in range(params.layer_num)], state_is_tuple=True) # init_state = mlstm_cell.zero_state(tf.shape(x)[0], dtype=tf.float32) outputs, state = tf.nn.dynamic_rnn(mlstm_cell, inputs=x, sequence_length=None, initial_state=None, dtype=tf.float32, time_major=False) output = state[-1][1] with tf.name_scope('calculate_accuracy'): cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=output)) correct_prediction = tf.equal(tf.argmax(output, 1), tf.argmax(y, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) with tf.name_scope('train'): train_step = tf.train.AdamOptimizer(0.001).minimize(cross_entropy) with tf.name_scope('calculate_loss'): loss = tf.reduce_mean(tf.nn.sigmoid(labels=y, logits=output)) # y_ = tf.nn.sigmoid(output) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for i in range(params.epoch): for x, y in data_utils.get_batch(x, y, params.batch_size, params.seq_length): l, acc, _ = sess.run([loss, acc, train_step], feed_dict={x: x, y: y}) print("Step: {:>4}, Loss: {:.4f}, Acc: {:.4%}".format(i, l, acc))
def evaluate(): """Evaluate an existing model.""" batch_size = FLAGS.batch_size * FLAGS.num_gpus with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: (model, beam_model, _, _, _, (_, dev_set, en_vocab_path, fr_vocab_path), _, sess) = initialize(sess) for p in FLAGS.problem.split("-"): for bin_id in xrange(len(data.bins)): if (FLAGS.task >= 0 and bin_id > 4) or (FLAGS.nprint == 0 and bin_id > 8 and p == "wmt"): break single_test(bin_id, model, sess, FLAGS.nprint, batch_size, dev_set, p, beam_model=beam_model) path = FLAGS.test_file_prefix xid = "" if FLAGS.task < 0 else ("%.4d" % (FLAGS.task+FLAGS.decode_offset)) en_path, fr_path = path + ".en" + xid, path + ".fr" + xid # Evaluate the test file if they exist. if path and tf.gfile.Exists(en_path) and tf.gfile.Exists(fr_path): data.print_out("Translating test set %s" % en_path) # Read lines. en_lines, fr_lines = [], [] with tf.gfile.GFile(en_path, mode="r") as f: for line in f: en_lines.append(line.strip()) with tf.gfile.GFile(fr_path, mode="r") as f: for line in f: fr_lines.append(line.strip()) # Tokenize and convert to ids. en_vocab, _ = wmt.initialize_vocabulary(en_vocab_path) _, rev_fr_vocab = wmt.initialize_vocabulary(fr_vocab_path) if FLAGS.simple_tokenizer: en_ids = [wmt.sentence_to_token_ids( l, en_vocab, tokenizer=wmt.space_tokenizer, normalize_digits=FLAGS.normalize_digits) for l in en_lines] else: en_ids = [wmt.sentence_to_token_ids(l, en_vocab) for l in en_lines] # Translate. results = [] for idx, token_ids in enumerate(en_ids): if idx % 5 == 0: data.print_out("Translating example %d of %d." % (idx, len(en_ids))) # Which bucket does it belong to? buckets = [b for b in xrange(len(data.bins)) if data.bins[b] >= len(token_ids)] if buckets: result, result_cost = [], 100000000.0 for bucket_id in buckets: if data.bins[bucket_id] > MAXLEN_F * len(token_ids) + EVAL_LEN_INCR: break # Get a 1-element batch to feed the sentence to the model. used_batch_size = 1 # batch_size inp, target = data.get_batch( bucket_id, used_batch_size, None, FLAGS.height, preset=([token_ids], [[]])) loss, output_logits, _, _ = model.step( sess, inp, target, None, beam_size=FLAGS.beam_size) outputs = [int(o[0]) for o in output_logits] loss = loss[0] - (data.bins[bucket_id] * FLAGS.length_norm) if FLAGS.simple_tokenizer: cur_out = outputs if wmt.EOS_ID in cur_out: cur_out = cur_out[:cur_out.index(wmt.EOS_ID)] res_tags = [rev_fr_vocab[o] for o in cur_out] bad_words, bad_brack = wmt.parse_constraints(token_ids, res_tags) loss += 1000.0 * bad_words + 100.0 * bad_brack # print (bucket_id, loss) if loss < result_cost: result = outputs result_cost = loss final = linearize(result, rev_fr_vocab) results.append("%s\t%s\n" % (final, fr_lines[idx])) # print result_cost sys.stderr.write(results[-1]) sys.stderr.flush() else: sys.stderr.write("TOOO_LONG\t%s\n" % fr_lines[idx]) sys.stderr.flush() if xid: decode_suffix = "beam%dln%dn" % (FLAGS.beam_size, int(100 * FLAGS.length_norm)) with tf.gfile.GFile(path + ".res" + decode_suffix + xid, mode="w") as f: for line in results: f.write(line)
def fit(self, train, val=None, out_dir='log/', verbose=False): """ Trains the model using the input training set. Summarizes the loss and training (and validation) accuracies. Input ===== - train: a pair for the features and labels of the training set. - val: a pair for the features and labels of the validation set, optional. - out_dir: the directory location for summary files relative to root directory, optional. - verbose: a boolean flag. If set to true, loss values and execution time is printed to the console, optional. """ X_train, y_train = train if (val): X_val, y_val = val # All of the built ops will be associated with the default global graph instance with tf.Graph().as_default(): # Create a model instance features, labels = lu.input_placeholders() logits = self.inference(features, self.hidden_dim) loss = self.loss(logits, labels) train_step = self.training(loss) accuracy = self.evaluation(logits, labels) conf_matrix = tf.confusion_matrix( tf.argmax(labels, 1), tf.argmax(logits, 1), num_classes=config.NUM_ACTIVITIES) # Create summarizers loss_summary = tf.summary.scalar('loss', loss) weights_summary = lu.get_histogram_summary(len(self.hidden_dim)) train_acc_summary = tf.summary.scalar('train_acc', accuracy) val_acc_summary = tf.summary.scalar('val_acc', accuracy) train_summary = tf.summary.merge( [loss_summary, weights_summary, train_acc_summary]) # Create Model Saver saver = tf.train.Saver() # Create a session sess = tf.Session() summary_writer = tf.summary.FileWriter(out_dir, sess.graph) # train the model init_vars = tf.global_variables_initializer() sess.run(init_vars) for step in range(self.num_steps): start_time = time.time() X_batch, y_batch = du.get_batch(train, batch_size=self.batch_size) feed_dict = {features: X_batch, labels: y_batch} _, loss_value, summary_str = sess.run( [train_step, loss, loss_summary], feed_dict=feed_dict) summary_writer.add_summary(summary_str, step) if (step % 100 == 0): # evaluate model on train dataset feed_dict = {features: X_train, labels: y_train} _, summary_str = sess.run([accuracy, train_summary], feed_dict=feed_dict) summary_writer.add_summary(summary_str, step) # evaluate model on validation dataset if (val): feed_dict = {features: X_val, labels: y_val} _, summary_str = sess.run([accuracy, val_acc_summary], feed_dict=feed_dict) summary_writer.add_summary(summary_str, step) if (verbose): duration = time.time() - start_time print 'Step %d, loss = %.3f (%.3f sec)' % ( step, loss_value, duration) summary_writer.flush() self.sess = sess self.acc = accuracy self.conf_matrix = conf_matrix self.features = features self.labels = labels # Save the model save_path = out_dir + 'model/' os.mkdir(save_path) saver.save(sess, save_path + 'model.ckpt')
def train(): """Train the model.""" batch_size = FLAGS.batch_size tasks = FLAGS.task.split("-") with tf.Session() as sess: (model, min_length, max_length, checkpoint_dir, curriculum, _) = initialize(sess) quant_op = neural_gpu.quantize_weights_op(512, 8) max_cur_length = min(min_length + 3, max_length) prev_acc_perp = [1000000 for _ in xrange(3)] prev_seq_err = 1.0 # Main traning loop. while True: global_step, pull, max_cur_length, learning_rate = sess.run( [model.global_step, model.pull, model.cur_length, model.lr]) acc_loss, acc_total, acc_errors, acc_seq_err = 0.0, 0, 0, 0 acc_grad_norm, step_count, step_time = 0.0, 0, 0.0 for _ in xrange(FLAGS.steps_per_checkpoint): global_step += 1 task = random.choice(tasks) # Select the length for curriculum learning. l = np.random.randint(max_cur_length - min_length + 1) + min_length # Prefer longer stuff 60% of time. if np.random.randint(100) < 60: l1 = np.random.randint(max_cur_length - min_length+1) + min_length l = max(l, l1) # Mixed curriculum learning: in 25% of cases go to any larger length. if np.random.randint(100) < 25: l1 = np.random.randint(max_length - min_length + 1) + min_length l = max(l, l1) # Run a step and time it. start_time = time.time() inp, target = data.get_batch(l, batch_size, True, task) noise_param = math.sqrt(math.pow(global_step, -0.55) * prev_seq_err) * FLAGS.grad_noise_scale loss, res, gnorm, _ = model.step(sess, inp, target, True, noise_param) step_time += time.time() - start_time acc_grad_norm += float(gnorm) # Accumulate statistics only if we did not exceed curriculum length. if l < max_cur_length + 1: step_count += 1 acc_loss += loss errors, total, seq_err = data.accuracy(inp, res, target, batch_size, 0) acc_total += total acc_errors += errors acc_seq_err += seq_err # Normalize and print out accumulated statistics. acc_loss /= step_count step_time /= FLAGS.steps_per_checkpoint acc_seq_err = float(acc_seq_err) / (step_count * batch_size) prev_seq_err = max(0.0, acc_seq_err - 0.02) # No noise at error < 2%. acc_errors = float(acc_errors) / acc_total if acc_total > 0 else 1.0 msg1 = "step %d step-time %.2f" % (global_step, step_time) msg2 = "lr %.8f pull %.3f" % (learning_rate, pull) msg3 = ("%s %s grad-norm %.8f" % (msg1, msg2, acc_grad_norm / FLAGS.steps_per_checkpoint)) data.print_out("%s len %d ppx %.8f errors %.2f sequence-errors %.2f" % (msg3, max_cur_length, data.safe_exp(acc_loss), 100*acc_errors, 100*acc_seq_err)) # If errors are below the curriculum threshold, move curriculum forward. if curriculum > acc_seq_err: if FLAGS.quantize: # Quantize weights. data.print_out(" Quantizing parameters.") sess.run([quant_op]) # Increase current length (until the next with training data). do_incr = True while do_incr and max_cur_length < max_length: sess.run(model.cur_length_incr_op) for t in tasks: if data.train_set[t]: do_incr = False # Forget last perplexities if we're not yet at the end. if max_cur_length < max_length: prev_acc_perp.append(1000000) # Either increase pull or, if it's large, average parameters. if pull < 0.1: sess.run(model.pull_incr_op) else: data.print_out(" Averaging parameters.") sess.run(model.avg_op) if acc_seq_err < (curriculum / 3.0): sess.run(model.lr_decay_op) # Lower learning rate if we're worse than the last 3 checkpoints. acc_perp = data.safe_exp(acc_loss) if acc_perp > max(prev_acc_perp[-3:]): sess.run(model.lr_decay_op) prev_acc_perp.append(acc_perp) # Save checkpoint. checkpoint_path = os.path.join(checkpoint_dir, "neural_gpu.ckpt") model.saver.save(sess, checkpoint_path, global_step=model.global_step) # Run evaluation. bound = data.bins[-1] + 1 for t in tasks: l = min_length while l < max_length + EXTRA_EVAL and l < bound: _, seq_err, _ = single_test(l, model, sess, t, FLAGS.nprint, batch_size) l += 1 while l < bound + 1 and not data.test_set[t][l]: l += 1 if seq_err < 0.05: # Run larger test if we're good enough. _, seq_err = multi_test(data.forward_max, model, sess, t, FLAGS.nprint, batch_size * 4) if seq_err < 0.01: # Super-large test on 1-task large-forward models. if data.forward_max > 4000 and len(tasks) == 1: multi_test(data.forward_max, model, sess, tasks[0], FLAGS.nprint, batch_size * 16, 0)
with tf.name_scope("eval"): scores = tf.nn.softmax(logits) predictions = tf.argmax(scores, 1, name="predictions") correct_predictions = tf.equal(predictions, tf.argmax(y, 1)) accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy") # Note: if you prefer to one_hot encode in "train": # probabilities = tf.nn.softmax(logits, name="probabilities") # classes = tf.argmax(input=probabilities, axis=1) # accuracy = tf.contrib.metrics.accuracy(y,classes) with tf.name_scope("init_and_save"): init = tf.global_variables_initializer() saver = tf.train.Saver() with tf.Session(graph=graph) as sess: init.run() batch_iter = get_batch(x_train, y_train) for epoch in range(10): for iteration in range(x_train.shape[0] // batch_size): X_batch, y_batch = batch_iter.next_batch(128) sess.run(training_op, feed_dict={X: X_batch, y: y_batch, is_training: True}) acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch}) acc_test = accuracy.eval(feed_dict={X: x_val, y: y_val}) print(epoch, "Train accuracy:", acc_train, "Test accuracy:", acc_test) save_path = saver.save(sess, "model_nn/model")
def train_epoch( epoch, samples, labels, sess, Z, X, CG, CD, CS, D_loss, G_loss, #D_logit_real, D_logit_fake, conv1, D_solver, G_solver, batch_size, use_time, D_rounds, G_rounds, seq_length, latent_dim, #layer, w, D_solver, G_solver, batch_size, use_time, D_rounds, G_rounds, seq_length, num_generated_features, cond_dim, max_val, WGAN_clip, one_hot): """ Train generator and discriminator for one epoch. """ for batch_idx in range( 0, int(len(samples) / batch_size) - (D_rounds + (cond_dim > 0) * G_rounds), D_rounds + (cond_dim > 0) * G_rounds): # update the discriminator for d in range(D_rounds): X_mb, Y_mb = data_utils.get_batch(samples, batch_size, batch_idx + d, labels) Z_mb = X_mb[:, : -latent_dim, :] #sample_Z(batch_size, seq_length, latent_dim, use_time) X_mb = X_mb[:, -latent_dim:, :] X_mb = X_mb.reshape(-1, latent_dim, num_generated_features) if cond_dim > 0: # CGAN Y_mb = Y_mb.reshape(-1, cond_dim) if one_hot: # change all of the labels to a different one offsets = np.random.choice(cond_dim - 1, batch_size) + 1 new_labels = (np.argmax(Y_mb, axis=1) + offsets) % cond_dim Y_wrong = np.zeros_like(Y_mb) Y_wrong[np.arange(batch_size), new_labels] = 1 else: # flip all of the bits (assuming binary...) Y_wrong = 1 - Y_mb _ = sess.run(D_solver, feed_dict={ X: X_mb, Z: Z_mb, CD: Y_mb, CS: Y_wrong, CG: Y_mb }) else: _ = sess.run(D_solver, feed_dict={X: X_mb, Z: Z_mb}) if WGAN_clip: raise NotImplementedError("Not implemented WGAN") # clip the weights # _ = sess.run([clip_disc_weights]) # update the generator for g in range(G_rounds): if cond_dim > 0: # note we are essentially throwing these X_mb away... X_mb, Y_mb = data_utils.get_batch(samples, batch_size, batch_idx + D_rounds + g, labels) _ = sess.run(G_solver, feed_dict={ Z: sample_Z(batch_size, seq_length, latent_dim, use_time=use_time), CG: Y_mb }) else: Z_mb, Y_mb = data_utils.get_batch(samples, batch_size, batch_idx, labels) Z_mb = Z_mb[:, :-latent_dim, :] _ = sess.run(G_solver, feed_dict={ Z: Z_mb }) #sample_Z(batch_size, seq_length, use_time=use_time)}) # at the end, get the loss if cond_dim > 0: D_loss_curr, G_loss_curr = sess.run( [D_loss, G_loss], feed_dict={ X: X_mb, Z: sample_Z(batch_size, seq_length, latent_dim, use_time=use_time), CG: Y_mb, CD: Y_mb }) D_loss_curr = np.mean(D_loss_curr) G_loss_curr = np.mean(G_loss_curr) else: D_loss_curr, G_loss_curr =\ sess.run([D_loss, G_loss], feed_dict={X: X_mb, Z: Z_mb})#sample_Z(batch_size, seq_length, use_time=use_time)}) D_loss_curr = np.mean(D_loss_curr) G_loss_curr = np.mean(G_loss_curr) return D_loss_curr, G_loss_curr
def train(): """Train the model.""" batch_size = FLAGS.batch_size * FLAGS.num_gpus (model, beam_model, min_length, max_length, checkpoint_dir, (train_set, dev_set, en_vocab_path, fr_vocab_path), sv, sess) = initialize() with sess.as_default(): quant_op = model.quantize_op max_cur_length = min(min_length + 3, max_length) prev_acc_perp = [1000000 for _ in xrange(5)] prev_seq_err = 1.0 is_chief = FLAGS.task < 1 do_report = False # Main traning loop. while not sv.ShouldStop(): global_step, max_cur_length, learning_rate = sess.run( [model.global_step, model.cur_length, model.lr]) acc_loss, acc_l1, acc_total, acc_errors, acc_seq_err = 0.0, 0.0, 0, 0, 0 acc_grad_norm, step_count, step_c1, step_time = 0.0, 0, 0, 0.0 # For words in the word vector file, set their embedding at start. bound1 = FLAGS.steps_per_checkpoint - 1 if FLAGS.word_vector_file_en and global_step < bound1 and is_chief: assign_vectors(FLAGS.word_vector_file_en, "embedding:0", en_vocab_path, sess) if FLAGS.max_target_vocab < 1: assign_vectors(FLAGS.word_vector_file_en, "target_embedding:0", en_vocab_path, sess) if FLAGS.word_vector_file_fr and global_step < bound1 and is_chief: assign_vectors(FLAGS.word_vector_file_fr, "embedding:0", fr_vocab_path, sess) if FLAGS.max_target_vocab < 1: assign_vectors(FLAGS.word_vector_file_fr, "target_embedding:0", fr_vocab_path, sess) for _ in xrange(FLAGS.steps_per_checkpoint): step_count += 1 step_c1 += 1 global_step = int(model.global_step.eval()) train_beam_anneal = global_step / float(FLAGS.train_beam_anneal) train_beam_freq = FLAGS.train_beam_freq * min(1.0, train_beam_anneal) p = random.choice(FLAGS.problem.split("-")) train_set = global_train_set[p][-1] bucket_id = get_bucket_id(train_buckets_scale[p][-1], max_cur_length, train_set) # Prefer longer stuff 60% of time if not wmt. if np.random.randint(100) < 60 and FLAGS.problem != "wmt": bucket1 = get_bucket_id(train_buckets_scale[p][-1], max_cur_length, train_set) bucket_id = max(bucket1, bucket_id) # Run a step and time it. start_time = time.time() inp, target = data.get_batch(bucket_id, batch_size, train_set, FLAGS.height) noise_param = math.sqrt(math.pow(global_step + 1, -0.55) * prev_seq_err) * FLAGS.grad_noise_scale # In multi-step mode, we use best from beam for middle steps. state, new_target, scores, history = None, None, None, [] while (FLAGS.beam_size > 1 and train_beam_freq > np.random.random_sample()): # Get the best beam (no training, just forward model). new_target, new_first, new_inp, scores = get_best_beam( beam_model, sess, inp, target, batch_size, FLAGS.beam_size, bucket_id, history, p) history.append(new_first) # Training step with the previous input and the best beam as target. _, _, _, state = model.step(sess, inp, new_target, FLAGS.do_train, noise_param, update_mem=True, state=state) # Change input to the new one for the next step. inp = new_inp # If all results are great, stop (todo: not to wait for all?). if FLAGS.nprint > 1: print(scores) if sum(scores) / float(len(scores)) >= 10.0: break # The final step with the true target. loss, res, gnorm, _ = model.step( sess, inp, target, FLAGS.do_train, noise_param, update_mem=True, state=state) step_time += time.time() - start_time acc_grad_norm += 0.0 if gnorm is None else float(gnorm) # Accumulate statistics. acc_loss += loss acc_l1 += loss errors, total, seq_err = data.accuracy( inp, res, target, batch_size, 0, new_target, scores) if FLAGS.nprint > 1: print("seq_err: ", seq_err) acc_total += total acc_errors += errors acc_seq_err += seq_err # Report summary every 10 steps. if step_count + 3 > FLAGS.steps_per_checkpoint: do_report = True # Don't polute plot too early. if is_chief and step_count % 10 == 1 and do_report: cur_loss = acc_l1 / float(step_c1) acc_l1, step_c1 = 0.0, 0 cur_perp = data.safe_exp(cur_loss) summary = tf.Summary() summary.value.extend( [tf.Summary.Value(tag="log_perplexity", simple_value=cur_loss), tf.Summary.Value(tag="perplexity", simple_value=cur_perp)]) sv.SummaryComputed(sess, summary, global_step) # Normalize and print out accumulated statistics. acc_loss /= step_count step_time /= FLAGS.steps_per_checkpoint acc_seq_err = float(acc_seq_err) / (step_count * batch_size) prev_seq_err = max(0.0, acc_seq_err - 0.02) # No noise at error < 2%. acc_errors = float(acc_errors) / acc_total if acc_total > 0 else 1.0 t_size = float(sum([len(x) for x in train_set])) / float(1000000) msg = ("step %d step-time %.2f train-size %.3f lr %.6f grad-norm %.4f" % (global_step + 1, step_time, t_size, learning_rate, acc_grad_norm / FLAGS.steps_per_checkpoint)) data.print_out("%s len %d ppl %.6f errors %.2f sequence-errors %.2f" % (msg, max_cur_length, data.safe_exp(acc_loss), 100*acc_errors, 100*acc_seq_err)) # If errors are below the curriculum threshold, move curriculum forward. is_good = FLAGS.curriculum_ppx > data.safe_exp(acc_loss) is_good = is_good and FLAGS.curriculum_seq > acc_seq_err if is_good and is_chief: if FLAGS.quantize: # Quantize weights. data.print_out(" Quantizing parameters.") sess.run([quant_op]) # Increase current length (until the next with training data). sess.run(model.cur_length_incr_op) # Forget last perplexities if we're not yet at the end. if max_cur_length < max_length: prev_acc_perp.append(1000000) # Lower learning rate if we're worse than the last 5 checkpoints. acc_perp = data.safe_exp(acc_loss) if acc_perp > max(prev_acc_perp[-5:]) and is_chief: sess.run(model.lr_decay_op) prev_acc_perp.append(acc_perp) # Save checkpoint. if is_chief: checkpoint_path = os.path.join(checkpoint_dir, "neural_gpu.ckpt") model.saver.save(sess, checkpoint_path, global_step=model.global_step) # Run evaluation. bin_bound = 4 for p in FLAGS.problem.split("-"): total_loss, total_err, tl_counter = 0.0, 0.0, 0 for bin_id in xrange(len(data.bins)): if bin_id < bin_bound or bin_id % FLAGS.eval_bin_print == 1: err, _, loss = single_test(bin_id, model, sess, FLAGS.nprint, batch_size * 4, dev_set, p, beam_model=beam_model) if loss > 0.0: total_loss += loss total_err += err tl_counter += 1 test_loss = total_loss / max(1, tl_counter) test_err = total_err / max(1, tl_counter) test_perp = data.safe_exp(test_loss) summary = tf.Summary() summary.value.extend( [tf.Summary.Value(tag="test/%s/loss" % p, simple_value=test_loss), tf.Summary.Value(tag="test/%s/error" % p, simple_value=test_err), tf.Summary.Value(tag="test/%s/perplexity" % p, simple_value=test_perp)]) sv.SummaryComputed(sess, summary, global_step)
def interactive(): """Interactively probe an existing model.""" with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: # Initialize model. (model, _, _, _, _, (_, _, en_path, fr_path), _, _) = initialize(sess) # Load vocabularies. en_vocab, rev_en_vocab = wmt.initialize_vocabulary(en_path) _, rev_fr_vocab = wmt.initialize_vocabulary(fr_path) # Print out vectors and variables. if FLAGS.nprint > 0 and FLAGS.word_vector_file_en: print_vectors("embedding:0", en_path, FLAGS.word_vector_file_en) if FLAGS.nprint > 0 and FLAGS.word_vector_file_fr: print_vectors("target_embedding:0", fr_path, FLAGS.word_vector_file_fr) total = 0 for v in tf.trainable_variables(): shape = v.get_shape().as_list() total += mul(shape) print(v.name, shape, mul(shape)) print(total) # Start interactive loop. sys.stdout.write("Input to Neural GPU Translation Model.\n") sys.stdout.write("> ") sys.stdout.flush() inpt = sys.stdin.readline(), "" while inpt: cures = [] # Get token-ids for the input sentence. if FLAGS.simple_tokenizer: token_ids = wmt.sentence_to_token_ids( inpt, en_vocab, tokenizer=wmt.space_tokenizer, normalize_digits=FLAGS.normalize_digits) else: token_ids = wmt.sentence_to_token_ids(inpt, en_vocab) print([rev_en_vocab[t] for t in token_ids]) # Which bucket does it belong to? buckets = [b for b in xrange(len(data.bins)) if data.bins[b] >= max(len(token_ids), len(cures))] if cures: buckets = [buckets[0]] if buckets: result, result_cost = [], 10000000.0 for bucket_id in buckets: if data.bins[bucket_id] > MAXLEN_F * len(token_ids) + EVAL_LEN_INCR: break glen = 1 for gen_idx in xrange(glen): # Get a 1-element batch to feed the sentence to the model. inp, target = data.get_batch( bucket_id, 1, None, FLAGS.height, preset=([token_ids], [cures])) loss, output_logits, _, _ = model.step( sess, inp, target, None, beam_size=FLAGS.beam_size, update_mem=False) # If it is a greedy decoder, outputs are argmaxes of output_logits. if FLAGS.beam_size > 1: outputs = [int(o) for o in output_logits] else: loss = loss[0] - (data.bins[bucket_id] * FLAGS.length_norm) outputs = [int(np.argmax(logit, axis=1)) for logit in output_logits] print([rev_fr_vocab[t] for t in outputs]) print(loss, data.bins[bucket_id]) print(linearize(outputs, rev_fr_vocab)) cures.append(outputs[gen_idx]) print(cures) print(linearize(cures, rev_fr_vocab)) if FLAGS.simple_tokenizer: cur_out = outputs if wmt.EOS_ID in cur_out: cur_out = cur_out[:cur_out.index(wmt.EOS_ID)] res_tags = [rev_fr_vocab[o] for o in cur_out] bad_words, bad_brack = wmt.parse_constraints(token_ids, res_tags) loss += 1000.0 * bad_words + 100.0 * bad_brack if loss < result_cost: result = outputs result_cost = loss print("FINAL", result_cost) print([rev_fr_vocab[t] for t in result]) print(linearize(result, rev_fr_vocab)) else: print("TOOO_LONG") sys.stdout.write("> ") sys.stdout.flush() inpt = sys.stdin.readline(), ""