def chat(args): with tf.Session() as sess: # Create model and load parameters. args.batch_size = 1 # We decode one sentence at a time. model = create_model(sess, args) # Load vocabularies. vocab_path = os.path.join(args.data_dir, "vocab%d.in" % args.vocab_size) vocab, rev_vocab = data_utils.initialize_vocabulary(vocab_path) # Decode from standard input. sys.stdout.write("> ") sys.stdout.flush() sentence = sys.stdin.readline() while sentence: predicted_sentence = get_predicted_sentence( args, sentence, vocab, rev_vocab, model, sess) # print(predicted_sentence) if isinstance(predicted_sentence, list): for sent in predicted_sentence: print(" (%s) -> %s" % (sent['prob'], sent['dec_inp'])) else: print(sentence, ' -> ', predicted_sentence) sys.stdout.write("> ") sys.stdout.flush() sentence = sys.stdin.readline()
def predict(): def _get_test_dataset(): with open(TEST_DATASET_PATH) as test_fh: test_sentences = [s.strip() for s in test_fh.readlines()] return test_sentences results_filename = '_'.join([ 'results', str(FLAGS.num_layers), str(FLAGS.size), str(FLAGS.vocab_size) ]) results_path = os.path.join(FLAGS.results_dir, results_filename) with tf.Session() as sess, open(results_path, 'w') as results_fh: # Create model and load parameters. model = create_model(sess, forward_only=True) model.batch_size = 1 # We decode one sentence at a time. # Load vocabularies. vocab_path = os.path.join(FLAGS.data_dir, "vocab%d.in" % FLAGS.vocab_size) vocab, rev_vocab = data_utils.initialize_vocabulary(vocab_path) test_dataset = _get_test_dataset() for sentence in test_dataset: # Get token-ids for the input sentence. predicted_sentence = get_predicted_sentence( sentence, vocab, rev_vocab, model, sess) print(sentence + ' -> ' + predicted_sentence) results_fh.write(predicted_sentence + '\n')
def chat(): with tf.Session() as sess: # Create model and load parameters. model = create_model(sess, forward_only=True) model.batch_size = 1 # We decode one sentence at a time. # Load vocabularies. vocab_path = os.path.join(FLAGS.data_dir, "vocab%d.in" % FLAGS.vocab_size) vocab, rev_vocab = data_utils.initialize_vocabulary(vocab_path) # Decode from standard input. sys.stdout.write("> ") sys.stdout.flush() sentence = sys.stdin.readline() sentence = re.sub(u'[^\u4e00-\u9fa5,。;:?!‘’“”、]', '', sentence.decode('utf-8')) sentence = re.sub(u'(?P<chinese>[\u4e00-\u9fa5,。;:?!‘’“”、])', add_space, sentence) while sentence: predicted_sentence = get_predicted_sentence( sentence, vocab, rev_vocab, model, sess) print(predicted_sentence) print("> ") sys.stdout.flush() sentence = sys.stdin.readline() sentence = re.sub(u'[^\u4e00-\u9fa5,。;:?!‘’“”、]', '', sentence.decode('utf-8')) sentence = re.sub(u'(?P<chinese>[\u4e00-\u9fa5,。;:?!‘’“”、])', add_space, sentence)
def predict(args, debug=False): def _get_test_dataset(): # with open(args.test_dataset_path) as test_fh: with open(args.input_name) as test_fh: test_sentences = [s.strip() for s in test_fh.readlines()] return test_sentences results_filename = '_'.join([ 'results', str(args.num_layers), str(args.size), str(args.vocab_size) ]) # results_path = os.path.join(args.results_dir, results_filename+'.txt') results_path = str(args.output_name) with tf.Session() as sess, open(results_path, 'w') as results_fh: # Create model and load parameters. args.batch_size = 1 model = create_model(sess, args) # Load vocabularies. vocab_path = os.path.join(args.data_dir, "vocab%d.in" % args.vocab_size) vocab, rev_vocab = data_utils.initialize_vocabulary(vocab_path) test_dataset = _get_test_dataset() for sentence in test_dataset: # Get token-ids for the input sentence. predicted_sentence = get_predicted_sentence(args, sentence, vocab, rev_vocab, model, sess, debug=debug) if isinstance(predicted_sentence, list): print("%s : (%s)" % (sentence, datetime.now())) # results_fh.write("%s : (%s)\n" % (sentence, datetime.now())) for sent in predicted_sentence: print(" (%s) -> %s" % (sent['prob'], sent['dec_inp'])) # results_fh.write(" (%f) -> %s\n" % (sent['prob'], sent['dec_inp'])) results_fh.write("%s\n" % (sent['dec_inp'])) else: print(sentence, ' -> ', predicted_sentence) # results_fh.write("%s -> %s\n" % (sentence, predicted_sentence)) results_fh.write("%s\n" % (predicted_sentence)) # break results_fh.close() print("results written in %s" % results_path)
def train(): print("Preparing dialog data in %s" % FLAGS.data_dir) train_data, dev_data, _ = data_utils.prepare_dialog_data( FLAGS.data_dir, FLAGS.vocab_size) with tf.Session() as sess: # Create model. print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size)) model = create_model(sess, forward_only=False) # Read data into buckets and compute their sizes. print("Reading development and training data (limit: %d)." % FLAGS.max_train_data_size) dev_set = read_data(dev_data) train_set = read_data(train_data, FLAGS.max_train_data_size) train_bucket_sizes = [len(train_set[b]) for b in xrange(len(BUCKETS))] train_total_size = float(sum(train_bucket_sizes)) # A bucket scale is a list of increasing numbers from 0 to 1 that we'll use # to select a bucket. Length of [scale[i], scale[i+1]] is proportional to # the size if i-th training bucket, as used later. train_buckets_scale = [ sum(train_bucket_sizes[:i + 1]) / train_total_size for i in xrange(len(train_bucket_sizes)) ] # This is the training loop. step_time, loss = 0.0, 0.0 current_step = 0 previous_losses = [] max_global_step = 120000 while model.global_step.eval() < max_global_step: # Choose a bucket according to data distribution. We pick a random number # in [0, 1] and use the corresponding interval in train_buckets_scale. random_number_01 = np.random.random_sample() bucket_id = min([ i for i in xrange(len(train_buckets_scale)) if train_buckets_scale[i] > random_number_01 ]) # Get a batch and make a step. start_time = time.time() encoder_inputs, encoder1_inputs, decoder_inputs, target_weights = model.get_batch( train_set, bucket_id) _, step_loss, _ = model.step(sess, encoder_inputs, encoder1_inputs, decoder_inputs, target_weights, bucket_id, forward_only=False) step_time += (time.time() - start_time) / FLAGS.steps_per_checkpoint loss += step_loss / FLAGS.steps_per_checkpoint current_step += 1 # Once in a while, we save checkpoint, print statistics, and run evals. if current_step % FLAGS.steps_per_checkpoint == 0: # Print statistics for the previous epoch. perplexity = math.exp(loss) if loss < 300 else float('inf') print( "global step %d learning rate %.4f step-time %.2f perplexity %.2f" % (model.global_step.eval(), model.learning_rate.eval(), step_time, perplexity)) # Decrease learning rate if no improvement was seen over last 3 times. if len(previous_losses) > 2 and loss > max( previous_losses[-3:]): sess.run(model.learning_rate_decay_op) previous_losses.append(loss) # Save checkpoint and zero timer and loss. checkpoint_path = os.path.join(FLAGS.model_dir, "model.ckpt") model.saver.save(sess, checkpoint_path, global_step=model.global_step) with open(FLAGS.emb_path, 'w') as f: pickle.dump(sess.run(model.embeddings), f) step_time, loss = 0.0, 0.0 # Run evals on development set and print their perplexity. for bucket_id in xrange(len(BUCKETS)): encoder_inputs, encoder1_inputs, decoder_inputs, target_weights = model.get_batch( dev_set, bucket_id) _, eval_loss, _ = model.step(sess, encoder_inputs, encoder1_inputs, decoder_inputs, target_weights, bucket_id, True) eval_ppx = math.exp( eval_loss) if eval_loss < 300 else float('inf') print(" eval: bucket %d perplexity %.2f" % (bucket_id, eval_ppx)) sys.stdout.flush()
def predict(): f = open('pkl_tianya/q_table.pkl') qtable = pickle.load(f) f.close() #f = open('pkl_file/n_table.pkl') #ntable = pickle.load(f) #f.close() f = open('pkl_tianya/co_table.pkl') cotable = pickle.load(f) f.close() f = open('/home/zyma/work/data_daily_punct/nouns2500.in') nouns = f.readlines() nouns = [ele.strip() for ele in nouns] f.close() def _get_test_dataset(): with open(TEST_DATASET_PATH) as test_fh: test_sentences = [s.strip() for s in test_fh.readlines()] return test_sentences results_filename = '_'.join([ 'results', str(FLAGS.num_layers), str(FLAGS.size), str(FLAGS.vocab_size) ]) results_path = os.path.join(FLAGS.results_dir, results_filename) #ss = u'你好' #ss = ss.encode('utf-8') #print(ss) with tf.Session() as sess, open(results_path, 'a') as results_fh: #with tf.Session() as sess: # Create model and load parameters. bw_model, fw_model = create_model(sess) bw_model.batch_size = 1 fw_model.batch_size = 1 # Load vocabularies. vocab_path = os.path.join(FLAGS.data_dir, "vocab%d.in" % FLAGS.vocab_size) vocab, rev_vocab = data_utils.initialize_vocabulary(vocab_path) print(vocab.items()[:20]) test_dataset = _get_test_dataset() #test_dataset = test_dataset[374:] #predicted_sentences = beam_search(test_dataset,vocab,rev_vocab,model,sess) #results_fh.write('\n'.join(predicted_sentences)) for sentence in test_dataset: # Get token-ids for the input sentence. #best,predicted_sentences,scores = beam_search(sentence, vocab, rev_vocab, model, sess) key_word = sentencePMI(sentence, cotable, qtable, nouns) print('key_word:%s' % key_word) bw_sentence = get_predicted_sentence(sentence, key_word, vocab, rev_vocab, bw_model, sess) print(bw_sentence) bw_sentence = bw_sentence.split()[:10] bw_sentence.reverse() bw_sentence = ' '.join(bw_sentence) print('bw_sentence:%s' % bw_sentence) predicted_sentences = get_predicted_sentence( sentence, bw_sentence, vocab, rev_vocab, fw_model, sess) print(sentence + ' -> ' + predicted_sentences) #predicted_sentences = predicted_sentences.split() #predicted_sentences = ' '.join(predicted_sentences[:1]) #predicted_sentences = get_predicted_sentence(sentence,None,vocab,rev_vocab,fw_model,sess) #print(sentence+' ---> '+predicted_sentences) #print ('\n'.join([str(ele)+','+predicted_sentences[ind] for ind,ele in enumerate(scores)])) #print(len(scores)) #results_fh.write(best+'\n') results_fh.write(predicted_sentences + '(%s)' % key_word + '\n')
def train(): print FLAGS.time_slot, train_data_path, save_model_dir, FLAGS.hidden_dim # load data dataset = Data(FLAGS.batch_size, FLAGS.encoder_size, FLAGS.decoder_size, train_data_path, test_data_path) train_X, train_y, test_X, test_y = dataset.train_inputs, dataset.train_labels, dataset.test_inputs, \ dataset.test_labels tolerance_count = 0 checkpoint_dir = os.path.join( save_model_dir, "%dhidden_%ddecoder_bestmodel.ckpt" % (FLAGS.hidden_dim, FLAGS.decoder_size)) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: global_test_error = 1000000 best_model = sess model = create_model(sess, FLAGS.encoder_size, FLAGS.decoder_size, FLAGS.hidden_dim, FLAGS.input_dim, \ FLAGS.output_dim, load_model, checkpoint_dir) # training for epoch in range(FLAGS.epoches): st = time.time() epoch_loss = 0.0 total_step = train_X.shape[0] / FLAGS.batch_size for step in range(total_step): encode_inputs, decode_inputs = model.get_batch( train_X, train_y, FLAGS.batch_size, step) step_loss, predict_outputs = model.step(sess, encode_inputs, decode_inputs, FLAGS.encoder_size, \ FLAGS.decoder_size, is_training=True) epoch_loss += step_loss if step % 20 == 0: print 'train(step:%d/%d epoch:%d/%d)'%(step+1,total_step,epoch+1,FLAGS.epoches), '\t', \ predict_outputs[0][0], '\t', decode_inputs[0][0], '\t loss:', step_loss print "train loss %.6f in epoch=%d, time=%f" % ( epoch_loss, epoch + 1, time.time() - st) # validation if (epoch + 1) % FLAGS.check_per_epoches == 0: print " validation (epoch:%d/%d)" % (epoch + 1, FLAGS.epoches) # test dataset test_loss = 0.0 times = 0.0 for step_test in range(len(test_X) / FLAGS.batch_size): encode_inputs, decode_inputs = model.get_batch( test_X, test_y, FLAGS.batch_size, step_test) step_loss, predict_outputs = model.step(sess, encode_inputs, decode_inputs, FLAGS.encoder_size, \ FLAGS.decoder_size, is_training=False) test_loss += step_loss # update min test loss if test_loss < global_test_error: tolerance_count = 0 global_test_error = test_loss model.saver.save( sess, os.path.join( save_model_dir, "%dhidden_%ddecoder_bestmodel.ckpt" % (FLAGS.hidden_dim, FLAGS.decoder_size))) else: tolerance_count += FLAGS.check_per_epoches print "test loss %.6f in epoch=%d" % (test_loss, epoch + 1) print "global min test loss %.6f in epoch=%d" % ( global_test_error, epoch + 1) if tolerance_count >= 50: break print 'The final final final global min test error: %f' % global_test_error