def interactive_comparison(): """Compare two sentences separated by a semi-colon""" with tf.Session() as sess: # Create model and load parameters. model = create_model(sess, True) model.batch_size = 1 # We decode one sentence at a time. # Load vocabularies. en_vocab_path = os.path.join(FLAGS.data_dir, "vocab%d.from" % FLAGS.from_vocab_size) fr_vocab_path = os.path.join(FLAGS.data_dir, "vocab%d.to" % FLAGS.to_vocab_size) en_vocab, _ = data_utils.initialize_vocabulary(en_vocab_path) _, rev_fr_vocab = data_utils.initialize_vocabulary(fr_vocab_path) # Decode from standard input. sys.stdout.write("(1) > ") sys.stdout.flush() sentence = sys.stdin.readline() contexts = [] while sentence: # Get token-ids for the input sentence. token_ids = data_utils.sentence_to_token_ids( tf.compat.as_bytes(sentence), en_vocab) print("tokenids:", token_ids) # Which bucket does it belong to? bucket_id = get_bucket(en_vocab, sentence) # Get a 1-element batch to feed the sentence to the model. encoder_inputs, decoder_inputs, target_weights = model.get_batch( {bucket_id: [(token_ids, [])]}, bucket_id) # Get the output context vector output_context = model.step_context(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id) # Append the context so we can compute the dot product contexts.append(output_context) # Display the output print("bucket_id: ", bucket_id) print("output_context", output_context) # Now we compute similarity metrics if len(contexts) == 2: cosine_distance = cosine_similarity(*contexts) euclid_distance = np.linalg.norm(contexts[1] - contexts[0]) print('cosine_similarity', cosine_distance) print('euclid_distance', euclid_distance) print('-------------------------------') contexts = [] # Start again next_sentence = len(contexts) + 1 print("(%i) > " % next_sentence, end="") sys.stdout.flush() sentence = sys.stdin.readline()
def decode(): with tf.Session() as sess: # Create model and load parameters. model = create_model(sess, True) model.batch_size = 1 # We decode one sentence at a time. # Load vocabularies. en_vocab_path = os.path.join(FLAGS.data_dir, "vocab%d.from" % FLAGS.from_vocab_size) fr_vocab_path = os.path.join(FLAGS.data_dir, "vocab%d.to" % FLAGS.to_vocab_size) en_vocab, _ = data_utils.initialize_vocabulary(en_vocab_path) _, rev_fr_vocab = data_utils.initialize_vocabulary(fr_vocab_path) # Decode from standard input. sys.stdout.write("> ") sys.stdout.flush() sentence = sys.stdin.readline() while sentence: # Get token-ids for the input sentence. token_ids = data_utils.sentence_to_token_ids( tf.compat.as_bytes(sentence), en_vocab) print("tokenids:", token_ids) # Which bucket does it belong to? bucket_id = get_bucket(en_vocab, sentence) # Get a 1-element batch to feed the sentence to the model. encoder_inputs, decoder_inputs, target_weights = model.get_batch( {bucket_id: [(token_ids, [])]}, bucket_id) # Get output logits for the sentence. _, _, output_logits = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, True) # Get the output context vector output_context = model.step_context(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id) # Display the output print("bucket_id: ", bucket_id) print("output_context", output_context) # This is a greedy decoder - outputs are just argmaxes of output_logits. outputs = [ int(np.argmax(logit, axis=1)) for logit in output_logits ] # If there is an EOS symbol in outputs, cut them at that point. if data_utils.EOS_ID in outputs: outputs = outputs[:outputs.index(data_utils.EOS_ID)] # Print out French sentence corresponding to outputs. print(" ".join([ tf.compat.as_str(rev_fr_vocab[output]) for output in outputs ])) print("> ", end="") sys.stdout.flush() sentence = sys.stdin.readline()
def chat(args): with tf.Session() as sess: # Create model and load parameters. args.batch_size = 1 # We decode one sentence at a time. model = create_model(sess, args) # Load vocabularies. vocab_path = os.path.join(args.data_dir, "vocab%d.in" % args.vocab_size) vocab, rev_vocab = data_utils.initialize_vocabulary(vocab_path) # Decode from standard input. sys.stdout.write("> ") sys.stdout.flush() sentence = sys.stdin.readline() while sentence: predicted_sentence = get_predicted_sentence(args, sentence, vocab, rev_vocab, model, sess) # print(predicted_sentence) if isinstance(predicted_sentence, list): for sent in predicted_sentence: print(" (%s) -> %s" % (sent['prob'], sent['dec_inp'])) else: print(sentence, ' -> ', predicted_sentence) print("> ") sys.stdout.flush() sentence = sys.stdin.readline()
def __init__(self, args, debug=False): start_time = datetime.now() # flow ctrl self.args = args self.debug = debug self.fbm_processed = [] gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_usage) self.sess = tf.InteractiveSession(config=tf.ConfigProto(gpu_options=gpu_options)) # Create model and load parameters. self.args.batch_size = 1 # We decode one sentence at a time. self.model = create_model(self.sess, self.args) # Load vocabularies. self.vocab_path = os.path.join(self.args.data_dir, "vocab%d.in" % self.args.vocab_size) self.vocab, self.rev_vocab = data_utils.initialize_vocabulary(self.vocab_path) print("[ChatBot] model initialize, cost %i secs" % (datetime.now() - start_time).seconds) # load yaml setup self.FBM_API = "https://graph.facebook.com/v2.6/me/messages" with open("config.yaml", 'rt') as stream: try: cfg = yaml.load(stream) self.FACEBOOK_TOKEN = cfg.get('FACEBOOK_TOKEN') self.VERIFY_TOKEN = cfg.get('VERIFY_TOKEN') except yaml.YAMLError as exc: print(exc)
def get_english_vocab(directory, vocab_size): """ Return the English vocabulary file that was used to train this model """ en_vocab_path = os.path.join(directory, "vocab%d.from" % vocab_size) en_vocab, _ = data_utils.initialize_vocabulary(en_vocab_path) return en_vocab
def __init__(self, args, debug=False): start_time = datetime.now() # flow ctrl self.args = args self.debug = debug self.fbm_processed = [] gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_usage) self.sess = tf.InteractiveSession(config=tf.ConfigProto(gpu_options=gpu_options)) # Create model and load parameters. self.args.batch_size = 1 # We decode one sentence at a time. self.model = create_model(self.sess, self.args) # Load vocabularies. self.vocab_path = os.path.join(self.args.data_dir, "vocab%d.in" % self.args.vocab_size) self.vocab, self.rev_vocab = data_utils.initialize_vocabulary(self.vocab_path) print("[ChatBot] model initialize, cost %i secs" % (datetime.now() - start_time).seconds) # load yaml setup self.FBM_API = "https://graph.facebook.com/v2.6/me/messages" with open("config.yaml", 'rt') as stream: try: cfg = yaml.load(stream) self.FACEBOOK_TOKEN = cfg.get('FACEBOOK_TOKEN') self.VERIFY_TOKEN = cfg.get('VERIFY_TOKEN') except yaml.YAMLError as exc: print(exc)
def my_predict(): def _get_test_dataset(): with open(TEST_DATASET_PATH) as test_fh: test_sentences = [s.strip() for s in test_fh.readlines()] # print test_sentences # print '///////////////////////' return test_sentences results_filename = '_'.join([ '1results_left', str(FLAGS.num_layers), str(FLAGS.size), str(FLAGS.vocab_size) ]) results_path = os.path.join(FLAGS.results_dir, results_filename) with tf.Session() as sess, open(results_path, 'w') as results_fh: model = create_model(sess, forward_only=True) model.batch_size = 1 vocab_path = os.path.join(FLAGS.data_dir, "vocab%d.in" % FLAGS.vocab_size) vocab, rev_vocab = data_utils.initialize_vocabulary(vocab_path) test_dataset = _get_test_dataset() i = 0 j = 0 allright = 0 for sentence in test_dataset: if i % 2 == 0: predicted_sentence = get_predicted_sentence( sentence, vocab, rev_vocab, model, sess) print(predicted_sentence, ' -> ', sentence) end = sentence if i % 2 == 1: sentence = sentence + ' ' + end pre_sentence = predicted_sentence + ' ' + end if sentence == pre_sentence: allright += 1 print('^ is allright' + '\n') results_fh.write(sentence + '\n' + predicted_sentence + ' ' + end + '\n') else: print('Error~right is %s' % sentence) results_fh.write(sentence + '\n' + predicted_sentence + ' ' + end + '\n') i = i + 1 print 'traj=', i / 2, ',allright=', allright, ',accuracy=', allright * 1.0 / ( i * 1.0 / 2) results_fh.write('traj=%d,allright=%d,accuracy=%f' % (i / 2, allright, allright * 1.0 / (i * 1.0 / 2)))
def predict(args, debug=False, parent=0): '''Тестирует модель на вопросы, записанные в файле test_dataset.txt и записывает результаты в файл ''' def _get_test_dataset(): with open(args.test_dataset_path) as test_fh: test_sentences = [s.strip() for s in test_fh.readlines()] return test_sentences results_filename = '_'.join([ 'results', str(args.num_layers), str(args.size), str(args.vocab_size) ]) results_path = os.path.join(args.results_dir, results_filename + '.txt') with tf.Session() as sess, open(results_path, 'w') as results_fh: # Создание модели и загрузка параметров. args.batch_size = 1 model = create_model(sess, args) # Загрузка словарей vocab_path = os.path.join(args.data_dir, "vocab%d.in" % args.vocab_size) vocab, rev_vocab = data_utils.initialize_vocabulary(vocab_path) test_dataset = _get_test_dataset() for sentence in test_dataset: # Get token-ids for the input sentence. predicted_sentence = get_predicted_sentence(args, sentence, vocab, rev_vocab, model, sess, debug=debug) if isinstance(predicted_sentence, list): print("%s : (%s)" % (sentence, datetime.now())) results_fh.write("%s : (%s)\n" % (sentence, datetime.now())) for sent in predicted_sentence: print(" (%s) -> %s" % (sent['prob'], sent['dec_inp'])) results_fh.write(" (%f) -> %s\n" % (sent['prob'], sent['dec_inp'])) else: print(sentence, ' -> ', predicted_sentence) results_fh.write("%s -> %s\n" % (sentence, predicted_sentence)) results_fh.close() print("Результаты теста записаны в %s" % results_path) if parent: parent.clearWidget()
def __init__(self, args, session): self.args = args self.args.batch_size = 1 self.session = session self.model = create_model(session, self.args) vocab_path = os.path.join( args.data_dir, "vocab%d.in" % args.vocab_size, ) self.vocab, self.rev_vocab = data_utils.initialize_vocabulary( vocab_path)
def predict(args, debug=False): def _get_test_dataset(): with open(args.test_dataset_path) as test_fh: test_sentences = [s.strip() for s in test_fh.readlines()] return test_sentences results_filename = '_'.join([ 'results', str(args.num_layers), str(args.size), str(args.vocab_size) ]) results_path = os.path.join(args.results_dir, results_filename + '.txt') with tf.Session() as sess, open(results_path, 'w') as results_fh: # Create model and load parameters. args.batch_size = 1 model = create_model(sess, args) # Load vocabularies. vocab_path = os.path.join(args.data_dir, "vocab%d.in" % args.vocab_size) vocab, rev_vocab = data_utils.initialize_vocabulary(vocab_path) test_dataset = _get_test_dataset() for sentence in test_dataset: # Get token-ids for the input sentence. predicted_sentence = get_predicted_sentence(args, sentence, vocab, rev_vocab, model, sess, debug=debug) if isinstance(predicted_sentence, list): print("%s : (%s)" % (sentence, datetime.now())) results_fh.write("%s : (%s)\n" % (sentence, datetime.now())) for sent in predicted_sentence: print(" (%s) -> %s" % (sent['prob'], sent['dec_inp'])) results_fh.write(" (%f) -> %s\n" % (sent['prob'], sent['dec_inp'])) else: print(sentence, ' -> ', predicted_sentence) results_fh.write("%s -> %s\n" % (sentence, predicted_sentence)) # break results_fh.close() print("results written in %s" % results_path) sent['dec_inp']
def comparison_task(sess, model=None): """Compare the encoder state for two different English sentences Cosine similarity is used as the distance metric """ sentences = [ "There was a man with a red hat", "There was a man with a blue hat", "A man with a blue hat was there", "The sky is blue and the grass is green", "Why are you asking about the history of my life" ] # Load the vocab en_vocab_path = os.path.join(FLAGS.data_dir, "vocab%d.from" % FLAGS.from_vocab_size) en_vocab, _ = data_utils.initialize_vocabulary(en_vocab_path) # Create model and load parameters. if model == None: model = create_model(sess, True) # Persist the original batch size original_batch_size = model.batch_size model.batch_size = 1 # We decode one sentence at a time. # Get all of the context vectors context_vectors = [] for sentence in sentences: context_vector = get_context(sess, model, en_vocab, sentence) context_vectors.append(context_vector) # Calculate the similarity matrix similarity = np.zeros((len(sentences), len(sentences))) for i in range(len(sentences)): for j in range(len(sentences)): similarity[i, j] = cosine_similarity(context_vectors[i], context_vectors[j]) # Dislay the output print(80 * "=") print("For the following sentences:\n") for i, sentence in enumerate(sentences): print(i, sentence) print("The similarity matrix is:\n") print(similarity, "\n") print(80 * "=") # Reset the model model.batch_size = original_batch_size
def chat(args): if args.gpu_usage == 0: #config = tf.ConfigProto(device_count = {'GPU': 0}) os.environ['CUDA_VISIBLE_DEVICES'] = '' gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_usage) config = tf.ConfigProto(gpu_options=gpu_options) with tf.Session(config=config) as sess: # Create model and load parameters. args.batch_size = 1 # We decode one sentence at a time. model = create_model(sess, args) # Load vocabularies. vocab_path = os.path.join(args.data_dir, "vocab%d.in" % args.vocab_size) vocab, rev_vocab = data_utils.initialize_vocabulary(vocab_path) # Decode from standard input. sys.stdout.write("> ") sys.stdout.flush() sentence = sys.stdin.readline() if len(sentence.split(' ')) < 2: sentence = sentence_split(sentence) while sentence: predicted_sentence = get_predicted_sentence( args, sentence, vocab, rev_vocab, model, sess) # print(predicted_sentence) if isinstance(predicted_sentence, list): for sent in predicted_sentence: print("%s: %s" % ('chatbot', sentence_combine(sent['dec_inp']))) # for sent in predicted_sentence: # print(" (%s) -> %s" % (sent['prob'], sent['dec_inp'])) else: print(sentence, ' -> ', predicted_sentence) sys.stdout.write("> ") sys.stdout.flush() sentence = sys.stdin.readline() if len(sentence.split(' ')) < 2: sentence = sentence_split(sentence)
def chat(args): with tf.Session() as sess: # Create model and load parameters. model = create_model(sess, args) model.batch_size = 1 # We decode one sentence at a time. # Load vocabularies. vocab_path = os.path.join(args.data_dir, "vocab%d.in" % args.vocab_size) vocab, rev_vocab = data_utils.initialize_vocabulary(vocab_path) # Decode from standard input. sys.stdout.write("> ") sys.stdout.flush() sentence = sys.stdin.readline() while sentence: predicted_sentence = get_predicted_sentence(args, sentence, vocab, rev_vocab, model, sess) print(predicted_sentence) print("> ") sys.stdout.flush() sentence = sys.stdin.readline()
def __init__(self, args, debug=False): self.FACEBOOK_TOKEN = 'EAAbo2I31LgEBAB0aHd8O6UpQckEnIZBnZC4qN2ExZBGHcOXZAtNBttWPIizqpv5KrGZCbfZA7ZCJsuMZCADZAtrdVZB52ZCZCe9VsZAzFpKPF8H6Qxh4ARkviYJZA47VXVdHQ4wLLMCZCbPDTNiyWTPBeRdzhGKOwwWXyQWZALoZAyaKXt8Xj8lWfoWsYdMAt' self.VERIFY_TOKEN = 'my_token' self.FBM_API = "https://graph.facebook.com/v2.6/me/messages" # flow ctrl self.args = args self.debug = debug self.fbm_processed = [] gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=args.gpu_usage) self.sess = tf.InteractiveSession(config=tf.ConfigProto( gpu_options=gpu_options)) # Create model and load parameters. self.args.batch_size = 1 # We decode one sentence at a time. self.model = create_model(self.sess, self.args) # Load vocabularies. self.vocab_path = os.path.join(self.args.data_dir, "vocab%d.in" % self.args.vocab_size) self.vocab, self.rev_vocab = data_utils.initialize_vocabulary( self.vocab_path)
def predict(args, debug=False): def _get_test_dataset(): with open(args.test_dataset_path) as test_fh: test_sentences = [s.strip() for s in test_fh.readlines()] return test_sentences results_filename = '_'.join(['results', str(args.num_layers), str(args.size), str(args.vocab_size)]) results_path = os.path.join(args.results_dir, results_filename+'.txt') with tf.Session() as sess, open(results_path, 'w') as results_fh: # Create model and load parameters. args.batch_size = 1 model = create_model(sess, args) # Load vocabularies. vocab_path = os.path.join(args.data_dir, "vocab%d.in" % args.vocab_size) vocab, rev_vocab = data_utils.initialize_vocabulary(vocab_path) test_dataset = _get_test_dataset() for sentence in test_dataset: # Get token-ids for the input sentence. predicted_sentence = get_predicted_sentence(args, sentence, vocab, rev_vocab, model, sess, debug=debug) if isinstance(predicted_sentence, list): print("%s : (%s)" % (sentence, datetime.now())) results_fh.write("%s : (%s)\n" % (sentence, datetime.now())) for sent in predicted_sentence: print(" (%s) -> %s" % (sent['prob'], sent['dec_inp'])) results_fh.write(" (%f) -> %s\n" % (sent['prob'], sent['dec_inp'])) else: print(sentence, ' -> ', predicted_sentence) results_fh.write("%s -> %s\n" % (sentence, predicted_sentence)) # break results_fh.close() print("results written in %s" % results_path)
def data_to_token_ids(data_path, target_path, vocabulary_path, tokenizer=None, normalize_digits=True): """Tokenize data file and turn into token-ids using given vocabulary file. This function loads data line-by-line from data_path, calls the above sentence_to_token_ids, and saves the result to target_path. See comment for sentence_to_token_ids on the details of token-ids format. Args: data_path: path to the data file in one-sentence-per-line format. target_path: path where the file with token-ids will be created. vocabulary_path: path to the vocabulary file. tokenizer: a function to use to tokenize each sentence; if None, basic_tokenizer will be used. normalize_digits: Boolean; if true, all digits are replaced by 0s. """ if not gfile.Exists(target_path): print("Tokenizing data in %s" % data_path) vocab, _ = data_utils.initialize_vocabulary(vocabulary_path) with gfile.GFile(data_path, mode="rb") as data_file: with gfile.GFile(target_path, mode="w") as tokens_file: counter = 0 for line in data_file: counter += 1 if counter % 100000 == 0: print(" tokenizing line %d" % counter) utterences = line.split('\t') tokenized_utterences = [] for utter in utterences: token_ids = data_utils.sentence_to_token_ids(tf.compat.as_bytes(utter), vocab, tokenizer, normalize_digits) tokenized_utterences.append(" ".join([str(tok) for tok in token_ids])) tokens_file.write("\t".join(tokenized_utterences) + "\n")
def fight(args, args1): if args.gpu_usage == 0: #config = tf.ConfigProto(device_count = {'GPU': 0}) os.environ['CUDA_VISIBLE_DEVICES'] = '' gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_usage) config = tf.ConfigProto(gpu_options=gpu_options) #create two graphs to build nested sessions model_graph = tf.Graph() adv_graph = tf.Graph() adv_sess = tf.Session(graph=adv_graph, config=config) sess = tf.Session(graph=model_graph, config=config) total_sent = [] total_sent = open('%s/random_sent.txt' % args.work_root, 'r').readlines() total_sent_len = len(total_sent) chatbot_A = 'Chatbot_A' chatbot_B = 'Chatbot_B' fight_tims = 5 Sleep_or_not = True #False # model_name if args.model_name[0] == 'g': chatbot_A = 'Gossiping_Bot' elif args.model_name[0] == 'w': chatbot_A = 'WomenTalk_Bot' # model_2_name if args.model_2_name[0] == 'g': chatbot_B = 'Gossiping_Bot' elif args.model_2_name[0] == 'w': chatbot_B = 'WomenTalk_Bot' # # gpu_options test # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_usage) # config=tf.ConfigProto(gpu_options=gpu_options) ## 1st session with sess.as_default(): # Create model and load parameters. with model_graph.as_default(): args.batch_size = 1 # We decode one sentence at a time. model = create_model(sess, args) # Load vocabularies. vocab_path = os.path.join(args.data_dir, "vocab%d.in" % args.vocab_size) vocab, rev_vocab = data_utils.initialize_vocabulary(vocab_path) ## 2nd session with adv_sess.as_default(): with adv_graph.as_default(): # Create model and load parameters. args1.batch_size = 1 # We decode one sentence at a time. model_r = create_model(adv_sess, args1) # Load vocabularies. vocab_path_r = os.path.join(args1.data_dir, "vocab%d.in" % args1.vocab_size) vocab_r, rev_vocab_r = data_utils.initialize_vocabulary( vocab_path_r) # Decode from standard input. print('\n') sys.stdout.write("> ") sys.stdout.flush() sentence = sys.stdin.readline() ## make sure jieba split sentence = sentence_split(sentence) while sentence: if sentence == 'random': sentence = total_sent[random.randint( 0, total_sent_len)] sentence = sentence_split(sentence) print('>> ', sentence) elif sentence == 'exit': print('\nBYE BYE ~ \n') break for turns in range(0, fight_tims): # ChatbotA predicted_sentence = get_predicted_sentence( args, sentence, vocab, rev_vocab, model, sess) str1 = predicted_sentence[0]['dec_inp'] str1 = sentence_split(str1) ## make sure jieba split # ChatbotB predicted_sentence1 = get_predicted_sentence( args1, str1, vocab_r, rev_vocab_r, model_r, adv_sess) # print(predicted_sentence) # ChatbotA if isinstance(predicted_sentence, list): for sent in predicted_sentence: # random_sleep(Sleep_or_not) print("%s: %s" % (chatbot_A, sentence_combine(sent['dec_inp']))) # ChatbotB if isinstance(predicted_sentence1, list): for sent in predicted_sentence1: random_sleep(Sleep_or_not) print("%s: %s" % (chatbot_B, sentence_combine(sent['dec_inp']))) sentence = predicted_sentence1[0]['dec_inp'] sentence = sentence_split( sentence) ## make sure jieba split print('\n') sys.stdout.write("> ") sys.stdout.flush() sentence = sys.stdin.readline() sentence = sentence_split( sentence) ## make sure jieba split
def train(args): print("[%s] Preparing dialog data in %s" % (args.model_name, args.data_dir)) setup_workpath(workspace=args.workspace) train_data, dev_data, _ = data_utils.prepare_dialog_data(args.data_dir, args.vocab_size) if args.reinforce_learn: args.batch_size = 1 # We decode one sentence at a time. gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_usage) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: # Create model. print("Creating %d layers of %d units." % (args.num_layers, args.size)) model = seq2seq_model_utils.create_model(sess, args, forward_only=False) # Read data into buckets and compute their sizes. print("Reading development and training data (limit: %d)." % args.max_train_data_size) dev_set = data_utils.read_data(dev_data, args.buckets, reversed=args.rev_model) train_set = data_utils.read_data(train_data, args.buckets, args.max_train_data_size, reversed=args.rev_model) train_bucket_sizes = [len(train_set[b]) for b in xrange(len(args.buckets))] train_total_size = float(sum(train_bucket_sizes)) # A bucket scale is a list of increasing numbers from 0 to 1 that we'll use # to select a bucket. Length of [scale[i], scale[i+1]] is proportional to # the size if i-th training bucket, as used later. train_buckets_scale = [sum(train_bucket_sizes[:i + 1]) / train_total_size for i in xrange(len(train_bucket_sizes))] # This is the training loop. step_time, loss = 0.0, 0.0 current_step = 0 previous_losses = [] # Load vocabularies. vocab_path = os.path.join(args.data_dir, "vocab%d.in" % args.vocab_size) vocab, rev_vocab = data_utils.initialize_vocabulary(vocab_path) while True: # Choose a bucket according to data distribution. We pick a random number # in [0, 1] and use the corresponding interval in train_buckets_scale. random_number_01 = np.random.random_sample() bucket_id = min([i for i in xrange(len(train_buckets_scale)) if train_buckets_scale[i] > random_number_01]) # Get a batch and make a step. start_time = time.time() encoder_inputs, decoder_inputs, target_weights = model.get_batch( train_set, bucket_id) # print("[shape]", np.shape(encoder_inputs), np.shape(decoder_inputs), np.shape(target_weights)) if args.reinforce_learn: _, step_loss, _ = model.step_rf(args, sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, rev_vocab=rev_vocab) else: _, step_loss, _ = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, forward_only=False, force_dec_input=True) step_time += (time.time() - start_time) / args.steps_per_checkpoint loss += step_loss / args.steps_per_checkpoint current_step += 1 # Once in a while, we save checkpoint, print statistics, and run evals. if (current_step % args.steps_per_checkpoint == 0) and (not args.reinforce_learn): # Print statistics for the previous epoch. perplexity = math.exp(loss) if loss < 300 else float('inf') print ("global step %d learning rate %.4f step-time %.2f perplexity %.2f @ %s" % (model.global_step.eval(), model.learning_rate.eval(), step_time, perplexity, datetime.now())) # Decrease learning rate if no improvement was seen over last 3 times. if len(previous_losses) > 2 and loss > max(previous_losses[-3:]): sess.run(model.learning_rate_decay_op) previous_losses.append(loss) # # Save checkpoint and zero timer and loss. checkpoint_path = os.path.join(args.model_dir, "model.ckpt") model.saver.save(sess, checkpoint_path, global_step=model.global_step) step_time, loss = 0.0, 0.0 # Run evals on development set and print their perplexity. for bucket_id in xrange(len(args.buckets)): encoder_inputs, decoder_inputs, target_weights = model.get_batch(dev_set, bucket_id) _, eval_loss, _ = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, forward_only=True, force_dec_input=False) eval_ppx = math.exp(eval_loss) if eval_loss < 300 else float('inf') print(" eval: bucket %d perplexity %.2f" % (bucket_id, eval_ppx)) sys.stdout.flush()
def predict(args, debug=False): def _get_test_dataset(): with open(args.test_dataset_path) as test_fh: test_sentences = [s.strip() for s in test_fh.readlines()] return test_sentences def sentence_split(sentence): seg_list = jieba.cut(sentence, cut_all=False) sentence = ' '.join((' '.join(seg_list)).split()) return sentence results_filename = '_'.join([ 'results', str(args.num_layers), str(args.size), str(args.vocab_size) ]) results_path = os.path.join(args.results_dir, results_filename + '.txt') # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_usage) # with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: if args.gpu_usage == 0: #config = tf.ConfigProto(device_count = {'GPU': 0}) os.environ['CUDA_VISIBLE_DEVICES'] = '' gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_usage) config = tf.ConfigProto(gpu_options=gpu_options) with tf.Session(config=config) as sess, open(results_path, 'w') as results_fh: # Create model and load parameters. args.batch_size = 1 model = create_model(sess, args) # Load vocabularies. vocab_path = os.path.join(args.data_dir, "vocab%d.in" % args.vocab_size) vocab, rev_vocab = data_utils.initialize_vocabulary(vocab_path) test_dataset = _get_test_dataset() for sentence in test_dataset: # Get token-ids for the input sentence. sentence = sentence_split(sentence) predicted_sentence = get_predicted_sentence(args, sentence, vocab, rev_vocab, model, sess, debug=debug) if isinstance(predicted_sentence, list): print("input: %s (%s)" % (sentence, datetime.now())) results_fh.write("input: %s (%s)\n" % (sentence, datetime.now())) for sent in predicted_sentence: print("chatbot: %s (%s)" % (sent['dec_inp'], sent['prob'])) results_fh.write("chatbot: %s (%f)\n" % (sent['dec_inp'], sent['prob'])) else: print(sentence, ' -> ', predicted_sentence) results_fh.write("%s -> %s\n" % (sentence, predicted_sentence)) results_fh.close() print("results written in %s" % results_path)
async def service_callback(): async with websockets.connect('ws://localhost:9090') as websocket: # advertise the service await websocket.send("{ \"op\": \"advertise_service\",\ \"type\": \"roboy_communication_cognition/GenerateAnswer\",\ \"service\": \"/roboy/cognition/generative_nlp/answer\"\ }") i = 1 # counter for the service request IDs with tf.Session() as sess: # Create model and load parameters. logging.info("Loading the model") args = params_setup() args.batch_size = 1 # We decode one sentence at a time. model = create_model(sess, args) # Load vocabularies. vocab_path = os.path.join(args.data_dir, "vocab%d.in" % args.vocab_size) vocab, rev_vocab = data_utils.initialize_vocabulary(vocab_path) logging.info( "Service /roboy/cognition/generative_nlp/answer is ready") # wait for the service request, generate the answer, and send it back while True: try: request = await websocket.recv() sentence = json.loads(request)["args"]["text_input"] model_response = get_predicted_sentence( args, sentence, vocab, rev_vocab, model, sess) srv_response = {} answer = {} if isinstance(model_response, list): text = model_response[0]['dec_inp'] else: text = model_response['dec_inp'] answer["text_output"] = ''.join([ i if ord(i) < 128 else '' for i in text ]) # strip down unicode srv_response["values"] = answer srv_response["op"] = "service_response" srv_response[ "id"] = "service_request:/roboy/cognition/generative_nlp/answer:" + str( i) srv_response["result"] = True srv_response[ "service"] = "/roboy/cognition/generative_nlp/answer" i += 1 await websocket.send(json.dumps(srv_response)) except Exception as e: logging.exception( "Oopsie! Got an exception in generative_nlp")
def train(args): print("[%s] Preparing dialog data in %s" % (args.model_name, args.data_dir)) setup_workpath(workspace=args.workspace) train_data, dev_data, _ = data_utils.prepare_dialog_data( args.data_dir, args.vocab_size) #### GET DATA ###### inti beer def get_gold(workspace=args.workspace): data_dir = "%s/data" % (workspace) full_path = str(sys.path[-1]) + "/" + data_dir + "/train/chat.txt.gz" print(full_path) with gzip.open(full_path, 'rb') as zi: test_sentences = zi.read() test_sentences = test_sentences.decode().split("\n") zi.close() return test_sentences ######get data data_ = get_gold() if args.reinforce_learn: args.batch_size = 1 # We decode one sentence at a time. gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_usage) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: # Create model. print("Creating %d layers of %d units." % (args.num_layers, args.size)) model = seq2seq_model_utils.create_model(sess, args, forward_only=False) # Read data into buckets and compute their sizes. print("Reading development and training data (limit: %d)." % args.max_train_data_size) dev_set = data_utils.read_data(dev_data, args.buckets, reversed=args.rev_model) train_set = data_utils.read_data(train_data, args.buckets, args.max_train_data_size, reversed=args.rev_model) train_bucket_sizes = [ len(train_set[b]) for b in xrange(len(args.buckets)) ] train_total_size = float(sum(train_bucket_sizes)) # A bucket scale is a list of increasing numbers from 0 to 1 that we'll use # to select a bucket. Length of [scale[i], scale[i+1]] is proportional to # the size if i-th training bucket, as used later. train_buckets_scale = [ sum(train_bucket_sizes[:i + 1]) / train_total_size for i in xrange(len(train_bucket_sizes)) ] # This is the training loop. step_time, loss = 0.0, 0.0 current_step = 0 previous_losses = [] # Load vocabularies. vocab_path = os.path.join(args.data_dir, "vocab%d.in" % args.vocab_size) vocab, rev_vocab = data_utils.initialize_vocabulary(vocab_path) while True: # Choose a bucket according to data distribution. We pick a random number # in [0, 1] and use the corresponding interval in train_buckets_scale. random_number_01 = np.random.random_sample() bucket_id = min([ i for i in xrange(len(train_buckets_scale)) if train_buckets_scale[i] > random_number_01 ]) # Get a batch and make a step. start_time = time.time() encoder_inputs, decoder_inputs, target_weights, encoder_input, decoder_input = model.get_batch( train_set, bucket_id) print("[shape]", np.shape(encoder_inputs), np.shape(decoder_inputs), np.shape(target_weights)) if args.reinforce_learn: _, step_loss, _ = model.step_rf(args, sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, data_, encoder_input, decoder_input, rev_vocab=rev_vocab, forward_only=False) else: _, step_loss, _ = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, forward_only=False, force_dec_input=True) step_time += (time.time() - start_time) / args.steps_per_checkpoint loss += step_loss / args.steps_per_checkpoint current_step += 1 print("Current step: " + str(current_step)) # Once in a while, we save checkpoint, print statistics, and run evals. if current_step % args.steps_per_checkpoint == 0: #and (not args.reinforce_learn): # Print statistics for the previous epoch. perplexity = math.exp(loss) if loss < 300 else float('inf') print( "global step %d learning rate %.4f step-time %.2f perplexity %.2f @ %s" % (model.global_step.eval(), model.learning_rate.eval(), step_time, perplexity, datetime.now())) # Decrease learning rate if no improvement was seen over last 3 times. if len(previous_losses) > 2 and loss > max( previous_losses[-3:]): sess.run(model.learning_rate_decay_op) previous_losses.append(loss) # # Save checkpoint and zero timer and loss. checkpoint_path = os.path.join(args.model_dir, "model.ckpt") model.saver.save(sess, checkpoint_path, global_step=model.global_step) step_time, loss = 0.0, 0.0 # Run evals on development set and print their perplexity. for bucket_id in xrange(len(args.buckets)): encoder_inputs, decoder_inputs, target_weights, _, _ = model.get_batch( dev_set, bucket_id) _, eval_loss, _ = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, forward_only=True, force_dec_input=False) eval_ppx = math.exp( eval_loss) if eval_loss < 300 else float('inf') print(" eval: bucket %d perplexity %.2f" % (bucket_id, eval_ppx)) sys.stdout.flush()