def ask(): message = str(request.form['messageText']) '''kernel = aiml.Kernel() if os.path.isfile("bot_brain.brn"): kernel.bootstrap(brainFile = "bot_brain.brn") else: kernel.bootstrap(learnFiles = os.path.abspath("aiml/std-startup.xml"), commands = "load aiml b") kernel.saveBrain("bot_brain.brn")''' # kernel now ready for use while True: if message == "quit": exit() else: idx_q = handleQueryFunc(message) #print (idx_q) output = model.predict(sess, idx_q.T) #print (output) q = data_utils.decode(sequence=idx_q[0], lookup=metadata['idx2w'], separator=' ') bot_response = data_utils.decode(sequence=output[0], lookup=metadata['idx2w'], separator=' ').split(' ') #print (bot_response) respose= ' '.join(bot_response) print (respose) return make_response(jsonify({'status':'OK','answer':respose}))
def send(message): #print("client",message) #print("send실행됐지롱!") print(message) data_len =limit['maxq'] print('1') q_refine_list = (kor_data2.disintegration_kor(message))[0] print('2') print(q_refine_list) #print("q_refine_list",q_refine_list) idx_q = np.zeros([data_len, limit['maxq']], dtype=np.int32) print('3') message_tokenized = q_refine_list.split(' ') indices = kor_data2.pad_seq(message_tokenized, metadata['w2idx'], limit['maxq']) print("messagetokenized",message_tokenized) #print("indices",indices) #print("np.array",np.array(indices)) #idx_q = idx_q+np.array(indices).reshape(1,limit['maxq']) #maxq행 1열 행렬만들기 idx_q = idx_q + np.array(indices).reshape(1,limit['maxq']) print("idx_q",idx_q) output = model.predict(sess, idx_q) print("output_origin",output) print("output",output[0]) output_decoded = data_utils.decode(sequence=output[0], lookup=metadata['idx2w'], separator=' ').split(' ') reply = ' '.join(output_decoded).replace('unk', '') #print("출력되라") print(reply) return reply
def sample_replies(self, sess, valid_set, metadata, batch_n): test_x = valid_set.__next__()[0] test_y_pred = self.predict(sess, test_x) log_file = open('logs/%d.txt' % batch_n, 'w') replies = [] for ii, oi in zip(test_x.T, test_y_pred): q = data_utils.decode(sequence=ii, lookup=metadata['idx2w'], separator=' ') decoded = data_utils.decode(sequence=oi, lookup=metadata['idx2w'], separator=' ') if decoded not in replies: log_file.write('q: "%s"; a: "%s"\n' % (q, decoded)) replies.append(decoded) log_file.write('%d/%d\n' % (len(replies), test_x.shape[1])) log_file.close()
def process_line(self, line): dic = self.metadata['w2idx'] dic2 = self.metadata['idx2w'] en = data.process_line(line, dic).reshape((25, 1)) res = self.model.predict(self.sess, en) en2 = data_utils.decode(res[0], dic2) res2 = "" for word in en2: res2 = res2 + word + " " return res2
def reply(): input_msg_ = request.form['msg'] msg = str(input_msg_).lower() msg = data.filter_line(msg, "0123456789abcdefghijklmnopqrstuvwxyz ") msg_arr = msg.split(' ') message = data.zero_pad_line(msg_arr, metadata['w2idx']) output = model.predict(sess, np.array(np.array(message.T))) decoded = data_utils.decode(sequence=output[0], lookup=metadata['idx2w'], separator=' ').split(' ') return jsonify({'text': ' '.join(decoded)})
def save_checkpoint(self, global_step, generate_sample=False): ''' Saves the model state dict, and will generate a sample if specified ''' checkpoint_name = os.path.join(self.checkpoints_dir, "model_checkpoint_step_{}.pt".format(global_step)) torch.save(self.state_dict(), checkpoint_name) if generate_sample: generation = self.generate(length=120) stream = decode(generation) stream.write('midi', os.path.join(self.train_sample_dir, 'train_sample_checkpoint_step_{}.mid'.format(global_step)))
def get_output(question, sess, w2idx, model, metadata): import data_utils from datasets.facebook2 import data # get output for input phrase idx_q, idx_a = data.process_input(question, w2idx) gen = data_utils.rand_batch_gen(idx_q, idx_a, 1) input_ = gen.__next__()[0] output = model.predict(sess, input_) # return ouput phrase for ii, oi in zip(input_.T, output): q = data_utils.decode(sequence=ii, lookup=metadata['idx2w'], separator=' ') decoded = data_utils.decode(sequence=oi, lookup=metadata['idx2w'], separator=' ').split(' ') return ' '.join(decoded)
def evaluate_line(): config = load_config(FLAGS.config_file) logger = get_logger(FLAGS.log_file) # limit GPU memory tf_config = tf.ConfigProto() tf_config.gpu_options.allow_growth = True with open(FLAGS.map_file, "rb") as f: char_to_id, id_to_char, tag_to_id, id_to_tag = pickle.load(f) with tf.Session(config=tf_config) as sess: model = create_model(sess, Model, FLAGS.ckpt_path, load_word2vec, config, id_to_char, logger) content = load_file_content(FLAGS.predict_file) tag_result = [] errors_map = {} num = 0 for word_line in content: num = num + 1 sentences = ''.join(word_line) sen_list = sentences.split('。') flag = False if sen_list[-1] == '': flag = True sen_list = sen_list[:-1] word_tag_map = {} errors = [] offset = 0 for i in range(len(sen_list)): line = sen_list[i] + '。' if (i == len(sen_list) - 1) and (flag == False): line = line[:-1] result = model.evaluate_line(sess, input_from_line(line, char_to_id), id_to_tag) tag_, errors_ = decode(result, word_line, offset) word_tag_map.update(tag_) errors = errors + errors_ offset = offset + len(line) errors_map[num] = errors tag_result_ = [] for i in range(len(word_line)): if i in word_tag_map.keys(): tag_result_.append(word_tag_map[i]) else: tag_result_.append('O') tag_result.append(' '.join(tag_result_) + '\n') dump_to_file(tag_result, os.path.join(FLAGS.result_path, 'predict_result.txt'), 'w')
def ask2(str): input_ = sentence_to_indexes(str) #print(input_.shape) #print(input_) output = model.predict(sess, input_) #print(output) #test = model.advance_predict(sess, input_) for i in output: decoded = data_utils.decode(sequence=i, lookup=metadata['idx2w'], separator=' ').split(' ') print('>>> {}'.format(' '.join(decoded)))
def get_response(self, text, metadata, sess): questions = [ text.lower() ] questions = [ filter_line(line, EN_WHITELIST) for line in questions ] answers = questions qlines, alines = filter_data(questions, answers) qtokenized = [ [w.strip() for w in wordlist.split(' ') if w] for wordlist in qlines ] atokenized = [ [w.strip() for w in wordlist.split(' ') if w] for wordlist in alines ] w2idx = pickle.load(open("datasets/cornell_corpus/w2idx.pkl","rb")) idx_q, idx_a = zero_pad(qtokenized, atokenized, w2idx) query = data_utils.rand_batch_gen(idx_q, idx_a, 1) input_q = query.__next__()[0] output = self.predict(sess, input_q) #print(input_q.shape) #replies = [] for ii, oi in zip(input_q.T, output): q = data_utils.decode(sequence=ii, lookup=metadata['idx2w'], separator=' ') decoded = data_utils.decode(sequence=oi, lookup=metadata['idx2w'], separator=' ').split(' ') ''' if decoded.count('unk') == 0: if decoded not in replies: print('q : [{0}]; a : [{1}]'.format(q, ' '.join(decoded))) replies.append(decoded) ''' return ' '.join(decoded)
def run(args): files = [INPUT_DIRECTORY + '/' + f for f in listdir(INPUT_DIRECTORY) if isfile(join(INPUT_DIRECTORY, f))] files.sort() if not os.path.exists(OUTPUT_DIRECTORY) or not os.path.isdir(OUTPUT_DIRECTORY): os.mkdir(OUTPUT_DIRECTORY) latex_ouput = '' for filename in files: latex_part = "\\paragraph{" latex_part += filename.split('/')[-1].split('.pdf.txt.txt')[0] latex_part += "}\n\\begin{enumerate}\n" f = open(filename,"r",encoding='utf-8', errors='ignore') sentences = f.readlines() sentences = [sentence.replace('\n', '') for sentence in sentences] for question in QUESTIONS: latex_part += "\\item " + question + "\\\\\n" latex_part += "$\\longrightarrow$ " reset_dict() testS, testQ, testA = process_data(sentences, question) answer, answer_probability, mem_probs = get_pred(testS, testQ) memory_probabilities = np.round(mem_probs, 4) best_sentence_index = 0 best_sentence_score = 0 # print(len(memory_probabilities.tolist())) for index, mem in enumerate(memory_probabilities.tolist()): if mem[2] > best_sentence_score: best_sentence_index = index best_sentence_score = mem[2] words_l = [] for idw in testS[0][best_sentence_index]: if idw == 0: break words_l.append(decode(idw)) sentence = ' '.join(words_l) sentence.replace('%', '\\%') sentence.replace('_', '\\_') latex_part += sentence + "\n" latex_part += "\\end{enumerate}" latex_ouput += "\n" + latex_part f = open(join(OUTPUT_DIRECTORY, 'latex_out.txt'), 'w') f.write(latex_ouput)
# get last session sess = model.restore_last_session() # get string input_, answers = test_batch_gen.__next__() output = model.predict(sess, input_) modelsim = 0 usersim = 0 randomsim = 0 simcount = 0 lines = [] # get questions, real answers and model answers for ii, ai, oi in zip(input_.T, answers.T, output): q = data_utils.decode(sequence=ii, lookup=metadata['idx2w'], separator=' ') a = data_utils.decode(sequence=ai, lookup=metadata['idx2w'], separator=' ') d = data_utils.decode(sequence=oi, lookup=metadata['idx2w'], separator=' ') d = custom_dict.translateSentence(d).strip() qarr = q.split(' ') aarr = a.split(' ') darr = d.split(' ') # random answer r = randomsentence(len(darr))
sort_order = np.argsort(step_numbers) num_steps = step_numbers[sort_order[-1]] # gets the checkpoint path with the greatest number of steps last_checkpoint_path = checkpoints[sort_order[-1]] full_path = last_checkpoint_path print("Loading model weights from {}...".format(full_path)) lstm2.load_state_dict(torch.load(full_path, map_location=device)) for i in tqdm.tqdm(range(args.num_samples)): generation = lstm.generate(condition=args.condition, k=None, length=args.sample_len, temperature=args.temp) stream = decode(generation) if args.logdir2 is not '': generation2 = lstm2.generate(condition=args.condition, k=None, length=args.sample_len2, temperature=args.temp) stream2 = decode(generation2) # COMBINE THE SAMPLES combined_stream = m21.stream.Stream() first_part = m21.stream.Part(id='1') first_part.append(stream) second_part = m21.stream.Part(id='2') second_part.append(stream2)
output = model.predict(sess, input_, aux_) ======= input_aux_ = train_batch_gen.__next__()[3] output, output_bwd = model.predict(sess, input_, input_aux_) >>>>>>> 0d81aa006b893195d432ed2dfbbf7f930ec62226 print(output.shape) # In[44]: replies = [] <<<<<<< HEAD for ii, oi in zip(input_.T, output): q = data_utils.decode(sequence=ii, lookup=metadata['idx2w'], separator=' ') decoded = data_utils.decode(sequence=oi, lookup=metadata['idx2w'], separator=' ').split(' ') print('q : [{0}]; a : [{1}]'.format(q, ' '.join(decoded))) ======= for ii, ai, oi, oi_bwd in zip(input_.T, input_aux_.T, output, output_bwd): genre = '' if ai[0] > 0: genre = 'romance' lookup_ = rom_metadata else: genre = 'horror' lookup_ = hor_metadata q = data_utils.decode(sequence=ii, lookup=lookup_[genre+'idx2w'], separator=' ') decoded = data_utils.decode(sequence=oi, lookup=lookup_[genre+'idx2w'], separator=' ').split(' ')
yseq_len=yseq_len, xvocab_size=xvocab_size, yvocab_size=yvocab_size, ckpt_path='ckpt/cornell_corpus/', emb_dim=emb_dim, num_layers=3) # In[8]: #val_batch_gen = data_utils.rand_batch_gen(validX, validY, 32) #train_batch_gen = data_utils.rand_batch_gen(trainX, trainY, batch_size) # In[9]: #sess = model.restore_last_session() #sess = model.train(train_batch_gen, val_batch_gen) with tf.Session() as sess: saver = tf.train.Saver() saver.restore(sess, "./seq2seq_model.ckpt-44000") dic = metadata['w2idx'] dic2 = metadata['idx2w'] while (True): line = raw_input("Please enter your line") en = data.process_line(line, dic).reshape((25, 1)) res = model.predict(sess, en) print res en2 = data_utils.decode(res[0], dic2) res2 = "" for word in en2: res2 = res2 + word + " " print res2
loss_path='', metadata=i_metadata, emb_dim=1024, num_layers=3 ) d_sess = d_model.restore_last_session() i_sess = i_model.restore_last_session() return d_model, i_model, d_sess, i_sess, d_metadata, i_metadata if __name__ == '__main__': dm, im, ds, i_s, dmt, imt = get_model() txt = 'I like to reading' d_q = d_data.split_sentence(txt, dmt) input_ = d_q.T output_ = dm.predict(ds, input_) print(output_) answer = data_utils.decode(sequence=output_[0], lookup=dmt['idx2w'], separator=' ') print(answer) txt = 'I like to reading' i_q = IE_data.split_sentence(txt, imt) input_ = i_q.T output_ = im.predict(i_s, input_) print(output_) answer = data_utils.decode(sequence=output_[0], lookup=imt['idx2w'], separator=' ') print(answer)
yvocab_size = xvocab_size emb_dim = 1024 import seq2seq_wrapper import importlib importlib.reload(seq2seq_wrapper) model = seq2seq_wrapper.Seq2Seq(xseq_len=xseq_len, yseq_len=yseq_len, xvocab_size=xvocab_size, yvocab_size=yvocab_size, ckpt_path='ckpt/opensubtitle/', emb_dim=emb_dim, num_layers=3) sess = model.restore_last_session() while True: query = input('Input:\t') if query == 'quit' or query == 'exit': exit(0) ids = data_utils.encode(sequence=query, lookup=metadata['w2idx']) output = model.predict(sess, ids) reply = data_utils.decode(sequence=output[0], lookup=metadata['idx2w'], separator=' ') print('Output:' + reply)
model_name=model_name, num_layers=num_layers) sess = model.restore_last_session() if train_model == True: sess = model.train(train_batch_gen, val_batch_gen) else: input_, labels_ = val_batch_gen.__next__() output = model.predict(sess, input_) replies = [] lbls = list() preds = list() for ii, il, oi in zip(input_.T, labels_.T, output): q = decode(sequence=ii, lookup=idx2block, separator=' ') l = decode(sequence=il, lookup=idx2block, separator=' ') o = decode(sequence=oi, lookup=idx2block, separator=' ') decoded = o.split(' ') if decoded.count('UNK') == 0: if decoded not in replies: if len(l) == len(o): print('i: [{0}]\na: [{1}]\np: [{2}]\n'.format( q, l, ' '.join(decoded))) print("{}".format("".join(["-" for i in range(80)]))) lsplits = l.split() osplits = o.split() for lspl in lsplits: match = re.match(r"(\d+)(\w)", lspl) block, iotype = match.group(1), match.group(2)
def respond(msg): encoded_msg = data_utils.encode(msg, w2idx, limit['maxq']) response = model.predict(sess, encoded_msg)[0] return data_utils.decode(response, idx2w)
def train(self, train_set, valid_set, sess=None): # we need to save the model periodically with self.g.as_default(): saver = tf.train.Saver() # if no session is given if not sess: # create a session sess = tf.Session() # init all variables sess.run(tf.global_variables_initializer()) sys.stdout.write('\n<log> Training started </log>\n') # run M epochs for i in range(self.epochs): try: self.train_batch(sess, train_set) print(i) if i and i % ( self.epochs // 10) == 0: # TODO : make this tunable by the user # save model to disk saver.save(sess, self.ckpt_path + self.model_name + '.ckpt', global_step=i) # evaluate to get validation loss val_loss, replies = self.eval_batches( sess, valid_set, 16) # TODO : and this # print stats print( '\nModel saved to disk at iteration #{}'.format(i)) print('val loss : {0:.6f}'.format(val_loss)) # print('val res:') # print(replies) sys.stdout.flush() # try preset data and save if not self.loss_path == '': with open( self.loss_path + 'preset' + str(i) + '.txt', 'w') as f: for sentence in PRESET_DATA: question = data.split_sentence( sentence, self.meta_data) input_ = question.T output_ = self.predict(sess, input_) answer = data_utils.decode( sequence=output_[0], lookup=self.meta_data['idx2w'], separator=' ') f.write(sentence) f.write('\n') f.write(answer) f.write('\n') except KeyboardInterrupt: # this will most definitely happen, so handle it print('Interrupted by user at iteration {}'.format(i)) self.session = sess return sess
val_batch_gen = data_utils.rand_batch_gen(validX, validY, 256) test_batch_gen = data_utils.rand_batch_gen(testX, testY, 256) train_batch_gen = data_utils.rand_batch_gen(trainX, trainY, batch_size) # In[9]: sess = model.train(train_batch_gen, val_batch_gen) # In[7]: sess = model.restore_last_session() # In[10]: input_ = test_batch_gen.__next__()[0] output = model.predict(sess, input_) print(output.shape) # In[11]: replies = [] for ii, oi in zip(input_.T, output): q = data_utils.decode(sequence=ii, lookup=metadata['idx2w'], separator=' ') decoded = data_utils.decode(sequence=oi, lookup=metadata['idx2w'], separator=' ').split(' ') if decoded.count('unk') == 0: if decoded not in replies: print('q : [{0}]; a : [{1}]'.format(q, ' '.join(decoded))) replies.append(decoded)
# load data from pickle and npy files data_ctl, idx_words, idx_phonemes = data.load_data(PATH='datasets/cmudict/') (trainX, trainY), (testX, testY), (validX, validY) = data_utils.split_dataset(idx_phonemes, idx_words) # parameters xseq_len = trainX.shape[-1] yseq_len = trainY.shape[-1] batch_size = 128 xvocab_size = len(data_ctl['idx2pho'].keys()) yvocab_size = len(data_ctl['idx2alpha'].keys()) emb_dim = 128 importlib.reload(seq2seq_wrapper) model = seq2seq_wrapper.Seq2Seq(xseq_len=xseq_len, yseq_len=yseq_len, xvocab_size=xvocab_size, yvocab_size=yvocab_size, ckpt_path='ckpt/cmudict/', emb_dim=emb_dim, num_layers=3) val_batch_gen = data_utils.rand_batch_gen(validX, validY, 16) train_batch_gen = data_utils.rand_batch_gen(trainX, trainY, 128) model.train(train_batch_gen, val_batch_gen) sess = model.restore_last_session() output = model.predict(sess, val_batch_gen.__next__()[0]) print(output.shape) for oi in output: print(data_utils.decode(sequence=oi, lookup=data_ctl['idx2alpha'], separator=''))
sort_order = np.argsort(step_numbers) num_steps = step_numbers[sort_order[-1]] # gets the checkpoint path with the greatest number of steps last_checkpoint_path = checkpoints[sort_order[-1]] full_path = last_checkpoint_path print("Loading MELODY model model weights from {}...".format(full_path)) melody_lstm.load_state_dict(torch.load(full_path, map_location=device)) for i in tqdm.tqdm(range(args.num_samples)): bass_out, melody_out = melody_lstm.generate(bassline_model=bassline_lstm, k=args.k, bass_temp=args.bass_temp, bass_length=args.bass_sample_len, melody_temp=args.melody_temp, melody_length=args.melody_sample_len) bass_stream = decode(bass_out) melody_stream = decode(melody_out) combined_stream = m21.stream.Stream() bass_part = m21.stream.Part(id='bass') bass_part.append(bass_stream) melody_part = m21.stream.Part(id='melody') melody_part.append(melody_stream) combined_stream.insert(0, melody_part) combined_stream.insert(0, bass_part) # melody_stream.mergeElements(bass_stream) # melody_stream.show('midi') sample_dir = './generated_samples/conditional'