def generate_one_question_b_v(self, question, deploy_model): question_make_sense = isMakeSense(question) res, score = beam_search(question, self.cr, deploy_model, beam_size=100, search_scope=100, output_size=50) print res res = [' '.join(self.cr.transform_input_text(s)) for s in res] resorted_list = list() for r, s in zip(res, score): idf = 0.0 tokens = r.split(u' ') for token in tokens[1:-1]: idf += get_idf(token) # idf /= len(tokens) # idf_revise = 1 / (1 + np.exp(-2 / idf)) idf_revise = 4 * np.tanh(4 * idf) resorted_list.append((r, s, s)) if len(question) > 3: resorted_list = sorted(resorted_list, key=lambda x:x[2] / len(question) ** 1) else: resorted_list = sorted(resorted_list, key=lambda x:x[2]) candidates = list() if question_make_sense == 1: f = 0 for r, _, _ in resorted_list[:5]: ori_sentence = r.replace(u'<END>', u'').replace(u' ', u'') if isMakeSense(ori_sentence) == 1: f += 1 if f <= 1: question_make_sense = 0 for r, s1, s2 in resorted_list: ori_sentence = r.strip().replace(u'<END>', u'') ori_sentence = ori_sentence.replace(u' ', u'') answer_make_sense = isMakeSense(ori_sentence) r0 = r if isinstance(r, unicode) : r0 = r.encode(config.globalCharSet()) print r0, s1, s2, answer_make_sense, if len(ori_sentence) <= 3 \ and len(ori_sentence) < len(question) and ori_sentence in question: print 'continue1' continue if answer_make_sense == -1 or u'ϵͳ' in ori_sentence or u'NUM' in ori_sentence: print 'continue2' continue if question_make_sense == 1 and answer_make_sense <= 0: print 'continue3' continue # r_token_count = len(ori_sentence.strip().split(u' ')) # if question_word_count > 1 and r_token_count == 1: # print 'continue4' # continue candidates.append((r, s2)) print 'variousen' variousen_scope = 15 output_size = 5 high_fruq_left = 4 if len(candidates) == 0: return candidates, _ = zip(*candidates) # v_index = variousen_strings(candidates[:variousen_scope], output_size) # v_index = range(min(len(candidates), high_fruq_left)) + v_index # # print v_index # func = lambda x, y:x if y in x else x + [y] # v_index = reduce(func, [[], ] + v_index) # toReturn = [candidates[i] for i in v_index[:output_size]] toReturn = candidates[:output_size] return toReturn
def generate_b_v_t_g(self, input_file, output_file): """ Generate a model with special optimizers. """ deploy_model = self.model.get_deploy_function() #get_cost= self.model.get_cost() print output_file print 'generate_b_v_t_g' with codecs.open(output_file, 'w', config.globalCharSet()) as fw: with codecs.open(input_file, 'r', config.globalCharSet()) as fo: for line in fo.readlines() : # line_word, line_zi = SegProcess(line.strip()) # line = line_word.decode("gb18030") # line = line_word line = line.strip() lines = line.strip().split('\t') #(question, question_mask) = self.cr.transform_input_data(lines[0]) #(answer, answer_mask) = self.cr.transform_input_data(lines[1]) #qa_cost=get_cost(question, question_mask,answer,answer_mask,[[string.atoi(lines[2])]]) #fw.write(line+'\t'+str(qa_cost)+'\n') question_make_sense = 1#isMakeSense(line) print (line.encode(config.globalCharSet())) #fw.writelines('%s\n' % line) if(len(lines)==3): line='' line+=lines[0] line+='\t' line+=lines[2] fw.writelines('%s\n' % line) res, score = beam_search_t(line, self.cr, deploy_model, beam_size=200, search_scope=100) print res res1= [s[:-1] for s in res] res2= [s[-1] for s in res] res = [' '.join(self.cr.transform_input_text(s)) for s in res1] for res_len in range(len(res)): res[res_len]+='\t' res[res_len]+=str(res2[res_len]) resorted_list = list() for r, s in zip(res, score): idf = 0.0 tokens = r.split(u' ') for token in tokens[1:-1]: idf += get_idf(token) # idf /= len(tokens) # idf_revise = 1 / (1 + np.exp(-2 / idf)) idf_revise = 4 * np.tanh(4 * idf) resorted_list.append((r, s, s)) if len(line) > 3: resorted_list = sorted(resorted_list, key=lambda x:x[2] / len(line) ** 1) else: resorted_list = sorted(resorted_list, key=lambda x:x[2]) candidates = list() if question_make_sense == 1: f = 0 for r, _, _ in resorted_list[:5]: ori_sentence = r.replace(u'<END>', u'').replace(u' ', u'') #if isMakeSense(ori_sentence) == 1: if 1: f += 1 if f <= 1: question_make_sense = 0 for r, s1, s2 in resorted_list: ori_sentence = r.strip().replace(u'<END>', u'') ori_sentence = ori_sentence.replace(u' ', u'') answer_make_sense = 1#isMakeSense(ori_sentence) r0 = r if isinstance(r, unicode) : r0 = r.encode(config.globalCharSet()) print r0, s1, s2, answer_make_sense if len(ori_sentence) <= 3 \ and len(ori_sentence) < len(line) and ori_sentence in line: print 'continue1' continue if answer_make_sense == -1: print 'continue2' continue if question_make_sense == 1 and answer_make_sense <= 0: print 'continue3' continue # r_token_count = len(ori_sentence.strip().split(u' ')) # if question_word_count > 1 and r_token_count == 1: # print 'continue4' # continue candidates.append((r, s2)) print 'variousen' variousen_scope = 15 output_size = 5 high_fruq_left = 5 v_index = variousen_strings(candidates[:variousen_scope], output_size) v_index = range(min(len(candidates), high_fruq_left)) + v_index # print v_index func = lambda x, y:x if y in x else x + [y] v_index = reduce(func, [[], ] + v_index) toReturn = [candidates[i] for i in v_index[:output_size]] for r, s in toReturn : print ('result: %s, score: %f.' % (r.encode(config.globalCharSet()), s)) fw.writelines('result: %s, score: %f.\n' % (r, s)) '''