def get_shuffle_indices_test(dh, step, train_part, model, train_step): """ :param dh: :param step: :param train_part: :param model: train valid test :return: """ if train_part == 'relation': if model == "valid": id_list = ct.get_static_id_list_debug( len(dh.train_question_list_index)) else: id_list = ct.get_static_id_list_debug_test( len(dh.test_question_list_index)) id_list = ct.random_get_some_from_list(id_list, FLAGS.evaluate_batchsize) id_list2 = [str(x) for x in id_list] # step 训练模式 训练部分 ct.just_log( config.cc_par('combine_test'), '%s\t%s\t%s\t%s' % (train_step, model, train_part, '\t'.join(id_list2))) else: f1s = ct.file_read_all_lines_strip(config.cc_par('combine_test')) line = '' exist = False for l1 in f1s: if str(l1).split('\t')[0] == str(train_step) \ and str(l1).split('\t')[1] == model: line = str(l1) exist = True break if exist: line_split = line.split('\t') line_split = line_split[3:] line_split = [int(x) for x in line_split] id_list = np.array(line_split) ct.print( 'get_shuffle_indices_test exist %s %s ' % (train_step, model), 'shuffle_indices_test') else: # 不存在就自己写 if model == "valid": id_list = ct.get_static_id_list_debug( len(dh.train_question_list_index)) else: id_list = ct.get_static_id_list_debug_test( len(dh.test_question_list_index)) id_list = ct.random_get_some_from_list(id_list, FLAGS.evaluate_batchsize) ct.print('get_shuffle_indices_test not exist %s ' % train_step, 'shuffle_indices_test') return id_list
def get_shuffle_indices_test(dh, step, train_part, model, train_step): """ :param dh: :param step: :param train_part: :param model: train valid test :return: """ if True: #train_part == 'relation': if model == "valid": if config.cc_compare('valid_model', 'only_error'): f1s = ct.file_read_all_lines_strip(config.cc_par('valid_only_error_valid')) id_list = [int(x) for x in f1s] else: id_list = ct.get_static_id_list_debug(len(dh.train_question_list_index)) else: if config.cc_compare('valid_model', 'only_error'): f1s = ct.file_read_all_lines_strip(config.cc_par('valid_only_error_test')) id_list = [int(x) for x in f1s] else: id_list = ct.get_static_id_list_debug_test(len(dh.test_question_list_index)) # id_list = ct.random_get_some_from_list(id_list, FLAGS.evaluate_batchsize) # id_list2 = [str(x) for x in id_list] # # step 训练模式 训练部分 # ct.just_log(config.cc_par('combine_test'), # '%s\t%s\t%s\t%s' % (train_step, model, train_part, '\t'.join(id_list2))) # else: # f1s = ct.file_read_all_lines_strip(config.cc_par('combine_test')) # line = '' # exist = False # for l1 in f1s: # if str(l1).split('\t')[0] == str(train_step) \ # and str(l1).split('\t')[1] == model: # line = str(l1) # exist = True # break # if exist: # line_split = line.split('\t') # line_split = line_split[3:] # line_split = [int(x) for x in line_split] # id_list = np.array(line_split) # ct.print('get_shuffle_indices_test exist %s %s ' % (train_step, model), 'shuffle_indices_test') # else: # 不存在就自己写 # if model == "valid": # id_list = ct.get_static_id_list_debug(len(dh.train_question_list_index)) # else: # id_list = ct.get_static_id_list_debug_test(len(dh.test_question_list_index)) # # id_list = ct.random_get_some_from_list(id_list, FLAGS.evaluate_batchsize) # ct.print('get_shuffle_indices_test not exist %s ' % train_step, 'shuffle_indices_test') return id_list
def __init__(self, max_document_length, word_dimension, vocab_size, rnn_size, model, need_cal_attention, need_max_pooling, word_model, embedding_weight, need_gan, first): bilstm.__init__(self, max_document_length, word_dimension, vocab_size, rnn_size, model, need_cal_attention, need_max_pooling, word_model, embedding_weight, need_gan, first) self.model_type = "Dis" self.learning_rate = FLAGS.gan_learn_rate with tf.name_scope("output"): # 这个是普通的loss函数: max( 0,0.05 -(pos-neg) ) self.losses = tf.maximum( 0.0, tf.subtract(0.05, tf.subtract(self.score12, self.score13))) self.loss = tf.reduce_sum( self.losses) # + self.l2_reg_lambda * self.l2_loss self.reward = 2.0 * (tf.sigmoid( tf.subtract(0.05, tf.subtract(self.score12, self.score13))) - 0.5) # no log self.positive = tf.reduce_mean(self.score12) # cosine(q,pos) self.negative = tf.reduce_mean(self.score13) # cosine(q,neg) self.correct = tf.equal(0.0, self.losses) self.accuracy = tf.reduce_mean(tf.cast(self.correct, "float"), name="accuracy") if config.cc_par('optimizer_method') == optimizer_m.gan: self.global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.AdamOptimizer( self.learning_rate) # 使用Adam 算法的Optimizer grads_and_vars = optimizer.compute_gradients(self.loss) # capped_gvs = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in grads_and_vars] # capped_gvs = [] # for grad, var in grads_and_vars: # if var != None and grad != None: # try: # capped_gvs.append((tf.clip_by_value(grad, -1., 1.), var)) # except Exception as e1: # print(e1) # # else: # print('None item') capped_gvs = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in grads_and_vars if grad is not None] self.train_op = optimizer.apply_gradients( capped_gvs, global_step=self.global_step) else: # origin self.global_step = tf.Variable(0, name="globle_step", trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars), FLAGS.max_grad_norm) optimizer = tf.train.GradientDescentOptimizer(1e-1) optimizer.apply_gradients(zip(grads, tvars)) self.train_op = optimizer.apply_gradients( zip(grads, tvars), global_step=self.global_step)
def __init__(self, max_document_length, word_dimension, vocab_size, rnn_size, model, need_cal_attention, need_max_pooling, word_model, embedding_weight, need_gan, first): bilstm.__init__(self, max_document_length, word_dimension, vocab_size, rnn_size, model, need_cal_attention, need_max_pooling, word_model, embedding_weight, need_gan, first) self.model_type = "Gen" self.learning_rate = FLAGS.gan_learn_rate self.reward = tf.placeholder(tf.float32, shape=[None], name='reward') self.neg_index = tf.placeholder(tf.int32, shape=[None], name='neg_index') # minize attention # self.gan_score = self.score13 - self.score12 # cosine(q,neg) - cosine(q,pos) # tf.subtract(self.ori_neg,self.ori_cand) self.gan_score = tf.subtract( self.score13, self.score12) # cosine(q,neg) - cosine(q,pos) self.dns_score = self.score13 # predicts = tf.nn.softmax(logits=logits, dim=-1) ,softmax能够放大占比重较大的项 # 默认针对1阶张量进行运算,可以通过指定dim来针对1阶以上的张量进行运算,但不能对0阶张量进行运算。而tf.nn.sigmoid是针对0阶张量, # 这个只是将外面非TF的计算拿进TF计算 # self.batch_scores = tf.nn.softmax(self.score13 - self.score12) # ~~~~~ self.batch_scores = tf.nn.softmax( tf.subtract(self.score13, self.score12)) # 改了下 # self.all_logits =tf.nn.softmax( self.score13) #~~~~~ self.prob = tf.gather(self.batch_scores, self.neg_index) # 取负数 平均值( log(回归后的neg的概率) * (neg的奖励) ) # Incompatible shapes: [5] vs. [100] self.gan_loss = -tf.reduce_mean( tf.log(self.prob) * self.reward) # + l2_reg_lambda * self.l2_loss # 优化器部分 if config.cc_par('optimizer_method') == optimizer_m.gan: self.global_step = tf.Variable(0, name="global_step", trainable=False) # 生成一个指定学习率的Adam算法的优化器 optimizer = tf.train.AdamOptimizer( self.learning_rate) # 之前是 0.05 猜测可能快了 改成0.02 grads_and_vars = optimizer.compute_gradients( self.gan_loss) # 计算全部gradient self.gan_updates = optimizer.apply_gradients( grads_and_vars, global_step=self.global_step) # 继续进行BP算法 else: self.global_step = tf.Variable(0, name="globle_step", trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm( tf.gradients(self.gan_loss, tvars), FLAGS.max_grad_norm) optimizer = tf.train.GradientDescentOptimizer(1e-1) optimizer.apply_gradients(zip(grads, tvars)) self.gan_updates = optimizer.apply_gradients( zip(grads, tvars), global_step=self.global_step)
def get_shuffle_indices_train(total, step, train_part, model, train_step): """ :param dh: :param step: :param train_part: :param model: train valid test :return: """ if train_part == 'relation': shuffle_indices = np.random.permutation(np.arange(total)) # 打乱样本下标 shuffle_indices1 = [str(x) for x in list(shuffle_indices)] # step 训练模式 训练部分 ct.just_log( config.cc_par('combine'), '%s\t%s\t%s\t%s' % (train_step, model, train_part, '\t'.join(shuffle_indices1))) else: f1s = ct.file_read_all_lines_strip(config.cc_par('combine')) line = '' exist = False for l1 in f1s: if str(l1).split('\t')[0] == str(train_step): line = str(l1) exist = True break if exist: line_split = line.split('\t') line_split = line_split[3:] line_split = [int(x) for x in line_split] shuffle_indices = np.array(line_split) ct.print('get_shuffle_indices_train exist %s' % train_step, 'shuffle_indices_train') else: # 不存在就自己写 shuffle_indices = np.random.permutation(np.arange(total)) # 打乱样本下标 ct.print('get_shuffle_indices_train not exist %s' % train_step, 'shuffle_indices_train') # step 训练模式 训练部分 # ct.file_wirte_list(config.cc_par('combine'), # '%s\t%s\t%s\t%s' % (train_step, model, train_part, '\t'.join(shuffle_indices))) return shuffle_indices
def transe_calculate_loss(self ): # train # ori_cand 是相似度, # 问题特征、pos neg 特征, # self.ori_q_feat, self.cand_q_feat self.neg_q_feat # self.ner_ori_q_feat, self.ner_cand_q_feat self.ner_neg_q_feat # self.ans_ori_q_feat, self.ans_cand_q_feat self.ans_neg_q_feat margin = config.cc_par('loss_margin') # test # self.test_q_feat_out, self.test_r_feat_out # self.ner_test_q_out, self.ner_test_r_out # self.ans_test_q_out, self.ans_test_r_out # 距离 = S + P - O distance_pos = self.ner_cand_q_feat + self.cand_q_feat - self.ans_cand_q_feat # |S_pos+P_pos-O_pos| - |S_neg+P_neg - O_pos| distance_neg = self.ner_neg_q_feat + self.neg_q_feat - self.ans_cand_q_feat # distance_pos = head_pos + relation_pos - tail_pos # distance_neg = head_neg + relation_neg - tail_neg self.score_func = 'L1' # 测试的 S + P self.distance_test = tf.abs(self.ner_test_r_out + self.test_r_feat_out ) # 待测2 S+P-0 = KB_S + KB_P - (s'+p') # self.distance_test = tf.abs(self.ner_test_q_out + self.test_q_out - self.ans_test_q_out) # 得分 = if self.score_func == 'L1': # L1 score self.transe_score = tf.abs(self.distance_test) # , axis=1 self.transe_score_for_cosine = tf.abs(self.distance_test) # , axis=1 else: self.transe_score = tf.square(self.distance_test) # , axis=1 # 12个list 每个item 是 200 维的数组 with tf.name_scope('transe_loss'): if self.score_func == 'L1': # L1 score score_pos = tf.reduce_sum(tf.abs(distance_pos)) # , axis=1 score_neg = tf.reduce_sum(tf.abs(distance_neg)) else: # L2 score score_pos = tf.reduce_sum(tf.square(distance_pos)) score_neg = tf.reduce_sum(tf.square(distance_neg)) raise Exception('此路不通') # self.transe_loss = tf.reduce_sum(tf.nn.relu(margin + score_pos - score_neg), name='max_margin_loss') self.transe_loss = tf.nn.relu(margin + score_pos - score_neg)
def __init__(self, f1=''): self.min_max_scaler = preprocessing.MinMaxScaler() self.all_datas = [] # f1 = '../data/nlpcc2016/8-logistics/logistics-2018-03-10.txt_bak.txt' f1s = ct.file_read_all_lines_strip(f1) self.train_data = [] self.test_data = [] index = -1 for f1_l in f1s: index += 1 need_skip = False if str(f1_l).__contains__('NULL'): need_skip = True if str(f1_l).__contains__('####'): need_skip = True # 改成不包含则跳过 if str(f1_l).__contains__('@@@@@@'): f1_l = str(f1_l).replace('1@@@@@@', '').replace('@@@@@@', '') # need_skip = True if need_skip: # 实际没有跳过的 print(f1_l, 'skip') continue # if index < config.cc_par('real_split_train_test_skip'): # <= int(len(f1s)*0.8): m1 = False if m1: is_train = index < config.cc_par('real_split_train_test_skip') else: is_train = index < int(len(f1s) * 0.8) if is_train: self.train_data.append(self.extract_line(f1_l)) else: self.test_data.append(self.extract_line(f1_l)) # 在这里除了下归一化 print('init ok')
def main(): with tf.device("/gpu"): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf) now = datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S") # 重要的,是否恢复模型,loss的部分;属性的数目 model = FLAGS.mode test_style = True ct.print("tf:%s should be 1.2.1 model:%s " % (str(tf.__version__), model)) # 1.2.1 ct.print("mark:%s " % config.cc_par('mark'), 'mark') # 1.2.1 ct.just_log2("info", now) ct.just_log2("result", now) ct.just_log2("info", get_config_msg()) ct.print(get_config_msg(), "mark") ct.just_log3( "test_check", "mode\tid\tglobal_id\tglobal_id_in_origin\tquestion\tentity\tpos\tanswer\tr1\tr2\tr3\n" ) ct.log3(now) msg1 = "t_relation_num:%d train_part:%s loss_part:%s" % \ (config.cc_par('t_relation_num'),config.cc_par('train_part'), config.cc_par('loss_part')) ct.print(msg1) msg1 = 'restrore:%s use_alias_dict:%s' % ( config.cc_par('restore_model'), config.cc_par('use_alias_dict')) ct.print(msg1) if config.cc_par('restore_model'): ct.print(config.cc_par('restore_path')) embedding_weight = None error_test_dict = dict() valid_test_dict = dict() # 1 读取所有的数据,返回一批数据标记好的数据{data.x,data.label} dh = data_helper.DataClass(model, "test") if FLAGS.word_model == "word2vec_train": embedding_weight = dh.embeddings # 3 构造模型LSTM类 # loss_type = "pair" discriminator = Discriminator( max_document_length=dh.max_document_length, # timesteps word_dimension=FLAGS.word_dimension, # 一个单词的维度 vocab_size=dh.converter. vocab_size, # embedding时候的W的大小embedding_size rnn_size=FLAGS.rnn_size, # 隐藏层大小 model=model, need_cal_attention=config.cc_par('d_need_cal_attention'), need_max_pooling=FLAGS.need_max_pooling, word_model=FLAGS.word_model, embedding_weight=embedding_weight, need_gan=True, first=True) # generator = Generator( # max_document_length=dh.max_document_length, # timesteps # word_dimension=FLAGS.word_dimension, # 一个单词的维度 # vocab_size=dh.converter.vocab_size, # embedding时候的W的大小embedding_size # rnn_size=FLAGS.rnn_size, # 隐藏层大小 # model=model, # need_cal_attention=config.cc_par('g_need_cal_attention'), # 不带注意力玩 # need_max_pooling=FLAGS.need_max_pooling, # word_model=FLAGS.word_model, # embedding_weight=embedding_weight, # need_gan=True, first=False) ct.print("max_document_length=%s,vocab_size=%s " % (str(dh.max_document_length), str(dh.converter.vocab_size))) # 初始化 init = tf.global_variables_initializer() merged = tf.summary.merge_all() with sess.as_default(): writer = tf.summary.FileWriter(ct.log_path() + "\\log\\", sess.graph) sess.run(init) loss_dict = dict() loss_dict['loss'] = 0 loss_dict['pos'] = 0 loss_dict['neg'] = 0 # 如果需要恢复则恢复 if config.cc_par('restore_model'): saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.num_checkpoints) save_path = config.cc_par('restore_path') ct.print('restore:%s' % save_path, 'model') saver.restore(sess, config.cc_par('restore_path')) # 1 NER 部分1 print('加载别名词典:') dh.bh.stat_dict('../data/nlpcc2016/4-ner/extract_entitys_all.txt') dh.bh.init_ner(f_in2='../data/nlpcc2016/4-ner/extract_e/e1.tj.txt') print('input:') line = '红楼梦的作者是谁?' # input() _best_p, _best_s = ner_rel_analyisis(dh, discriminator, line, sess) # 2 NER LSTM 识别 hh_dh = dh hh_discriminator = discriminator hh_sess = sess print(_best_s) print(_best_p) return hh_dh, hh_discriminator, hh_sess
from lib.baike_helper import baike_helper, baike_test from lib.classification_helper import classification from lib.data_helper import DataClass from lib.config import config from lib.ct import ct from lib.pretreatment import pretreatment # 一键纠错 if __name__ == '__main__': # bkt = baike_test() bkh = baike_helper(config.cc_par('alias_dict')) cf = classification() # -------------- 预处理 test 部分 if False: pretreatment.re_write( f1='../data/nlpcc2016/10-test/nlpcc2018.kbqa.test', f2='../data/nlpcc2016/10-test/test.txt') if False: pretreatment.stat_all_space(f1='../data/nlpcc2016/10-test/test.txt') # 重写别名指代 if False: pretreatment.re_write_m2id( f1= '../data/nlpcc2016/1-origin/nlpcc-iccpol-2016.kbqa.kb.mention2id', f_out='../data/nlpcc2016/4-ner/extract_e/e1.dict.v2.txt') # 将实体 if False: bkh.statistics_subject_len( f_in='../data/nlpcc2016/4-ner/extract_e/e1.dict.v2.txt', f_out='../data/nlpcc2016/4-ner/extract_e/e1.tj.v2-2.txt')
def main(): time.sleep(0.5) # 休息0.5 秒让之前的进程退出 now = "\n\n\n" + str(datetime.datetime.now().isoformat()) # test 是完整的; small 是少量 ; debug 只是一次 model = FLAGS.mode ct.print("tf:%s should be 1.2.1 model:%s " % (str(tf.__version__), model)) # 1.2.1 ct.just_log2("info", now) ct.just_log2("valid", now) ct.just_log2("test", now) ct.just_log2("info", get_config_msg()) ct.log3(now) embedding_weight = None error_test_dict = dict() valid_test_dict = dict() # 1 读取所有的数据,返回一批数据标记好的数据{data.x,data.label} dh = data_helper.DataClass(model) if FLAGS.word_model == "word2vec_train": embedding_weight = dh.embeddings # 3 构造模型LSTM类 ct.print("max_document_length=%s,vocab_size=%s " % (str(dh.max_document_length), str(dh.converter.vocab_size))) lstm = mynn.CustomNetwork( max_document_length=dh.max_document_length, # timesteps word_dimension=FLAGS.word_dimension, # 一个单词的维度 vocab_size=dh.converter.vocab_size, # embedding时候的W的大小embedding_size rnn_size=FLAGS.rnn_size, # 隐藏层大小 model=model, need_cal_attention=FLAGS.need_cal_attention, need_max_pooling=FLAGS.need_max_pooling, word_model=FLAGS.word_model, embedding_weight=embedding_weight, need_gan=False) # 4 ----------------------------------- 设定loss----------------------------------- global_step = tf.Variable(0, name="globle_step", trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(lstm.loss, tvars), FLAGS.max_grad_norm) optimizer = tf.train.GradientDescentOptimizer(1e-1) optimizer.apply_gradients(zip(grads, tvars)) train_op = optimizer.apply_gradients(zip(grads, tvars), global_step=global_step) # 初始化 init = tf.global_variables_initializer() merged = tf.summary.merge_all() with tf.Session().as_default() as sess: writer = tf.summary.FileWriter("log/", sess.graph) sess.run(init) embeddings = [] use_error = False error_test_q_list = [] error_test_pos_r_list = [] error_test_neg_r_list = [] # 测试输出所以的训练问题和测试问题 # dh.build_train_test_q() # train_step = 0 max_acc = 0 for step in range(FLAGS.epoches): toogle_line = ">>>>>>>>>>>>>>>>>>>>>>>>>step=%d,total_train_step=%d " % ( step, len(dh.q_neg_r_tuple)) ct.log3(toogle_line) ct.just_log2("info", toogle_line) # 数据准备 my_generator = '' if FLAGS.fix_model and len(error_test_q_list) != 0: my_generator = dh.batch_iter_wq_debug_fix_model( error_test_q_list, error_test_pos_r_list, error_test_neg_r_list, FLAGS.batch_size) use_error = True toogle_line = "\n\n\n\n\n------------------use_error to train" ct.log3(toogle_line) ct.just_log2("info", toogle_line) ct.just_log2("valid", 'use_error to train') ct.just_log2("test", 'use_error to train') elif ct.is_debug_few(): toogle_line = "\n------------------is_debug_few to train" ct.log3(toogle_line) ct.just_log2("info", toogle_line) train_part = config.cc_par('train_part') model = 'train' # 属性就生成问题就读取 shuffle_indices = get_shuffle_indices_train( len(dh.q_neg_r_tuple_train), step, train_part, model, train_step) if train_part == 'relation': my_generator = dh.batch_iter_wq_debug( dh.train_question_list_index, dh.train_relation_list_index, shuffle_indices, FLAGS.batch_size, train_part) else: my_generator = dh.batch_iter_wq_debug( dh.train_question_list_index, dh.train_answer_list_index, shuffle_indices, FLAGS.batch_size, train_part) else: # 不用 train_q, train_cand, train_neg = \ dh.batch_iter_wq(dh.train_question_list_index, dh.train_relation_list_index, FLAGS.batch_size) toogle_line = "\n==============================train_step=%d\n" % train_step ct.just_log2("info", toogle_line) ct.log3(toogle_line) # 训练数据 for gen in my_generator: toogle_line = "\n==============================train_step=%d\n" % train_step ct.just_log2("info", toogle_line) ct.log3(toogle_line) if not use_error: train_step += 1 train_q = gen[0] train_cand = gen[1] train_neg = gen[2] run_step2(sess, lstm, step, train_step, train_op, train_q, train_cand, train_neg, merged, writer, dh, use_error) if use_error: continue # -------------------------test # 1 源数据,训练数据OR验证数据OR测试数据 # 验证 valid_test_dict, error_test_dict, max_acc, all_right,\ error_test_q_list, error_test_pos_r_list, error_test_neg_r_list \ = valid_test_checkpoint(train_step, dh, step, sess, lstm, merged, writer, train_op, valid_test_dict, error_test_dict, max_acc) if config.cc_par('keep_run') and all_right and step > 2: del lstm # 清理资源 del sess return True if use_error: error_test_q_list.clear() error_test_pos_r_list.clear() error_test_neg_r_list.clear() use_error = False toogle_line = "<<<<<<<<<<<<<<<<<<<<<<<<<<<<step=%d\n" % step # ct.just_log2("test", toogle_line) ct.just_log2("info", toogle_line) ct.log3(toogle_line)
def valid_test_checkpoint(train_step, dh, step, sess, lstm, merged, writer, train_op, valid_test_dict, error_test_dict, acc_max=0): test_batchsize = FLAGS.test_batchsize # 暂时统一 验证和测试的数目 # if (train_step + 1) % FLAGS.evaluate_every == 0: if True: model = "valid" train_part = config.cc_par('train_part') if train_part == 'relation': train_part_1 = dh.train_relation_list_index else: train_part_1 = dh.train_answer_list_index id_list = get_shuffle_indices_test(dh, step, train_part, model, train_step) # if model == "valid": # id_list = ct.get_static_id_list_debug(len(dh.train_question_list_index)) # else: # id_list = ct.get_static_id_list_debug_test(len(dh.test_question_list_index)) # id_list = ct.random_get_some_from_list(id_list, FLAGS.evaluate_batchsize) acc_valid, error_test_q_list, error_test_pos_r_list, error_test_neg_r_list, maybe_list_list, \ maybe_global_index_list, questions_ok_dict = \ valid_batch_debug(sess, lstm, 0, train_op, merged, writer, dh, test_batchsize, dh.train_question_list_index, train_part_1, model, dh.train_question_global_index, train_part, id_list) msg = "step:%d train_step %d valid_batchsize:%d acc:%f " % ( step, train_step, test_batchsize, acc_valid) ct.print(msg) ct.just_log2("valid", msg) valid_test_dict = log_error_questions(dh, model, error_test_q_list, error_test_pos_r_list, error_test_neg_r_list, valid_test_dict, maybe_list_list, acc_valid, maybe_global_index_list) # ct.print("===========step=%d"%step, "maybe_possible") # if FLAGS.need_test and (train_step + 1) % FLAGS.test_every == 0: if True: model = "test" train_part = config.cc_par('train_part') if train_part == 'relation': train_part_1 = dh.test_relation_list_index else: train_part_1 = dh.test_answer_list_index id_list = get_shuffle_indices_test(dh, step, train_part, model, train_step) acc_test, _1, _2, _3, maybe_list_list, maybe_global_index_list, questions_ok_dict = \ valid_batch_debug(sess, lstm, step, train_op, merged, writer, dh, test_batchsize, dh.test_question_list_index, train_part_1, model, dh.test_question_global_index, train_part, id_list) # 测试 集合不做训练 但是将其记录下来 error_test_dict = log_error_questions(dh, model, _1, _2, _3, error_test_dict, maybe_list_list, acc_test, maybe_global_index_list) # _1.clear() # _2.clear() # _3.clear() msg = "step:%d train_step %d valid_batchsize:%d acc:%f " % ( step, train_step, test_batchsize, acc_test) ct.print(msg) ct.just_log2("test", msg) ct.print("===========step=%d" % step, "maybe_possible") checkpoint(sess, step) # 输出记录 all_right = False if acc_test >= acc_max and len(dh.maybe_test_questions) > 0: msg_list = [] acc_max = acc_test all_right = True for index in dh.maybe_test_questions: # try: ok = questions_ok_dict[int(index)] # except Exception as ee1: # print(ee1) if not ok: all_right = False msg = "%s_%s" % (index, ok) msg_list.append(msg) acc_str = "%s_%s" % (acc_valid, acc_test) ct.just_log( config.cc_par('test_ps_result'), "%s\t%s\t%s\t%s" % (step, ct.log_path().split('runs\\')[1], acc_str, '\t'.join(msg_list))) return valid_test_dict, error_test_dict, acc_max, all_right, error_test_q_list, error_test_pos_r_list, error_test_neg_r_list
error_test_q_list, error_test_pos_r_list, error_test_neg_r_list \ = valid_test_checkpoint(train_step, dh, step, sess, lstm, merged, writer, train_op, valid_test_dict, error_test_dict, max_acc) if config.cc_par('keep_run') and all_right and step > 2: del lstm # 清理资源 del sess return True if use_error: error_test_q_list.clear() error_test_pos_r_list.clear() error_test_neg_r_list.clear() use_error = False toogle_line = "<<<<<<<<<<<<<<<<<<<<<<<<<<<<step=%d\n" % step # ct.just_log2("test", toogle_line) ct.just_log2("info", toogle_line) ct.log3(toogle_line) # 重启继续跑 if __name__ == '__main__': # for i in range(9693): main() if config.cc_par('keep_run'): gc.collect() os.system(config.cc_par('cmd_path'))
def cal_attention(self): with tf.name_scope("att_weight"): # attention params # 设定权重分布 # attention_matrix_size = embedding size # 对bilstm的输出 2 * self.rnn_size 大小的的output每一位做一个权重 att_W = { 'Wam': tf.Variable(tf.truncated_normal( [2 * self.rnn_size, self.attention_matrix_size], stddev=0.1)), 'Wqm': tf.Variable(tf.truncated_normal( [2 * self.rnn_size, self.attention_matrix_size], stddev=0.1)), 'Wms': tf.Variable(tf.truncated_normal( [self.attention_matrix_size, 1], stddev=0.1)) } q_side_att_W = { 'q_side_Wam': tf.Variable(tf.truncated_normal( [2 * self.rnn_size, self.attention_matrix_size], stddev=0.1)), 'q_side_Wqm': tf.Variable(tf.truncated_normal( [2 * self.rnn_size, self.attention_matrix_size], stddev=0.1)), 'q_side_Wms': tf.Variable(tf.truncated_normal( [self.attention_matrix_size, 1], stddev=0.1)) } # 获取特征 # print("cal_attention") # print(self.ori_q) # self.ori_q_feat, self.cand_q_feat = get_feature(self.ori_q, self.cand_a, att_W) # self.ori_nq_feat, self.neg_q_feat = get_feature(self.ori_q, self.neg_a, att_W) weight_dict = dict() # [ 'Wam','Wqm','Wms'] weight_dict['Wam']='Wam' weight_dict['Wqm'] = 'Wqm' weight_dict['Wms'] = 'Wms' _AM = config.cc_par('attention_model') # 问题端 (默认)答案端 if _AM == 'q_side': self.cand_q_feat, self.ori_q_feat = get_feature(self.cand_a,self.ori_q,att_W, weight_dict) self.neg_q_feat, self.ori_nq_feat = get_feature(self.neg_a,self.ori_q, att_W, weight_dict) self.test_r_feat_out, self.test_q_feat_out = get_feature(self.test_r_out,self.test_q_out, att_W, weight_dict) elif _AM=='a_side': # self.ori_q_feat, self.cand_q_feat = get_feature(self.ori_q, self.cand_a, att_W, weight_dict) self.ori_q_feat, self.cand_q_feat = get_feature(self.ori_q, self.cand_a, att_W,weight_dict) self.ori_nq_feat, self.neg_q_feat,self.debug = get_feature_debug(self,self.ori_q, self.neg_a, att_W,weight_dict) self.test_q_feat_out, self.test_r_feat_out = get_feature(self.test_q_out, self.test_r_out, att_W,weight_dict) # # NS.V2 self.ns2_q_feat, self.ns2_r_feat = get_feature(self.ns2_q, self.ns2_r, att_W, weight_dict) # self.test_q_feat_out, self.test_r_feat_out,self.debug = get_feature_debug(self,self.test_q_out, self.test_r_out, att_W,weight_dict) # # # NS.V2 # self.ns2_q_feat, self.ns2_r_feat,self.debug1 = get_feature_debug2(self,self.ns2_q, self.ns2_r, att_W, weight_dict) # print(_AM) elif _AM =='both': # a_side 保留 a 部分 _, self.cand_q_feat = get_feature(self.ori_q, self.cand_a, att_W,weight_dict) _, self.neg_q_feat,self.debug = get_feature_debug(self,self.ori_q, self.neg_a, att_W,weight_dict) _, self.test_r_out_bak = get_feature(self.test_q_out, self.test_r_out, att_W,weight_dict) # NS.V2 _, self.ns2_r_feat = get_feature(self.ns2_q, self.ns2_r, att_W, weight_dict) # q_side q_side_weight_dict = dict() # [ 'Wam','Wqm','Wms'] q_side_weight_dict['Wam'] = 'q_side_Wam' q_side_weight_dict['Wqm'] = 'q_side_Wqm' q_side_weight_dict['Wms'] = 'q_side_Wms' _, self.ori_q_feat = get_feature(self.cand_a,self.ori_q,q_side_att_W, q_side_weight_dict) _, self.ori_nq_feat = get_feature(self.neg_a,self.ori_q, q_side_att_W, q_side_weight_dict) _, self.test_q_out_bak = get_feature(self.test_r_out,self.test_q_out,q_side_att_W, q_side_weight_dict) # NS.V2 self.ns2_q_feat, _ = get_feature(self.ns2_q, self.ns2_r, q_side_att_W, q_side_weight_dict) self.test_r_out = self.test_r_out_bak self.test_q_out = self.test_q_out_bak raise Exception('NO NO ') else: raise Exception('NO NO ') # 20180916 ns 实验 negative sampling top k # self.ns_test_r_pos_out = get_feature() # self.ns_test_r_cp_out # end # 20180906-1--start 用cos做NER ner_att_W = { 'ner_Wam': tf.Variable(tf.truncated_normal( [2 * self.rnn_size, self.attention_matrix_size], stddev=0.1)), 'ner_Wqm': tf.Variable(tf.truncated_normal( [2 * self.rnn_size, self.attention_matrix_size], stddev=0.1)), 'ner_Wms': tf.Variable(tf.truncated_normal( [self.attention_matrix_size, 1], stddev=0.1)) } ner_weight_dict = dict() # [ 'Wam','Wqm','Wms'] ner_weight_dict['Wam']='ner_Wam' ner_weight_dict['Wqm'] = 'ner_Wqm' ner_weight_dict['Wms'] = 'ner_Wms' # 获取特征 self.ner_ori_q_feat, self.ner_cand_q_feat = get_feature(self.ner_ori_q, self.ner_cand_a, ner_att_W,ner_weight_dict) self.ner_ori_nq_feat, self.ner_neg_q_feat = get_feature(self.ner_ori_q, self.ner_neg_a, ner_att_W,ner_weight_dict) self.ner_test_q_out, self.ner_test_r_out = get_feature(self.ner_test_q_out, self.ner_test_r_out, ner_att_W,ner_weight_dict) # 20180906-1--end # 201809010 start 添加answer部分 ans_att_W = { 'ans_Wam': tf.Variable(tf.truncated_normal( [2 * self.rnn_size, self.attention_matrix_size], stddev=0.1)), 'ans_Wqm': tf.Variable(tf.truncated_normal( [2 * self.rnn_size, self.attention_matrix_size], stddev=0.1)), 'ans_Wms': tf.Variable(tf.truncated_normal( [self.attention_matrix_size, 1], stddev=0.1)) } ans_weight_dict = dict() # [ 'Wam','Wqm','Wms'] ans_weight_dict['Wam']='ans_Wam' ans_weight_dict['Wqm'] = 'ans_Wqm' ans_weight_dict['Wms'] = 'ans_Wms' # 获取特征 self.ans_ori_q_feat, self.ans_cand_q_feat = get_feature(self.ans_ori_q, self.ans_cand_a, ans_att_W,ans_weight_dict) self.ans_ori_nq_feat, self.ans_neg_q_feat = get_feature(self.ans_ori_q, self.ans_neg_a, ans_att_W,ans_weight_dict) self.ans_test_q_out, self.ans_test_r_out = get_feature(self.ans_test_q_out, self.ans_test_r_out, ans_att_W,ans_weight_dict)
def __init__(self, max_document_length, word_dimension, vocab_size, rnn_size, model, need_cal_attention, need_max_pooling, word_model, embedding_weight, need_gan, first): bilstm.__init__(self, max_document_length, word_dimension, vocab_size, rnn_size, model, need_cal_attention, need_max_pooling, word_model, embedding_weight, need_gan, first) self.model_type = "Dis" with tf.name_scope("output"): # 这个是普通的loss函数: max( 0,0.05 -(pos-neg) ) loss_margin = float(config.cc_par('loss_margin')) self.rel_loss = tf.maximum(0.0, tf.subtract(loss_margin, tf.subtract(self.score12, self.score13))) # 20180906-1--start 用cos做NER self.ner_losses = tf.maximum(0.0, tf.subtract(loss_margin, tf.subtract(self.ner_score12, self.ner_score13))) self.ans_losses = tf.maximum(0.0, tf.subtract(loss_margin, tf.subtract(self.ans_score12, self.ans_score13))) # 20180906-1--end # self.loss = 0 # if config.cc_par('loss_part').__contains__('relation'): self.loss_rel = tf.reduce_sum(self.rel_loss) # + self.l2_reg_lambda * self.l2_loss self.loss_ner = tf.reduce_sum(self.ner_losses) # if config.cc_par('loss_part').__contains__('entity'): self.loss_e_r = tf.reduce_sum(self.ner_losses)+tf.reduce_sum(self.rel_loss) # if config.cc_par('loss_part').__contains__('answer'): # self.loss += tf.reduce_sum(self.ans_losses) self.loss_ans = tf.reduce_sum(self.ans_losses) # if config.cc_par('loss_part').__contains__('transE'): # 临时注释 self.loss_transe = tf.reduce_sum(self.transe_loss) self.loss_e_r_transe = tf.reduce_sum(self.ner_losses)+tf.reduce_sum(self.rel_loss)+\ tf.reduce_sum(self.transe_loss) # print('当前使用了3个loss') # self.transe_loss # print(self.loss) self.correct = tf.equal(0.0, self.rel_loss) self.accuracy = tf.reduce_mean(tf.cast(self.correct, "float"), name="accuracy") self.ner_correct = tf.equal(0.0, self.ner_losses) self.ner_accuracy = tf.reduce_mean(tf.cast(self.ner_correct, "float"), name="ner_accuracy") # 下面是gan的部分 # self.score12 = self.cosine(q, pos) # self.score13 = self.cosine(q, neg) # tf.subtract(0.05, tf.subtract(self.score12, self.score13) # self.reward = 2.0*(tf.sigmoid()) -0.5) # no log self.pred_score = tf.subtract(loss_margin, tf.subtract(self.score12, self.score13)) self.reward = 2.0 * (tf.sigmoid(self.pred_score) - 0.5) # no log 为了使得值不为负数 self.positive = tf.reduce_mean(self.score12) # cosine(q,pos) self.negative = tf.reduce_mean(self.score13) # cosine(q,neg) # 将两个优化并存,自由选择 # if config.cc_par('optimizer_method') == optimizer_m.gan: self.global_step = tf.Variable(0, name="global_step", trainable=False) if False: # 使用之前的优化器 # 优化G self.learning_rate = FLAGS.gan_learn_rate # 仅用于 gan部分 optimizer = tf.train.AdamOptimizer(self.learning_rate) # 使用Adam 算法的Optimizer grads_and_vars = optimizer.compute_gradients(self.loss_rel) # capped_gvs = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in grads_and_vars] # capped_gvs = [] # for grad, var in grads_and_vars: # if var != None and grad != None: # try: # capped_gvs.append((tf.clip_by_value(grad, -1., 1.), var)) # except Exception as e1: # print(e1) # # else: # print('None item') capped_gvs = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in grads_and_vars if grad is not None] self.train_op_d = optimizer.apply_gradients(capped_gvs, global_step=self.global_step) # 优化D # self.global_step = tf.Variable(0, name="globle_step", trainable=False) # if config.cc_par('loss_part') == 'entity_relation': # print('现在训练的是 loss_e_r ') self.train_op_e_r = self.train_op1(self.loss_e_r, self.global_step,FLAGS.max_grad_norm) # elif config.cc_par('loss_part') == 'entity': # print('现在训练的是 loss_ner ') self.train_op_ner = self.train_op1(self.loss_ner, self.global_step, FLAGS.max_grad_norm) # elif config.cc_par('loss_part') == 'entity': # print('现在训练的是 loss_rel ') self.train_op_rel = self.train_op1(self.loss_rel, self.global_step, FLAGS.max_grad_norm) self.train_op_e_r_transe = self.train_op1(self.loss_e_r_transe, self.global_step, FLAGS.max_grad_norm) # # 临时注释 self.train_op_transe = self.train_op1(self.loss_transe, self.global_step,FLAGS.max_grad_norm) print('end ')