def read_csv(input_file): """Reads a tab separated value file.""" df = load_csv(input_file, header=0) jobcontent = list(df['content']) jlabel = list(df['label']) lines = [[str(jlabel[i]), str(jobcontent[i])] for i in range(len(jobcontent))] print('Read csv finished!(1)') return shuffle_one([[ list(dict_label.keys())[list(dict_label.values()).index(l[0])], l[1] ] for l in lines if type(l[1]) == str])
def _read_csv(cls, input_file): # 项目 """Reads a tab separated value file.""" df = load_csv(input_file, header=0) jobcontent = list(df['content']) #[:len(df)-5000] jlabel = list(df['label']) #[:len(df)-5000] lines = [[str(jlabel[i]), str(jobcontent[i])] for i in range(len(jobcontent))] #print(lines[0],lines[1]) lines2 = [[ list(dict_label.keys())[list(dict_label.values()).index(l[0])], l[1] ] for l in lines if type(l[1]) == str] lines3 = shuffle_one(lines2) print('Head data:', lines2[0:2]) print('Length of data:', len(lines)) print('Read csv finished(2)!') return lines3
ids_test = np.arange(len(input_ids_test)) # 启动图和训练 saver = tf.train.Saver(max_to_keep=100) sess = tf.Session() sess.run(tf.global_variables_initializer()) # 恢复模型参数 MODEL_SAVE_PATH = os.path.join(pwd, 'model') ckpt = tf.train.get_checkpoint_state(MODEL_SAVE_PATH) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) print('Restored model!') with sess.as_default(): for i in range(hp.num_train_epochs): indexs = shuffle_one(ids_train) for batch_num in range(num_batches - 1): i1 = indexs[batch_num * hp.batch_size:min((batch_num + 1) * hp.batch_size, N_train)] # Get features input_id_ = select(input_ids, i1) input_mask_ = select(input_masks, i1) segment_id_ = select(segment_ids, i1) label_id_ = select(label_ids, i1) # Feed dict fd = { MODEL.input_ids: input_id_, MODEL.input_masks: input_mask_, MODEL.segment_ids: segment_id_, MODEL.label_ids: label_id_ }
saver = tf.train.Saver(max_to_keep=hp.max_to_keep) sess = tf.Session() sess.run(tf.global_variables_initializer()) # Load model saved before MODEL_SAVE_PATH = os.path.join(pwd, hp.file_save_model) ckpt = tf.train.get_checkpoint_state(MODEL_SAVE_PATH) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) print('Restored model!') with sess.as_default(): # Tensorboard writer writer = tf.summary.FileWriter(hp.logdir, sess.graph) for i in range(hp.num_train_epochs): indexs = shuffle_one(arr) for j in range(num_batchs - 1): i1 = indexs[j * hp.batch_size:min((j + 1) * hp.batch_size, num_train_samples)] # Get features input_id_ = select(input_ids, i1) input_mask_ = select(input_masks, i1) segment_id_ = select(segment_ids, i1) label_id_ = select(label_ids, i1) # Feed dict fd = { MODEL.input_ids: input_id_, MODEL.input_masks: input_mask_, MODEL.segment_ids: segment_id_, MODEL.label_ids: label_id_