def train(self): with tf.variable_scope('Inputs'): self.X_inputs = tf.placeholder(tf.int32, [None, self.timestep_size], name='X_input') self.y_inputs = tf.placeholder(tf.int32, [None, self.timestep_size], name='y_input') bilstm_output = self.bi_lstm(self.X_inputs) echo(1, 'The shape of BiLSTM Layer output:', bilstm_output.shape) with tf.variable_scope('outputs'): softmax_w = self.weight_variable( [self.hidden_size * 2, self.num_tag]) softmax_b = self.bias_variable([self.num_tag]) self.y_pred = tf.matmul(bilstm_output, softmax_w) + softmax_b self.scores = tf.reshape(self.y_pred, [-1, self.timestep_size, self.num_tag]) print('The shape of Output Layer:', self.scores.shape) log_likelihood, self.transition_params = tf.contrib.crf.crf_log_likelihood( self.scores, self.y_inputs, self.length) self.loss = tf.reduce_mean(-log_likelihood) optimizer = tf.train.AdamOptimizer(learning_rate=self.lr) self.train_op = optimizer.minimize(self.loss)
def bert(self, word_set: List, seq: List, n_gram: int = 4): ''' bert ''' bert_dir = '../bert/chinese_L-12_H-768_A-12' bert = BertModel.from_pretrained(bert_dir) tokenizer = BertTokenizer.from_pretrained( f'{bert_dir}/chinese_L-12_H-768_A-12/vocab.txt') word_set = np.array(word_set) num_fea = len(self.word2id) num_seq, num_word = word_set.shape echo(0, num_seq, num_word, num_fea) origin_set_one = word_set.reshape(-1) id2word = {jj: ii for ii, jj in self.word2id.items()} n_gram_dict = self.prepare_n_gram(origin_set_one, seq, n_gram) n_gram_word = [ ' '.join([id2word[jj] for jj, kk in ii.items() if kk]) for ii in n_gram_dict ] transformed = [] for ii in n_gram_word: ids = torch.tensor( [tokenizer.convert_tokens_to_ids(tokenizer.tokenize(ii))]) transformed.append( bert(ids, output_all_encoded_layers=False)[-1][0].detach().numpy()) transformed += [np.zeros(len(transformed[0])) ] * (num_seq * num_word - len(n_gram_dict)) echo(1, 'Bert Over') return transformed.reshape([num_seq, num_word, num_fea])
def test_evaluation(): echo(3, '-----_- Begin to test evaluation script -------') origin_test_set, _ = processing_pos_data(POS_DATA['Test']) # origin_test_set = [origin_test_set[0]] # test for first row seq = [len(ii) for ii in origin_test_set] y = sum([[int(CWS_LAB2ID[jj[1]] > 1) for jj in ii] for ii in origin_test_set], []) y_pre = np.random.randint(4, size=[len(y)]) y_pre = [int(ii > 1) for ii in y_pre] ''' evaluation for f1 score ''' change_idx, idx = [], -1 for ii in seq: change_idx.append(ii + idx) idx += ii for ii in change_idx: y_pre[ii] = 1 p, r, macro_f1 = fastF1(y, y_pre) echo(1, f"P: {p:.2f}%, R: {r:.2f}%, Macro_f1: {macro_f1:.2f}%") idx, test_predict_text = 0, [] for ii in origin_test_set: temp_len = len(ii) temp_tag = y_pre[idx:idx + temp_len] temp_text = ''.join([ f'{kk[0]}{" " if temp_tag[jj] else ""}' for jj, kk in enumerate(ii) ]).strip() test_predict_text.append(temp_text) idx += temp_len with open(RESULT['CWS'], 'w') as f: f.write('\n'.join(test_predict_text)) total_evaluate(RESULT['CWS'], CWS_DATA['Test_POS'], POS_DATA['Train'], POS_DATA['Train'], verbose=True)
def __init__(self, node_num, task, comm_req, sys_exec_profile, sys_comm_profile, parent_list): """ node_num: index of the node. task: task name (ex. "A", "B" etc.) comm_req: dictionary {next task: data tx reqd} exec_times: dictionary {task: execution time} sys_comm_profile: 2D matrix, element [i][j] specifies the comm cost between node i and j. parent_list: [list of parent tasks] """ self.node_num = node_num self.task = task self.dest = [] self.parent_list = parent_list self.queues = dict() # queues: {"A": (4, Lock()), "B": (0, Lock())} for task in parent_list: self.queues[task] = [0, threading.Lock()] util.echo("Generating node {} with task {}".format( self.node_num, self.task)) self.exec_times = sys_exec_profile[self.task] self.sys_comm_profile = sys_comm_profile[self.node_num] self.comm_req = comm_req # Store the finish times for the jobs #TODO self.finish_times = [] self.num_finished_tasks = 0 self.total_exec_time = 0 self.set_status(True)
def statistical_data(self, train_set: List, dev_set: List, test_set: List, do_reshape: bool = True): ''' statistical data ''' if embed_type == EMBED_TYPE.FAST_TEXT or embed_type == EMBED_TYPE.BERT: pre_set = [*train_set, *test_set, *dev_set] else: pre_set = train_set word_list = sum([[jj[0] for jj in ii] for ii in pre_set], []) word_set = ['[OOV]', *list(set(word_list))] echo(1, len(word_list)) word2id = {jj: ii for ii, jj in enumerate(word_set)} if not do_reshape: train_set = [[(word2id[jj] if jj in word2id else 0, con.CWS_LAB2ID[kk]) for jj, kk in ii] for ii in train_set] dev_set = [[(word2id[jj] if jj in word2id else 0, con.CWS_LAB2ID[kk]) for jj, kk in ii] for ii in dev_set] test_set = [[(word2id[jj] if jj in word2id else 0, con.CWS_LAB2ID[kk]) for jj, kk in ii] for ii in test_set] self.train_set = train_set self.dev_set = dev_set self.test_set = test_set else: ''' a way to reduce memory using ''' self.word2id = word2id self.train_set = self.reshape_data(train_set) self.dev_set = self.reshape_data(dev_set) self.test_set = self.reshape_data(test_set)
def prepare_data(mode=None): ''' prepare data ''' if not os.path.exists(param.PKL_DIR): echo(3, 'mkdiring data/pkl') os.makedirs(param.PKL_DIR) basic_prepare_data(param.SA_TYPE.CWS, mode) basic_prepare_data(param.SA_TYPE.NER, mode)
def one_hot(self, word_set: List): ''' one hot embed ''' word_set = np.array(word_set) num_fea = len(self.word2id) num_seq, num_word = word_set.shape echo(0, num_seq, num_word, num_fea) return np.squeeze(np.eye(num_fea)[word_set.reshape(-1)]).reshape( [num_seq, num_word, num_fea]).astype(np.int16)
def run(self): while self.on_status: # on: True, off: False if self.execute(): t = threading.Thread(target=self.send_outputs, args=(self.num_finished_tasks, )) t.daemon = True t.start() ### Solve the atomic print issue. util.echo("** Turning off node {} **".format( self.node_num)) # Why does this not get printed????
def predict(self): config = tf.ConfigProto() sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) ckpt = tf.train.latest_checkpoint('./checkpoint') saver = tf.train.Saver() saver.restore(sess, ckpt) _ = self.test_epoch(self.data_test, sess, 'Test') echo('Over')
def load_word_once(self, origin_set: List, MAX_LEN: int) -> List: ''' load word once ''' data_set = [ [self.word2id[jj[0]] if jj[0] in self.word2id else 0 for jj in ii] + [0] * (MAX_LEN - len(ii)) for ii in origin_set ] label = [[con.CWS_LAB2ID[jj[1]] for jj in ii] + [0] * (MAX_LEN - len(ii)) for ii in origin_set] seq = [len(ii) for ii in origin_set] echo(1, np.array(data_set).shape, np.array(seq).shape) return [np.array(data_set), np.array(label), np.array(seq), origin_set]
def char_embed(self, word_set: List): ''' char embed ''' if embed_type == EMBED_TYPE.FAST_TEXT: embed_path = 'embedding/gigaword_chn.all.a2b.uni.ite50.vec' embed = self.load_embedding(embed_path) echo(1, 'len of embed', len(embed)) word_set = np.array(word_set) num_fea = len(list(embed.values())[0]) num_seq, num_word = word_set.shape echo(0, num_seq, num_word, num_fea) word_set = word_set.reshape(-1) result_set = np.array([ embed[ii] if ii in embed else np.zeros(num_fea) for ii in word_set ]) return result_set.reshape([num_seq, num_word, num_fea])
def load_word(self, train_set: List, dev_set: List, test_set: List, predict_set: List): ''' load word ''' total_set = [*train_set, *dev_set, *test_set, *predict_set] word_list = sum([[jj[0] for jj in ii] for ii in total_set], []) word_set = ['[PAD]', *list(set(word_list))] echo(1, len(word_list)) word2id = {jj: ii for ii, jj in enumerate(word_set)} self.word2id = word2id MAX_LEN = max([len(ii) for ii in total_set]) self.train_set = self.load_word_once(train_set, MAX_LEN) self.dev_set = self.load_word_once(dev_set, MAX_LEN) self.test_set = self.load_word_once(test_set, MAX_LEN) self.predict_set = self.load_word_once(predict_set, MAX_LEN) self.MAX_LEN = MAX_LEN
def basic_prepare_data(sa_type: param.SA_TYPE, mode=None): ''' basic prepare data function ''' read_function = read_cws_data if param.SA_TYPE.CWS == sa_type else read_ner_data types = 'CWS' if param.SA_TYPE.CWS == sa_type else 'NER' echo(3, 'parapre', types, 'data') train_set = read_function(param.ORIGIN_SET_PATH('Train')[types]) test_set = read_function(param.ORIGIN_SET_PATH('Test')[types]) train_set, dev_set = train_test_split(train_set, test_size=0.3) if mode == None: dump_bigger(train_set, param.PKL_SET_PATH('Train')[types]) dump_bigger(dev_set, param.PKL_SET_PATH('Dev')[types]) dump_bigger(test_set, param.PKL_SET_PATH('Test')[types]) else: write_data(train_set, 'Train', sa_type) write_data(dev_set, 'Dev', sa_type) write_data(test_set, 'Test', sa_type)
def reshape_data(self, origin_set: List, MAX_LEN: int = 200) -> List: ''' reshape data ''' data_set = sum([[ (self.word2id[jj] if jj in self.word2id else 0, con.CWS_LAB2ID[kk]) for jj, kk in ii ] for ii in origin_set], []) data_len = len(data_set) pad_len = MAX_LEN - data_len % MAX_LEN echo(2, data_len, pad_len) data_set = np.array([*data_set, *[(0, 0)] * pad_len]) reshape_data = data_set.reshape([MAX_LEN, len(data_set) // MAX_LEN, 2]) if not pad_len: last_id = reshape_data.shape[0] - 1 reshape_data = [ jj if ii != last_id else jj[:MAX_LEN - pad_len] for ii, jj in enumerate(reshape_data) ] return reshape_data
def __init__(self, sys_exec_profile, sys_comm_profile, sys_comm_req, mapping, arrival_rate): """ sys_exec_profile: dictionary {node number: {task: exec time}}. sys_comm_profile: 2D matrix, element [i][j] specifies the comm cost between node i and j. sys_comm_req: dictionary {task: dict({next task: comm_req})} mapping: dictionary {task: [list of node numbers]} arrival_rate: input jobs arrival rate """ self.mapping = mapping self.sys_exec_profile = sys_exec_profile self.sys_comm_profile = sys_comm_profile self.sys_comm_req = sys_comm_req self.node_list = [ ] # NOTE: Nodes are not ordered with respect to their node_num self.init_nodes = [] # task nodes (numbers) with inputs self.end_nodes = [] # Nodes with final task # Construct and initialize nodes and instream destinations self.generate_nodes() util.echo("Node list indexes {}".format( [node.node_num for node in self.node_list])) util.echo("Initial task nodes: {}".format(self.init_nodes)) util.echo("End task nodes: {}".format( [node.node_num for node in self.end_nodes])) self.instream = InStream(arrival_rate) # Input job generator self.set_input_nodes()
def prepare_n_gram(self, origin_set_one: List, seq: List, n_gram: int = 4): ''' prepare n gram''' idx, origin_set, n_gram_dict, exist_word, no_exist_word = 0, [], [], {}, [] for ii in seq: origin_set.append(list(origin_set_one[idx:idx + ii])) idx += ii echo(1, 'Seq Length Over') for ii in origin_set: for jj, _ in enumerate(ii): t_seq_len = len(ii) begin_idx = max(0, jj - n_gram) end_idx = min(t_seq_len, jj + n_gram) n_gram_word = ii[begin_idx:end_idx] n_gram_count = dict(Counter(n_gram_word)) n_gram_dict.append(n_gram_count) for kk, mm in n_gram_count.items(): exist_word[kk] = mm echo(1, 'n_gram Over') for ii in self.word2id.values(): if ii not in exist_word: no_exist_word.append(ii) for ii in no_exist_word: n_gram_dict[-1][ii] = 0 echo(1, len(no_exist_word), 'no exist Over') return n_gram_dict
def evaluation(y: List, y_predict: List, seq: List, types: str): # print(np.array(y).shape, np.array(y_predict).shape, seq.shape) y = sum([list(jj[:seq[ii]]) for ii, jj in enumerate(y)], []) y = [int(ii > 1) for ii in y] y_predict = sum([list(jj[:seq[ii]]) for ii, jj in enumerate(y_predict)], []) y_predict = [int(ii > 1) for ii in y_predict] change_idx, idx = [], -1 for ii in seq: change_idx.append(ii + idx) idx += ii for ii in change_idx: try: y_predict[ii] = 1 except: echo(0, ii, len(y_predict)) # correct_labels = np.sum((y == y_predict) * mask) # accuracy = 100.0 * correct_labels / float(total_label) p, r, macro_f1 = fastF1(y, y_predict) print(f"{types} P: {p:.2f}%, R: {r:.2f}%, Macro_f1: {macro_f1:.2f}%") return p, r, macro_f1
def evaluation(y: List, y_predict: List, seq: List, types: str): ''' evaluation ''' y_t, y_p_t = [], [] for ii, jj in enumerate(y): y_t.extend(jj[:seq[ii]]) for ii, jj in enumerate(y_predict): y_p_t.extend(jj[:seq[ii]]) y = [int(ii > 1) for ii in y_t] y_predict = [int(ii > 1) for ii in y_p_t] change_idx, idx = [], -1 for ii in seq: change_idx.append(ii + idx) idx += ii for ii in change_idx: try: y_predict[ii] = 1 except: echo(0, ii, len(y_predict)) p, r, macro_f1 = fastF1(y, y_predict) print(f"{types} P: {p:.2f}%, R: {r:.2f}%, Macro_f1: {macro_f1:.2f}%") return p, r, macro_f1
def main(sa_type: param.SA_TYPE, run_id: str): ''' run model ''' param.change_run_id(run_id) echo(1, '....... Load data ......') train, dev, test = dataLoad.load_data(sa_type) echo(1, '....... Data load Over ......') sa_model = Model(train, dev, test, sa_type) echo(1, '------ Begin Train -------') sa_model.run_model()
def generate_inputs(self, total_jobs): """ total_jobs: total number of jobs to simulate """ self.total_jobs = total_jobs self.curr_jobs = 0 if not self.dest: util.echo("No destinations for the input stream") return while self.curr_jobs < self.total_jobs: # util.echo(">>> Generating job {}".format(self.curr_jobs)) for destination in self.dest: partition = len(destination) dest_idx = (self.curr_jobs) % partition node = destination[dest_idx] node.increase("Input") self.curr_jobs += 1 self.start_times.append(datetime.now()) time.sleep(self.arrival_diff) return
def load_word(self, train_set: List, dev_set: List, test_set: List): ''' load word ''' echo(0, 'begin load word') total_set = [*train_set, *dev_set, *test_set] echo(0, 'begin load word I ', len(total_set)) word_list = set(' '.join( [' '.join([ii[0] for ii in jj]) for jj in total_set]).split()) echo(0, 'begin load word II') word_set = ['[PAD]', *list(set(word_list))] word_set = self.load_word_list(total_set) echo(1, len(word_set)) word2id = {jj: ii for ii, jj in enumerate(word_set)} self.word2id = word2id MAX_LEN = max([len(ii) for ii in total_set]) self.train_set = self.load_word_once(train_set, MAX_LEN) self.dev_set = self.load_word_once(dev_set, MAX_LEN) self.test_set = self.load_word_once(test_set, MAX_LEN) self.MAX_LEN = MAX_LEN
def test_epoch(self, dataset, sess, types: str): ''' Test one epoch ''' _batch_size = 128 _y = dataset[1] data_size = _y.shape[0] batch_num = int(data_size / _batch_size) + 1 predict = [] fetches = [ self.model.scores, self.model.length, self.model.transition_params ] echo(1, 'Test Batch Num:', batch_num) for i in range(batch_num): begin_index = i * _batch_size end_index = min((i + 1) * _batch_size, data_size) X_batch = dataset[0][begin_index:end_index] Y_batch = dataset[1][begin_index:end_index] feed_dict = { self.model.X_inputs: X_batch, self.model.y_inputs: Y_batch, self.model.lr: 1e-5, self.model.batch_size: _batch_size, self.model.keep_prob: 1.0 } test_score, test_length, transition_params = sess.run( fetches=fetches, feed_dict=feed_dict) for tf_unary_scores_, y_, sequence_length_ in zip( test_score, Y_batch, test_length): tf_unary_scores_ = tf_unary_scores_ viterbi_sequence, _ = tf.contrib.crf.viterbi_decode( tf_unary_scores_, transition_params) predict.append(viterbi_sequence) if not (i + 1) % 100: echo(0, i) if types == 'Test': pickle.dump(predict, open(f"{param.RESULT_PATH(self.sa_type)}.pkl", 'wb')) self.output_result(dataset, predict, types) echo(1, 'Predict Result shape:', np.array(predict).shape) if self.sa_type == param.SA_TYPE.CWS: p, r, macro_f1 = evaluation(_y, predict, dataset[2], types) return p, r, macro_f1, predict else: p, r, macro_f1, log_text = evaluation_ner(_y, predict, dataset[2], types) return p, r, macro_f1, log_text
def tf_idf(self, word_set: List, seq: List, n_gram: int = 4): ''' tf-idf embed''' word_set = np.array(word_set) num_fea = len(self.word2id) num_seq, num_word = word_set.shape echo(0, num_seq, num_word, num_fea) origin_set_one = word_set.reshape(-1) n_gram_dict = self.prepare_n_gram(origin_set_one, seq, n_gram) n_gram_dict += [{}] * (num_seq * num_word - len(n_gram_dict)) to_pipeline = [DictVectorizer(), TfidfTransformer()] data_transformer = make_pipeline(*to_pipeline) transformed = np.array( data_transformer.fit_transform(n_gram_dict).todense(), dtype=np.float16) echo(1, 'Tf idf Over') for ii in n_gram_dict[0].keys(): echo(0, transformed[0][ii]) return transformed.reshape([num_seq, num_word, num_fea])
def start_simulation(self, total_jobs): self.total_jobs = total_jobs util.echo("Starting simulation with {} jobs".format(self.total_jobs)) for node in self.node_list: util.echo("Starting node {}".format(node.node_num)) # Start each node on a separate thread t = threading.Thread(target=node.run) t.daemon = True t.start() # Start generating inputs t = threading.Thread( target=self.instream.generate_inputs, args=(self.total_jobs * 2, ) ) ### this is to avoid some corner effects, where the end task runs short of 1 parent task t.daemon = True t.start() while True: if self.check_end_status(): # Check if all jobs executed util.echo("=== Ending the simulation ===") return self.end_simulation()
def crf_tf(train_x: List, train_y: List, train_seq: List, train_se: List, dev_x: List, dev_y: List, dev_seq: List, dev_se: List, test_x: List, test_y: List, test_seq: List, test_se: List, num_tag: int): ''' crf base on tensorflow ''' with tf.Graph().as_default(), tf.Session() as session: train_x_init = tf.placeholder(tf.float32, shape=np.array(train_x).shape) train_x_t = tf.Variable(train_x_init) train_y_t = tf.constant(train_y) train_seq_t = tf.constant(train_seq) dev_x_t, dev_y_t, dev_seq_t = tf_constant(dev_x, dev_y, dev_seq) test_x_t, test_y_t, test_seq_t = tf_constant(test_x, test_y, test_seq) num_train_seq, num_train_word, num_fea = train_x.shape num_dev_seq, num_dev_word, _ = dev_x.shape num_test_seq, num_test_word, _ = test_x.shape weights = tf.get_variable("weights", [num_fea, num_tag], dtype=tf.float32) train_score = score_matrix(train_x_t, weights, num_train_seq, num_train_word, num_fea, num_tag) dev_score = score_matrix(dev_x_t, weights, num_dev_seq, num_dev_word, num_fea, num_tag) test_score = score_matrix(test_x_t, weights, num_test_seq, num_test_word, num_fea, num_tag) log_likelihood, transition_params = tf.contrib.crf.crf_log_likelihood( train_score, train_y_t, train_seq_t) # softmaxs_sparse = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=train_y_t, logits=train_score) train_viterbi_seq, _ = tf.contrib.crf.crf_decode( train_score, transition_params, train_seq_t) dev_viterbi_seq, _ = tf.contrib.crf.crf_decode(dev_score, transition_params, dev_seq_t) test_viterbi_seq, _ = tf.contrib.crf.crf_decode( test_score, transition_params, test_seq_t) ''' loss calculate ''' loss = tf.reduce_mean(-log_likelihood) train_op = tf.train.GradientDescentOptimizer(0.01).minimize(loss) session.run(tf.global_variables_initializer(), feed_dict={train_x_init: train_x}) train_mask, train_total = mask(num_train_word, train_seq) dev_mask, dev_total = mask(num_dev_word, dev_seq) test_mask, test_total = mask(num_test_word, test_seq) best_dev_acc = -1 log(f'------- {time_str()} -------') for i in range(1000): train_predict, _ = session.run([train_viterbi_seq, train_op]) if i % 20 == 0: dev_predict = session.run([dev_viterbi_seq])[0] print(f'------ \033[92m{i} epochs \033[0m -------') train_p, train_r, train_macro_f1 = evaluation( train_y, train_predict, train_se, train_mask, train_total, 'Train') dev_p, dev_r, dev_macro_f1 = evaluation( dev_y, dev_predict, dev_se, dev_mask, dev_total, 'Dev') log(f'{i}|{train_p:.2f}|{train_r:.2f}|{train_macro_f1:.2f}|{dev_p:.2f}|{dev_r:.2f}|{dev_macro_f1:.2f}|' ) if dev_macro_f1 > best_dev_acc: best_dev_acc = dev_macro_f1 test_predict = session.run([test_viterbi_seq])[0] pickle.dump(test_predict, open(f"{con.RESULT['CWS']}.pkl", 'wb')) test_p, test_r, test_macro_f1 = evaluation( test_y, test_predict, test_se, test_mask, test_total, 'Test') log(f"Best Dev Macro_f1: {best_dev_acc:.2f}%") log(f"Best Test P: {test_p:.2f}%, R: {test_r:.2f}%, Macro_f1: {test_macro_f1:.2f}%" ) echo(0, f"Best Dev Macro_f1: {best_dev_acc:.2f}%") echo( 0, f"Best Test P: {test_p:.2f}%, R: {test_r:.2f}%, Macro_f1: {test_macro_f1:.2f}%" ) return test_predict
def train(self, max_epoch: int, max_max_epoch: int, tr_batch_size: int, display_num: int = 5, do_finetune: bool = False): config = tf.ConfigProto() sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) if do_finetune: ckpt = tf.train.latest_checkpoint(param.CHECK_DIR) saver = tf.train.Saver() saver.restore(sess, ckpt) print(f'Finetune ckpt: {ckpt} ...') echo(0, 'Train shape', self.data_train[0].shape, self.data_train[1].shape) echo(0, 'Dev shape', self.data_dev[0].shape, self.data_dev[1].shape) echo(0, 'Test shape', self.data_test[0].shape, self.data_test[1].shape) tr_batch_num = int(self.data_train[1].shape[0] / tr_batch_size) echo(3, tr_batch_num) display_batch = int(tr_batch_num / display_num) saver = tf.train.Saver(max_to_keep=10) log(f'------- {time_str()} -------') for epoch in range(max_max_epoch): print(f'------ \033[92m{epoch} epochs \033[0m -------') _lr = 0.01 if epoch < max_epoch else 0.005 # !!! very import. The learning rate of CWS model & NER model not same if self.sa_type == param.SA_TYPE.NER: _lr /= 10 start_time = time.time() _losstotal, show_loss, best_dev_acc = 0.0, 0.0, -1 for batch in range(tr_batch_num): fetches = [self.model.loss, self.model.train_op] begin_index = batch * tr_batch_size end_index = (batch + 1) * tr_batch_size X_batch = self.data_train[0][begin_index:end_index] Y_batch = self.data_train[1][begin_index:end_index] # echo(0, X_batch[57,0], Y_batch[57,0]) feed_dict = { self.model.X_inputs: X_batch, self.model.y_inputs: Y_batch, self.model.lr: _lr, self.model.batch_size: tr_batch_size, self.model.keep_prob: 0.5 } _loss, _ = sess.run(fetches, feed_dict) _losstotal += _loss show_loss += _loss if not (batch + 1) % display_batch: train_p, train_r, train_macro_f1, train_log = self.test_epoch( self.data_train, sess, 'Train') dev_p, dev_r, dev_macro_f1, dev_log = self.test_epoch( self.data_dev, sess, 'Dev') if dev_macro_f1 > best_dev_acc: test_p, test_r, test_macro_f1, predict = self.test_epoch( self.data_test, sess, 'Test') best_dev_acc = dev_macro_f1 if self.sa_type == param.SA_TYPE.CWS: log(f'{time_str()}|{epoch}-{batch}|{train_p:.2f}|{train_r:.2f}|{train_macro_f1:.2f}|{dev_p:.2f}|{dev_r:.2f}|{dev_macro_f1:.2f}|' ) else: log(f'{time_str()}|{epoch}-{batch}|{train_p:.2f}|{train_r:.2f}|{train_macro_f1:.2f}|{dev_p:.2f}|{dev_r:.2f}|{dev_macro_f1:.2f}| {train_log} | {dev_log}' ) else: if self.sa_type == param.SA_TYPE.CWS: log(f'{time_str()}|{epoch}-{batch}|{train_p:.2f}|{train_r:.2f}|{train_macro_f1:.2f}|{dev_p:.2f}|{dev_r:.2f}|{dev_macro_f1:.2f}|' ) else: log(f'{time_str()}|{epoch}-{batch}|{train_p:.2f}|{train_r:.2f}|{train_macro_f1:.2f}|{dev_p:.2f}|{dev_r:.2f}|{dev_macro_f1:.2f}| {train_log} | {dev_log}' ) echo(f'training loss={show_loss / display_batch}') show_loss = 0.0 mean_loss = _losstotal / tr_batch_num save_path = saver.save(sess, self.model.model_save_path, global_step=(epoch + 1)) print('the save path is ', save_path) echo( 1, f'Training {self.data_train[1].shape[0]}, loss={mean_loss:g} ') echo( 2, f'Epoch training {self.data_train[1].shape[0]}, loss={mean_loss:g}, speed={time.time() - start_time:g} s/epoch' ) log(f"Best Dev Macro_f1: {best_dev_acc:.2f}%") log(f"Best Test P: {test_p:.2f}%, R: {test_r:.2f}%, Macro_f1: {test_macro_f1:.2f}%" ) sess.close() return predict
def debug_bar(self): util.clear() util.echo("BAR:", self.bar)