def evaluate_batch(self, epoch_id) -> int: self.session.run(self.increment_bad_iter_count_op) bad_iter_count, best_f1_score, best_em_score, prev_f1_score, prev_em_score, best_epoch_id, num_resets = \ self.session.run([self.bad_iter_count, self.best_f1_score, self.best_em_score, self.prev_f1_score, self.prev_em_score, self.best_epoch_id, self.num_resets]) em_score, f1_score = evaluate(self.valid_dataset, self.infer(self.valid_dataset)) print_batch_summary(epoch_id, em_score, f1_score) if f1_score > prev_f1_score and self.conf_lr_reduction_criterion == 'F1' or \ em_score > prev_em_score and self.conf_lr_reduction_criterion == 'EM': print('[{} | {}] new good iteration!'.format(now(), epoch_id)) self.session.run([ tf.assign(self.bad_iter_count, 0), tf.assign(self.best_f1_score, f1_score), tf.assign(self.best_em_score, em_score), tf.assign(self.best_epoch_id, epoch_id), ]) else: num_resets = self.session.run(self.num_resets) print('[{} | {}] bad iteration, resetting the {} time'.format( now(), epoch_id, ordinal(num_resets + 1))) self.session.run([ tf.assign(self.bad_iter_count, 0), tf.assign(self.num_resets, num_resets + 1), ]) self.reset_optimizer(self.conf_opt_lr, False) self.session.run([ tf.assign(self.prev_f1_score, f1_score), tf.assign(self.prev_em_score, em_score), ]) return self.session.run(self.num_resets)
def train(self, train_dataset: ProcDataset, valid_dataset: ProcDataset, max_epochs=1000, patience=0) -> None: for epoch_id in range(self.loaded_epoch_id + 1, max_epochs): print( "[{} | {}] current best em: {}, current best f1: {}, current bad iter count {}, current num resets {}" .format( now(), epoch_id, *self.session.run([ self.best_em_score, self.best_f1_score, self.bad_iter_count, self.num_resets ]))) self.train_epoch(train_dataset, epoch_id) em_score, f1_score = evaluate(valid_dataset, self.infer(valid_dataset)) print_epoch_summary(epoch_id, em_score, f1_score) self.session.run(self.increment_bad_iter_count_op) bad_iter_count, best_f1_score, best_em_score, best_epoch_id, num_resets = \ self.session.run([self.bad_iter_count, self.best_f1_score, self.best_em_score, self.best_epoch_id, self.num_resets]) if f1_score > best_f1_score and self.conf_lr_reduction_criterion == 'F1' or \ em_score > best_em_score and self.conf_lr_reduction_criterion == 'EM': print('[{} | {}] new best iteration!'.format(now(), epoch_id)) self.session.run([ tf.assign(self.bad_iter_count, 0), tf.assign(self.best_f1_score, f1_score), tf.assign(self.best_em_score, em_score), tf.assign(self.best_epoch_id, epoch_id), ]) elif bad_iter_count > patience: num_resets = self.session.run(self.num_resets) print('[{} | {}] resetting the {} time'.format( now(), epoch_id, ordinal(num_resets + 1))) self.init_weights() self.session.run([ tf.assign(self.bad_iter_count, 0), tf.assign(self.num_resets, num_resets + 1), ]) self.reset_optimizer(self.conf_opt_lr / 2.0**(num_resets + 1), True) else: print( '[{} | {}] bad iteration, not doing anything yet, curr_patience {}, patience' .format(now(), bad_iter_count, patience)) self.save(epoch_id)
def train_epoch(self, dataset: ProcDataset, epoch_id: int) -> None: batch_iter = self.create_iter(dataset, True) num_batches = batch_iter.num_valid_batches() info_interval = num_batches // 5 batch_counter, loss_sum = 0, 0.0 self.session.run(self.zero_ops) for batch_id in range(num_batches): _, feed_dict = self.to_feed_dict(batch_iter.__next__(), True) iterations = (batch_id + 1) * self.conf_batch_size curr_lr, loss_val, _ = self.session.run( [self.curr_lr, self.loss, self.accum_ops], feed_dict=feed_dict) if (batch_id + 1) % self.conf_apply_grads_interval == 0: self.session.run(self.apply_grads) self.session.run(self.zero_ops) batch_counter += 1 # noinspection PyTypeChecker loss_sum += float(np.sum(loss_val, 0)) if (batch_id + 1) % info_interval == 0: train_loss = loss_sum / batch_counter print('[{} | {} | {}] train loss: {}, lr: {}'.format( now(), epoch_id, iterations, train_loss, curr_lr)) batch_counter, loss_sum = 0, 0.0 num_resets = self.evaluate_batch(epoch_id) if num_resets >= self.conf_patience: return
def save(self, epoch_id) -> str: tf.logging.set_verbosity('DEBUG') model_file_stump = os.path.join(self.conf_weights_dir, 'model') print('[{}] Saving to {} with epoch_id {}'.format( now(), model_file_stump, epoch_id)) save_path = self.saver.save(self.session, model_file_stump, epoch_id, write_meta_graph=self.conf_save_meta_graph) conf_json = os.path.join(self.conf_weights_dir, 'config.json') if not os.path.isfile(conf_json): with open(conf_json, 'w') as f: json.dump(self.config, f) print('[{}] finished saving!'.format(now())) tf.logging.set_verbosity('WARN') return save_path
def print_trainable_params(self) -> None: with self.graph.as_default(): total_params = 0 for var in tf.trainable_variables(): shape = var.get_shape() var_params = 1 for dim in shape: var_params *= dim.value total_params += var_params print('[{}] there are {} total trainable parameters in this model'. format(now(), total_params))
def build_graph(self, graph) -> tf.Graph: print('[{}] building {} model ...'.format(now(), self.__class__.__name__)) with graph.as_default(): self.build_inputs() # building the actual model with tf.device('/gpu'): self.par_vectors = self.apply_dropout( embedding_layer(self.word_embedder, self.char_embedder, self.conf_layer_size, self.par_words, self.par_num_words, self.par_chars, self.par_num_chars, False, self.apply_dropout)) self.qu_vectors = self.apply_dropout( embedding_layer(self.word_embedder, self.char_embedder, self.conf_layer_size, self.qu_words, self.qu_num_words, self.qu_chars, self.qu_num_chars, True, self.apply_dropout)) print_var('par_vectors', self.par_vectors) print_var('qu_vectors', self.qu_vectors) self.par_encoded, self.qu_encoded = self.encoding_layers() print_var('par_encoded', self.par_encoded) print_var('question_encoded', self.qu_encoded) self.match_par_qu = self.match_par_qu_layer() print_var('match_par_qu', self.match_par_qu) self.match_self = self.match_self_layer() print_var('match_self', self.match_self) self.predictions = self.prediction_layer() print_var('predictions', self.predictions) self.loss = self.loss_function() print_var('loss', self.loss) self.build_optimizer(self.conf_opt_lr) return graph
def infer(self, dataset: ProcDataset) -> Dict[str, str]: print('[{}] starting inference ...'.format(now())) dataset_iter = self.create_iter(dataset, False) num_samples = dataset_iter.num_samples() index_results = {} for first_sample_index in range(0, num_samples + self.conf_batch_size, self.conf_batch_size): qu_ids, feed_dict = self.to_feed_dict(dataset_iter.__next__(), False) pred_val = self.session.run(self.predictions, feed_dict=feed_dict) for i in range(0, self.conf_batch_size): # noinspection PyTypeChecker index_results[qu_ids[i]] = (int(np.argmax(pred_val[i, 0])), int(np.argmax(pred_val[i, 1]))) text_results = {} for doc in dataset.documents: for par in doc.paragraphs: for qu in par.questions: first_token_index, last_token_index = index_results[ qu.raw.id] first_token_index = min( [first_token_index, len(par.tokens) - 1]) last_token_index = min( [last_token_index, len(par.tokens) - 1]) char_offset_start = par.tokens[ first_token_index].char_offset char_offset_end = par.tokens[ last_token_index].char_offset_end() text_results[qu.raw.id] = par.raw.context[ char_offset_start:char_offset_end] return text_results
def train_epoch(self, dataset: ProcDataset, epoch_id: int) -> None: batch_iter = self.create_iter(dataset, True) num_batches = batch_iter.num_valid_batches() info_interval = num_batches // 10 batch_counter, loss_sum = 0, 0.0 for batch_id in range(num_batches): _, feed_dict = self.to_feed_dict(batch_iter.__next__(), True) iterations = (batch_id + 1) * self.conf_batch_size loss_val, _ = self.session.run([self.loss, self.optimizer_op], feed_dict=feed_dict) batch_counter += 1 # noinspection PyTypeChecker loss_sum += float(np.sum(loss_val, 0)) if (batch_id + 1) % info_interval == 0: train_loss = loss_sum / batch_counter print('[{} | {} | {}] train loss: {}'.format( now(), epoch_id, iterations, train_loss)) batch_counter, loss_sum = 0, 0.0
def train(self, train_dataset: ProcDataset, valid_dataset: ProcDataset, max_epochs=1000, patience=0) -> None: self.valid_dataset = valid_dataset for epoch_id in range(self.loaded_epoch_id + 1, max_epochs): em, f1, bad_iter, num_resets, curr_lr = self.session.run([ self.best_em_score, self.best_f1_score, self.bad_iter_count, self.num_resets, self.curr_lr ]) print( "[{} | {}] current best em: {}, current best f1: {}, current bad iter count {}, " "current num resets {}, current lr {}".format( now(), epoch_id, em, f1, bad_iter, num_resets, curr_lr)) if num_resets >= self.conf_patience: print('patience reached, exiting ...') return self.train_epoch(train_dataset, epoch_id) self.save(epoch_id)
def print_batch_summary(epoch_id: int, em_score: float, f1_score: float) -> None: print('[{} | {}] accuracy (EM): {}, f1: {}'.format(now(), epoch_id, em_score, f1_score))
def print_epoch_summary(epoch_id: int, em_score: float, f1_score: float) -> None: print('=============== [{} | {}] ==============='.format(now(), epoch_id)) print('accuracy (EM): ', em_score) print('f1: ', f1_score) print('==================================================================')
import os import sys from subprocess import Popen from common.util.time import now for i in range(12): print('[{}] reproducing experiment {}'.format(now(), i)) if len(sys.argv) <= 1: save_path = '/tmp' else: save_path = sys.argv[1] out_path = os.path.join(save_path, 'model_{}'.format(i)) if not os.path.exists(out_path): os.mkdir(out_path) log_file = os.path.join(out_path, 'log_{}.txt'.format(i)) ARGS = [ '--model=RnetRep{}'.format(i), '--layer_size=75', '--att_size=75', '--char_embedding_size=75', '--char_rnn_size=75', '--dropout=0.8', '--max_par_length=767', '--max_qu_length=60', '--max_char_length=37', '--batch_size=16',