def test_nli(hparam, nli_setting, run_name, data, model_path): print("test nil :", run_name) task = transformer_nli(hparam, nli_setting.vocab_size, 2, False) train_batches, dev_batches = data sess = init_session() sess.run(tf.global_variables_initializer()) loader = tf.train.Saver(tf.global_variables(), max_to_keep=1) loader.restore(sess, model_path) def batch2feed_dict(batch): x0, x1, x2, y = batch feed_dict = { task.x_list[0]: x0, task.x_list[1]: x1, task.x_list[2]: x2, task.y: y, } return feed_dict def valid_fn(): loss_list = [] acc_list = [] for batch in dev_batches[:100]: loss_val, acc = sess.run([task.loss, task.acc], feed_dict=batch2feed_dict(batch)) loss_list.append(loss_val) acc_list.append(acc) return average(acc_list) return valid_fn()
def boring(): disable_eager_execution() seq_length = 512 batch_size = 3 virtual_input_ids = np.zeros([batch_size, seq_length], np.int) input_ids = tf1.placeholder(tf.int32, [batch_size, seq_length]) input_mask = tf1.placeholder(tf.int32, [batch_size, seq_length]) segment_ids = tf1.placeholder(tf.int32, [batch_size, seq_length]) train_op = define_graph(input_ids, input_mask, segment_ids) tf.compat.v1.summary.scalar('accuracy', 0) merged = tf1.summary.merge_all() sess = init_session() sess.run(tf.compat.v1.global_variables_initializer()) run_options = tf1.RunOptions(trace_level=tf1.RunOptions.FULL_TRACE) run_metadata = tf1.RunMetadata() train_writer = tf1.summary.FileWriter( os.path.join(cpath.output_path, "horizon_summary"), sess.graph) _, summary_out = sess.run( [train_op, merged], feed_dict={ input_ids: virtual_input_ids, input_mask: virtual_input_ids, segment_ids: virtual_input_ids, }, options=run_options, run_metadata=run_metadata) i = 0 train_writer.add_run_metadata(run_metadata, 'step%03d' % i) train_writer.add_summary(summary_out, i)
def __init__(self, model, lookup_augment_fn, sess=None): if sess is None: self.sess = train_module.init_session() self.sess.run(tf.compat.v1.global_variables_initializer()) else: self.sess = sess self.model = model self.lookup_augment_fn = lookup_augment_fn
def __init__(self, model_path, num_classes, seq_len=None): self.voca_size = 30522 load_names = ['bert', "output_bias", "output_weights"] self.hp = hyperparams.HPFAD() if seq_len is not None: self.hp.seq_max = seq_len self.model_dir = cpath.model_path self.task = transformer_logit(self.hp, num_classes, self.voca_size, False) self.sess = init_session() self.sess.run(tf.compat.v1.global_variables_initializer()) self.load_model_white(model_path, load_names) self.batch_size = 64
def debug_names(is_training): tf.compat.v1.disable_eager_execution() seq_max = 200 lr = 1e-5 batch_size = FLAGS.train_batch_size tf_logging.debug("Building graph") model = DictReaderWrapper(3, seq_max, is_training) with tf.compat.v1.variable_scope("optimizer"): train_cls, global_step = train_module.get_train_op(model.cls_loss, lr) train_lookup, global_step = train_module.get_train_op( model.lookup_loss, lr, global_step) sess = train_module.init_session() sess.run(tf.compat.v1.global_variables_initializer()) tvars = tf.compat.v1.trainable_variables() for var in tvars: name = var.name print(name)
def train_nli_w_dict(run_name, model: DictReaderInterface, model_path, model_config, data_feeder_loader, model_init_fn): print("Train nil :", run_name) batch_size = FLAGS.train_batch_size f_train_lookup = "lookup" in FLAGS.train_op tf_logging.debug("Building graph") with tf.compat.v1.variable_scope("optimizer"): lr = FLAGS.learning_rate lr2 = lr * 0.1 if model_config.compare_attrib_value_safe("use_two_lr", True): tf_logging.info("Using two lr for each parts") train_cls, global_step = get_train_op_sep_lr( model.get_cls_loss(), lr, 5, "dict") else: train_cls, global_step = train_module.get_train_op( model.get_cls_loss(), lr) train_lookup_op, global_step = train_module.get_train_op( model.get_lookup_loss(), lr2, global_step) sess = train_module.init_session() sess.run(tf.compat.v1.global_variables_initializer()) train_writer, test_writer = setup_summary_writer(run_name) last_saved = get_latest_model_path_from_dir_path(model_path) if last_saved: tf_logging.info("Loading previous model from {}".format(last_saved)) load_model(sess, last_saved) elif model_init_fn is not None: model_init_fn(sess) log = log_module.train_logger() train_data_feeder = data_feeder_loader.get_train_feeder() dev_data_feeder = data_feeder_loader.get_dev_feeder() lookup_train_feeder = train_data_feeder valid_runner = WSSDRRunner(model, dev_data_feeder.augment_dict_info, sess) dev_batches = [] n_dev_batch = 100 dev_batches_w_dict = dev_data_feeder.get_all_batches(batch_size, True)[:n_dev_batch] for _ in range(n_dev_batch): dev_batches.append(dev_data_feeder.get_random_batch(batch_size)) dev_batches_w_dict.append(dev_data_feeder.get_lookup_batch(batch_size)) def get_summary_obj(loss, acc): summary = tf.compat.v1.Summary() summary.value.add(tag='loss', simple_value=loss) summary.value.add(tag='accuracy', simple_value=acc) return summary def get_summary_obj_lookup(loss, p_at_1): summary = tf.compat.v1.Summary() summary.value.add(tag='lookup_loss', simple_value=loss) summary.value.add(tag='P@1', simple_value=p_at_1) return summary def train_lookup(step_i): batches, info = lookup_train_feeder.get_lookup_train_batches( batch_size) if not batches: raise NoLookupException() def get_cls_loss(batch): return sess.run([model.get_cls_loss_arr()], feed_dict=model.batch2feed_dict(batch)) loss_array = get_loss_from_batches(batches, get_cls_loss) supervision_for_lookup = train_data_feeder.get_lookup_training_batch( loss_array, batch_size, info) def lookup_train(batch): return sess.run( [model.get_lookup_loss(), model.get_p_at_1(), train_lookup_op], feed_dict=model.batch2feed_dict(batch)) avg_loss, p_at_1, _ = lookup_train(supervision_for_lookup) train_writer.add_summary(get_summary_obj_lookup(avg_loss, p_at_1), step_i) log.info("Step {0} lookup loss={1:.04f}".format(step_i, avg_loss)) return avg_loss def train_classification(step_i): batch = train_data_feeder.get_random_batch(batch_size) loss_val, acc, _ = sess.run( [model.get_cls_loss(), model.get_acc(), train_cls], feed_dict=model.batch2feed_dict(batch)) log.info("Step {0} train loss={1:.04f} acc={2:.03f}".format( step_i, loss_val, acc)) train_writer.add_summary(get_summary_obj(loss_val, acc), step_i) return loss_val, acc lookup_loss_window = MovingWindow(20) def train_classification_w_lookup(step_i): data_indices, batch = train_data_feeder.get_lookup_batch(batch_size) logits, = sess.run([model.get_lookup_logits()], feed_dict=model.batch2feed_dict(batch)) term_ranks = np.flip(np.argsort(logits[:, :, 1], axis=1)) batch = train_data_feeder.augment_dict_info(data_indices, term_ranks) loss_val, acc, _ = sess.run( [model.get_cls_loss(), model.get_acc(), train_cls], feed_dict=model.batch2feed_dict(batch)) log.info("ClsW]Step {0} train loss={1:.04f} acc={2:.03f}".format( step_i, loss_val, acc)) train_writer.add_summary(get_summary_obj(loss_val, acc), step_i) return loss_val, acc def lookup_enabled(lookup_loss_window, step_i): return step_i > model_config.lookup_min_step\ and lookup_loss_window.get_average() < model_config.lookup_threshold def train_fn(step_i): if lookup_enabled(lookup_loss_window, step_i): loss, acc = train_classification_w_lookup((step_i)) else: loss, acc = train_classification(step_i) if f_train_lookup and step_i % model_config.lookup_train_frequency == 0: try: lookup_loss = train_lookup(step_i) lookup_loss_window.append(lookup_loss, 1) except NoLookupException: log.warning("No possible lookup found") return loss, acc def debug_fn(batch): y_lookup, = sess.run([ model.y_lookup, ], feed_dict=model.batch2feed_dict(batch)) print(y_lookup) return 0, 0 def valid_fn(step_i): if lookup_enabled(lookup_loss_window, step_i): valid_fn_w_lookup(step_i) else: valid_fn_wo_lookup(step_i) def valid_fn_wo_lookup(step_i): loss_val, acc = valid_runner.run_batches_wo_lookup(dev_batches) log.info("Step {0} Dev loss={1:.04f} acc={2:.03f}".format( step_i, loss_val, acc)) test_writer.add_summary(get_summary_obj(loss_val, acc), step_i) return acc def valid_fn_w_lookup(step_i): loss_val, acc = valid_runner.run_batches_w_lookup(dev_batches_w_dict) log.info("Step {0} DevW loss={1:.04f} acc={2:.03f}".format( step_i, loss_val, acc)) test_writer.add_summary(get_summary_obj(loss_val, acc), step_i) return acc def save_fn(): op = tf.compat.v1.assign(global_step, step_i) sess.run([op]) return save_model_to_dir_path(sess, model_path, global_step) n_data = train_data_feeder.get_data_len() step_per_epoch = int((n_data + batch_size - 1) / batch_size) tf_logging.debug("{} data point -> {} batches / epoch".format( n_data, step_per_epoch)) train_steps = step_per_epoch * FLAGS.num_train_epochs tf_logging.debug("Max train step : {}".format(train_steps)) valid_freq = 100 save_interval = 60 * 20 last_save = time.time() init_step, = sess.run([global_step]) print("Initial step : ", init_step) for step_i in range(init_step, train_steps): if dev_fn is not None: if (step_i + 1) % valid_freq == 0: valid_fn(step_i) if save_fn is not None: if time.time() - last_save > save_interval: save_fn() last_save = time.time() loss, acc = train_fn(step_i) return save_fn()
def train_nli(hparam, nli_setting, run_name, num_epochs, data, model_path): print("Train nil :", run_name) task = transformer_nli(hparam, nli_setting.vocab_size, 2) with variable_scope("optimizer"): train_cls, global_step = get_train_op(task.loss, hparam.lr) train_batches, dev_batches = data log = log_module.train_logger() def get_summary_obj(loss, acc): summary = tf.compat.v1.Summary() summary.value.add(tag='loss', simple_value=loss) summary.value.add(tag='accuracy', simple_value=acc) return summary sess = init_session() sess.run(tf.compat.v1.global_variables_initializer()) train_writer, test_writer = setup_summary_writer(run_name) if model_path is not None: init_model_with_bert(sess, model_path) def batch2feed_dict(batch): x0, x1, x2, y = batch feed_dict = { task.x_list[0]: x0, task.x_list[1]: x1, task.x_list[2]: x2, task.y: y, } return feed_dict g_step_i = 0 def train_classification(batch, step_i): loss_val, acc, _ = sess.run([task.loss, task.acc, train_cls], feed_dict=batch2feed_dict(batch)) log.debug("Step {0} train loss={1:.04f} acc={2:.03f}".format( step_i, loss_val, acc)) train_writer.add_summary(get_summary_obj(loss_val, acc), step_i) nonlocal g_step_i g_step_i = step_i return loss_val, acc def valid_fn(): loss_list = [] acc_list = [] for batch in dev_batches[:100]: loss_val, acc = sess.run([task.loss, task.acc], feed_dict=batch2feed_dict(batch)) loss_list.append(loss_val) acc_list.append(acc) loss_val = average(loss_list) acc = average(acc_list) test_writer.add_summary(get_summary_obj(loss_val, acc), g_step_i) return average(acc_list) def save_fn(): return save_model(sess, run_name, global_step) print("{} train batches".format(len(train_batches))) valid_freq = 100 save_interval = 100000 for i_epoch in range(num_epochs): loss, _ = epoch_runner(train_batches, train_classification, valid_fn, valid_freq, save_fn, save_interval) return save_fn()
def load_bert_like(): disable_eager_execution() model = BertLike() sess = init_session() #sess.run(tf.compat.v1.global_variables_initializer()) load_v2_to_v2(sess, get_bert_full_path(), False) attention_prob_list, = sess.run([model.attention_probs_list]) html = HtmlVisualizer("position.html") for layer_no, attention_prob in enumerate(attention_prob_list): html.write_headline("Layer {}".format(layer_no)) acc_dict = {} zero_scores = [list() for _ in range(12)] for loc in range(2, 40, 2): print("Source : ", loc) for target_loc in range(20): offset = target_loc - loc print(offset, end=" ") for head_idx in range(num_head): key = offset, head_idx if key not in acc_dict: acc_dict[key] = [] e = attention_prob[0, head_idx, loc, target_loc] if target_loc != 0: acc_dict[key].append(e) else: zero_scores[head_idx].append(e) print("{0:.2f}".format(e * 100), end=" ") print() rows = [[Cell("Loc")] + [Cell("Head{}".format(i)) for i in range(12)]] for offset in range(-7, +7): print(offset, end=" ") scores = [] for head_idx in range(12): key = offset, head_idx try: elems = acc_dict[key] if len(elems) < 3: raise KeyError avg = average(elems) scores.append(avg) print("{0:.2f}".format(avg * 100), end=" ") except KeyError: print("SKIP") print() rows.append([Cell(offset)] + [Cell(float(v * 100), v * 1000) for v in scores]) html.write_table(rows) html.write_paragraph("Attention to first token") zero_scores = [average(l) for l in zero_scores] rows = [[Cell(" ")] + [Cell("Head{}".format(i)) for i in range(12)], [Cell(" ")] + [Cell(float(v * 100), v * 1000) for v in zero_scores]] html.write_table(rows)
def train_nli(hparam, nli_setting, run_name, num_steps, data, model_path): print("Train nil :", run_name) task = transformer_nli_pooled(hparam, nli_setting.vocab_size) train_cls = get_train_op2(task.loss, hparam.lr, 75000) train_batches, dev_batches = data print("train:", train_batches[0][0][0]) print("dev:", dev_batches[0][0][0]) log = log_module.train_logger() log.setLevel(logging.INFO) sess = init_session() sess.run(tf1.global_variables_initializer()) if model_path is not None: load_v2_to_v2(sess, model_path) def batch2feed_dict(batch): x0, x1, x2, y = batch feed_dict = { task.x_list[0]: x0, task.x_list[1]: x1, task.x_list[2]: x2, task.y: y, } return feed_dict g_step_i = 0 def train_classification(batch, step_i): loss_val, acc, _ = sess.run([ task.loss, task.acc, train_cls, ], feed_dict=batch2feed_dict(batch)) log.debug("Step {0} train loss={1:.04f} acc={2:.03f}".format( step_i, loss_val, acc)) g_step_i = step_i return loss_val, acc global_step = tf1.train.get_or_create_global_step() def valid_fn(): loss_list = [] acc_list = [] for batch in dev_batches: loss_val, acc, g_step_val = sess.run( [task.loss, task.acc, global_step], feed_dict=batch2feed_dict(batch)) loss_list.append(loss_val) acc_list.append(acc) log.info("Step dev step={0} loss={1:.04f} acc={2:.03f}".format( g_step_val, average(loss_list), average(acc_list))) return average(acc_list) def save_fn(): return save_model(sess, run_name, global_step) print("{} train batches".format(len(train_batches))) valid_freq = 100 save_interval = 1000 loss, _ = sub_step_runner(train_batches, train_classification, valid_fn, valid_freq, save_fn, save_interval, num_steps) return save_fn()