Beispiel #1
0
def test_nli(hparam, nli_setting, run_name, data, model_path):
    print("test nil :", run_name)
    task = transformer_nli(hparam, nli_setting.vocab_size, 2, False)

    train_batches, dev_batches = data

    sess = init_session()
    sess.run(tf.global_variables_initializer())

    loader = tf.train.Saver(tf.global_variables(), max_to_keep=1)
    loader.restore(sess, model_path)

    def batch2feed_dict(batch):
        x0, x1, x2, y = batch
        feed_dict = {
            task.x_list[0]: x0,
            task.x_list[1]: x1,
            task.x_list[2]: x2,
            task.y: y,
        }
        return feed_dict

    def valid_fn():
        loss_list = []
        acc_list = []
        for batch in dev_batches[:100]:
            loss_val, acc = sess.run([task.loss, task.acc],
                                     feed_dict=batch2feed_dict(batch))
            loss_list.append(loss_val)
            acc_list.append(acc)

        return average(acc_list)

    return valid_fn()
Beispiel #2
0
def boring():
    disable_eager_execution()
    seq_length = 512
    batch_size = 3
    virtual_input_ids = np.zeros([batch_size, seq_length], np.int)

    input_ids = tf1.placeholder(tf.int32, [batch_size, seq_length])
    input_mask = tf1.placeholder(tf.int32, [batch_size, seq_length])
    segment_ids = tf1.placeholder(tf.int32, [batch_size, seq_length])

    train_op = define_graph(input_ids, input_mask, segment_ids)
    tf.compat.v1.summary.scalar('accuracy', 0)
    merged = tf1.summary.merge_all()

    sess = init_session()
    sess.run(tf.compat.v1.global_variables_initializer())
    run_options = tf1.RunOptions(trace_level=tf1.RunOptions.FULL_TRACE)
    run_metadata = tf1.RunMetadata()
    train_writer = tf1.summary.FileWriter(
        os.path.join(cpath.output_path, "horizon_summary"), sess.graph)

    _, summary_out = sess.run(
        [train_op, merged],
        feed_dict={
            input_ids: virtual_input_ids,
            input_mask: virtual_input_ids,
            segment_ids: virtual_input_ids,
        },
        options=run_options,
        run_metadata=run_metadata)
    i = 0
    train_writer.add_run_metadata(run_metadata, 'step%03d' % i)
    train_writer.add_summary(summary_out, i)
Beispiel #3
0
    def __init__(self, model, lookup_augment_fn, sess=None):
        if sess is None:
            self.sess = train_module.init_session()
            self.sess.run(tf.compat.v1.global_variables_initializer())
        else:
            self.sess = sess

        self.model = model
        self.lookup_augment_fn = lookup_augment_fn
Beispiel #4
0
 def __init__(self, model_path, num_classes, seq_len=None):
     self.voca_size = 30522
     load_names = ['bert', "output_bias", "output_weights"]
     self.hp = hyperparams.HPFAD()
     if seq_len is not None:
         self.hp.seq_max = seq_len
     self.model_dir = cpath.model_path
     self.task = transformer_logit(self.hp, num_classes, self.voca_size,
                                   False)
     self.sess = init_session()
     self.sess.run(tf.compat.v1.global_variables_initializer())
     self.load_model_white(model_path, load_names)
     self.batch_size = 64
Beispiel #5
0
def debug_names(is_training):
    tf.compat.v1.disable_eager_execution()

    seq_max = 200
    lr = 1e-5
    batch_size = FLAGS.train_batch_size

    tf_logging.debug("Building graph")
    model = DictReaderWrapper(3, seq_max, is_training)

    with tf.compat.v1.variable_scope("optimizer"):
        train_cls, global_step = train_module.get_train_op(model.cls_loss, lr)
        train_lookup, global_step = train_module.get_train_op(
            model.lookup_loss, lr, global_step)

    sess = train_module.init_session()
    sess.run(tf.compat.v1.global_variables_initializer())
    tvars = tf.compat.v1.trainable_variables()

    for var in tvars:
        name = var.name
        print(name)
Beispiel #6
0
def train_nli_w_dict(run_name, model: DictReaderInterface, model_path,
                     model_config, data_feeder_loader, model_init_fn):
    print("Train nil :", run_name)
    batch_size = FLAGS.train_batch_size
    f_train_lookup = "lookup" in FLAGS.train_op
    tf_logging.debug("Building graph")

    with tf.compat.v1.variable_scope("optimizer"):
        lr = FLAGS.learning_rate
        lr2 = lr * 0.1
        if model_config.compare_attrib_value_safe("use_two_lr", True):
            tf_logging.info("Using two lr for each parts")
            train_cls, global_step = get_train_op_sep_lr(
                model.get_cls_loss(), lr, 5, "dict")
        else:
            train_cls, global_step = train_module.get_train_op(
                model.get_cls_loss(), lr)
        train_lookup_op, global_step = train_module.get_train_op(
            model.get_lookup_loss(), lr2, global_step)

    sess = train_module.init_session()
    sess.run(tf.compat.v1.global_variables_initializer())

    train_writer, test_writer = setup_summary_writer(run_name)

    last_saved = get_latest_model_path_from_dir_path(model_path)
    if last_saved:
        tf_logging.info("Loading previous model from {}".format(last_saved))
        load_model(sess, last_saved)
    elif model_init_fn is not None:
        model_init_fn(sess)

    log = log_module.train_logger()
    train_data_feeder = data_feeder_loader.get_train_feeder()
    dev_data_feeder = data_feeder_loader.get_dev_feeder()
    lookup_train_feeder = train_data_feeder
    valid_runner = WSSDRRunner(model, dev_data_feeder.augment_dict_info, sess)

    dev_batches = []
    n_dev_batch = 100
    dev_batches_w_dict = dev_data_feeder.get_all_batches(batch_size,
                                                         True)[:n_dev_batch]
    for _ in range(n_dev_batch):
        dev_batches.append(dev_data_feeder.get_random_batch(batch_size))
        dev_batches_w_dict.append(dev_data_feeder.get_lookup_batch(batch_size))

    def get_summary_obj(loss, acc):
        summary = tf.compat.v1.Summary()
        summary.value.add(tag='loss', simple_value=loss)
        summary.value.add(tag='accuracy', simple_value=acc)
        return summary

    def get_summary_obj_lookup(loss, p_at_1):
        summary = tf.compat.v1.Summary()
        summary.value.add(tag='lookup_loss', simple_value=loss)
        summary.value.add(tag='P@1', simple_value=p_at_1)
        return summary

    def train_lookup(step_i):
        batches, info = lookup_train_feeder.get_lookup_train_batches(
            batch_size)
        if not batches:
            raise NoLookupException()

        def get_cls_loss(batch):
            return sess.run([model.get_cls_loss_arr()],
                            feed_dict=model.batch2feed_dict(batch))

        loss_array = get_loss_from_batches(batches, get_cls_loss)

        supervision_for_lookup = train_data_feeder.get_lookup_training_batch(
            loss_array, batch_size, info)

        def lookup_train(batch):
            return sess.run(
                [model.get_lookup_loss(),
                 model.get_p_at_1(), train_lookup_op],
                feed_dict=model.batch2feed_dict(batch))

        avg_loss, p_at_1, _ = lookup_train(supervision_for_lookup)
        train_writer.add_summary(get_summary_obj_lookup(avg_loss, p_at_1),
                                 step_i)
        log.info("Step {0} lookup loss={1:.04f}".format(step_i, avg_loss))
        return avg_loss

    def train_classification(step_i):
        batch = train_data_feeder.get_random_batch(batch_size)
        loss_val, acc, _ = sess.run(
            [model.get_cls_loss(),
             model.get_acc(), train_cls],
            feed_dict=model.batch2feed_dict(batch))
        log.info("Step {0} train loss={1:.04f} acc={2:.03f}".format(
            step_i, loss_val, acc))
        train_writer.add_summary(get_summary_obj(loss_val, acc), step_i)

        return loss_val, acc

    lookup_loss_window = MovingWindow(20)

    def train_classification_w_lookup(step_i):
        data_indices, batch = train_data_feeder.get_lookup_batch(batch_size)
        logits, = sess.run([model.get_lookup_logits()],
                           feed_dict=model.batch2feed_dict(batch))
        term_ranks = np.flip(np.argsort(logits[:, :, 1], axis=1))
        batch = train_data_feeder.augment_dict_info(data_indices, term_ranks)

        loss_val, acc, _ = sess.run(
            [model.get_cls_loss(),
             model.get_acc(), train_cls],
            feed_dict=model.batch2feed_dict(batch))
        log.info("ClsW]Step {0} train loss={1:.04f} acc={2:.03f}".format(
            step_i, loss_val, acc))
        train_writer.add_summary(get_summary_obj(loss_val, acc), step_i)

        return loss_val, acc

    def lookup_enabled(lookup_loss_window, step_i):
        return step_i > model_config.lookup_min_step\
               and lookup_loss_window.get_average() < model_config.lookup_threshold

    def train_fn(step_i):
        if lookup_enabled(lookup_loss_window, step_i):
            loss, acc = train_classification_w_lookup((step_i))
        else:
            loss, acc = train_classification(step_i)
        if f_train_lookup and step_i % model_config.lookup_train_frequency == 0:
            try:
                lookup_loss = train_lookup(step_i)
                lookup_loss_window.append(lookup_loss, 1)
            except NoLookupException:
                log.warning("No possible lookup found")

        return loss, acc

    def debug_fn(batch):
        y_lookup, = sess.run([
            model.y_lookup,
        ],
                             feed_dict=model.batch2feed_dict(batch))
        print(y_lookup)
        return 0, 0

    def valid_fn(step_i):
        if lookup_enabled(lookup_loss_window, step_i):
            valid_fn_w_lookup(step_i)
        else:
            valid_fn_wo_lookup(step_i)

    def valid_fn_wo_lookup(step_i):
        loss_val, acc = valid_runner.run_batches_wo_lookup(dev_batches)
        log.info("Step {0} Dev loss={1:.04f} acc={2:.03f}".format(
            step_i, loss_val, acc))
        test_writer.add_summary(get_summary_obj(loss_val, acc), step_i)
        return acc

    def valid_fn_w_lookup(step_i):
        loss_val, acc = valid_runner.run_batches_w_lookup(dev_batches_w_dict)
        log.info("Step {0} DevW loss={1:.04f} acc={2:.03f}".format(
            step_i, loss_val, acc))
        test_writer.add_summary(get_summary_obj(loss_val, acc), step_i)
        return acc

    def save_fn():
        op = tf.compat.v1.assign(global_step, step_i)
        sess.run([op])
        return save_model_to_dir_path(sess, model_path, global_step)

    n_data = train_data_feeder.get_data_len()
    step_per_epoch = int((n_data + batch_size - 1) / batch_size)
    tf_logging.debug("{} data point -> {} batches / epoch".format(
        n_data, step_per_epoch))
    train_steps = step_per_epoch * FLAGS.num_train_epochs
    tf_logging.debug("Max train step : {}".format(train_steps))
    valid_freq = 100
    save_interval = 60 * 20
    last_save = time.time()

    init_step, = sess.run([global_step])
    print("Initial step : ", init_step)
    for step_i in range(init_step, train_steps):
        if dev_fn is not None:
            if (step_i + 1) % valid_freq == 0:
                valid_fn(step_i)

        if save_fn is not None:
            if time.time() - last_save > save_interval:
                save_fn()
                last_save = time.time()

        loss, acc = train_fn(step_i)

    return save_fn()
Beispiel #7
0
def train_nli(hparam, nli_setting, run_name, num_epochs, data, model_path):
    print("Train nil :", run_name)
    task = transformer_nli(hparam, nli_setting.vocab_size, 2)
    with variable_scope("optimizer"):
        train_cls, global_step = get_train_op(task.loss, hparam.lr)

    train_batches, dev_batches = data

    log = log_module.train_logger()

    def get_summary_obj(loss, acc):
        summary = tf.compat.v1.Summary()
        summary.value.add(tag='loss', simple_value=loss)
        summary.value.add(tag='accuracy', simple_value=acc)
        return summary

    sess = init_session()
    sess.run(tf.compat.v1.global_variables_initializer())
    train_writer, test_writer = setup_summary_writer(run_name)
    if model_path is not None:
        init_model_with_bert(sess, model_path)

    def batch2feed_dict(batch):
        x0, x1, x2, y = batch
        feed_dict = {
            task.x_list[0]: x0,
            task.x_list[1]: x1,
            task.x_list[2]: x2,
            task.y: y,
        }
        return feed_dict

    g_step_i = 0

    def train_classification(batch, step_i):
        loss_val, acc, _ = sess.run([task.loss, task.acc, train_cls],
                                    feed_dict=batch2feed_dict(batch))
        log.debug("Step {0} train loss={1:.04f} acc={2:.03f}".format(
            step_i, loss_val, acc))
        train_writer.add_summary(get_summary_obj(loss_val, acc), step_i)
        nonlocal g_step_i
        g_step_i = step_i
        return loss_val, acc

    def valid_fn():
        loss_list = []
        acc_list = []
        for batch in dev_batches[:100]:
            loss_val, acc = sess.run([task.loss, task.acc],
                                     feed_dict=batch2feed_dict(batch))
            loss_list.append(loss_val)
            acc_list.append(acc)

        loss_val = average(loss_list)
        acc = average(acc_list)
        test_writer.add_summary(get_summary_obj(loss_val, acc), g_step_i)

        return average(acc_list)

    def save_fn():
        return save_model(sess, run_name, global_step)

    print("{} train batches".format(len(train_batches)))
    valid_freq = 100
    save_interval = 100000
    for i_epoch in range(num_epochs):
        loss, _ = epoch_runner(train_batches, train_classification, valid_fn,
                               valid_freq, save_fn, save_interval)

    return save_fn()
Beispiel #8
0
def load_bert_like():
    disable_eager_execution()
    model = BertLike()
    sess = init_session()
    #sess.run(tf.compat.v1.global_variables_initializer())
    load_v2_to_v2(sess, get_bert_full_path(), False)

    attention_prob_list, = sess.run([model.attention_probs_list])
    html = HtmlVisualizer("position.html")

    for layer_no, attention_prob in enumerate(attention_prob_list):
        html.write_headline("Layer {}".format(layer_no))
        acc_dict = {}

        zero_scores = [list() for _ in range(12)]

        for loc in range(2, 40, 2):
            print("Source : ", loc)
            for target_loc in range(20):
                offset = target_loc - loc

                print(offset, end=" ")
                for head_idx in range(num_head):
                    key = offset, head_idx
                    if key not in acc_dict:
                        acc_dict[key] = []
                    e = attention_prob[0, head_idx, loc, target_loc]
                    if target_loc != 0:
                        acc_dict[key].append(e)
                    else:
                        zero_scores[head_idx].append(e)
                    print("{0:.2f}".format(e * 100), end=" ")
                print()

        rows = [[Cell("Loc")] + [Cell("Head{}".format(i)) for i in range(12)]]
        for offset in range(-7, +7):
            print(offset, end=" ")
            scores = []
            for head_idx in range(12):
                key = offset, head_idx

                try:
                    elems = acc_dict[key]
                    if len(elems) < 3:
                        raise KeyError

                    avg = average(elems)
                    scores.append(avg)
                    print("{0:.2f}".format(avg * 100), end=" ")
                except KeyError:
                    print("SKIP")
            print()
            rows.append([Cell(offset)] +
                        [Cell(float(v * 100), v * 1000) for v in scores])
        html.write_table(rows)

        html.write_paragraph("Attention to first token")
        zero_scores = [average(l) for l in zero_scores]
        rows = [[Cell("   ")] + [Cell("Head{}".format(i)) for i in range(12)],
                [Cell("   ")] +
                [Cell(float(v * 100), v * 1000) for v in zero_scores]]
        html.write_table(rows)
Beispiel #9
0
def train_nli(hparam, nli_setting, run_name, num_steps, data, model_path):
    print("Train nil :", run_name)

    task = transformer_nli_pooled(hparam, nli_setting.vocab_size)
    train_cls = get_train_op2(task.loss, hparam.lr, 75000)

    train_batches, dev_batches = data
    print("train:", train_batches[0][0][0])
    print("dev:", dev_batches[0][0][0])

    log = log_module.train_logger()
    log.setLevel(logging.INFO)

    sess = init_session()
    sess.run(tf1.global_variables_initializer())
    if model_path is not None:
        load_v2_to_v2(sess, model_path)

    def batch2feed_dict(batch):
        x0, x1, x2, y = batch
        feed_dict = {
            task.x_list[0]: x0,
            task.x_list[1]: x1,
            task.x_list[2]: x2,
            task.y: y,
        }
        return feed_dict

    g_step_i = 0

    def train_classification(batch, step_i):
        loss_val, acc, _ = sess.run([
            task.loss,
            task.acc,
            train_cls,
        ],
                                    feed_dict=batch2feed_dict(batch))
        log.debug("Step {0} train loss={1:.04f} acc={2:.03f}".format(
            step_i, loss_val, acc))
        g_step_i = step_i
        return loss_val, acc

    global_step = tf1.train.get_or_create_global_step()

    def valid_fn():
        loss_list = []
        acc_list = []
        for batch in dev_batches:
            loss_val, acc, g_step_val = sess.run(
                [task.loss, task.acc, global_step],
                feed_dict=batch2feed_dict(batch))
            loss_list.append(loss_val)
            acc_list.append(acc)
        log.info("Step dev step={0} loss={1:.04f} acc={2:.03f}".format(
            g_step_val, average(loss_list), average(acc_list)))

        return average(acc_list)

    def save_fn():
        return save_model(sess, run_name, global_step)

    print("{} train batches".format(len(train_batches)))
    valid_freq = 100
    save_interval = 1000
    loss, _ = sub_step_runner(train_batches, train_classification, valid_fn,
                              valid_freq, save_fn, save_interval, num_steps)

    return save_fn()