Example #1
0
    def __init__(self, num_classes, ssdr_config, core_model, seq_length, is_training):
        super(WSSDRWrapperInterface, self).__init__()
        placeholder = tf.compat.v1.placeholder
        bert_config = BertConfig.from_json_file(os.path.join(data_path, "bert_config.json"))
        def_max_length = FLAGS.max_def_length
        loc_max_length = FLAGS.max_loc_length
        tf_logging.debug("WSSDRWrapper init()")
        tf_logging.debug("seq_length %d" % seq_length)
        tf_logging.debug("def_max_length %d" % def_max_length)
        tf_logging.debug("loc_max_length %d" % loc_max_length)

        self.input_ids = placeholder(tf.int64, [None, seq_length], name="input_ids")
        self.input_mask_ = placeholder(tf.int64, [None, seq_length], name="input_mask")
        self.segment_ids = placeholder(tf.int64, [None, seq_length], name="segment_ids")
        self.d_location_ids = placeholder(tf.int64, [None, loc_max_length], name="d_location_ids")

        self.d_input_ids = placeholder(tf.int64, [None, def_max_length], name="d_input_ids")
        self.d_input_mask = placeholder(tf.int64, [None, def_max_length], name="d_input_mask")
        self.d_segment_ids = placeholder(tf.int64, [None, def_max_length], name="d_segment_ids")
        self.ab_mapping = placeholder(tf.int64, [None, 1], name="ab_mapping")
        if ssdr_config.use_ab_mapping_mask:
            self.ab_mapping_mask = placeholder(tf.int64, [None, FLAGS.def_per_batch], name="ab_mapping_mask")
        else:
            self.ab_mapping_mask = None

        # [batch,seq_len], 1 if the indices in d_locations_id
        y_lookup = get_y_lookup_from_location_ids(self.d_location_ids, seq_length)

        self.y_cls = placeholder(tf.int64, [None])

        self.network = core_model(
                config=bert_config,
                ssdr_config=ssdr_config,
                is_training=is_training,
                input_ids=self.input_ids,
                input_mask=self.input_mask_,
                token_type_ids=self.segment_ids,
                d_input_ids=self.d_input_ids,
                d_input_mask=self.d_input_mask,
                d_segment_ids=self.d_segment_ids,
                d_location_ids=self.d_location_ids,
                ab_mapping=self.ab_mapping,
                ab_mapping_mask=self.ab_mapping_mask,
                use_one_hot_embeddings=False,
            )
        self.cls_logits = keras.layers.Dense(num_classes)(self.network.get_pooled_output())
        self.cls_loss_arr = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=self.cls_logits,
            labels=self.y_cls)
        self.cls_loss = tf.reduce_mean(self.cls_loss_arr)

        self.lookup_logits = keras.layers.Dense(2)(self.network.get_sequence_output())
        self.lookup_p_at_1 = tf_module.p_at_1(self.lookup_logits[:,:, 1], y_lookup)
        self.lookup_loss_arr = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=self.lookup_logits,
            labels=y_lookup)
        self.y_lookup = y_lookup
        self.lookup_loss_per_example = tf.reduce_sum(self.lookup_loss_arr, axis=-1)
        self.lookup_loss = tf.reduce_mean(self.lookup_loss_per_example)
        self.acc = tf_module.accuracy(self.cls_logits, self.y_cls)
Example #2
0
    def __init__(self, hp, voca_size, is_training=True):
        config = BertConfig(vocab_size=voca_size,
                                 hidden_size=hp.hidden_units,
                                 num_hidden_layers=hp.num_blocks,
                                 num_attention_heads=hp.num_heads,
                                 intermediate_size=hp.intermediate_size,
                                 type_vocab_size=hp.type_vocab_size,
                                 )

        seq_length = hp.seq_max
        use_tpu = False

        input_ids = placeholder(tf.int64, [None, seq_length])
        input_mask = placeholder(tf.int64, [None, seq_length])
        segment_ids = placeholder(tf.int64, [None, seq_length])
        label_ids = placeholder(tf.int64, [None])
        self.x_list = [input_ids, input_mask, segment_ids]
        self.y = label_ids

        use_one_hot_embeddings = use_tpu
        self.model = BertModel(
            config=config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)

        pooled_output = self.model.get_pooled_output()

        task = ClassificationB(is_training, hp.hidden_units, 3)
        task.call(pooled_output, label_ids)
        self.loss = task.loss
        self.logits = task.logits
        self.acc = task.acc
Example #3
0
def main(_):
    bert_config = BertConfig.from_json_file(FLAGS.bert_config_file)
    train_config = TrainConfigEx.from_flags(FLAGS)
    input_files = get_input_files_from_flags(FLAGS)
    input_fn = input_fn_builder(input_files, FLAGS, False)
    model_fn = model_fn_lm(bert_config, train_config, BertModel)
    return run_estimator(model_fn, input_fn)
Example #4
0
def run_classification_w_second_input():
    input_files = get_input_files_from_flags(FLAGS)
    bert_config = BertConfig.from_json_file(FLAGS.bert_config_file)
    train_config = TrainConfigEx.from_flags(FLAGS)
    show_input_files(input_files)
    model_fn = model_fn_classification(
        bert_config,
        train_config,
    )
    input_fn = input_fn_builder_use_second_input(FLAGS)
    if FLAGS.do_predict:
        tf_logging.addFilter(MuteEnqueueFilter())
    result = run_estimator(model_fn, input_fn)
    return result
Example #5
0
    def __init__(self, num_classes, seq_length, is_training):
        super(DictReaderWrapper, self).__init__()
        placeholder = tf.compat.v1.placeholder
        bert_config = BertConfig.from_json_file(os.path.join(data_path, "bert_config.json"))
        def_max_length = FLAGS.max_def_length
        loc_max_length = FLAGS.max_loc_length
        tf_logging.debug("DictReaderWrapper init()")
        tf_logging.debug("seq_length %d" % seq_length)
        tf_logging.debug("def_max_length %d" % def_max_length)
        tf_logging.debug("loc_max_length %d" % loc_max_length)

        self.input_ids = placeholder(tf.int64, [None, seq_length])
        self.input_mask_ = placeholder(tf.int64, [None, seq_length])
        self.segment_ids = placeholder(tf.int64, [None, seq_length])

        self.d_input_ids = placeholder(tf.int64, [None, def_max_length])
        self.d_input_mask = placeholder(tf.int64, [None, def_max_length])
        self.d_location_ids = placeholder(tf.int64, [None, loc_max_length])

        self.y_cls = placeholder(tf.int64, [None])
        self.y_lookup = placeholder(tf.int64, [None, seq_length])

        self.network = DictReaderModel(
                config=bert_config,
                d_config=bert_config,
                is_training=is_training,
                input_ids=self.input_ids,
                input_mask=self.input_mask_,
                d_input_ids=self.d_input_ids,
                d_input_mask=self.d_input_mask,
                d_location_ids=self.d_location_ids,
                use_target_pos_emb=True,
                token_type_ids=self.segment_ids,
                use_one_hot_embeddings=False,
            )

        self.cls_logits = keras.layers.Dense(num_classes)(self.network.pooled_output)
        self.cls_loss_arr = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=self.cls_logits,
            labels=self.y_cls)
        self.cls_loss = tf.reduce_mean(self.cls_loss_arr)

        self.lookup_logits = keras.layers.Dense(2)(self.network.sequence_output)
        self.lookup_loss_arr = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=self.lookup_logits,
            labels=self.y_lookup)
        self.lookup_loss_per_example = tf.reduce_mean(self.lookup_loss_arr, axis=-1)
        self.lookup_loss = tf.reduce_mean(self.lookup_loss_per_example)
        self.acc = tf_module.accuracy(self.cls_logits, self.y_cls)
Example #6
0
def run_w_data_id():
    input_files = get_input_files_from_flags(FLAGS)
    bert_config = BertConfig.from_json_file(FLAGS.bert_config_file)
    train_config = TrainConfigEx.from_flags(FLAGS)
    show_input_files(input_files)
    model_fn = model_fn_classification_weighted_loss(
        bert_config,
        train_config,
    )
    if FLAGS.do_predict:
        tf_logging.addFilter(CounterFilter())
    input_fn = input_fn_builder_classification_w_data_id(
        input_files=input_files, flags=FLAGS, is_training=FLAGS.do_train)
    result = run_estimator(model_fn, input_fn)
    return result
Example #7
0
def main(_):
    input_files = get_input_files_from_flags(FLAGS)
    bert_config = BertConfig.from_json_file(FLAGS.bert_config_file)
    train_config = TrainConfigEx.from_flags(FLAGS)
    show_input_files(input_files)
    special_flags = FLAGS.special_flags.split(",")
    model_fn = model_fn_sensitivity(
        bert_config=bert_config,
        train_config=train_config,
        model_class=BertModel,
        special_flags=special_flags,
    )
    if FLAGS.do_predict:
        tf_logging.addFilter(MuteEnqueueFilter())
    input_fn = input_fn_builder_use_second_input(FLAGS)
    result = run_estimator(model_fn, input_fn)
    return result
Example #8
0
def run_w_data_id():
    input_files = get_input_files_from_flags(FLAGS)
    bert_config = BertConfig.from_json_file(FLAGS.bert_config_file)
    train_config = TrainConfigEx.from_flags(FLAGS)
    show_input_files(input_files)
    special_flags = FLAGS.special_flags.split(",")
    model_fn = model_fn_classification(
        bert_config=bert_config,
        train_config=train_config,
        model_class=BertModel,
        special_flags=special_flags,
    )
    if FLAGS.do_predict:
        tf_logging.addFilter(CounterFilter())
    input_fn = input_fn_builder_classification_w_data_ids_typo(
        input_files=input_files, flags=FLAGS, is_training=FLAGS.do_train)
    result = run_estimator(model_fn, input_fn)
    return result
def main(_):
    input_files = get_input_files_from_flags(FLAGS)
    bert_config = BertConfig.from_json_file(FLAGS.bert_config_file)
    train_config = TrainConfigEx.from_flags(FLAGS)
    show_input_files(input_files)
    special_flags = FLAGS.special_flags.split(",")
    model_fn = model_fn_classification(
        bert_config=bert_config,
        train_config=train_config,
        model_class=FreezeEmbedding,
        special_flags=special_flags,
    )

    input_fn = input_fn_builder_classification_w_data_id(
        input_files=input_files, flags=FLAGS, is_training=FLAGS.do_train)

    result = run_estimator(model_fn, input_fn)
    return result
Example #10
0
def main(_):
    input_files = get_input_files_from_flags(FLAGS)
    bert_config = BertConfig.from_json_file(FLAGS.bert_config_file)
    train_config = TrainConfigEx.from_flags(FLAGS)
    show_input_files(input_files)
    special_flags = FLAGS.special_flags.split(",")

    def override_prediction_fn(predictions, model):
        predictions['vector'] = model.get_output()
        return predictions

    model_fn = model_fn_classification(
        bert_config=bert_config,
        train_config=train_config,
        model_class=MultiEvidenceUseFirst,
        special_flags=special_flags,
        override_prediction_fn=override_prediction_fn)
    if FLAGS.do_predict:
        tf_logging.addFilter(CounterFilter())
    input_fn = input_fn_builder_use_second_input(FLAGS)
    result = run_estimator(model_fn, input_fn)
    return result
Example #11
0
    def __init__(self, hp, voca_size, method, is_training=True):
        config = BertConfig(
            vocab_size=voca_size,
            hidden_size=hp.hidden_units,
            num_hidden_layers=hp.num_blocks,
            num_attention_heads=hp.num_heads,
            intermediate_size=hp.intermediate_size,
            type_vocab_size=hp.type_vocab_size,
        )

        seq_length = hp.seq_max
        use_tpu = False
        task = Classification(data_generator.NLI.nli_info.num_classes)

        input_ids = placeholder(tf.int64, [None, seq_length])
        input_mask = placeholder(tf.int64, [None, seq_length])
        segment_ids = placeholder(tf.int64, [None, seq_length])
        label_ids = placeholder(tf.int64, [None])

        self.x_list = [input_ids, input_mask, segment_ids]
        self.y = label_ids

        use_one_hot_embeddings = use_tpu
        self.model = BertModel(config=config,
                               is_training=is_training,
                               input_ids=input_ids,
                               input_mask=input_mask,
                               token_type_ids=segment_ids,
                               use_one_hot_embeddings=use_one_hot_embeddings)

        pred, loss = task.predict(self.model.get_sequence_output(), label_ids,
                                  True)

        self.logits = task.logits
        self.sout = tf.nn.softmax(self.logits)
        self.pred = pred
        self.loss = loss
        self.acc = task.acc
Example #12
0
    def __init__(self):
        config = BertConfig.from_json_file(
            os.path.join(data_path, "bert_config.json"))
        self.attention_probs_list = []

        input_ids = tf.constant([[101] + [100] * 511])
        token_type_ids = tf.constant([[0] * 512])
        input_mask = tf.constant([[1] * 512])
        attention_mask = create_attention_mask_from_input_mask(
            input_ids, input_mask)
        initializer = create_initializer(config.initializer_range)

        scope = None
        with tf.compat.v1.variable_scope(scope, default_name="bert"):
            with tf.compat.v1.variable_scope("embeddings"):
                # Perform embedding lookup on the word ids.
                (self.embedding_output,
                 self.embedding_table) = embedding_lookup(
                     input_ids=input_ids,
                     vocab_size=config.vocab_size,
                     embedding_size=config.hidden_size,
                     initializer_range=config.initializer_range,
                     word_embedding_name="word_embeddings",
                     use_one_hot_embeddings=False)

                # Add positional embeddings and token type embeddings, then layer
                # normalize and perform dropout.
                self.embedding_output = embedding_postprocessor(
                    input_tensor=self.embedding_output,
                    use_token_type=True,
                    token_type_ids=token_type_ids,
                    token_type_vocab_size=config.type_vocab_size,
                    token_type_embedding_name="token_type_embeddings",
                    use_position_embeddings=True,
                    position_embedding_name="position_embeddings",
                    initializer_range=config.initializer_range,
                    max_position_embeddings=config.max_position_embeddings,
                    dropout_prob=config.hidden_dropout_prob)
            prev_output = reshape_to_matrix(self.embedding_output)
            with tf.compat.v1.variable_scope("encoder"):

                for layer_idx in range(12):
                    with tf.compat.v1.variable_scope("layer_%d" % layer_idx):
                        layer_input = prev_output

                        with tf.compat.v1.variable_scope("attention"):
                            attention_heads = []
                            with tf.compat.v1.variable_scope("self"):
                                attention_head = self.attention_fn(layer_input)
                                attention_heads.append(attention_head)

                            attention_output = None
                            if len(attention_heads) == 1:
                                attention_output = attention_heads[0]
                            else:
                                # In the case where we have other sequences, we just concatenate
                                # them to the self-attention head before the projection.
                                attention_output = tf.concat(attention_heads,
                                                             axis=-1)

                            # Run a linear projection of `hidden_size` then add a residual
                            # with `layer_input`.
                            with tf.compat.v1.variable_scope("output"):
                                attention_output = dense(
                                    hidden_size, initializer)(attention_output)
                                attention_output = layer_norm(
                                    attention_output + layer_input)

                        # The activation is only applied to the "intermediate" hidden layer.
                        with tf.compat.v1.variable_scope("intermediate"):
                            intermediate_output = dense(
                                config.intermediate_size,
                                initializer,
                                activation=gelu)(attention_output)

                        # Down-project back to `hidden_size` then add the residual.
                        with tf.compat.v1.variable_scope("output"):
                            layer_output = dense(
                                hidden_size, initializer)(intermediate_output)
                            layer_output = layer_norm(layer_output +
                                                      attention_output)
                            prev_output = layer_output