def scatter_multiple(input_ids, indice, update_vals):
    batch_size = get_shape_list2(input_ids)[0]
    seq_length = get_shape_list2(input_ids)[1]

    flat_offsets = tf.reshape(
        tf.range(0, batch_size, dtype=tf.int32) * seq_length, [-1, 1])
    indices = tf.reshape(indice + flat_offsets, [-1, 1])
    tensor = tf.reshape(input_ids, [batch_size * seq_length])

    updates = tf.reshape(update_vals, [-1])
    flat_output = tf.tensor_scatter_nd_update(tensor, indices, updates)
    return tf.reshape(flat_output, [batch_size, seq_length])
Beispiel #2
0
    def call(self, input_ids, input_mask, segment_ids):
        with tf.compat.v1.variable_scope("embeddings"):
            self.embedding_layer = Embedding2()
            input_tensor = self.embedding_layer.apply(
                input_ids, segment_ids, self.config.initializer_range,
                self.config.vocab_size, self.config.embedding_size,
                self.config.type_vocab_size,
                self.config.max_position_embeddings,
                self.config.hidden_dropout_prob, self.use_one_hot_embeddings)
            input_tensor = self.embedding_projection(input_tensor)
            self.embedding_output = input_tensor
            input_shape = bc.get_shape_list2(input_tensor)
            batch_size, seq_length, _ = input_shape
        with tf.compat.v1.variable_scope("encoder"):
            self.attention_mask = bc.create_attention_mask_from_input_mask2(
                input_tensor, input_mask)
            prev_output = bc.reshape_to_matrix(input_tensor)
            with tf.compat.v1.variable_scope("layer"):
                intermediate_output, prev_output = self.layer.apply(
                    prev_output, batch_size, seq_length, self.attention_mask)
                final_output = bc.reshape_from_matrix2(prev_output,
                                                       input_shape)
                self.all_layer_outputs.append(final_output)

            for layer_idx in range(1, self.config.num_hidden_layers):
                with tf.compat.v1.variable_scope("layer", reuse=True):
                    intermediate_output, prev_output = self.layer.apply(
                        prev_output, batch_size, seq_length,
                        self.attention_mask)
                    final_output = bc.reshape_from_matrix2(
                        prev_output, input_shape)
                    self.all_layer_outputs.append(final_output)

        return prev_output
Beispiel #3
0
def pooling_modeling(option_name, num_classes, pooled_outputs,
                     sequence_output_3d):
    def seq_max_pooling(sequence_output_3d):
        single_rep = tf.reduce_max(sequence_output_3d, axis=2)
        single_rep = tf.reduce_max(single_rep, axis=1)
        return single_rep

    def seq_avg_pooling(sequence_output_3d):
        single_rep = tf.reduce_mean(sequence_output_3d, axis=2)
        single_rep = tf.reduce_mean(single_rep, axis=1)
        return single_rep

    if option_name == "pooled_max":
        single_rep = tf.reduce_max(pooled_outputs, axis=1)
    elif option_name == "pooled_avg":
        single_rep = tf.reduce_mean(pooled_outputs, axis=1)
    elif option_name == "seq_max+1" or option_name == "seq_avg+1":
        batch, num_seg, seq, hidden_dim = get_shape_list2(sequence_output_3d)
        sequence_rep = tf.keras.layers.Dense(
            hidden_dim, name="cls_dense")(sequence_output_3d)
        if option_name == "seq_max+1":
            single_rep = seq_max_pooling(sequence_rep)
        elif option_name == "seq_avg+1":
            single_rep = seq_avg_pooling(sequence_rep)
        else:
            assert False
    elif option_name == "seq_avg":
        single_rep = seq_avg_pooling(sequence_output_3d)
    elif option_name == "seq_max":
        single_rep = seq_max_pooling(sequence_output_3d)
    else:
        assert False

    logits = tf.keras.layers.Dense(num_classes, name="cls_dense")(single_rep)
    return logits
Beispiel #4
0
def get_nli_ex_model_segmented(input_ids, input_mask, segment_ids):
    method = 5
    hp = hyperparams.HPBert()
    voca_size = 30522
    sequence_shape = bert_common.get_shape_list2(input_ids)
    batch_size = sequence_shape[0]

    step = 200
    pad_len = 200 - 1 - (512 - (step * 2 - 1))

    def spread(t):
        cls_token = t[:, :1]
        pad = tf.ones([batch_size, pad_len], tf.dtypes.int32) * PAD_ID
        a = t[:, :step]
        b = tf.concat([cls_token, t[:, step:step * 2 - 1]], axis=1)
        c = tf.concat([cls_token, t[:, step * 2 - 1:], pad], axis=1)
        return tf.concat([a, b, c], axis=0)

    def collect(t):
        a = t[:batch_size]
        b = t[batch_size:batch_size * 2, 1:]
        c = t[batch_size * 2:, 1:-pad_len]
        return tf.concat([a, b, c], axis=1)

    model = transformer_nli(hp, spread(input_ids), spread(input_mask),
                            spread(segment_ids), voca_size, method, False)
    output = model.conf_logits
    output = collect(output)
    return output
Beispiel #5
0
def split_and_append_sep2(input_ids, input_mask, segment_ids, seq_length: int,
                          window_length: int, CLS_ID, EOW_ID):
    special_tokens = 2  # CLS, SEP
    src_window_length = window_length - special_tokens
    num_window = int(seq_length / src_window_length)
    batch_size, _ = bc.get_shape_list2(input_ids)

    def r2to3(arr):
        return tf.reshape(arr, [batch_size, num_window, -1])

    stacked_input_ids = r2to3(
        input_ids)  # [batch_size, num_window, src_window_length]
    stacked_input_mask = r2to3(
        input_mask)  # [batch_size, num_window, src_window_length]
    stacked_segment_ids = r2to3(
        segment_ids)  # [batch_size, num_window, src_window_length]

    edge_shape = [batch_size, num_window, 1]
    cls_arr = tf.ones(edge_shape, tf.int32) * CLS_ID
    eow_arr = tf.ones(edge_shape, tf.int32) * EOW_ID

    stacked_input_ids = tf.concat([cls_arr, stacked_input_ids, eow_arr],
                                  axis=2)

    mask_edge = tf.ones(edge_shape, tf.int32)
    stacked_input_mask = tf.concat([mask_edge, stacked_input_mask, mask_edge],
                                   axis=2)

    edge1 = stacked_segment_ids[:, :, 0:1]
    edge2 = stacked_segment_ids[:, :, -2:-1]
    stacked_segment_ids = tf.concat([edge1, stacked_segment_ids, edge2],
                                    axis=2)

    return stacked_input_ids, stacked_input_mask, stacked_segment_ids
Beispiel #6
0
    def call(self, input_vectors, use_context):
        # input_vectors : [num_window, hidden_size]
        batch_size, seq_length, hidden_dim = bc.get_shape_list2(input_vectors)
        # Add position embedding
        input_vectors = bc.embedding_postprocessor2(
            input_tensor=input_vectors,
            token_type_table=self.token_type_table,
            full_position_embeddings=self.full_position_embeddings,
            use_token_type=False,
            token_type_ids=None,
            token_type_vocab_size=1,
            use_position_embeddings=True,
            max_position_embeddings=self.config.max_num_window,
            dropout_prob=self.config.hidden_dropout_prob)

        input_shape = [batch_size, seq_length]

        attention_mask = tf.ones([batch_size, seq_length, seq_length],
                                 tf.int32) * tf.expand_dims(use_context, 2)
        with tf.compat.v1.variable_scope("mid"):
            prev_output = bc.reshape_to_matrix(input_vectors)
            for layer_idx in range(self.n_layers):
                with tf.compat.v1.variable_scope("layer_%d" % layer_idx):
                    intermediate_output, prev_output = self.layer_list[
                        layer_idx].apply(prev_output, batch_size, seq_length,
                                         attention_mask)
                    final_output = bc.reshape_from_matrix2(
                        prev_output, input_shape)
                    self.all_layer_outputs.append(final_output)

        return prev_output
Beispiel #7
0
def delete_tokens(input_ids, n_trial, shift):
    delete_location = []
    n_block_size = 1
    for i in range(n_trial):
        st = shift + i * n_block_size
        ed = shift + (i + 1) * n_block_size
        row = []
        for j in range(st, ed):
            row.append(j)

        delete_location.append(row)
    print(delete_location)
    batch_size, _ = get_shape_list2(input_ids)

    # [n_trial, 1]
    delete_location = tf.constant(delete_location, tf.int32)
    # [1, n_trial, 1]
    delete_location = tf.expand_dims(delete_location, 0)
    # [batch_size, n_trial, 1]
    delete_location = tf.tile(delete_location, [batch_size, 1, 1])
    # [n_trial, batch, 1]
    delete_location = tf.transpose(delete_location, [1, 0, 2])
    # [n_trial * batch, 1]
    delete_location = tf.reshape(delete_location, [batch_size * n_trial, -1])
    n_input_ids = tf.tile(input_ids, [n_trial, 1])
    masked_input_ids = scatter_with_batch(n_input_ids, delete_location,
                                          MASK_ID)
    return masked_input_ids
Beispiel #8
0
def random_masking(input_ids,
                   input_masks,
                   n_sample,
                   mask_token,
                   special_tokens=None):
    a_seg_len = 459

    part_cls = numpy.zeros([1])
    part_a_seg = numpy.random.random(a_seg_len)
    part_remain = numpy.zeros([512 - a_seg_len - 1])
    t = numpy.concatenate((part_cls, part_a_seg, part_remain))
    batch_size, _ = get_shape_list2(input_ids)
    base_random = tf.expand_dims(tf.constant(t, tf.float32), 0)
    rand = tf.tile(base_random, [batch_size, 1])
    print(rand.shape)

    if special_tokens is None:
        special_tokens = []
    rand = remove_special_mask(input_ids, input_masks, rand, special_tokens)
    _, indice = tf.math.top_k(rand,
                              k=n_sample,
                              sorted=False,
                              name="masking_top_k")
    masked_lm_positions = indice  # [batch, n_samples]
    masked_lm_ids = gather_index2d(input_ids, masked_lm_positions)
    masked_lm_weights = tf.ones_like(masked_lm_positions, dtype=tf.float32)
    masked_input_ids = scatter_with_batch(input_ids, indice, mask_token)
    return masked_input_ids, masked_lm_positions, masked_lm_ids, masked_lm_weights
def select_value(a_size, ab_mapping, b_scores, b_items, method,
                 ab_mapping_mask):
    # [b_size]
    b_scores = tf.reshape(b_scores, [-1])
    b_size = bc.get_shape_list2(b_items)[0]
    t = tf.reshape(ab_mapping, [-1])
    t = tf.cast(t, tf.int32)
    indice = tf.stack([tf.range(b_size), t], 1)
    collect_bin = tf.scatter_nd(indice, tf.ones([b_size], tf.float32),
                                [b_size, a_size])
    if ab_mapping_mask is not None:
        collect_bin = collect_bin * tf.cast(tf.transpose(ab_mapping_mask),
                                            tf.float32)
    scattered_score = tf.transpose(tf.expand_dims(b_scores, 1) * collect_bin)
    # scattered_score :  [a_size, b_size], if not corresponding item, the score is zero

    if method == "max":
        selected_idx = tf.argmax(scattered_score, axis=1)

    elif method == "sample":
        remover = tf.transpose(tf.ones([b_size, a_size]) -
                               collect_bin) * -10000.00
        scattered_score += remover
        selected_idx = categorical_sampling(scattered_score)

    result = gather(b_items, selected_idx)

    #[n_items, n_layers, hidden]
    return result
Beispiel #10
0
def cate():
    n_sample = 3
    alpha = tf.constant(0.5)
    prob = tf.math.log(tf.constant([[0.5, 0.5, 0.01, 0.3, 0.2], [0.5, 0.5, 0.1, 0.03, 0.2]]))
    prob = tf.nn.softmax(prob, axis=1)
    sequence_shape = get_shape_list2(prob)
    batch_size = sequence_shape[0]
    seq_length = sequence_shape[1]

    rand = tf.random.uniform(
        prob.shape,
        minval=0,
        maxval=1,
        dtype=tf.dtypes.float32,
        seed=None,
        name=None
    )

    p1 = tf.ones_like(prob, dtype=tf.float32) / seq_length * alpha
    p2 = prob * (1-alpha)

    final_p = p1 + p2
    print(prob)
    print(final_p)

    _, indice = tf.math.top_k(
        rand * final_p,
        k=n_sample,
        sorted=False,
        name=None
    )

    print(indice)
Beispiel #11
0
    def apply_3d(self, input_tensor, batch_size, seq_length, attention_mask):
        input_shape = bc.get_shape_list2(input_tensor)
        input_tensor = bc.reshape_to_matrix(input_tensor)
        intermediate_output, layer_output = self.apply(input_tensor,
                                                       batch_size, seq_length,
                                                       attention_mask)

        return bc.reshape_from_matrix2(layer_output, input_shape)
Beispiel #12
0
 def extend_input_mask(self, input_mask):
     input_shape = bc.get_shape_list2(input_mask)
     batch_size, seq_length = input_shape
     input_mask = tf.concat(
         [input_mask,
          tf.ones([batch_size, self.topic_emb_len], tf.int32)],
         axis=1)
     return input_mask
Beispiel #13
0
    def __init__(self,
                 config,
                 is_training,
                 input_ids,
                 input_mask=None,
                 token_type_ids=None,
                 use_one_hot_embeddings=True,
                 features=None,
                 scope=None):
        super(MES, self).__init__()
        combiner = get_combiner(is_training, config)

        unit_length = config.max_seq_length
        d_seq_length = config.max_d_seq_length
        num_window = int(d_seq_length / unit_length)
        batch_size, _ = get_shape_list2(input_ids)

        def dense(hidden_size, name):
            return tf.keras.layers.Dense(hidden_size,
                                         activation=tf.keras.activations.tanh,
                                         name=name,
                                         kernel_initializer=create_initializer(
                                             config.initializer_range))

        def r2to3(arr):
            return tf.reshape(arr, [batch_size, num_window, -1])

        def r3to4(arr):
            return tf.reshape(arr, [batch_size, num_window, unit_length, -1])

        def get_seq_output_3d(model_class, input_ids, input_masks,
                              segment_ids):
            # [Batch, num_window, unit_seq_length]
            stacked_input_ids, stacked_input_mask, stacked_segment_ids = split_input(
                input_ids, input_masks, segment_ids, d_seq_length, unit_length)
            model = model_class(
                config=config,
                is_training=is_training,
                input_ids=r3to2(stacked_input_ids),
                input_mask=r3to2(stacked_input_mask),
                token_type_ids=r3to2(stacked_segment_ids),
                use_one_hot_embeddings=use_one_hot_embeddings,
            )

            # [Batch * num_window, seq_length, hidden_size]
            sequence = model.get_sequence_output()
            # [Batch, num_window, window_length, hidden_size]
            return r3to4(sequence)

        segment_ids = token_type_ids

        # [Batch, num_window, window_length, hidden_size]
        seq_output = get_seq_output_3d(BertModel, input_ids, input_mask,
                                       segment_ids)
        print(seq_output)

        self.pooled_output = combiner(seq_output)
Beispiel #14
0
    def __init__(self,
                 config,
                 is_training,
                 use_one_hot_embeddings=True,
                 features=None,
                 scope=None):

        super(MES_pad, self).__init__()
        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]

        trained_l = config.trained_seq_length
        data_l = config.data_seq_length

        batch_size, _ = get_shape_list2(input_ids)

        add_len = trained_l - data_l
        zero_pad = tf.zeros([batch_size, add_len], tf.int32)
        input_ids = tf.concat([input_ids, zero_pad], axis=1)
        input_mask = tf.concat([input_mask, zero_pad], axis=1)
        segment_ids = tf.concat([segment_ids, zero_pad], axis=1)

        # [Batch, unit_seq_length]
        with tf.compat.v1.variable_scope(dual_model_prefix1):
            model = BertModel(
                config=config,
                is_training=is_training,
                input_ids=input_ids,
                input_mask=input_mask,
                token_type_ids=segment_ids,
                use_one_hot_embeddings=use_one_hot_embeddings,
            )
        pooled = model.get_pooled_output()
        logits_2d = tf.keras.layers.Dense(2, name="cls_dense")(pooled)  #

        with tf.compat.v1.variable_scope(dual_model_prefix2):
            model = BertModel(
                config=config,
                is_training=is_training,
                input_ids=input_ids,
                input_mask=input_mask,
                token_type_ids=segment_ids,
                use_one_hot_embeddings=use_one_hot_embeddings,
            )
        logits = tf.keras.layers.Dense(2, name="cls_dense")(
            model.get_pooled_output())
        self.logits = logits
        label_ids = tf.reshape(label_ids, [-1])
        loss_arr = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=logits, labels=label_ids)

        layer2_loss = tf.reduce_mean(loss_arr)
        self.loss = layer2_loss
Beispiel #15
0
    def network_stacked(self, stacked_input_ids, stacked_input_mask,
                        stacked_segment_ids, use_context):
        batch_size, num_window, seq_length = bc.get_shape_list2(
            stacked_input_ids)
        self.batch_size = batch_size
        self.num_window = num_window

        self.lower_module.call(
            r3to2(stacked_input_ids),
            r3to2(stacked_input_mask),
            r3to2(stacked_segment_ids),
        )

        lower_module_last_layer = self.lower_module.all_layer_outputs[
            -1]  # [ batch_size * num_window, seq_length, hidden_size)
        input_to_upper = exchange_contexts(batch_size, lower_module_last_layer,
                                           num_window, use_context)
        # input_vectors : [batch_size * num_window, window_length + num_window, hidden_size]
        added_tokens = num_window
        attention_mask = tf.pad(self.lower_module.attention_mask,
                                [[0, 0], [0, added_tokens], [0, added_tokens]],
                                'CONSTANT',
                                constant_values=1)
        with tf.compat.v1.variable_scope("upper"):
            for upper_module in self.upper_module_list:
                self.upper_module_inputs.append(input_to_upper)
                upper_module.call(input_to_upper, attention_mask)
                middle_output = upper_module.get_last_layer_output()
                input_to_upper = exchange_return_context(
                    batch_size, middle_output, self.window_size, num_window,
                    use_context)

        self.embedding_table = self.lower_module.embedding_layer.embedding_table
        raw_sequence_output = self.upper_module_list[-1].all_layer_outputs[-1]
        self.sequence_output = raw_sequence_output[:, :self.window_size, :]
        self.all_encoder_layers = self.lower_module.all_layer_outputs
        for upper_module in self.upper_module_list:
            self.all_encoder_layers.extend(upper_module.all_layer_outputs)

        self.all_encoder_layers = []
        self.embedding_output = self.lower_module.embedding_output
        if self.pooling == "head":
            self.pooled_output = self.head_pooling()
        elif self.pooling == "all":
            self.pooled_output = self.all_pooling()
        elif self.pooling == "none":
            pass

        return self.sequence_output
Beispiel #16
0
    def __init__(
        self,
        config,
        use_one_hot_embeddings,
        is_training,
        masked_input_ids,
        input_mask,
        segment_ids,
        tt_input_ids,
        tt_input_mask,
        tt_segment_ids,
    ):

        all_input_ids = tf.concat([masked_input_ids, tt_input_ids], axis=0)
        all_input_mask = tf.concat([input_mask, tt_input_mask], axis=0)
        all_segment_ids = tf.concat([segment_ids, tt_segment_ids], axis=0)
        self.config = config
        self.lm_batch_size, _ = get_shape_list2(masked_input_ids)
        self.model = BertModel(config, is_training, all_input_ids,
                               all_input_mask, all_segment_ids,
                               use_one_hot_embeddings)
        initializer = base.create_initializer(config.initializer_range)
        self.tt_layer = ForwardLayer(config, initializer)

        self.tt_input_mask = tt_input_mask
        seq_output = self.model.get_sequence_output()[self.lm_batch_size:]
        tt_batch_size, seq_length = get_shape_list2(tt_input_ids)
        tt_attention_mask = create_attention_mask_from_input_mask2(
            seq_output, self.tt_input_mask)

        print('tt_attention_mask', tt_attention_mask.shape)
        print("seq_output", seq_output.shape)
        seq_output = self.tt_layer.apply_3d(seq_output, tt_batch_size,
                                            seq_length, tt_attention_mask)
        self.tt_feature = mimic_pooling(seq_output, self.config.hidden_size,
                                        self.config.initializer_range)
Beispiel #17
0
    def call(self, input_vectors, attention_mask):
        prev_output = input_vectors
        input_shape = bc.get_shape_list2(input_vectors)
        batch_size, seq_length, _ = input_shape
        prev_output = bc.reshape_to_matrix(prev_output)
        for layer_idx in range(self.n_layers):
            with tf.compat.v1.variable_scope(
                    "layer_%d" % (layer_idx + self.layer_idx_base)):
                layer = self.layer_list[layer_idx]
                intermediate_output, prev_output = layer.apply(
                    prev_output, batch_size, seq_length, attention_mask)
                final_output = bc.reshape_from_matrix2(prev_output,
                                                       input_shape)
                self.all_layer_outputs.append(final_output)

        return prev_output
Beispiel #18
0
def iterate_over(query, doc, doc_mask, total_doc_len, segment_len, step_size):
    query_input_mask = tf.ones_like(query, tf.int32)
    query_segment_ids = tf.zeros_like(query, tf.int32)
    batch_size, _ = get_shape_list2(query)
    idx = 0
    input_ids_list = []
    input_masks_list = []
    input_segments_list = []
    n_segment = 0
    edge_shape = [batch_size, 1]
    cls_arr = tf.ones(edge_shape, tf.int32) * CLS_ID
    sep_arr = tf.ones(edge_shape, tf.int32) * SEP_ID
    edge_one = tf.ones(edge_shape, tf.int32)
    edge_zero = tf.zeros(edge_shape, tf.int32)

    while idx < total_doc_len:
        st = idx
        ed = idx + segment_len
        pad_len = ed - total_doc_len if ed > total_doc_len else 0
        padding = tf.zeros([batch_size, pad_len], tf.int32)
        doc_seg_input_ids = tf.concat([doc[:, st:ed], sep_arr, padding],
                                      axis=1)
        doc_seg_input_mask = tf.concat([doc_mask[:, st:ed], edge_one, padding],
                                       axis=1)
        doc_seg_segment_ids = tf.ones_like(doc_seg_input_ids,
                                           tf.int32) * doc_seg_input_mask

        input_ids = tf.concat([cls_arr, query, sep_arr, doc_seg_input_ids],
                              axis=1)
        input_mask = tf.concat(
            [edge_one, query_input_mask, edge_one, doc_seg_input_mask], axis=1)
        segment_ids = tf.concat(
            [edge_zero, query_segment_ids, edge_zero, doc_seg_segment_ids],
            axis=1)

        input_ids_list.append(input_ids)
        input_masks_list.append(input_mask)
        input_segments_list.append(segment_ids)
        idx += step_size
        n_segment += 1

    all_input_ids = tf.concat(input_ids_list, axis=0)
    all_input_mask = tf.concat(input_masks_list, axis=0)
    all_segment_ids = tf.concat(input_segments_list, axis=0)
    print(all_input_ids)
    return all_input_ids, all_input_mask, all_segment_ids, n_segment
Beispiel #19
0
def split_and_append_sep(input_ids, input_mask, segment_ids, seq_length: int,
                         window_length: int, CLS_ID, EOW_ID):
    special_tokens = 2  # CLS, SEP
    src_window_length = window_length - special_tokens
    num_window = int(seq_length / src_window_length)

    window_input_ids_list = []
    window_input_mask_list = []
    window_segment_ids_list = []
    for window_idx in range(num_window):
        st = window_idx * src_window_length
        ed = (window_idx + 1) * src_window_length
        window_input_ids_list.append(input_ids[:, st:ed])
        window_input_mask_list.append(input_mask[:, st:ed])
        window_segment_ids_list.append(segment_ids[:, st:ed])

    stacked_input_ids = tf.stack(
        window_input_ids_list,
        1)  # [batch_size, num_window, src_window_length]
    stacked_input_mask = tf.stack(
        window_input_mask_list,
        1)  # [batch_size, num_window, src_window_length]
    stacked_segment_ids = tf.stack(
        window_segment_ids_list,
        1)  # [batch_size, num_window, src_window_length]

    batch_size, num_window, _ = bc.get_shape_list2(stacked_input_ids)
    edge_shape = [batch_size, num_window, 1]
    cls_arr = tf.ones(edge_shape, tf.int32) * 23
    eow_arr = tf.ones(edge_shape, tf.int32) * EOW_ID

    stacked_input_ids = tf.concat([cls_arr, stacked_input_ids, eow_arr],
                                  axis=2)

    mask_edge = tf.ones(edge_shape, tf.int32)
    stacked_input_mask = tf.concat([mask_edge, stacked_input_mask, mask_edge],
                                   axis=2)

    edge1 = stacked_segment_ids[:, :, 0:1]
    edge2 = stacked_segment_ids[:, :, -2:-1]
    stacked_segment_ids = tf.concat([edge1, stacked_segment_ids, edge2],
                                    axis=2)

    return stacked_input_ids, stacked_input_mask, stacked_segment_ids
Beispiel #20
0
def candidate_gen(input_ids, input_mask, segment_ids, n_trial):
    seed = 0

    # draw random interval

    batch_size, input_len = get_shape_list2(input_ids)
    indice = draw_starting_point(batch_size, input_len, input_mask, n_trial, seed)
    flat_indice = tf.reshape(indice, [batch_size*n_trial]) # [ batch_size, m]

    geo = tfp.distributions.Geometric([0.5])
    length_arr = tf.squeeze(tf.cast(geo.sample(indice.shape) + 1, tf.int32), 2)

    length_arr_flat = tf.reshape(length_arr, [-1])

    new_input_ids = drop_middle(batch_size, flat_indice, input_ids, input_len, length_arr_flat, n_trial)
    new_segment_ids = drop_middle(batch_size, flat_indice, segment_ids, input_len, length_arr_flat, n_trial)
    new_input_mask = drop_middle(batch_size, flat_indice, input_mask, input_len, length_arr_flat, n_trial)

    return new_input_ids, new_segment_ids, new_input_mask, indice, length_arr
Beispiel #21
0
    def __init__(
        self,
        config,
        use_one_hot_embeddings,
        is_training,
        masked_input_ids,
        input_mask,
        segment_ids,
        nli_input_ids,
        nli_input_mask,
        nli_segment_ids,
    ):

        all_input_ids = tf.concat([masked_input_ids, nli_input_ids], axis=0)
        all_input_mask = tf.concat([input_mask, nli_input_mask], axis=0)
        all_segment_ids = tf.concat([segment_ids, nli_segment_ids], axis=0)
        self.batch_size, _ = get_shape_list2(masked_input_ids)
        self.model = BertModel(config, is_training, all_input_ids,
                               all_input_mask, all_segment_ids,
                               use_one_hot_embeddings)
Beispiel #22
0
def tlm2(bert_config, use_one_hot_embeddings, features):
    input_ids = features["input_ids"]
    input_mask = features["input_mask"]
    segment_ids = features["segment_ids"]

    hp = hyperparams.HPBert()
    voca_size = 30522
    sequence_shape = bert_common.get_shape_list2(input_ids)

    encode_model = BertModel(
        config=bert_config,
        is_training=False,
        input_ids=input_ids,
        input_mask=input_mask,
        token_type_ids=segment_ids,
        use_one_hot_embeddings=use_one_hot_embeddings,
    )
    loss_model = IndependentLossModel(bert_config)
    loss_model.build_predictions(encode_model.get_sequence_output())
    output = -(loss_model.prob1 - loss_model.prob2)
    return output
Beispiel #23
0
    def call(self, stacked_input_ids, stacked_input_mask, stacked_segment_ids,
             use_context):
        self.lower_module.call(
            r3to2(stacked_input_ids),
            r3to2(stacked_input_mask),
            r3to2(stacked_segment_ids),
        )

        lower_module_last_layer = self.lower_module.all_layer_outputs[
            -1]  # [ batch_size * num_window, seq_length, hidden_size)
        window_vectors = lower_module_last_layer[:, -1, :]

        batch_size, num_window, seq_length = bc.get_shape_list2(
            stacked_input_ids)
        window_vectors = tf.reshape(window_vectors,
                                    [batch_size, num_window, -1])
        context_vectors = self.mid_layers.call(
            window_vectors,
            use_context)  # [batch_size, num_window, hidden_size ]
        context_vectors = tf.reshape(context_vectors,
                                     [batch_size * num_window, 1, -1])
        input_vectors = tf.concat([lower_module_last_layer, context_vectors],
                                  axis=1)

        added_tokens = 1
        attention_mask = tf.pad(self.lower_module.attention_mask,
                                [[0, 0], [0, added_tokens], [0, added_tokens]],
                                'CONSTANT',
                                constant_values=1)

        with tf.compat.v1.variable_scope("upper"):
            self.upper_module.call(input_vectors, attention_mask)
        self.embedding_table = self.lower_module.embedding_layer.embedding_table
        self.sequence_output = self.upper_module.all_layer_outputs[-1]
        self.all_encoder_layers = self.lower_module.all_layer_outputs + self.upper_module.all_layer_outputs

        self.all_encoder_layers = []
        self.embedding_output = self.lower_module.embedding_output
        return self.sequence_output
Beispiel #24
0
    def call(self, input_ids, input_mask, segment_ids, topic_ids):
        with tf.compat.v1.variable_scope("embeddings"):
            self.embedding_layer = Embedding(self.config,
                                             self.use_one_hot_embeddings)
            input_tensor = self.embedding_layer.apply(input_ids, segment_ids)
            self.embedding_output = input_tensor

        input_mask = self.extend_input_mask(input_mask)
        topic_tensor, _ = bc.embedding_lookup2(topic_ids, self.n_topics,
                                               self.topic_embedding,
                                               self.topic_embedding_size,
                                               self.use_one_hot_embeddings)
        self.topic_tensor = tf.reshape(
            topic_tensor, [-1, self.topic_emb_len, self.hidden_size])

        input_tensor = tf.concat([input_tensor, self.topic_tensor], axis=1)
        input_shape = bc.get_shape_list2(input_tensor)
        batch_size, seq_length, _ = input_shape

        with tf.compat.v1.variable_scope("encoder"):
            self.attention_mask = bc.create_attention_mask_from_input_mask2(
                input_tensor, input_mask)
            prev_output = bc.reshape_to_matrix(input_tensor)
            for layer_idx in range(self.n_layers):
                with tf.compat.v1.variable_scope("layer_%d" % layer_idx):
                    layer = self.layer_list[layer_idx]
                    intermediate_output, prev_output = layer.apply(
                        prev_output, batch_size, seq_length,
                        self.attention_mask)
                    final_output = bc.reshape_from_matrix2(
                        prev_output, input_shape)
                    self.all_layer_outputs.append(final_output)

        self.embedding_table = self.embedding_layer.embedding_table
        self.sequence_output = final_output[:, :-self.topic_emb_len]
        self.pooled_output = mimic_pooling(self.sequence_output,
                                           self.config.hidden_size,
                                           self.config.initializer_range)
        return self.sequence_output
Beispiel #25
0
    def call(self, stacked_input_ids, stacked_input_mask, stacked_segment_ids,
             use_context):
        batch_size, num_window, seq_length = bc.get_shape_list2(
            stacked_input_ids)

        self.lower_module.call(
            r3to2(stacked_input_ids),
            r3to2(stacked_input_mask),
            r3to2(stacked_segment_ids),
        )

        lower_module_last_layer = self.lower_module.all_layer_outputs[
            -1]  # [ batch_size * num_window, seq_length, hidden_size)
        input_vectors = exchange_contexts(batch_size, lower_module_last_layer,
                                          num_window, use_context)
        # input_vectors : [batch_size * num_window, window_length + num_window, hidden_size]
        added_tokens = num_window
        attention_mask = tf.pad(self.lower_module.attention_mask,
                                [[0, 0], [0, added_tokens], [0, added_tokens]],
                                'CONSTANT',
                                constant_values=1)
        with tf.compat.v1.variable_scope("mid"):
            self.mid_layers.call(input_vectors, attention_mask)
            middle_output = self.mid_layers.get_last_layer_output()

        input_to_upper = exchange_return_context(batch_size, middle_output,
                                                 self.window_size, num_window,
                                                 use_context)
        with tf.compat.v1.variable_scope("upper"):
            self.upper_module.call(input_to_upper, attention_mask)
        self.embedding_table = self.lower_module.embedding_layer.embedding_table
        raw_sequence_output = self.upper_module.all_layer_outputs[-1]
        self.sequence_output = raw_sequence_output[:, :self.window_size, :]
        self.all_encoder_layers = self.lower_module.all_layer_outputs + self.upper_module.all_layer_outputs

        self.all_encoder_layers = []
        self.embedding_output = self.lower_module.embedding_output
        return self.sequence_output
Beispiel #26
0
    def call(self, input_ids, input_mask, segment_ids):
        n_added_tokens = self.num_column_tokens * self.num_columns
        input_ids = input_ids[:, :-n_added_tokens]
        input_mask = input_mask[:, :-n_added_tokens]
        segment_ids = segment_ids[:, :-n_added_tokens]
        input_tensor = self.embedding_layer.apply(
            input_ids, segment_ids, self.config.initializer_range,
            self.config.vocab_size, self.config.embedding_size,
            self.config.type_vocab_size, self.config.max_position_embeddings,
            self.config.hidden_dropout_prob, self.use_one_hot_embeddings)
        self.embedding_output = input_tensor
        input_tensor = self.embedding_projector(
            input_tensor)  # [ batch_size, seq_len, hidden_dim ]

        batch_size, _, _ = get_shape_list2(input_tensor)

        tensor_list = [input_tensor] + self.get_column_embeddings(batch_size)
        tensor_list = [Tensor2D(t) for t in tensor_list]
        to_tensor_mask = self.get_to_tensor_mask(batch_size, input_mask)
        for layer_no in range(self.num_layers):
            with tf.compat.v1.variable_scope("layer", reuse=layer_no > 0):
                tensor_list = self.forward(tensor_list, to_tensor_mask)
                self.all_raw_layers.append(tensor_list)
                self.all_main_layers.append(tensor_list[0])

        self.embedding_table = self.embedding_layer.embedding_table

        last_main_tensor = self.all_main_layers[-1]
        self.sequence_output = last_main_tensor.get_3d()
        self.sequence_output = tf.concat([
            self.sequence_output,
            tf.zeros([batch_size, n_added_tokens, self.config.hidden_size])
        ],
                                         axis=1)
        self.pooled_output = mimic_pooling(self.sequence_output,
                                           self.config.hidden_size,
                                           self.config.initializer_range)
        return self.sequence_output
Beispiel #27
0
    def call(self, input_ids, input_mask, segment_ids):
        with tf.compat.v1.variable_scope("embeddings"):
            self.embedding_layer = Embedding(self.config,
                                             self.use_one_hot_embeddings)
            input_tensor = self.embedding_layer.apply(input_ids, segment_ids)
            self.embedding_output = input_tensor
            input_shape = bc.get_shape_list2(input_tensor)
            batch_size, seq_length, _ = input_shape
        with tf.compat.v1.variable_scope("lower"):
            self.attention_mask = bc.create_attention_mask_from_input_mask2(
                input_tensor, input_mask)
            prev_output = bc.reshape_to_matrix(input_tensor)
            for layer_idx in range(self.n_layers):
                with tf.compat.v1.variable_scope("layer_%d" % layer_idx):
                    layer = self.layer_list[layer_idx]
                    intermediate_output, prev_output = layer.apply(
                        prev_output, batch_size, seq_length,
                        self.attention_mask)
                    final_output = bc.reshape_from_matrix2(
                        prev_output, input_shape)
                    self.all_layer_outputs.append(final_output)

        return prev_output
Beispiel #28
0
    def network_stacked(self, stacked_input_ids, stacked_input_mask,
                        stacked_segment_ids, use_context):
        batch_size, num_window, seq_length = bc.get_shape_list2(
            stacked_input_ids)
        self.lower_module.call(
            r3to2(stacked_input_ids),
            r3to2(stacked_input_mask),
            r3to2(stacked_segment_ids),
        )

        lower_module_last_layer = self.lower_module.all_layer_outputs[-1]
        #[ batch_size * num_window, seq_length, hidden_size)
        lower_module_last_layer = tf.reshape(
            lower_module_last_layer, [batch_size, num_window, seq_length, -1])

        self.pooled_output = self.combine_model.call(lower_module_last_layer)
        print(self.pooled_output)

        self.embedding_table = self.lower_module.embedding_layer.embedding_table
        self.sequence_output = lower_module_last_layer
        self.all_encoder_layers = self.lower_module.all_layer_outputs
        self.embedding_output = self.lower_module.embedding_output
        return self.sequence_output
Beispiel #29
0
def get_dummy_next_sentence_labels(input_ids):
    sequence_shape = bert_common.get_shape_list2(input_ids)
    batch_size = sequence_shape[0]
    next_sentence_labels = tf.zeros([batch_size, 1], tf.int64)
    return next_sentence_labels
Beispiel #30
0
def attention_layer(from_tensor: Tensor2D,
                    to_tensor_list: List[Tensor2D],
                    query_ff,
                    key_ff,
                    value_ff,
                    attention_mask=None,
                    num_attention_heads=1,
                    size_per_head=512,
                    attention_probs_dropout_prob=0.0):
    def transpose_for_scores(input_tensor, batch_size, num_attention_heads,
                             seq_length, width):
        output_tensor = tf.reshape(
            input_tensor, [batch_size, seq_length, num_attention_heads, width],
            name="reshape_transpose_for_scores")

        output_tensor = tf.transpose(a=output_tensor, perm=[0, 2, 1, 3])
        return output_tensor

    from_shape = get_shape_list2(from_tensor.matrix)
    for to_tensor in to_tensor_list:
        to_shape = get_shape_list2(to_tensor.matrix)
        if len(from_shape) != len(to_shape):
            raise ValueError(
                "The rank of `from_tensor` must match the rank of `to_tensor`."
            )

    # `query_layer` = [B*F, N*H]
    query_layer = query_ff(from_tensor.matrix)
    # `query_layer` = [B, N, F, H]
    query_layer = transpose_for_scores(query_layer, from_tensor.batch_size,
                                       num_attention_heads,
                                       from_tensor.seq_length, size_per_head)

    key_layer_list = []
    value_layer_list = []
    for to_tensor in to_tensor_list:
        # `key_layer` = [B*T, N*H]
        key_layer = key_ff(to_tensor.matrix)

        # `key_layer` = [B, N, T, H]
        key_layer = transpose_for_scores(key_layer, to_tensor.batch_size,
                                         num_attention_heads,
                                         to_tensor.seq_length, size_per_head)

        key_layer_list.append(key_layer)
        # `value_layer` = [B*T, N*H]
        value_layer = value_ff(to_tensor.matrix)

        # `value_layer` = [B, T, N, H]
        value_layer = tf.reshape(value_layer, [
            to_tensor.batch_size, to_tensor.seq_length, num_attention_heads,
            size_per_head
        ],
                                 name="value_reshape")

        # `value_layer` = [B, N, T, H]
        value_layer = tf.transpose(a=value_layer, perm=[0, 2, 1, 3])
        value_layer_list.append(value_layer)

    key_layer_all = tf.concat(key_layer_list, axis=2)
    value_layer_all = tf.concat(value_layer_list, axis=2)

    # Take the dot product between "query" and "key" to get the raw
    # attention scores.
    # `attention_scores` = [B, N, F, T]
    attention_scores = tf.matmul(query_layer, key_layer_all, transpose_b=True)
    attention_scores = tf.multiply(attention_scores,
                                   1.0 / math.sqrt(float(size_per_head)))

    if attention_mask is not None:
        # `attention_mask` = [B, 1, F, T]
        attention_mask = tf.expand_dims(attention_mask, axis=[1])

        # Since attention_mask is 1.0 for positions we want to attend and 0.0 for
        # masked positions, this operation will create a tensor which is 0.0 for
        # positions we want to attend and -10000.0 for masked positions.
        adder = (1.0 - tf.cast(attention_mask, tf.float32)) * -10000.0

        # Since we are adding it to the raw scores before the softmax, this is
        # effectively the same as removing these entirely.
        attention_scores += adder

    # Normalize the attention scores to probabilities.
    # `attention_probs` = [B, N, F, T]
    attention_probs = tf.nn.softmax(attention_scores)

    # This is actually dropping out entire tokens to attend to, which might
    # seem a bit unusual, but is taken from the original Transformer paper.
    # TODO restore this
    # attention_probs = dropout(attention_probs, attention_probs_dropout_prob)

    # `context_layer` = [B, N, F, H]
    context_layer = tf.matmul(attention_probs, value_layer_all)

    # `context_layer` = [B, F, N, H]
    context_layer = tf.transpose(a=context_layer, perm=[0, 2, 1, 3])

    # `context_layer` = [B*F, N*V]
    context_layer = tf.reshape(context_layer, [
        from_tensor.batch_size * from_tensor.seq_length,
        num_attention_heads * size_per_head
    ])

    return context_layer