Beispiel #1
0
def coattention_layer(seq1,
                      seq1_length,
                      seq2,
                      seq2_length,
                      attn_type='diagonal_bilinear',
                      scaled=True,
                      with_sentinel=False,
                      seq2_to_seq1=None,
                      num_layers=1,
                      encoder=None):
    """Encodes seq1 conditioned on seq2, e.g., using word-by-word attention."""
    if attn_type == 'bilinear':
        attn_fun = attention.bilinear_attention
    elif attn_type == 'dot':
        attn_fun = attention.dot_attention
    elif attn_type == 'diagonal_bilinear':
        attn_fun = attention.diagonal_bilinear_attention
    else:
        raise ValueError("Unknown attention type: %s" % attn_type)

    _, _, attn_states1, attn_states2, co_attn_state = attention.coattention(
        seq1, seq1_length, seq2, seq2_length, scaled, with_sentinel, attn_fun)

    if num_layers < 2:
        out = tf.concat([attn_states1, co_attn_state], 2)
    else:
        seq1, attn_states1, attn_states2, co_attn_state = [], [attn_states1], [
            attn_states2
        ], [co_attn_state]
        for i in range(1, num_layers):
            with tf.variable_scope(str(i)):
                enc_1 = sequence_encoder.encoder(attn_states1[-1],
                                                 seq1_length,
                                                 name='encoder1',
                                                 **encoder)
                enc_2 = sequence_encoder.encoder(attn_states2[-1],
                                                 seq2_length,
                                                 name='encoder2',
                                                 **encoder)
                seq1.append(enc_1)
                _, _, new_attn_states1, new_attn_states2, new_co_attn_state = attention.coattention(
                    enc_1,
                    seq1_length,
                    enc_2,
                    seq2_length,
                    scaled,
                    with_sentinel,
                    attn_fun,
                    seq2_to_seq1=seq2_to_seq1)
                attn_states1.append(new_attn_states1)
                attn_states2.append(new_attn_states2)
                co_attn_state.append(new_co_attn_state)
        out = tf.concat(seq1 + attn_states1 + co_attn_state, 2)
    # out.set_shape([None, None, (3 * num_layers - 1) * sum(s.get_shape()[-1].value for s in seq1) +
    #               seq2.get_shape()[-1].value])
    return out
Beispiel #2
0
def answer_layer(encoded_question, question_length, encoded_support, support_length,
                 support2question, answer2support, is_eval, correct_start=None, beam_size=1, max_span_size=10000,
                 encoder=None, module='bilinear', repr_dim=100, **kwargs):
    if module == 'bilinear':
        return bilinear_answer_layer(
            repr_dim, encoded_question, question_length, encoded_support, support_length,
            support2question, answer2support, is_eval, beam_size, max_span_size)
    elif module == 'mlp':
        return mlp_answer_layer(repr_dim, encoded_question, question_length, encoded_support, support_length,
                                support2question, answer2support, is_eval, beam_size, max_span_size)
    elif module == 'conditional':
        return conditional_answer_layer(
            repr_dim, encoded_question, question_length, encoded_support, support_length,
            correct_start, support2question, answer2support, is_eval, beam_size, max_span_size)
    elif module == 'conditional_bilinear':
        return conditional_answer_layer(
            repr_dim, encoded_question, question_length, encoded_support, support_length,
            correct_start, support2question, answer2support, is_eval, beam_size, max_span_size, bilinear=True)
    elif module == 'conditional_bilinear':
        return conditional_answer_layer(
            repr_dim, encoded_question, question_length, encoded_support, support_length,
            correct_start, support2question, answer2support, is_eval, beam_size, max_span_size, bilinear=True)
    elif module == 'bidaf':
        if 'repr_dim' not in encoder:
            encoder['repr_dim'] = repr_dim
        encoded_support_end = sequence_encoder.encoder(
            encoded_support, support_length, name='encoded_support_end', **encoder)
        encoded_support_end = tf.concat([encoded_support, encoded_support_end], 2)
        return bidaf_answer_layer(encoded_support, encoded_support_end, support_length,
                                  support2question, answer2support, is_eval, beam_size=1, max_span_size=10000)
    else:
        raise ValueError("Unknown answer layer type: %s" % module)
Beispiel #3
0
def modular_encoder(encoder_config, inputs, inputs_length, inputs_mapping, default_repr_dim, is_eval):
    outputs = dict(inputs)
    outputs_length = dict(inputs_length)
    outputs_mapping = dict(inputs_mapping)
    seen_layer = set()
    for i, module in enumerate(encoder_config):
        if 'name' not in module:
            inp_str = module['input'] if isinstance(module['input'], str) else '_'.join(module['input'])
            module['name'] = '_'.join([str(i), inp_str, module['module']])
        reuse = module['name'] in seen_layer
        seen_layer.add(module['name'])
        try:
            key = module['input']
            out_key = module.get('output', key)
            if module['module'] in ['concat', 'add', 'mul', 'weighted_add', 'sub']:
                outputs_length[out_key] = outputs_length[key[0]]
                outputs_mapping[out_key] = outputs_mapping[key[0]]
                if module['module'] == 'concat':
                    outputs[out_key] = tf.concat([outputs[k] for k in key], 2, name=module['name'])
                    continue
                if module['module'] == 'add':
                    outputs[out_key] = tf.add_n([outputs[k] for k in key], name=module['name'])
                    continue
                if module['module'] == 'sub':
                    outputs[out_key] = tf.subtract(outputs[key[0]], outputs[key[1]], name=module['name'])
                    continue
                if module['module'] == 'mul':
                    o = outputs[key[0]]
                    for k in key[1:-1]:
                        o *= outputs[k]
                    outputs[out_key] = tf.multiply(o, outputs[key[-1]], name=module['name'])
                    continue
                if module['module'] == 'weighted_add':
                    bias = module.get('bias', 0.0)
                    g = tf.layers.dense(tf.concat([outputs[k] for k in key], 2), outputs[key[0]].get_shape()[-1].value,
                                        tf.sigmoid, bias_initializer=tf.constant_initializer(bias))
                    outputs[out_key] = tf.identity(g * outputs[key[0]] + (1.0 - g) * outputs[key[0]],
                                                   name=module['name'])
                    continue
            if 'repr_dim' not in module:
                module['repr_dim'] = default_repr_dim
            if 'dependent' in module:
                dep_key = module['dependent']
                outputs[out_key] = interaction_layer(
                    outputs[key], outputs_length[key],
                    outputs[dep_key], outputs_length[dep_key],
                    outputs_mapping[key], reuse=reuse, **module)
            else:
                outputs[out_key] = encoder(outputs[key], outputs_length[key], reuse=reuse, **module)
            outputs_length[out_key] = outputs_length[key]
            outputs_mapping[out_key] = outputs_mapping[key]
            dropout = module.get('dropout', 0.0)
            if dropout > 0.0:
                outputs[out_key] = tf.cond(
                    is_eval,
                    lambda: outputs[out_key],
                    lambda: tf.nn.dropout(
                        outputs[out_key], 1.0 - dropout, noise_shape=[1, 1, outputs[out_key].get_shape()[-1].value]))
        except Exception as e:
            logger.error('Creating module %s failed.', module['name'])
            raise e
    return outputs, outputs_length, outputs_mapping
Beispiel #4
0
    def create_output(self, shared_resources, input_tensors):
        tensors = TensorPortTensors(input_tensors)
        with tf.variable_scope(
                "fast_qa", initializer=tf.contrib.layers.xavier_initializer()):
            # Some helpers
            batch_size = tf.shape(tensors.question_length)[0]
            max_question_length = tf.reduce_max(tensors.question_length)
            support_mask = misc.mask_for_lengths(tensors.support_length)

            input_size = shared_resources.config["repr_dim_input"]
            size = shared_resources.config["repr_dim"]
            with_char_embeddings = shared_resources.config.get(
                "with_char_embeddings", False)

            # set shapes for inputs
            tensors.emb_question.set_shape([None, None, input_size])
            tensors.emb_support.set_shape([None, None, input_size])

            emb_question = tensors.emb_question
            emb_support = tensors.emb_support
            if with_char_embeddings:
                # compute combined embeddings
                [char_emb_question, char_emb_support] = conv_char_embedding(
                    len(shared_resources.char_vocab), size, tensors.word_chars,
                    tensors.word_char_length,
                    [tensors.question_words, tensors.support_words])

                emb_question = tf.concat([emb_question, char_emb_question], 2)
                emb_support = tf.concat([emb_support, char_emb_support], 2)
                input_size += size

                # set shapes for inputs
                emb_question.set_shape([None, None, input_size])
                emb_support.set_shape([None, None, input_size])

            # compute encoder features
            question_features = tf.ones(
                tf.stack([batch_size, max_question_length, 2]))

            v_wiqw = tf.get_variable("v_wiq_w", [1, 1, input_size],
                                     initializer=tf.constant_initializer(1.0))

            wiq_w = tf.matmul(tf.gather(emb_question * v_wiqw,
                                        tensors.support2question),
                              emb_support,
                              adjoint_b=True)
            wiq_w = wiq_w + tf.expand_dims(support_mask, 1)

            question_binary_mask = tf.gather(
                tf.sequence_mask(tensors.question_length, dtype=tf.float32),
                tensors.support2question)
            wiq_w = tf.reduce_sum(
                tf.nn.softmax(wiq_w) * tf.expand_dims(question_binary_mask, 2),
                [1])

            # [B, L , 2]
            support_features = tf.stack([tensors.word_in_question, wiq_w], 2)

            # highway layer to allow for interaction between concatenated embeddings
            if with_char_embeddings:
                with tf.variable_scope("char_embeddings") as vs:
                    emb_question = tf.layers.dense(
                        emb_question, size, name="embeddings_projection")
                    emb_question = highway_network(emb_question, 1)
                    vs.reuse_variables()
                    emb_support = tf.layers.dense(emb_support,
                                                  size,
                                                  name="embeddings_projection")
                    emb_support = highway_network(emb_support, 1)

            keep_prob = 1.0 - shared_resources.config.get("dropout", 0.0)
            emb_question, emb_support = tf.cond(
                tensors.is_eval, lambda: (emb_question, emb_support), lambda:
                (tf.nn.dropout(emb_question,
                               keep_prob,
                               noise_shape=
                               [1, 1, emb_question.get_shape()[-1].value]),
                 tf.nn.dropout(emb_support,
                               keep_prob,
                               noise_shape=
                               [1, 1, emb_question.get_shape()[-1].value])))

            # extend embeddings with features
            emb_question_ext = tf.concat([emb_question, question_features], 2)
            emb_support_ext = tf.concat([emb_support, support_features], 2)

            # encode question and support
            encoder_type = shared_resources.config.get('encoder',
                                                       'lstm').lower()
            if encoder_type in ['lstm', 'sru', 'gru']:
                size = size + 2 if encoder_type == 'sru' else size  # to allow for use of residual in SRU
                encoded_question = encoder(emb_question_ext,
                                           tensors.question_length,
                                           size,
                                           module=encoder_type)
                encoded_support = encoder(emb_support_ext,
                                          tensors.support_length,
                                          size,
                                          module=encoder_type,
                                          reuse=True)
                projection_initializer = tf.constant_initializer(
                    np.concatenate([np.eye(size), np.eye(size)]))
                encoded_question = tf.layers.dense(
                    encoded_question,
                    size,
                    tf.tanh,
                    use_bias=False,
                    kernel_initializer=projection_initializer,
                    name='projection_q')
                encoded_support = tf.layers.dense(
                    encoded_support,
                    size,
                    tf.tanh,
                    use_bias=False,
                    kernel_initializer=projection_initializer,
                    name='projection_s')
            else:
                raise ValueError(
                    "Only rnn ('lstm', 'sru', 'gru') encoder allowed for FastQA!"
                )

            answer_layer = shared_resources.config.get('answer_layer',
                                                       'conditional').lower()

            if answer_layer == 'conditional':
                start_scores, end_scores, doc_idx, predicted_start_pointer, predicted_end_pointer = \
                    conditional_answer_layer(size, encoded_question, tensors.question_length, encoded_support,
                                             tensors.support_length,
                                             tensors.correct_start, tensors.support2question, tensors.answer2support,
                                             tensors.is_eval,
                                             beam_size=shared_resources.config.get("beam_size", 1),
                                             max_span_size=shared_resources.config.get("max_span_size", 10000))
            elif answer_layer == 'conditional_bilinear':
                start_scores, end_scores, doc_idx, predicted_start_pointer, predicted_end_pointer = \
                    conditional_answer_layer(size, encoded_question, tensors.question_length, encoded_support,
                                             tensors.support_length,
                                             tensors.correct_start, tensors.support2question, tensors.answer2support,
                                             tensors.is_eval,
                                             beam_size=shared_resources.config.get("beam_size", 1),
                                             max_span_size=shared_resources.config.get("max_span_size", 10000),
                                             bilinear=True)
            elif answer_layer == 'bilinear':
                start_scores, end_scores, doc_idx, predicted_start_pointer, predicted_end_pointer = \
                    bilinear_answer_layer(size, encoded_question, tensors.question_length, encoded_support,
                                          tensors.support_length,
                                          tensors.support2question, tensors.answer2support, tensors.is_eval,
                                          beam_size=shared_resources.config.get("beam_size", 1),
                                          max_span_size=shared_resources.config.get("max_span_size", 10000))
            else:
                raise ValueError

            span = tf.stack(
                [doc_idx, predicted_start_pointer, predicted_end_pointer], 1)

            return TensorPort.to_mapping(self.output_ports,
                                         (start_scores, end_scores, span))