Ejemplo n.º 1
0
    def create_output(self, shared_resources, input_tensors):
        tensors = TensorPortTensors(input_tensors)

        [char_emb_question, char_emb_support] = conv_char_embedding(
            len(shared_resources.char_vocab), shared_resources.config['repr_dim'], tensors.word_chars,
            tensors.word_char_length, [tensors.question_words, tensors.support_words])

        model = shared_resources.config['model']
        repr_dim = shared_resources.config['repr_dim']
        input_size = shared_resources.config["repr_dim_input"]
        dropout = shared_resources.config.get("dropout")
        tensors.emb_question.set_shape([None, None, input_size])
        tensors.emb_support.set_shape([None, None, input_size])

        inputs = {'question': tensors.emb_question, 'support': tensors.emb_support,
                  'char_question': char_emb_question, 'char_support': char_emb_support,
                  'word_in_question': tf.expand_dims(tensors.word_in_question, 2)}
        inputs_length = {'question': tensors.question_length, 'support': tensors.support_length,
                         'char_question': tensors.question_length, 'char_support': tensors.support_length,
                         'word_in_question': tensors.support_length}
        inputs_mapping = {'question': None, 'support': tensors.support2question,
                          'char_support': tensors.support2question}

        encoder_config = model['encoder_layer']

        encoded, lengths, mappings = modular_encoder(
            encoder_config, inputs, inputs_length, inputs_mapping, repr_dim, dropout, tensors.is_eval)

        with tf.variable_scope('answer_layer'):
            answer_layer_config = model['answer_layer']
            encoded_question = encoded[answer_layer_config.get('question', 'question')]
            encoded_support = encoded[answer_layer_config.get('support', 'support')]

            if 'repr_dim' not in answer_layer_config:
                answer_layer_config['repr_dim'] = repr_dim
            if 'max_span_size' not in answer_layer_config:
                answer_layer_config['max_span_size'] = shared_resources.config.get('max_span_size', 16)
            topk = tf.get_variable(
                'topk', initializer=shared_resources.config.get('topk', 1), dtype=tf.int32, trainable=False)
            topk_p = tf.placeholder(tf.int32, [], 'beam_size_setter')
            topk_assign = topk.assign(topk_p)
            self._topk_assign = lambda k: self.tf_session.run(topk_assign, {topk_p: k})

            start_scores, end_scores, doc_idx, predicted_start_pointer, predicted_end_pointer = \
                answer_layer(encoded_question, lengths[answer_layer_config.get('question', 'question')],
                             encoded_support, lengths[answer_layer_config.get('support', 'support')],
                             mappings[answer_layer_config.get('support', 'support')],
                             tensors.answer2support, tensors.is_eval,
                             tensors.correct_start, topk=topk, **answer_layer_config)

        span = tf.stack([doc_idx, predicted_start_pointer, predicted_end_pointer], 1)

        return TensorPort.to_mapping(self.output_ports, (start_scores, end_scores, span))
Ejemplo n.º 2
0
    def create_output(self, shared_resources, input_tensors):
        tensors = TensorPortTensors(input_tensors)

        question_length = tensors.question_length
        support_length = tensors.support_length
        support2question = tensors.support2question
        word_chars = tensors.word_chars
        word_char_length = tensors.word_char_length
        question = tensors.question
        support = tensors.support
        is_eval = tensors.is_eval
        word_embeddings = tensors.word_embeddings
        assertion_lengths = tensors.assertion_lengths
        assertion2question = tensors.assertion2question
        assertions = tensors.assertions
        definition_lengths = tensors.definition_lengths
        definition2question = tensors.definition2question
        definitions = tensors.definitions
        word2lemma = tensors.word2lemma

        model = shared_resources.config['model']
        repr_dim = shared_resources.config['repr_dim']
        input_size = shared_resources.config["repr_dim_input"]
        dropout = shared_resources.config.get("dropout", 0.0)
        size = shared_resources.config["repr_dim"]
        with_char_embeddings = shared_resources.config.get(
            "with_char_embeddings", False)

        word_embeddings.set_shape([None, input_size])

        if shared_resources.config.get('no_reading', False):
            new_word_embeddings = tf.layers.dense(word_embeddings,
                                                  size,
                                                  activation=tf.nn.relu,
                                                  name="embeddings_projection")
            if with_char_embeddings:
                new_word_embeddings = word_with_char_embed(
                    size, new_word_embeddings, tensors.word_chars,
                    tensors.word_char_length, len(shared_resources.char_vocab))
            keep_prob = 1.0 - dropout
            if keep_prob < 1.0:
                new_word_embeddings = tf.cond(
                    is_eval,
                    lambda: new_word_embeddings, lambda: tf.nn.dropout(
                        new_word_embeddings, keep_prob, [1, size]))
            reading_sequence_offset = [support, question, assertions]
        else:
            if shared_resources.config.get("assertion_limit", 0) > 0:
                reading_sequence = [support, question, assertions, definitions]
                reading_sequence_lengths = [
                    support_length, question_length, assertion_lengths,
                    definition_lengths
                ]
                reading_sequence_to_batch = [
                    support2question, None, assertion2question,
                    definition2question
                ]
            else:
                reading_sequence = [support, question, definitions]
                reading_sequence_lengths = [
                    support_length, question_length, definition_lengths
                ]
                reading_sequence_to_batch = [
                    support2question, None, definition2question
                ]

            reading_encoder_config = shared_resources.config['reading_module']
            new_word_embeddings, reading_sequence_offset, _ = embedding_refinement(
                size,
                word_embeddings,
                reading_encoder_config,
                reading_sequence,
                reading_sequence_to_batch,
                reading_sequence_lengths,
                word2lemma,
                word_chars,
                word_char_length,
                is_eval,
                keep_prob=1.0 - shared_resources.config.get('dropout', 0.0),
                with_char_embeddings=with_char_embeddings,
                num_chars=len(shared_resources.char_vocab))

        emb_question = tf.nn.embedding_lookup(new_word_embeddings,
                                              reading_sequence_offset[1],
                                              name='embedded_question')
        emb_support = tf.nn.embedding_lookup(new_word_embeddings,
                                             reading_sequence_offset[0],
                                             name='embedded_support')

        inputs = {
            'question': emb_question,
            'support': emb_support,
            'word_in_question': tf.expand_dims(tensors.word_in_question, 2)
        }
        inputs_length = {
            'question': question_length,
            'support': support_length,
            'word_in_question': support_length
        }
        inputs_mapping = {'question': None, 'support': support2question}

        encoder_config = model['encoder_layer']

        encoded, lengths, mappings = modular_encoder(encoder_config, inputs,
                                                     inputs_length,
                                                     inputs_mapping, repr_dim,
                                                     dropout, tensors.is_eval)

        with tf.variable_scope('answer_layer'):
            answer_layer_config = model['answer_layer']
            encoded_question = encoded[answer_layer_config.get(
                'question', 'question')]
            encoded_support = encoded[answer_layer_config.get(
                'support', 'support')]

            if 'repr_dim' not in answer_layer_config:
                answer_layer_config['repr_dim'] = repr_dim
            if 'max_span_size' not in answer_layer_config:
                answer_layer_config[
                    'max_span_size'] = shared_resources.config.get(
                        'max_span_size', 16)
            topk = tf.get_variable('topk',
                                   initializer=shared_resources.config.get(
                                       'topk', 1),
                                   dtype=tf.int32,
                                   trainable=False)
            topk_p = tf.placeholder(tf.int32, [], 'topk_setter')
            topk_assign = topk.assign(topk_p)
            self._topk_assign = lambda k: self.tf_session.run(
                topk_assign, {topk_p: k})

            start_scores, end_scores, doc_idx, predicted_start_pointer, predicted_end_pointer = \
                answer_layer(encoded_question, lengths[answer_layer_config.get('question', 'question')],
                             encoded_support, lengths[answer_layer_config.get('support', 'support')],
                             mappings[answer_layer_config.get('support', 'support')],
                             tensors.answer2support, tensors.is_eval,
                             tensors.correct_start, topk=topk, **answer_layer_config)

        span = tf.stack(
            [doc_idx, predicted_start_pointer, predicted_end_pointer], 1)

        span = tf.stack(
            [doc_idx, predicted_start_pointer, predicted_end_pointer], 1)

        return TensorPort.to_mapping(self.output_ports,
                                     (start_scores, end_scores, span))