Esempio n. 1
0
    def decode_train(self, init_state, dec_input_tokens, dec_input_lengths,
                     dec_output_lengths):
        with tf.variable_scope(self.shared_scope or "RNNDecoder",
                               reuse=tf.AUTO_REUSE) as scope:
            state_size = shape(init_state, -1)

            self.cell = setup_cell(self.cell_type,
                                   state_size,
                                   self.num_layers,
                                   keep_prob=self.keep_prob)

            with tf.variable_scope('projection') as scope:
                self.projection = tf.layers.Dense(shape(self.embeddings, 0),
                                                  use_bias=True,
                                                  trainable=True)

            with tf.name_scope('Train'):
                dec_input_embs = tf.nn.embedding_lookup(
                    self.embeddings, dec_input_tokens)
                helper = tf.contrib.seq2seq.TrainingHelper(
                    dec_input_embs,
                    sequence_length=dec_input_lengths,
                    time_major=False)
                train_decoder = tf.contrib.seq2seq.BasicDecoder(
                    self.cell,
                    helper,
                    init_state,
                    output_layer=self.projection)
                train_dec_outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(
                    train_decoder,
                    impute_finished=True,
                    maximum_iterations=tf.reduce_max(dec_output_lengths),
                    scope=scope)
                logits = train_dec_outputs.rnn_output
        return logits
Esempio n. 2
0
    def __init__(self, sess, config, encoder, activation=tf.nn.relu):
        super(CategoryClassification, self).__init__(sess, config)
        self.sess = sess
        self.encoder = encoder
        self.activation = activation
        self.is_training = encoder.is_training
        self.keep_prob = 1.0 - tf.to_float(
            self.is_training) * config.dropout_rate
        self.vocab = encoder.vocab

        with tf.name_scope('Placeholder'):
            self.ph = recDotDefaultDict()
            # [batch_size, max_num_context, max_num_words]
            self.ph.text.word = tf.placeholder(
                tf.int32, name='contexts.word',
                shape=[None, None, None]) if self.encoder.wbase else None
            self.ph.text.char = tf.placeholder(
                tf.int32, name='contexts.char',
                shape=[None, None, None, None]) if self.encoder.cbase else None

            self.ph.link = tf.placeholder(tf.int32,
                                          name='link',
                                          shape=[None, None, 2])
            self.ph.target = tf.placeholder(tf.int32,
                                            name='link',
                                            shape=[None])

            self.sentence_length = tf.count_nonzero(self.ph.text.word, axis=-1)
            self.num_contexts = tf.cast(
                tf.count_nonzero(self.sentence_length, axis=-1), tf.float32)

        with tf.name_scope('Encoder'):
            word_repls = encoder.word_encoder.word_encode(self.ph.text.word)
            char_repls = encoder.word_encoder.char_encode(self.ph.text.char)

            text_emb, text_outputs, state = encoder.encode(
                [word_repls, char_repls], self.sentence_length)
            mention_starts, mention_ends = tf.unstack(self.ph.link, axis=-1)

            mention_repls, head_scores = encoder.get_batched_mention_emb(
                text_emb, text_outputs, mention_starts,
                mention_ends)  # [batch_size, max_n_contexts, mention_size]
            self.adv_outputs = tf.reshape(
                text_outputs, [
                    shape(text_outputs, 0) * shape(text_outputs, 1),
                    shape(text_outputs, 2),
                    shape(text_outputs, 3)
                ]
            )  # [batch_size * max_n_contexts, max_sentence_length, output_size]

        with tf.variable_scope('Inference'):
            self.outputs = self.inference(mention_repls)
            self.predictions = tf.argmax(self.outputs, axis=-1)

        with tf.name_scope("Loss"):
            self.losses = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=self.outputs, labels=self.ph.target)

            self.loss = tf.reduce_mean(self.losses)
Esempio n. 3
0
    def predict_relation(self, query_emb, mention_emb, mention_scores,
                         is_query_subjective):
        '''
    Args:
    - query_emb: [emb]
    - mention_emb: [n_mentions, emb]
    - is_query_subjective: A boolean. If true, this function outputs a distribution of relation label probabilities for a triple (query, rel, mention) across rel, otherwise for (mention, rel, query)
    - reuse: A boolean. The variables of this network should be reused by both query-subjective and query-objective predictions by switching the orders of input representations.
    '''
        with tf.variable_scope('pair_emb'):
            n_mentions = shape(mention_emb, -2)
            query_emb = tf.tile(query_emb,
                                [n_mentions, 1])  # [n_mentions, emb]
            if is_query_subjective:
                pair_emb = tf.concat([query_emb, mention_emb],
                                     -1)  # [n_mentions, emb]
            else:
                pair_emb = tf.concat([mention_emb, query_emb],
                                     -1)  # [n_mentions, emb]

            for i in range(self.ffnn_depth):
                with tf.variable_scope('Forward%d' % i):
                    pair_emb = linear(pair_emb,
                                      output_size=self.ffnn_size,
                                      activation=self.activation)
                    pair_emb = tf.nn.dropout(pair_emb,
                                             keep_prob=self.keep_prob)

            with tf.variable_scope('Output'):
                w = self.rel_w
                b = tf.get_variable('biases', [self.vocab.rel.size - 1])
                x = pair_emb
                logits = tf.nn.xw_plus_b(x, w, b)
                no_relation = tf.zeros([shape(mention_scores, 0), 1],
                                       tf.float32)
                logits = tf.concat([no_relation, logits], axis=-1)

                # type A
                mention_unconfidence_penalty = tf.concat([
                    no_relation,
                    tf.tile(tf.expand_dims(mention_scores, 1),
                            [1, self.vocab.rel.size - 1])
                ],
                                                         axis=-1)

                # type B
                # mention_unconfidence_penalty = tf.concat([
                #   -tf.expand_dims(mention_scores, 1),
                #   #tf.tile(tf.expand_dims(mention_scores, 1), [1, shape(logits, 1)-1])
                #   tf.zeros([shape(logits, 0), self.vocab.rel.size-1], dtype=tf.float32)
                # ], axis=-1)

        tf.get_variable_scope().reuse_variables()
        return logits + mention_unconfidence_penalty
Esempio n. 4
0
    def decode_test(self, init_state, start_token=PAD_ID, end_token=PAD_ID):
        with tf.variable_scope(self.shared_scope or "RNNDecoder",
                               reuse=tf.AUTO_REUSE) as scope:
            with tf.name_scope('Test'):
                tiled_init_state = tf.contrib.seq2seq.tile_batch(
                    init_state, multiplier=self.beam_width)
                batch_size = shape(init_state, 0)
                start_tokens = tf.tile(
                    tf.constant([start_token], dtype=tf.int32), [batch_size])
                test_decoder = tf.contrib.seq2seq.BeamSearchDecoder(
                    self.cell,
                    self.embeddings,
                    start_tokens,
                    end_token,
                    tiled_init_state,
                    self.beam_width,
                    output_layer=self.projection,
                    length_penalty_weight=self.length_penalty_weight)

                test_dec_outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(
                    test_decoder,
                    impute_finished=False,
                    maximum_iterations=self.max_output_len,
                    scope=scope)
                predictions = test_dec_outputs.predicted_ids  # [batch_size, T, beam_width]
                predictions = tf.transpose(
                    predictions, perm=[0, 2, 1])  # [batch_size, beam_width, T]
        #return predictions
        return predictions
Esempio n. 5
0
    def batch_inference(self, text_emb, text_outputs, sentence_length):
        # To handle batched inputs.
        document_length = tf.reduce_sum(sentence_length, axis=-1)
        batch_size = shape(text_emb, 0)
        max_num_mentions = tf.to_int32(
            tf.floor(
                tf.to_float(tf.reduce_max(document_length)) *
                self.mention_ratio))

        def loop_func(idx, relations, mentions, losses):
            r, m, l = self.inference(text_emb[idx],
                                     text_outputs[idx],
                                     sentence_length[idx],
                                     self.ph.query[idx],
                                     self.ph.mentions[idx],
                                     self.ph.num_mentions[idx],
                                     self.ph.target.subjective[idx],
                                     self.ph.target.objective[idx],
                                     self.ph.loss_weights_by_label[idx],
                                     max_num_mentions=max_num_mentions)
            idx = idx + 1
            relations = tf.concat(
                [relations, tf.expand_dims(r, axis=0)], axis=0)
            mentions = tf.concat([mentions, tf.expand_dims(m, axis=0)], axis=0)
            losses = tf.concat([losses, tf.expand_dims(l, axis=0)], axis=0)
            return idx, relations, mentions, losses

        idx = tf.zeros((), dtype=tf.int32)
        cond = lambda idx, *args: idx < batch_size
        loop_vars = [
            idx,
            tf.zeros((0, max_num_mentions, 2), dtype=tf.int32),
            tf.zeros((0, max_num_mentions, 2), dtype=tf.int32),
            tf.zeros((0), dtype=tf.float32),
        ]
        _, relations, mentions, losses = tf.while_loop(
            cond,
            loop_func,
            loop_vars,
            shape_invariants=[
                idx.get_shape(),
                tf.TensorShape([None, None, 2]),
                tf.TensorShape([None, None, 2]),
                tf.TensorShape([None]),
            ],
            parallel_iterations=self.max_batch_size,
        )
        predictions = [relations, mentions]
        loss = tf.reduce_mean(losses, axis=-1)
        return predictions, loss
Esempio n. 6
0
def extract_span(encoder_outputs, spans):
    '''
  Args:
  - encoder_outputs: [batch_size, max_num_word, hidden_size]
  '''
    with tf.name_scope('ExtractSpan'):

        def loop_func(idx, span_repls, begin, end):
            res = tf.reduce_mean(span_repls[idx][begin[idx]:end[idx] + 1],
                                 axis=0)
            return tf.expand_dims(res, axis=0)

        beginning_of_link, end_of_link = tf.unstack(spans, axis=1)
        batch_size = shape(encoder_outputs, 0)
        hidden_size = shape(encoder_outputs, -1)
        idx = tf.zeros((), dtype=tf.int32)

        # Continue concatenating the obtained representation of each span.
        res = tf.zeros((0, hidden_size))
        cond = lambda idx, res: idx < batch_size
        body = lambda idx, res: (idx + 1,
                                 tf.concat([
                                     res,
                                     loop_func(idx, encoder_outputs,
                                               beginning_of_link, end_of_link)
                                 ],
                                           axis=0))
        loop_vars = [idx, res]
        _, res = tf.while_loop(cond,
                               body,
                               loop_vars,
                               shape_invariants=[
                                   idx.get_shape(),
                                   tf.TensorShape([None, hidden_size])
                               ])
        return res
Esempio n. 7
0
    def __init__(self, sess, config, encoder, tasks):
        super().__init__(sess, config)

        self.sess = sess
        self.encoder = encoder
        adv_outputs = []
        task_ids = []
        for i, t in enumerate(tasks):
            # inputs = []
            # if self.encoder.wbase:
            #   inputs.append(t.text_ph.word)
            # if self.encoder.cbase:
            #   inputs.append(t.text_ph.char)

            print('adv_outputs', t, t.adv_outputs)
            if isinstance(t.encoder, MultiEncoderWrapper):
                # Split the encoders' represantions into the task-shared and the task-private.
                assert len(t.adv_outputs.get_shape()
                           ) == 3  # [*, max_sentence_length, hidden_size]
                shared_repls, private_repls = tf.split(t.adv_outputs,
                                                       2,
                                                       axis=2)

                # Take average of the representations across all the time step.
                shared_repls = tf.reduce_mean(shared_repls, axis=1)
                private_repls = tf.reduce_mean(private_repls, axis=1)

                # 論文ではこうなっているけど, 違う文を読んだベクトル同士も引き離す必要あるのか?
                #similarities = tf.matmul(tf.transpose(shared_repls), private_repls)
                similarities = tf.matmul(tf.transpose(shared_repls),
                                         private_repls)
                l_diff = squared_frobenius_norm(similarities)

            else:
                shared_repls = t.adv_outputs
                l_diff = 0.0

            task_id = tf.tile([i], [shape(shared_repls, 0)])
            adv_outputs.append(shared_repls)
            task_ids.append(task_id)
        adv_outputs = flip_gradient(tf.concat(adv_outputs, axis=0))
        task_ids = tf.concat(task_ids, axis=0)
        task_ids = tf.one_hot(task_ids, len(tasks))
        self.outputs = tf.nn.softmax(linear(adv_outputs, len(tasks)))
        l_adv = tf.nn.softmax_cross_entropy_with_logits(logits=self.outputs,
                                                        labels=task_ids)
        l_adv = tf.reduce_sum(l_adv)
        self.loss = config.adv_weight * l_adv + config.diff_weight * l_diff
Esempio n. 8
0
    def flatten_emb_by_sentence(self, emb, text_len_mask):
        num_sentences = tf.shape(emb)[0]
        max_sentence_length = tf.shape(emb)[1]

        emb_rank = len(emb.get_shape())
        if emb_rank == 2:
            flattened_emb = tf.reshape(emb,
                                       [num_sentences * max_sentence_length])
        elif emb_rank == 3:
            flattened_emb = tf.reshape(
                emb,
                [num_sentences * max_sentence_length,
                 tf_utils.shape(emb, 2)])
        else:
            raise ValueError("Unsupported rank: {}".format(emb_rank))
        return tf.boolean_mask(flattened_emb,
                               text_len_mask)  # remove masked elements
Esempio n. 9
0
    def get_q_values(self, state, action, next_state, next_candidates):
        q_values = self.calc_q_values(state)  # [batch_size, vocab_size]

        # The Q values only of the action chosen in the current step.
        with tf.name_scope('dynamic_batch_size'):
            batch_size = shape(state, 0)

        with tf.name_scope('q_values_of_selected_action'):
            q_values_of_selected_action = tf.reshape(
                batch_gather(q_values, action), [batch_size])

        with tf.name_scope('next_q_values'):
            next_q_values = self.calc_q_values(
                next_state)  # [batch_size, vocab_size]

            with tf.name_scope('mask_by_next_candidates'):
                next_candidates_mask = tf.one_hot(
                    next_candidates, self.config.vocab_size.card
                )  # [batch_size, num_next_candidates_samples, NUM_CANDIDATES, vocab_size]
                next_candidates_mask = tf.reduce_sum(
                    next_candidates_mask, axis=2
                )  # [batch_size, num_next_candidates_samples, vocab_size]
                tiled_q_values = tf.tile(
                    tf.expand_dims(next_q_values, 1),
                    tf.constant([
                        1,
                        self.config.num_next_candidates_samples,
                        1,
                    ])
                )  # [batch_size, num_next_candidates_samples, vocab_size]

                # Mask q_values.
                masked_next_q_values = next_candidates_mask * tiled_q_values
                # masked_next_q_values = tiled_q_values
            # Take the maximum q-values by each of the sampled 3 candidates, and average them.
            with tf.name_scope('expected_next_q_value'):
                expected_next_q_value = tf.reduce_mean(tf.reduce_max(
                    masked_next_q_values, axis=-1),
                                                       axis=-1)

        return q_values, q_values_of_selected_action, expected_next_q_value
Esempio n. 10
0
    def __init__(self, sess, config, manager, encoder, activation=tf.nn.relu):
        """
    Args:
    """
        super(DescriptionGeneration, self).__init__(sess, config)
        self.config = config
        self.activation = activation
        self.encoder = encoder
        self.other_tasks = manager.tasks
        self.vocab = manager.vocab
        self.is_training = encoder.is_training
        self.dataset = config.dataset
        self.train_shared = config.train_shared
        self.keep_prob = 1.0 - tf.to_float(
            self.is_training) * config.dropout_rate

        self.ph = self.setup_placeholders()

        enc_sentence_length = tf.count_nonzero(self.ph.text.word,
                                               axis=-1,
                                               dtype=tf.int32)
        enc_context_length = tf.count_nonzero(enc_sentence_length,
                                              axis=-1,
                                              dtype=tf.float32)

        word_repls = encoder.word_encoder.word_encode(self.ph.text.word)
        char_repls = encoder.word_encoder.char_encode(self.ph.text.char)
        enc_inputs = [word_repls, char_repls]
        # Encode input text
        enc_inputs, enc_outputs, enc_state = self.encoder.encode(
            enc_inputs, enc_sentence_length, prop_gradients=self.train_shared)
        self.adv_outputs = enc_outputs

        mention_starts, mention_ends = tf.unstack(self.ph.link, axis=-1)
        mention_repls, head_scores = encoder.get_batched_mention_emb(
            enc_inputs, enc_outputs, mention_starts,
            mention_ends)  # [batch_size, max_n_contexts, mention_size]
        if not self.train_shared:
            mention_repls = tf.stop_gradient(mention_repls)
            head_scores = tf.stop_gradient(head_scores)

        # Aggregate context representations.
        init_state = tf.reduce_sum(mention_repls, axis=1)
        init_state = init_state / tf.expand_dims(enc_context_length, -1)

        # Add BOS and EOS to the decoder's inputs and outputs.
        batch_size = shape(self.ph.target, 0)
        with tf.name_scope('start_tokens'):
            start_tokens = tf.tile(tf.constant([START_TOKEN], dtype=tf.int32),
                                   [batch_size])
        with tf.name_scope('end_tokens'):
            end_tokens = tf.tile(tf.constant([END_TOKEN], dtype=tf.int32),
                                 [batch_size])
        dec_input_tokens = tf.concat(
            [tf.expand_dims(start_tokens, 1), self.ph.target], axis=1)
        dec_output_tokens = tf.concat(
            [self.ph.target, tf.expand_dims(end_tokens, 1)], axis=1)

        # Length of description + end_token (or start_token)
        dec_input_lengths = dec_output_lengths = tf.count_nonzero(
            self.ph.target, axis=1, dtype=tf.int32) + 1

        with tf.variable_scope('Decoder') as scope:
            self.decoder = RNNDecoder(config.decoder,
                                      self.is_training,
                                      self.vocab.decoder,
                                      shared_scope=scope)
            self.logits = self.decoder.decode_train(init_state,
                                                    dec_input_tokens,
                                                    dec_input_lengths,
                                                    dec_output_lengths)
            self.predictions = self.decoder.decode_test(init_state)

        # Convert dec_output_lengths to binary masks
        dec_output_weights = tf.sequence_mask(dec_output_lengths,
                                              dtype=tf.float32)

        # Compute loss
        self.loss = tf.contrib.seq2seq.sequence_loss(
            self.logits,
            dec_output_tokens,
            dec_output_weights,
            average_across_timesteps=True,
            average_across_batch=True)
        #self.debug_ops = [self.ph.text.word, enc_sentence_length, enc_context_length]

        # Combined tests with coref task.
        coref_model = [
            x for x in self.other_tasks.values()
            if isinstance(x, CorefModelBase)
        ]
        if coref_model:
            coref_model = coref_model[0].generate_mention_desc(self.decoder)
Esempio n. 11
0
    def inference(self,
                  text_emb,
                  text_outputs,
                  sentence_length,
                  query,
                  gold_mentions,
                  num_gold_mentions,
                  subj_targets,
                  obj_targets,
                  loss_weights_by_label,
                  max_num_mentions=None):
        '''
    Args:
    - text_emb:
    - text_outputs:
    - sentence_length:
    - query:
    - gold_mentions:
    - num_gold_mentions:
    - subj_targets, obj_targets:  [max_sequence_len, max_mention_width]
    - loss_weights_by_label: [num_relations]
    Return:
    - predicted_relations: [num_mentions, 2 (= subj/obj)]
    - predicted_mentions: [num_mentions, 2 (= start/end)]
    - losses: [num_mentions]
    - max_num_mentions: None or An integer tensor. If not None, the first dimentions of predicted_relations and predicted_mentions are padded up to this value for batching.
    
    '''
        # self.sentence_length = tf.count_nonzero(self.ph.text.word, axis=-1)
        # word_repls = encoder.word_encoder.word_encode(self.ph.text.word)
        # char_repls = encoder.word_encoder.char_encode(self.ph.text.char)
        # text_emb, text_outputs, state = encoder.encode([word_repls, char_repls],
        #                                                self.sentence_length)

        if self.reuse:
            tf.get_variable_scope().reuse_variables()

        with tf.name_scope('flatten_text'):
            flattened_text_emb, flattened_text_outputs, flattened_sentence_indices = self.flatten_doc_to_sent(
                text_emb, text_outputs, sentence_length)

        with tf.name_scope('get_query_emb'):
            query_starts, query_ends = tf.unstack(tf.expand_dims(query, 0),
                                                  axis=-1)
            query_emb = self.get_mention_emb(flattened_text_emb,
                                             flattened_text_outputs,
                                             query_starts, query_ends)

        with tf.name_scope('get_mentions'):
            _, _, _, pred_mention_starts, pred_mention_ends, pred_mention_scores, pred_mention_emb = self.get_mentions(
                flattened_text_emb, flattened_text_outputs,
                flattened_sentence_indices)

        # Concatenated [subjective, objective] relations with each mention.
        with tf.name_scope('calc_logits'):
            pred_subj_logits = self.predict_relation(query_emb,
                                                     pred_mention_emb,
                                                     pred_mention_scores, True)
            pred_obj_logits = self.predict_relation(query_emb,
                                                    pred_mention_emb,
                                                    pred_mention_scores, False)

        with tf.name_scope('predict_mention_and_relation'):
            predicted_relations = tf.concat(
                [
                    tf.expand_dims(tf.argmax(pred_subj_logits, axis=-1), -1),
                    tf.zeros(
                        [shape(pred_subj_logits, 0), 1],
                        dtype=tf.int64)  # no prediction as for obj for now.
                    #tf.expand_dims(tf.argmax(pred_obj_logits, axis=-1), -1)
                ],
                axis=-1)  # [num_mentions, 2]
            predicted_relations = tf.cast(predicted_relations, tf.int32)
            predicted_mentions = tf.concat([
                tf.expand_dims(pred_mention_starts, -1),
                tf.expand_dims(pred_mention_ends, -1)
            ],
                                           axis=-1)  # [num_mentions, 2]

            if max_num_mentions is not None:
                num_pads = max_num_mentions - shape(predicted_relations, 0)
                pad_shape = [[0, num_pads], [0, 0]]
                predicted_relations = tf.pad(predicted_relations, pad_shape)
                predicted_mentions = tf.pad(predicted_mentions, pad_shape)

        with tf.name_scope('merge_logits'):
            mention_starts = []
            mention_ends = []
            subj_logits = []
            obj_logits = []

            if self.use_predicted_mentions:
                mention_starts.append(pred_mention_starts)
                mention_ends.append(pred_mention_ends)
                subj_logits.append(pred_subj_logits)
                obj_logits.append(pred_obj_logits)

            if self.use_gold_mentions:
                gold_mentions = tf.slice(gold_mentions, [0, 0],
                                         [num_gold_mentions, 2])
                gold_mentions = tf.reshape(gold_mentions,
                                           [shape(gold_mentions, 0), 2])
                gold_mention_starts, gold_mention_ends = tf.unstack(
                    gold_mentions, axis=-1)
                gold_mention_emb = self.get_mention_emb(
                    flattened_text_emb, flattened_text_outputs,
                    gold_mention_starts, gold_mention_ends)
                gold_mention_scores = self.get_mention_scores(gold_mention_emb)
                gold_subj_logits = self.predict_relation(
                    query_emb, gold_mention_emb, gold_mention_scores, True)
                gold_obj_logits = self.predict_relation(
                    query_emb, gold_mention_emb, gold_mention_scores, False)
                mention_starts.append(gold_mention_starts)
                mention_ends.append(gold_mention_ends)
                subj_logits.append(gold_subj_logits)
                obj_logits.append(gold_obj_logits)

            assert self.use_gold_mentions or self.use_predicted_mentions
            mention_starts = tf.concat(mention_starts, axis=0)
            mention_ends = tf.concat(mention_ends, axis=0)
            subj_logits = tf.concat(subj_logits, axis=0)
            obj_logits = tf.concat(obj_logits, axis=0)

        with tf.name_scope('loss'):
            mention_indices = tf.stack(
                [mention_starts, mention_ends - mention_starts],
                axis=-1)  # [num_mentions, 2]

            # Gold mentions longer than self.max_mention_width should be cut.
            mention_indices = tf.clip_by_value(mention_indices, 0,
                                               shape(subj_targets, -1) -
                                               1)  # [num_mentions, 2]

            subj_targets = tf.gather_nd(subj_targets,
                                        mention_indices)  # [num_mentions]
            obj_targets = tf.gather_nd(obj_targets,
                                       mention_indices)  # [num_mentions]

            subj_loss_weights = tf.gather(loss_weights_by_label, subj_targets)
            obj_loss_weights = tf.gather(loss_weights_by_label, obj_targets)

            subj_losses = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=subj_logits, labels=subj_targets)
            obj_losses = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=obj_logits, labels=obj_targets)
            #losses = tf.concat([subj_losses, obj_losses], axis=-1) * tf.concat([subj_loss_weights, obj_loss_weights], axis=-1)
            losses = subj_losses * subj_loss_weights
            loss = tf.reduce_mean(losses, axis=-1)
        self.reuse = True
        return predicted_relations, predicted_mentions, loss
Esempio n. 12
0
    def __init__(self, sess, config, encoder, activation=tf.nn.relu):
        super(GraphLinkPrediction, self).__init__(sess, config)
        self.sess = sess
        self.encoder = encoder
        self.activation = activation

        self.is_training = encoder.is_training
        self.keep_prob = 1.0 - tf.to_float(
            self.is_training) * config.dropout_rate
        self.ffnn_size = config.ffnn_size
        self.cnn_filter_widths = config.cnn.filter_widths
        self.cnn_filter_size = config.cnn.filter_size

        # Placeholders
        with tf.name_scope('Placeholder'):
            self.ph = recDotDefaultDict()
            self.ph.text.word = tf.placeholder(
                tf.int32, name='text.word',
                shape=[None, None]) if self.encoder.wbase else None
            self.ph.text.char = tf.placeholder(
                tf.int32, name='text.char',
                shape=[None, None, None]) if self.encoder.cbase else None

            self.ph.subj = tf.placeholder(tf.int32,
                                          name='subj.position',
                                          shape=[None, 2])
            self.ph.obj = tf.placeholder(tf.int32,
                                         name='obj.position',
                                         shape=[None, 2])

            self.ph.rel = dotDict()
            self.ph.rel.word = tf.placeholder(
                tf.int32, name='rel.word',
                shape=[None, None]) if self.encoder.wbase else None
            self.ph.rel.char = tf.placeholder(
                tf.int32, name='rel.char',
                shape=[None, None, None]) if self.encoder.cbase else None
            self.ph.target = tf.placeholder(tf.int32,
                                            name='target',
                                            shape=[None])
            self.sentence_length = tf.count_nonzero(self.ph.text.word, axis=1)

        with tf.name_scope('Encoder'):
            text_emb, encoder_outputs, encoder_state = self.encoder.encode(
                [self.ph.text.word, self.ph.text.char], self.sentence_length)
            self.encoder_outputs = encoder_outputs

        with tf.variable_scope('Subject') as scope:
            mention_starts, mention_ends = tf.unstack(self.ph.subj, axis=1)
            subj_outputs, _ = self.encoder.get_batched_mention_emb(
                text_emb, encoder_outputs, mention_starts, mention_ends)

        with tf.variable_scope('Object') as scope:
            mention_starts, mention_ends = tf.unstack(self.ph.obj, axis=1)
            obj_outputs, _ = self.encoder.get_batched_mention_emb(
                text_emb, encoder_outputs, mention_starts, mention_ends)

        with tf.variable_scope('Relation') as scope:
            # Stop gradient to prevent biased learning to the words used as relation labels.
            rel_words_emb = tf.stop_gradient(
                self.encoder.word_encoder.encode(
                    [self.ph.rel.word, self.ph.rel.char]))
            with tf.name_scope("compose_words"):
                rel_outputs = cnn(rel_words_emb, self.cnn_filter_widths,
                                  self.cnn_filter_size)

        with tf.variable_scope('Inference'):
            score_outputs = self.inference(subj_outputs, rel_outputs,
                                           obj_outputs)  # [batch_size, 1]
            self.outputs = tf.round(
                tf.reshape(score_outputs,
                           [shape(score_outputs, 0)]))  # [batch_size]
        with tf.name_scope("Loss"):
            self.losses = self.cross_entropy(score_outputs, self.ph.target)
            self.loss = tf.reduce_mean(self.losses)
Esempio n. 13
0
    def get_antecedent_scores(self, mention_emb, mention_scores, antecedents,
                              antecedents_len, mention_starts, mention_ends,
                              mention_speaker_ids, genre_emb):
        num_mentions = tf_utils.shape(mention_emb, 0)
        max_antecedents = tf_utils.shape(antecedents, 1)

        feature_emb_list = []

        if self.use_metadata:
            antecedent_speaker_ids = tf.gather(
                mention_speaker_ids, antecedents)  # [num_mentions, max_ant]
            same_speaker = tf.equal(
                tf.expand_dims(mention_speaker_ids, 1),
                antecedent_speaker_ids)  # [num_mentions, max_ant]
            speaker_pair_emb = tf.gather(
                self.same_speaker_emb,
                tf.to_int32(same_speaker))  # [num_mentions, max_ant, emb]
            feature_emb_list.append(speaker_pair_emb)

            tiled_genre_emb = tf.tile(
                tf.expand_dims(tf.expand_dims(genre_emb, 0), 0),
                [num_mentions, max_antecedents, 1
                 ])  # [num_mentions, max_ant, emb]
            feature_emb_list.append(tiled_genre_emb)

        if self.use_distance_feature:
            target_indices = tf.range(num_mentions)  # [num_mentions]
            mention_distance = tf.expand_dims(
                target_indices, 1) - antecedents  # [num_mentions, max_ant]
            mention_distance_bins = coref_ops.distance_bins(
                mention_distance)  # [num_mentions, max_ant]
            mention_distance_bins.set_shape([None, None])
            mention_distance_emb = tf.gather(
                self.mention_distance_emb,
                mention_distance_bins)  # [num_mentions, max_ant]
            feature_emb_list.append(mention_distance_emb)

        feature_emb = tf.concat(feature_emb_list,
                                2)  # [num_mentions, max_ant, emb]
        feature_emb = tf.nn.dropout(
            feature_emb, self.keep_prob)  # [num_mentions, max_ant, emb]

        antecedent_emb = tf.gather(mention_emb,
                                   antecedents)  # [num_mentions, max_ant, emb]
        target_emb_tiled = tf.tile(
            tf.expand_dims(mention_emb, 1),
            [1, max_antecedents, 1])  # [num_mentions, max_ant, emb]
        similarity_emb = antecedent_emb * target_emb_tiled  # [num_mentions, max_ant, emb]
        pair_emb = tf.concat(
            [target_emb_tiled, antecedent_emb, similarity_emb, feature_emb],
            2)  # [num_mentions, max_ant, emb]

        with tf.variable_scope("iteration"):
            with tf.variable_scope("antecedent_scoring"):
                antecedent_scores = tf_utils.ffnn(
                    pair_emb, self.ffnn_depth, self.ffnn_size, 1,
                    self.keep_prob)  # [num_mentions, max_ant, 1]
        antecedent_scores = tf.squeeze(antecedent_scores,
                                       2)  # [num_mentions, max_ant]

        antecedent_mask = tf.log(
            tf.sequence_mask(antecedents_len,
                             max_antecedents,
                             dtype=tf.float32))  # [num_mentions, max_ant]
        antecedent_scores += antecedent_mask  # [num_mentions, max_ant]

        antecedent_scores += tf.expand_dims(mention_scores, 1) + tf.gather(
            mention_scores, antecedents)  # [num_mentions, max_ant]

        no_antecedent = tf.zeros([tf_utils.shape(mention_scores, 0),
                                  1])  # [num_mentions, 1]
        antecedent_scores = tf.concat([no_antecedent, antecedent_scores],
                                      1)  # [num_mentions, max_ant + 1]
        return antecedent_scores  # [num_mentions, max_ant + 1]
Esempio n. 14
0
 def get_mention_scores(self, mention_emb):
     with tf.variable_scope("mention_scores"):
         scores = tf_utils.ffnn(mention_emb, self.ffnn_depth,
                                self.ffnn_size, 1,
                                self.keep_prob)  # [num_mentions, 1]
         return tf.reshape(scores, [tf_utils.shape(scores, 0)])