Exemple #1
0
 def apply(self, inputs):
     '''
     inputs shape = [batch_size, max_num_causes] the word id of input cause
     sequence_length = [batch_size,]
     return shape = [batch_size, max_cause_num, embedding_size]
     '''
     max_num_causes = int(inputs.shape[1])
     max_word_num = int(self._cause_id_table.shape[1])
     inputs_word_ids = gen_array_ops.gather_v2(self._cause_id_table,
                                               inputs,
                                               axis=0)
     # shape = [batch_size, max_num_causes, max_words_length]
     embedded_inputs = gen_array_ops.gather_v2(self._word_embeddings,
                                               inputs_word_ids,
                                               axis=0)
     # shape = [batch_size, max_num_causes, max_words_length, embedding_size], and it will be flattend
     embedded_inputs = tf.reshape(embedded_inputs,
                                  [-1, max_word_num, self._embedding_size])
     inputs_word_length = gen_array_ops.gather_v2(
         self._cause_id_table_length, inputs, axis=0)
     # shape = [batch_size, max_num_causes]
     inputs_word_length = tf.reshape(inputs_word_length, [-1])
     lstm_zero_state = self._lstm_cell.zero_state(
         tf.shape(inputs_word_length)[0], tf.float32)
     outputs, state = tf.nn.dynamic_rnn(self._lstm_cell,
                                        embedded_inputs,
                                        inputs_word_length,
                                        lstm_zero_state,
                                        scope='lstm_cause_encoder')
     # state = [c, h] shape = [batch_size * max_num_causes, embedding_size]
     state = tf.reshape(state[0],
                        [-1, max_num_causes, self._embedding_size])
     return state
Exemple #2
0
 def __call__(self, input):
     '''
     transform word ids into embeddings
     :param input: shape = [-1, sen_len]
     :return: shape = [-1, sen_len, embedding_dimension]
     '''
     return gen_array_ops.gather_v2(self._embedding_matrix, input, axis=0)
Exemple #3
0
    def _mask_outputs_by_lable(self, outputs, last_choice):
        """outputs shape=[batch_size, num_classes]"""

        vocab_size = array_ops.shape(outputs)[1]
        next_choies = gen_array_ops.gather_v2(params=self.lookup_table,
                                              indices=last_choice,
                                              axis=0)
        '''get the [batch_size, vocab_size] mask'''
        mask = math_ops.reduce_sum(array_ops.one_hot(indices=next_choies,
                                                     depth=vocab_size,
                                                     dtype=dtypes.int32),
                                   axis=1)
        mask = math_ops.cast(mask, dtype=dtypes.bool)
        # shape = [batch_size, beam_width, vacab_size]
        finished_probs = array_ops.fill(dims=array_ops.shape(outputs),
                                        value=outputs.dtype.min)
        return array_ops.where(mask, outputs, finished_probs)
Exemple #4
0
def _match_model_fn_v6(features, labels, mode, params):
    '''
    this version uses origianl seq2seq, but uses a lstm merges the cause and word embedding_tabel

    and this version use the input embedding as the attention query
    '''
    # print('aaa')
    '''set parameters'''
    with tf.device('/gpu:0'), tf.variable_scope('model',
                                                reuse=tf.AUTO_REUSE) as scope:
        # set hyper parameters
        embedding_size = params['embedding_size']
        num_units = params['num_units']
        if mode == tf.estimator.ModeKeys.TRAIN:
            dropout_keep_prob = params['dropout_keep_prob']
        else:
            dropout_keep_prob = 1
        beam_width = params['beam_width']
        EOS = params['EOS']
        SOS = params['SOS']
        # set training parameters
        max_sequence_length = params['max_sequence_length']
        max_cause_length = params['max_cause_length']
        vocab_size = params['vocab_size']
        num_causes = EOS + 1
        '''process input and target'''
        # input layer
        input = tf.reshape(features['content'], [-1, max_sequence_length])
        batch_size = tf.shape(input)[0]
        input_length = tf.reshape(features['content_length'], [batch_size])
        cause_label = tf.reshape(labels['cause_label'],
                                 [batch_size, max_cause_length])
        cause_length = tf.reshape(labels['cause_length'], [batch_size])

        # necessary cast
        input = tf.cast(input, dtype=tf.int32)
        input_length = tf.cast(input_length, dtype=tf.int32)
        cause_label = tf.cast(cause_label, dtype=tf.int32)
        cause_length = tf.cast(cause_length, dtype=tf.int32)

        # word embedding layer
        embeddings_word = load_embedding(params['word2vec_model'], vocab_size,
                                         embedding_size)

        embedded_input = gen_array_ops.gather_v2(embeddings_word,
                                                 input,
                                                 axis=0)
        # cause-label embedding layer
        cause_encoder = CauseEncoder(word_embeddings=embeddings_word,
                                     params=params)
        embedded_cause = cause_encoder.apply(cause_label)

        # cause lookpu_table
        cause_table = tf.constant(params['cause_table'], dtype=tf.int32)
        encoder_output = encoders(embedded_input, input_length, params, mode)
        '''hierarchical multilabel decoder'''
        # build lstm cell with attention
        lstm = rnn.LayerNormBasicLSTMCell(num_units=num_units,
                                          reuse=tf.AUTO_REUSE,
                                          dropout_keep_prob=dropout_keep_prob)
        # lstm = rnn.DropoutWrapper(lstm, output_keep_prob=dropout_keep_prob)
        # the subtraction at the end of the line is a ele-wise subtraction supported by tensorflow

        attention_mechanism = MyBahdanauAttention(
            num_units=embedding_size,
            memory=encoder_output.attention_values,
            memory_sequence_length=encoder_output.attention_values_length)
        initial_state = rnn.LSTMStateTuple(encoder_output.initial_state,
                                           encoder_output.initial_state)
        cell = MyAttentionWrapper_v2(lstm,
                                     attention_mechanism,
                                     sot=SOS,
                                     output_attention=False,
                                     name="MyAttentionWrapper")
        cell_state = cell.zero_state(dtype=tf.float32, batch_size=batch_size)
        cell_state = cell_state.clone(cell_state=initial_state,
                                      attention=encoder_output.final_state)

        # extra dense layer to project a rnn output into a classification
        project_dense = Dense(num_causes,
                              _reuse=tf.AUTO_REUSE,
                              _scope='project_dense_scope',
                              name='project_dense')

        # train_decoder
        train_helper = MyTrainingHelper(embedded_cause, cause_label,
                                        cause_length)
        train_decoder = MyBasicDecoder(cell,
                                       train_helper,
                                       cell_state,
                                       lookup_table=cause_table,
                                       output_layer=project_dense,
                                       hie=params['hie'])

        decoder_output_train, decoder_state_train, decoder_len_train = dynamic_decode(
            train_decoder,
            maximum_iterations=max_cause_length - 1,
            parallel_iterations=64,
            scope='decoder')

        # beam_width = 1
        tiled_memory_sequence_length = tile_batch(
            encoder_output.attention_values_length, multiplier=beam_width)
        tiled_memory = tile_batch(encoder_output.attention_values,
                                  multiplier=beam_width)
        tiled_encoder_output_initital_state = tile_batch(
            encoder_output.initial_state, multiplier=beam_width)
        tiled_initial_state = rnn.LSTMStateTuple(
            tiled_encoder_output_initital_state,
            tiled_encoder_output_initital_state)
        tiled_first_attention = tile_batch(encoder_output.final_state,
                                           multiplier=beam_width)

        attention_mechanism = MyBahdanauAttention(
            num_units=embedding_size,
            memory=tiled_memory,
            memory_sequence_length=tiled_memory_sequence_length)

        cell = MyAttentionWrapper_v2(lstm,
                                     attention_mechanism,
                                     sot=SOS,
                                     output_attention=False,
                                     name="MyAttentionWrapper")
        cell_state = cell.zero_state(dtype=tf.float32,
                                     batch_size=batch_size * beam_width)
        cell_state = cell_state.clone(cell_state=tiled_initial_state,
                                      attention=tiled_first_attention)
        infer_decoder = MyBeamSearchDecoder(cell,
                                            embedding=cause_encoder,
                                            sots=tf.fill([batch_size], SOS),
                                            start_tokens=tf.fill([batch_size],
                                                                 SOS),
                                            end_token=EOS,
                                            initial_state=cell_state,
                                            beam_width=beam_width,
                                            output_layer=project_dense,
                                            lookup_table=cause_table,
                                            length_penalty_weight=0.7,
                                            hie=params['hie'])

        cause_output_infer, cause_state_infer, cause_length_infer = dynamic_decode(
            infer_decoder,
            parallel_iterations=64,
            maximum_iterations=max_cause_length - 1,
            scope='decoder')

        # loss
        mask_for_cause = tf.sequence_mask(cause_length - 1,
                                          max_cause_length - 1,
                                          dtype=tf.float32)
        # loss = sequence_loss(logits=padded_train_output, targets=cause_label, weights=mask_for_cause, name='loss')
        tmp_padding = tf.pad(decoder_output_train.rnn_output,
                             [[0, 0],
                              [
                                  0, max_cause_length - 1 -
                                  tf.shape(decoder_output_train.rnn_output)[1]
                              ], [0, 0]],
                             constant_values=0)
        loss = _compute_loss(tmp_padding, cause_label, mask_for_cause,
                             batch_size)
        # predicted_ids: [batch_size, max_cause_length, beam_width]

        predicted_and_cause_ids = tf.transpose(
            cause_output_infer.predicted_ids,
            perm=[0, 2, 1],
            name='predicted_cause_ids')

        # for monitoring
        cause_label_expanded = tf.reshape(cause_label[:, 1:],
                                          [-1, 1, max_cause_length - 1])
        predicted_and_cause_ids = tf.pad(
            predicted_and_cause_ids,
            [[0, 0], [0, 0],
             [0, max_cause_length - 1 - tf.shape(predicted_and_cause_ids)[2]]],
            constant_values=EOS)
        predicted_and_cause_ids = tf.concat(
            [predicted_and_cause_ids, cause_label_expanded],
            axis=1,
            name='predicted_and_cause_ids')
        predicted_and_cause_ids = tf.reshape(
            predicted_and_cause_ids,
            [-1, beam_width + 1, max_cause_length - 1])
        predicted_and_cause_ids_train = tf.concat(
            [decoder_output_train.sample_id, cause_label[:, 1:]],
            axis=1,
            name='predicted_and_cause_ids_train')

        predictions = {
            'predicted_and_cause_ids': predicted_and_cause_ids,
        }
        if mode == tf.estimator.ModeKeys.PREDICT:
            return tf.estimator.EstimatorSpec(mode=mode,
                                              predictions=predictions)

        if mode == tf.estimator.ModeKeys.TRAIN:
            # warm_up_constant = params['warm_up_steps'] ** (-1.5)
            # embedding_constant = embedding_size ** (-0.5)
            # global_step = tf.to_float(tf.train.get_global_step())
            # learning_rate = tf.minimum(1 / tf.sqrt(global_step),
            #                            warm_up_constant * global_step) * embedding_constant
            # optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.9, beta2=0.98, epsilon=1e-9)
            optimizer = tf.train.AdamOptimizer()
            # # train_op = optimizer.minimize(loss=loss, global_step=tf.train.get_global_step())
            # '''using gradient clipping'''
            # loss = tf.Print(loss, [loss, 'to be clear, this is the loss'])
            grads_and_vars = optimizer.compute_gradients(loss)
            clipped_gvs = [
                ele if ele[0] is None else
                (tf.clip_by_value(ele[0], -0.1, 0.1), ele[1])
                for ele in grads_and_vars
            ]
            train_op = optimizer.apply_gradients(
                clipped_gvs, global_step=tf.train.get_global_step())
            return tf.estimator.EstimatorSpec(mode=mode,
                                              loss=loss,
                                              train_op=train_op)

        # predicted_cause_ids shape = [batch_size, cause_length]
        # cause_label = [batch_size, cause_length]
        #  select the predicted cause with the highest possibility
        # todo: evalutaion
        # bi_predicted_cause_ids = binarizer(predicted_cause_ids[:, 0, :], num_causes)
        # bi_cause_label = binarizer(cause_label, num_causes)

        # todo: now I have to leave the evaluation work be done outside the estimator
        eval_metric_ops = {
            'predicted_and_cause_ids':
            tf.contrib.metrics.streaming_concat(predicted_and_cause_ids),
            # 'precision': tf.metrics.precision(bi_cause_label, bi_predicted_cause_ids),
            # 'recall': tf.metrics.recall(bi_cause_label, bi_predicted_cause_ids),
            # 'f1-score': f_score(bi_cause_label, bi_predicted_cause_ids),
        }
        return tf.estimator.EstimatorSpec(mode=mode,
                                          loss=loss,
                                          eval_metric_ops=eval_metric_ops)
Exemple #5
0
 def apply(self, inputs):
     embedding_state = gen_array_ops.gather_v2(self._cause_embeddings, inputs, axis=0)
     return embedding_state
Exemple #6
0
 def apply(self, inputs):
     lstm_state = super(CauseEncoder_v2, self).apply(inputs)
     embedding_state = gen_array_ops.gather_v2(self._cause_embeddings, inputs, axis=0)
     state = tf.concat([lstm_state, embedding_state], axis=2)
     return state
Exemple #7
0
    def step(self, time, inputs, state, name=None):
        """Perform a decoding step.

        Args:
          time: scalar `int32` tensor.
          inputs: A (structure of) input tensors.
          state: A (structure of) state tensors and TensorArrays.
          name: Name scope for any created operations.

        Returns:
          `(outputs, next_state, next_inputs, finished)`.
        """
        batch_size = self._batch_size
        beam_width = self._beam_width
        end_token = self._end_token
        length_penalty_weight = self._length_penalty_weight

        with ops.name_scope(name, "BeamSearchDecoderStep",
                            (time, inputs, state)):
            cell_state = state.cell_state
            inputs = nest.map_structure(
                lambda inp: self._merge_batch_beams(inp, s=inp.shape[2:]),
                inputs)
            cell_state = nest.map_structure(self._maybe_merge_batch_beams,
                                            cell_state, self._cell.state_size)
            cell_outputs, next_cell_state = self._cell(inputs, cell_state)

            # finished = tf.Print(state.finished, [state.finished, 'finished', time], summarize=100)
            # not_finished = tf.Print(not_finished, [not_finished, 'not_finished', time], summarize=100)
            # cell_state.last_choice shape = [batch_size * beam_width]
            next_choices = gen_array_ops.gather_v2(self.lookup_table,
                                                   cell_state.last_choice,
                                                   axis=0)
            not_finished = tf.not_equal(next_choices[:, 0], end_token)
            next_next_choices = gen_array_ops.gather_v2(self.lookup_table,
                                                        next_choices[:, 0],
                                                        axis=0)
            will_finish = tf.logical_and(
                not_finished, tf.equal(next_next_choices[:, 0], end_token))

            def move(will_finish, last_choice, cell_outputs):
                # cell_outputs = tf.Print(cell_outputs, [cell_outputs, 'cell_outputs', time], summarize=1000)
                # will_finish = tf.Print(will_finish, [will_finish, 'will_finish', time], summarize=100)
                attention_score = self._step_method(last_choice)
                attention_score = attention_score + cell_outputs
                # final = tf.Print(final, [final, 'finalll', time], summarize=1000)
                return tf.where(will_finish, attention_score, cell_outputs)

            if self._output_layer is not None:
                cell_outputs = self._output_layer(cell_outputs)
                # will_finish = tf.Print(will_finish, [will_finish, 'will_finish, beam_search', time], summarize=100)
                cell_outputs = tf.cond(
                    tf.reduce_any(will_finish),
                    false_fn=lambda: cell_outputs,
                    true_fn=lambda: move(will_finish, cell_state.last_choice,
                                         cell_outputs))

            if self.hie:
                cell_outputs = self._mask_outputs_by_lable(
                    cell_outputs, cell_state.last_choice)

                # cell_state.last_choice shape = [batch_size*beam_width,]

            cell_outputs = nest.map_structure(
                lambda out: self._split_batch_beams(out, out.shape[1:]),
                cell_outputs)

            next_cell_state = nest.map_structure(self._maybe_split_batch_beams,
                                                 next_cell_state,
                                                 self._cell.state_size)

            beam_search_output, beam_search_state = _beam_search_step(
                time=time,
                logits=cell_outputs,
                next_cell_state=next_cell_state,
                beam_state=state,
                batch_size=batch_size,
                beam_width=beam_width,
                end_token=end_token,
                length_penalty_weight=length_penalty_weight)

            finished = beam_search_state.finished

            # replace the father ids
            sample_ids = beam_search_output.predicted_ids
            next_cell_state = beam_search_state.cell_state
            next_cell_state = next_cell_state._replace(last_choice=sample_ids)
            beam_search_state = beam_search_state._replace(
                cell_state=next_cell_state)

            # sample_ids shape = [batch_size, beam_width]
            next_inputs = control_flow_ops.cond(
                math_ops.reduce_all(finished), lambda: self._start_inputs,
                lambda: self._embedding_fn(sample_ids))

        return (beam_search_output, beam_search_state, next_inputs, finished)