Ejemplo n.º 1
0
    def __init__(self, config: AttendedInputConfig, is_training, features, init_embedding=None):

        super(AttendedInputModel).__init__()
        input_ids = features["input_ids"]
        input_dicts = features["input_dicts"]
        seq_length = features["seq_length"]
        label_ids = features["label_ids"]

        self.input_ids = input_ids
        self.label_ids = label_ids
        self.dict = input_dicts
        self.seq_length = seq_length
        self.is_training = is_training
        input_shape = model_utils.get_shape_list(input_ids, expected_rank=3)
        self.batch_size = input_shape[0]
        self.max_length = input_shape[1]
        self.window_size = input_shape[2]
        dict_shape = model_utils.get_shape_list(input_dicts, expected_rank=3)
        self.dict_dim = dict_shape[2]

        if not is_training:
            config.embedding_dropout_prob = 0.0
            config.hidden_dropout_prob = 0.0

        if init_embedding is None:
            self.embedding = tf.get_variable(shape=[config.vocab_size, config.embedding_size],
                                             dtype=tf.float32,
                                             name='embedding',
                                             initializer=tf.truncated_normal_initializer(stddev=0.02))
        else:
            self.embedding = tf.Variable(init_embedding, dtype=tf.float32, name='embedding')

        with tf.variable_scope('embedding'):
            x = tf.nn.embedding_lookup(self.embedding, self.input_ids)

        def lstm_cell(dim):
            cell = tf.nn.rnn_cell.LSTMCell(dim, name='basic_lstm_cell')
            cell = rnn.DropoutWrapper(cell, output_keep_prob=1.0 - config.hidden_dropout_prob)
            return cell

        with tf.variable_scope('dict'):
            self.dict = tf.cast(self.dict, dtype=tf.float32)
            (forward_output, backword_output), _ = tf.nn.bidirectional_dynamic_rnn(
                cell_fw=lstm_cell(config.dict_hidden_size),
                cell_bw=lstm_cell(config.dict_hidden_size),
                inputs=self.dict,
                sequence_length=self.seq_length,
                dtype=tf.float32
            )
            dict_output = tf.concat([forward_output, backword_output], axis=2)

        with tf.variable_scope('input_attention'):
            feat_size = self.window_size
            input_attention = layers.fully_connected(
                inputs=dict_output,
                num_outputs=feat_size,
                activation_fn=tf.sigmoid
            )
            # [B, L, F] * [B, L, F, E] -> [B, L, F, E]
            input_attention = tf.expand_dims(input_attention, -1)
            attend_input = tf.multiply(x, input_attention)
            attend_input = tf.reshape(attend_input, [self.batch_size, -1, feat_size * config.embedding_size])
            attend_input = model_utils.dropout(attend_input, config.embedding_dropout_prob)

        with tf.variable_scope('character'):
            (forward_output, backword_output), _ = tf.nn.bidirectional_dynamic_rnn(
                cell_fw=lstm_cell(config.hidden_size),
                cell_bw=lstm_cell(config.hidden_size),
                inputs=attend_input,
                sequence_length=self.seq_length,
                dtype=tf.float32
            )
            output = tf.concat([forward_output, backword_output], axis=2)

        with tf.variable_scope('output'):
            output = tf.concat([dict_output, output], axis=2)
            scores = layers.fully_connected(
                inputs=output,
                num_outputs=config.num_classes,
                activation_fn=None
            )
            transition_param = tf.get_variable("transitions", [config.num_classes, config.num_classes])
            self.prediction, _ = crf.crf_decode(scores, transition_param, self.seq_length)

        with tf.variable_scope('loss'):
            # crf
            self.log_likelihood, _ = crf.crf_log_likelihood(
                scores, self.label_ids, self.seq_length, transition_param)
            self.loss = tf.reduce_mean(-self.log_likelihood)
Ejemplo n.º 2
0
    def __init__(self,
                 config: BaselineConfig,
                 is_training,
                 features,
                 init_embedding=None):
        """Constructor for BertModel.

        Args:
          config: `BertConfig` instance.
          is_training: bool. rue for training model, false for eval model. Controls
            whether dropout will be applied.
          input_ids: int64 Tensor of shape [batch_size, seq_length, feat_size].
          label_ids: (optional) int64 Tensor of shape [batch_size, seq_length].
          seq_length: (optional) int64 Tensor of shape [batch_size].
          init_embedding: (optional)

        Raises:
          ValueError: The config is invalid or one of the input tensor shapes
            is invalid.
        """

        super(BaselineModel).__init__()
        input_ids = features["input_ids"]
        seq_length = features["seq_length"]
        label_ids = features["label_ids"]

        self.input_ids = input_ids
        self.label_ids = label_ids
        self.seq_length = seq_length
        self.is_training = is_training
        input_shape = model_utils.get_shape_list(input_ids, expected_rank=3)
        self.batch_size = input_shape[0]
        self.max_length = input_shape[1]
        self.window_size = input_shape[2]

        if not is_training:
            config.embedding_dropout_prob = 0.0
            config.hidden_dropout_prob = 0.0

        if init_embedding is None:
            self.embedding = tf.get_variable(
                shape=[config.vocab_size, config.embedding_size],
                dtype=tf.float32,
                name='embedding',
                initializer=tf.truncated_normal_initializer(stddev=0.02))
        else:
            self.embedding = tf.Variable(init_embedding,
                                         dtype=tf.float32,
                                         name='embedding')

        with tf.variable_scope('embedding'):
            x = tf.nn.embedding_lookup(self.embedding, self.input_ids)
            feat_size = self.window_size
            x = tf.reshape(
                x, [self.batch_size, -1, feat_size * config.embedding_size])

        x = model_utils.dropout(x, config.embedding_dropout_prob)

        def lstm_cell(dim):
            cell = tf.nn.rnn_cell.LSTMCell(dim, name='basic_lstm_cell')
            cell = rnn.DropoutWrapper(cell,
                                      output_keep_prob=1.0 -
                                      config.hidden_dropout_prob)
            cell = tf.nn.rnn_cell.MultiRNNCell([cell] *
                                               config.num_hidden_layers)
            return cell

        with tf.variable_scope('rnn'):
            (forward_output,
             backword_output), _ = tf.nn.bidirectional_dynamic_rnn(
                 cell_fw=lstm_cell(config.hidden_size),
                 cell_bw=lstm_cell(config.hidden_size),
                 inputs=x,
                 sequence_length=self.seq_length,
                 dtype=tf.float32)
            output = tf.concat([forward_output, backword_output], axis=2)

        with tf.variable_scope('output'):
            scores = layers.fully_connected(inputs=output,
                                            num_outputs=config.num_classes,
                                            activation_fn=None)
            transition_param = tf.get_variable(
                "transitions", [config.num_classes, config.num_classes])
            self.prediction, _ = crf.crf_decode(scores, transition_param,
                                                self.seq_length)

        with tf.variable_scope('loss'):
            # crf
            self.log_likelihood, _ = crf.crf_log_likelihood(
                scores, self.label_ids, self.seq_length, transition_param)

            self.loss = tf.reduce_mean(-self.log_likelihood)
Ejemplo n.º 3
0
    def __init__(self,
                 config: BiLSTMConfig,
                 is_training,
                 input_ids,
                 label_ids,
                 seq_length,
                 init_embedding=None):
        """Constructor for BertModel.

        Args:
          config: `BertConfig` instance.
          is_training: bool. rue for training model, false for eval model. Controls
            whether dropout will be applied.
          input_ids: int64 Tensor of shape [batch_size, seq_length, feat_size].
          label_ids: (optional) int64 Tensor of shape [batch_size, seq_length].
          seq_length: (optional) int64 Tensor of shape [batch_size].
          init_embedding: (optional)

        Raises:
          ValueError: The config is invalid or one of the input tensor shapes
            is invalid.
        """
        self.input_ids = input_ids
        self.label_ids = label_ids
        self.seq_length = seq_length
        self.is_training = is_training
        input_shape = model_utils.get_shape_list(input_ids, expected_rank=3)
        batch_size = input_shape[0]
        max_length = input_shape[1]
        window_size = input_shape[2]

        if not is_training:
            config.embedding_dropout_prob = 0.0
            config.hidden_dropout_prob = 0.0

        if init_embedding is None:
            embedding = tf.get_variable(
                shape=[config.vocab_size, config.embedding_size],
                dtype=tf.float32,
                name='embedding',
                initializer=tf.truncated_normal_initializer(stddev=0.02))
        else:
            embedding = tf.Variable(init_embedding,
                                    dtype=tf.float32,
                                    name='embedding')

        with tf.variable_scope('embedding'):
            x = tf.nn.embedding_lookup(embedding, input_ids)
            feat_size = window_size
            x = tf.reshape(x,
                           [batch_size, -1, feat_size * config.embedding_size])

        x = model_utils.dropout(x, config.embedding_dropout_prob)

        with tf.variable_scope('rnn_cell'):
            if config.rnn_cell == 'lstm':
                fw_cell = tf.nn.rnn_cell.LSTMCell(config.hidden_size,
                                                  name='basic_lstm_cell')
                bw_cell = tf.nn.rnn_cell.LSTMCell(config.hidden_size,
                                                  name='basic_lstm_cell')
            else:
                fw_cell = rnn.GRUCell(config.hidden_size)
                bw_cell = rnn.GRUCell(config.hidden_size)
            fw_cell = rnn.DropoutWrapper(fw_cell,
                                         output_keep_prob=1.0 -
                                         config.hidden_dropout_prob)
            bw_cell = rnn.DropoutWrapper(bw_cell,
                                         output_keep_prob=1.0 -
                                         config.hidden_dropout_prob)
            fw_multi_cell = rnn.MultiRNNCell([fw_cell] *
                                             config.num_hidden_layers)
            bw_multi_cell = rnn.MultiRNNCell([bw_cell] *
                                             config.num_hidden_layers)

        with tf.variable_scope('rnn'):
            if config.bi_direction:
                (forward_output,
                 backword_output), _ = tf.nn.bidirectional_dynamic_rnn(
                     cell_fw=fw_multi_cell,
                     cell_bw=bw_multi_cell,
                     inputs=x,
                     sequence_length=seq_length,
                     dtype=tf.float32)
                output = tf.concat([forward_output, backword_output], axis=2)
            else:
                forward_output, _ = tf.nn.dynamic_rnn(
                    cell=fw_multi_cell,
                    inputs=x,
                    sequence_length=seq_length,
                    dtype=tf.float32)
                output = forward_output

        with tf.variable_scope('output'):
            logits = layers.fully_connected(inputs=output,
                                            num_outputs=config.num_classes,
                                            activation_fn=None)
            self.prediction = tf.argmax(logits, axis=-1)

        with tf.variable_scope('loss'):
            weight = tf.sequence_mask(seq_length, dtype=tf.float32)
            self.loss = tf.contrib.seq2seq.sequence_loss(
                logits=logits,
                targets=self.label_ids,
                weights=weight,
                average_across_timesteps=True,
                average_across_batch=True)
Ejemplo n.º 4
0
    def __init__(self,
                 config: DictHyperConfig,
                 is_training,
                 features,
                 init_embedding=None):

        super(DictHyperModel).__init__()
        input_ids = features["input_ids"]
        input_dicts = features["input_dicts"]
        seq_length = features["seq_length"]
        label_ids = features["label_ids"]

        self.input_ids = input_ids
        self.label_ids = label_ids
        self.dict = input_dicts
        self.seq_length = seq_length
        self.is_training = is_training
        input_shape = model_utils.get_shape_list(input_ids, expected_rank=3)
        self.batch_size = input_shape[0]
        self.max_length = input_shape[1]
        self.window_size = input_shape[2]

        if not is_training:
            config.embedding_dropout_prob = 0.0
            config.hidden_dropout_prob = 0.0

        if init_embedding is None:
            self.embedding = tf.get_variable(
                shape=[config.vocab_size, config.embedding_size],
                dtype=tf.float32,
                name='embedding',
                initializer=tf.truncated_normal_initializer(stddev=0.02))
        else:
            self.embedding = tf.Variable(init_embedding,
                                         dtype=tf.float32,
                                         name='embedding')

        with tf.variable_scope('embedding'):
            x = tf.nn.embedding_lookup(self.embedding, self.input_ids)
            feat_size = self.window_size
            x = tf.reshape(
                x, [self.batch_size, -1, feat_size * config.embedding_size])

        x = model_utils.dropout(x, config.embedding_dropout_prob)

        def hyperlstm_cell(dim, input_main_dim, input_hyper_dim):
            cell = HyperLSTMCell(
                num_units=dim,
                input_main_dim=input_main_dim,
                input_hyper_dim=input_hyper_dim,
                forget_bias=1.0,
                use_recurrent_dropout=False,
                dropout_keep_prob=1.0,
                use_layer_norm=False,
                hyper_num_units=config.dict_hidden_size,
                hyper_embedding_size=config.hyper_embedding_size,
                hyper_use_recurrent_dropout=False)
            cell = tf.nn.rnn_cell.DropoutWrapper(cell,
                                                 output_keep_prob=1 -
                                                 config.hidden_dropout_prob)
            return cell

        with tf.variable_scope('hyper'):
            self.dict = tf.cast(self.dict, dtype=tf.float32)
            input_main_dim = model_utils.get_shape_list(x, expected_rank=3)[2]
            input_hyper_dim = model_utils.get_shape_list(self.dict,
                                                         expected_rank=3)[2]
            x = tf.concat([x, self.dict], axis=2)
            (forward_output,
             backword_output), _ = tf.nn.bidirectional_dynamic_rnn(
                 cell_fw=hyperlstm_cell(config.hidden_size, input_main_dim,
                                        input_hyper_dim),
                 cell_bw=hyperlstm_cell(config.hidden_size, input_main_dim,
                                        input_hyper_dim),
                 inputs=x,
                 sequence_length=self.seq_length,
                 dtype=tf.float32)
            output = tf.concat([forward_output, backword_output], axis=2)

        with tf.variable_scope('output'):
            scores = layers.fully_connected(inputs=output,
                                            num_outputs=config.num_classes,
                                            activation_fn=None)
            transition_param = tf.get_variable(
                "transitions", [config.num_classes, config.num_classes])
            self.prediction, _ = crf.crf_decode(scores, transition_param,
                                                self.seq_length)

        with tf.variable_scope('loss'):
            # crf
            self.log_likelihood, _ = crf.crf_log_likelihood(
                scores, self.label_ids, self.seq_length, transition_param)
            self.loss = tf.reduce_mean(-self.log_likelihood)