Beispiel #1
0
    def testLayerBasic(self):
        num_layers = 4
        num_units = 2
        batch_size = 8
        direction = CUDNN_RNN_UNIDIRECTION
        dir_count = 1

        with vs.variable_scope("main"):
            kernel_initializer = init_ops.constant_initializer(0.)
            bias_initializer = init_ops.constant_initializer(0.)
            inputs = random_ops.random_uniform(
                [num_layers * dir_count, batch_size, num_units],
                dtype=dtypes.float32)

            lstm = cudnn_rnn.CudnnLSTM(num_layers,
                                       num_units,
                                       direction=direction,
                                       kernel_initializer=kernel_initializer,
                                       bias_initializer=bias_initializer,
                                       name="awesome_lstm")

            # Build the layer
            outputs1, _ = lstm(inputs)
            # Reuse the layer
            outputs2, _ = lstm(inputs)

            total_sum1 = math_ops.reduce_sum(outputs1)
            total_sum2 = math_ops.reduce_sum(outputs2)

        with vs.variable_scope("main", reuse=True):
            lstm = cudnn_rnn.CudnnLSTM(num_layers,
                                       num_units,
                                       direction=direction,
                                       kernel_initializer=kernel_initializer,
                                       bias_initializer=bias_initializer,
                                       name="awesome_lstm")

            # Reuse the layer
            outputs3, _ = lstm(inputs)
            total_sum3 = math_ops.reduce_sum(outputs3)

        self.assertEqual(1, len(variables.trainable_variables()))
        self.assertEqual(
            1, len(ops.get_collection(ops.GraphKeys.SAVEABLE_OBJECTS)))
        self.assertEqual("main/awesome_lstm/opaque_kernel",
                         variables.trainable_variables()[0].op.name)

        with self.test_session(use_gpu=True) as sess:
            sess.run(variables.global_variables_initializer())
            (total_sum1_v, total_sum2_v,
             total_sum3_v) = sess.run([total_sum1, total_sum2, total_sum3])
            self.assertEqual(0, total_sum1_v)
            self.assertEqual(0, total_sum2_v)
            self.assertEqual(0, total_sum3_v)
Beispiel #2
0
  def __init__(self,
               vocab_size,
               embedding_dim,
               hidden_dim,
               num_layers,
               dropout_ratio,
               use_cudnn_rnn=True):
    super(PTBModel, self).__init__()

    self.keep_ratio = 1 - dropout_ratio
    self.use_cudnn_rnn = use_cudnn_rnn
    self.embedding = self.track_layer(Embedding(vocab_size, embedding_dim))

    if self.use_cudnn_rnn:
      self.rnn = cudnn_rnn.CudnnLSTM(
          num_layers, hidden_dim, dropout=dropout_ratio)
    else:
      self.rnn = RNN(hidden_dim, num_layers, self.keep_ratio)
    self.track_layer(self.rnn)

    self.linear = self.track_layer(
        tf.layers.Dense(
            vocab_size,
            kernel_initializer=tf.random_uniform_initializer(-0.1, 0.1)))
    self._output_shape = [-1, embedding_dim]
Beispiel #3
0
  def _TestOptimizerSupportHelper(self, opt):
    num_layers = 4
    num_units = 2
    batch_size = 8
    direction = CUDNN_RNN_UNIDIRECTION
    dir_count = 1

    with ops.Graph().as_default() as g:
      kernel_initializer = init_ops.constant_initializer(0.)
      bias_initializer = init_ops.constant_initializer(0.)
      inputs = random_ops.random_uniform([
          num_layers * dir_count, batch_size, num_units], dtype=dtypes.float32)

      lstm = cudnn_rnn.CudnnLSTM(num_layers, num_units,
                                 direction=direction,
                                 kernel_initializer=kernel_initializer,
                                 bias_initializer=bias_initializer,
                                 name="awesome_lstm")
      outputs, _ = lstm(inputs)
      loss = math_ops.reduce_sum(outputs)
      optimizer = self._GetOptimizer(opt)
      train_op = optimizer.minimize(loss)

    with self.test_session(use_gpu=True, graph=g) as sess:
      sess.run(variables.global_variables_initializer())
      sess.run(train_op)
Beispiel #4
0
def cudnn_lstm_layer(layer_sizes, dropout_keep_prob, name_or_scope='rnn'):
    """Builds a CudnnLSTM Layer based on the given parameters."""
    for ls in layer_sizes:
        if ls != layer_sizes[0]:
            raise ValueError(
                'CudnnLSTM does not support layers with differing sizes. Got: %s',
                layer_sizes)
    lstm = cudnn_rnn.CudnnLSTM(num_layers=len(layer_sizes),
                               num_units=layer_sizes[0],
                               direction='unidirectional',
                               dropout=1.0 - dropout_keep_prob,
                               name=name_or_scope)

    class BackwardCompatibleCudnnLSTMSaveable(
            tf.contrib.cudnn_rnn.CudnnLSTMSaveable):
        """Overrides CudnnLSTMSaveable for backward-compatible var names."""
        def _TFCanonicalNamePrefix(self, layer, is_fwd=True):
            if self._direction == 'unidirectional':
                return 'multi_rnn_cell/cell_%d/lstm_cell' % layer
            else:
                return (
                    'cell_%d/bidirectional_rnn/%s/multi_rnn_cell/cell_0/lstm_cell'
                    % (layer, 'fw' if is_fwd else 'bw'))

    lstm._saveable_cls = BackwardCompatibleCudnnLSTMSaveable  # pylint:disable=protected-access
    return lstm
def test():
    inputs = tf.placeholder(tf.float32, shape=[None, None, 10], name='inputs')
    num = tf.placeholder(tf.float32, name='num')

    # shape0 = tf.shape(inputs)[0]
    # shape1 = tf.shape(inputs)[1]

    # mult = tf.multiply(inputs, num, name='multiply')
    # re_mult = tf.reshape(mult, shape=[shape0*shape1, 16], name='re_mult')

    lstm = cudnn_rnn.CudnnLSTM(num_layers=2,
                               num_units=16,
                               direction='bidirectional',
                               dropout=0.0,
                               name='cudnn_lstm')
    lstm.build([None, None, 10])

    outputs, states = lstm(inputs, training=True)

    with tf.Session() as sess:
        sess.run(variables.global_variables_initializer())
        raw_inputs = range(180)
        raw_inputs = np.asarray(raw_inputs, dtype="float32",
                                order=None).reshape([6, 3, 10])
        outputs, states = sess.run([outputs, states],
                                   feed_dict={inputs: raw_inputs},
                                   options=None,
                                   run_metadata=None)
        print(outputs)
Beispiel #6
0
def cudnn_lstm_layer(layer_sizes,
                     dropout_keep_prob,
                     is_training=True,
                     name_or_scope='rnn'):
    """Builds a CudnnLSTM Layer based on the given parameters."""
    dropout_keep_prob = dropout_keep_prob if is_training else 1.0
    for ls in layer_sizes:
        if ls != layer_sizes[0]:
            raise ValueError(
                'CudnnLSTM does not support layers with differing sizes. Got: %s'
                % layer_sizes)
    lstm = cudnn_rnn.CudnnLSTM(num_layers=len(layer_sizes),
                               num_units=layer_sizes[0],
                               direction='unidirectional',
                               dropout=1.0 - dropout_keep_prob,
                               name=name_or_scope)

    class BackwardCompatibleCudnnParamsFormatConverterLSTM(
            contrib_cudnn_rnn.CudnnParamsFormatConverterLSTM):
        """Overrides CudnnParamsFormatConverterLSTM for backward-compatibility."""
        def _cudnn_to_tf_biases(self, *cu_biases):
            """Overrides to subtract 1.0 from `forget_bias` (see BasicLSTMCell)."""
            (tf_bias, ) = (super(
                BackwardCompatibleCudnnParamsFormatConverterLSTM,
                self)._cudnn_to_tf_biases(*cu_biases))
            i, c, f, o = tf.split(tf_bias, 4)
            # Non-Cudnn LSTM cells add 1.0 to the forget bias variable.
            return (tf.concat([i, c, f - 1.0, o], axis=0), )

        def _tf_to_cudnn_biases(self, *tf_biases):
            """Overrides to add 1.0 to `forget_bias` (see BasicLSTMCell)."""
            (tf_bias, ) = tf_biases
            i, c, f, o = tf.split(tf_bias, 4)
            # Non-Cudnn LSTM cells add 1.0 to the forget bias variable.
            return (super(BackwardCompatibleCudnnParamsFormatConverterLSTM,
                          self)._tf_to_cudnn_biases(
                              tf.concat([i, c, f + 1.0, o], axis=0)))

    class BackwardCompatibleCudnnLSTMSaveable(
            contrib_cudnn_rnn.CudnnLSTMSaveable):
        """Overrides CudnnLSTMSaveable for backward-compatibility."""

        _format_converter_cls = BackwardCompatibleCudnnParamsFormatConverterLSTM

        def _tf_canonical_name_prefix(self, layer, is_fwd=True):
            """Overrides for backward-compatible variable names."""
            if self._direction == 'unidirectional':
                return 'multi_rnn_cell/cell_%d/lstm_cell' % layer
            else:
                return (
                    'cell_%d/bidirectional_rnn/%s/multi_rnn_cell/cell_0/lstm_cell'
                    % (layer, 'fw' if is_fwd else 'bw'))

    lstm._saveable_cls = BackwardCompatibleCudnnLSTMSaveable  # pylint:disable=protected-access
    return lstm
Beispiel #7
0
def get_cell(rnn_type, hidden_size, layer_num=1, direction=cudnn_rnn.CUDNN_RNN_UNIDIRECTION):
    if rnn_type.endswith('lstm'):
        cudnn_cell = cudnn_rnn.CudnnLSTM(num_layers=layer_num, num_units=hidden_size, direction=direction,
                                         dropout=0)
    elif rnn_type.endswith('gru'):
        cudnn_cell = cudnn_rnn.CudnnGRU(num_layers=layer_num, num_units=hidden_size, direction=direction,
                                        dropout=0)
    elif rnn_type.endswith('rnn'):
        cudnn_cell = cudnn_rnn.CudnnRNNTanh(num_layers=layer_num, num_units=hidden_size, direction=direction,
                                            dropout=0)
    else:
        raise NotImplementedError('Unsuported rnn type: {}'.format(rnn_type))
    return cudnn_cell
Beispiel #8
0
    def __init__(self,
                 hidden_size,
                 keep_prob,
                 num_layers,
                 use_cudnn_lstm=False,
                 batch_size=None,
                 cudnn_dropout=None):
        """
        Inputs:
          hidden_size: int. Hidden size of the RNN
          keep_prob: Tensor containing a single scalar that is the keep probability (for dropout)
        """
        self.use_cudnn_lstm = use_cudnn_lstm
        self.hidden_size = hidden_size
        self.keep_prob = keep_prob
        self.num_layers = num_layers
        self.cudnn_dropout = cudnn_dropout
        if self.use_cudnn_lstm:
            print('Using cudnn lstm')
            self.direction = 'bidirectional'

            self.cudnn_cell = cudnn_rnn.CudnnLSTM(self.num_layers,
                                                  self.hidden_size,
                                                  direction=self.direction,
                                                  dropout=cudnn_dropout)
        else:
            self.rnn_cell_fw = [
                tf.contrib.rnn.LSTMCell(self.hidden_size,
                                        name='lstmf' + str(i))
                for i in range(num_layers)
            ]
            self.rnn_cell_fw = [
                DropoutWrapper(self.rnn_cell_fw[i],
                               input_keep_prob=self.keep_prob)
                for i in range(num_layers)
            ]
            self.rnn_cell_bw = [
                tf.contrib.rnn.LSTMCell(self.hidden_size,
                                        name='lstmb' + str(i))
                for i in range(num_layers)
            ]
            self.rnn_cell_bw = [
                DropoutWrapper(self.rnn_cell_bw[i],
                               input_keep_prob=self.keep_prob)
                for i in range(num_layers)
            ]
Beispiel #9
0
  def testSaveableGraphDeviceAssignment(self):
    num_layers = 4
    num_units = 2
    batch_size = 8
    direction = CUDNN_RNN_UNIDIRECTION
    dir_count = 1

    def DeviceFn(op):
      if op.type in ("Variable", "VariableV2"):
        return "/cpu:0"
      else:
        return "/gpu:0"

    with ops.Graph().as_default() as g:
      with ops.device(DeviceFn):
        with vs.variable_scope("main"):
          kernel_initializer = init_ops.constant_initializer(3.14)
          bias_initializer = init_ops.constant_initializer(1.59)
          inputs = random_ops.random_uniform(
              [num_layers * dir_count, batch_size, num_units],
              dtype=dtypes.float32)

          lstm = cudnn_rnn.CudnnLSTM(num_layers, num_units,
                                     direction=direction,
                                     kernel_initializer=kernel_initializer,
                                     bias_initializer=bias_initializer,
                                     name="awesome_lstm")
          outputs = lstm(inputs)

        # saver is created in the scope of DeviceFn.
        saver = saver_lib.Saver()

    with self.test_session(use_gpu=True, graph=g) as sess:
      save_path = os.path.join(self.get_temp_dir(),
                               "test-saveable-device-assignment")
      sess.run(variables.global_variables_initializer())

      saver.save(sess, save_path)
      saver.restore(sess, save_path)
      sess.run(outputs)
Beispiel #10
0
  def __init__(self,
               vocab_size,
               embedding_dim,
               hidden_dim,
               num_layers,
               dropout_ratio,
               use_cudnn_rnn=True,
               forget_bias=0.2):
    super(LSTMModel, self).__init__()

    self.keep_ratio = 1 - dropout_ratio
    self.use_cudnn_rnn = use_cudnn_rnn
    self.embedding = Embedding(vocab_size, embedding_dim)

    if self.use_cudnn_rnn:
      self.rnn = cudnn_rnn.CudnnLSTM(
          num_layers, hidden_dim, dropout=dropout_ratio)
    else:
      self.rnn = RNN(hidden_dim, num_layers, self.keep_ratio,forget_bias)

    self.linear = layers.Dense(
        vocab_size, kernel_initializer=tf.random_uniform_initializer(-0.1, 0.1))#tf.keras.initializers.he_normal()) #tf.random_uniform_initializer(-0.1, 0.1))
    self._output_shape = [-1, embedding_dim]
    def build_graph(self):
        # Start building inputs
        # inputs: [time_len, batch_size, input_size]
        self.graph = tf.Graph()

        with self.graph.as_default():
            with tf.name_scope("Inputs"):
                self.inputs = tf.placeholder(
                    tf.float32,
                    shape=[None, None, self.num_feature],
                    name='inputs')

                self.labels = tf.SparseTensor(
                    tf.placeholder(tf.int64, name='indices'),
                    tf.placeholder(tf.int32, name='values'),
                    tf.placeholder(tf.int64, name='shape'))

                self.seq_lens = tf.placeholder(tf.int32,
                                               shape=[None],
                                               name='seq_lens')

                self.learning_rate = tf.placeholder(tf.float32,
                                                    name='learning_rate')

                # use __init__ variables instead
                # self.keep_prob = tf.placeholder(tf.float32, name='keep_prob')

            # Convolution Preprocessing
            # if self.using_conv:
            #     with tf.name_scope("Convolution"):
            #         self.conv = tf.nn.conv2d()

            # Start building RNN
            with tf.name_scope("RNN"):
                # TODO: add other LSTM categories
                # Only use cudnnLSTM for now
                if self.use_cudnn:
                    self.lstm = cudnn_rnn.CudnnLSTM(num_layers=self.num_layers,
                                                    num_units=self.num_units,
                                                    direction='bidirectional',
                                                    dropout=1.0 -
                                                    self.keep_prob,
                                                    name='cudnn_lstm')

                    # build first(optional)
                    self.lstm.build([None, None, self.num_feature])
                    self.outputs, self.states = self.lstm(
                        self.inputs, training=self.is_training)
                # else:
                #     # CudnnCompatibleLSTMCell
                #     self.lstm = cud

                # input: [time_len, batch_size, input_size]
                # outputs: [time_len, batch_size, num_dirs * num_units]
                # states: a tuple of tensor(s) [num_layers * num_dirs, batch_size, num_units]

                self.encoder_outputs = self.outputs

            # Start building fully connected layers, with bottlenneck and FC
            with tf.name_scope("Fully_Connected"):
                batch_size = tf.shape(self.inputs)[1]
                max_time = tf.shape(self.inputs)[0]
                output_dim = self.encoder_outputs.shape.as_list()[-1]

                outputs_2d = tf.reshape(
                    self.encoder_outputs,
                    shape=[batch_size * max_time, output_dim])

                # if self.bottleneck_dim is not None and self.bottleneck_dim != 0:
                #     with tf.variable_scope('bottleneck') as scope:
                #         outputs_2d = tf.contrib.layers.fully_connected(
                #             outputs_2d,
                #             num_outputs=self.bottleneck_dim,
                #             activation_fn=tf.nn.relu)

                #     # Dropout for the hidden-output connections
                #     outputs_2d = tf.nn.dropout(
                #         outputs_2d, keep_prob, name='dropout_bottleneck')

                with tf.variable_scope('output') as scope:
                    logits_2d = tf.contrib.layers.fully_connected(
                        outputs_2d,
                        num_outputs=self.num_classes,
                        activation_fn=None)

                    if self.time_major:
                        # Reshape back to the original shape
                        logits = tf.reshape(
                            logits_2d,
                            shape=[max_time, batch_size, self.num_classes])
                    else:
                        # Reshape back to the original shape
                        logits = tf.reshape(
                            logits_2d,
                            shape=[batch_size, max_time, self.num_classes])

                        # Convert to time-major: `[T, B, num_classes]'
                        logits = tf.transpose(logits, [1, 0, 2])

                self.logits = logits
                #self.logits =tf.Print(self.logits, [tf.shape(self.logits)])

            # Start building ctc loss
            # TODO: Could add weight decay policy here
            with tf.name_scope("CTC_Loss"):
                # TODO: dig into all variables
                # labels: int32 SparseTensor.
                #         labels.indices[i, :] == [b, t] means labels.values[i] stores the id for (batch b, time t).
                #         labels.values[i] must take on values in [0, num_labels)
                # logits: 3-D float Tensor [max_time, batch_size, num_classes]
                # inputs_seq_len: 1-D int32 vector, [batch_size]

                # return 1-D float tensor: [batch], neg-log prob
                ctc_losses = tf.nn.ctc_loss(
                    self.labels,
                    self.logits,
                    #tf.cast(inputs_seq_len, tf.int32),
                    self.seq_lens,
                    preprocess_collapse_repeated=False,
                    ctc_merge_repeated=True,
                    ignore_longer_outputs_than_inputs=True,
                    time_major=True)
                self.ctc_loss = tf.reduce_mean(ctc_losses,
                                               name='ctc_loss_mean')

            # TODO: add more optimizers
            with tf.name_scope("Optimizer"):
                self.optimizer = tf.train.RMSPropOptimizer(
                    learning_rate=self.learning_rate,
                    decay=0.9,
                    momentum=0.0,
                    epsilon=1e-10,
                    use_locking=False,
                    centered=False)

            if self.is_training:
                self.train_op = self.optimizer.minimize(self.ctc_loss)
Beispiel #12
0
                                          dtype=tf.float32,
                                          initial_state=initial_state,
                                          time_major=False)

    outputs, states = tf.nn.dynamic_rnn(lstm_cell,
                                        X_timemajor,
                                        dtype=tf.float32,
                                        initial_state=initial_state,
                                        time_major=True)

    cudnn_cell = cudnn_rnn.CudnnLSTM(
        num_layers=1,
        num_units=num_input,
        direction=cudnn_rnn.CUDNN_RNN_UNIDIRECTION,
        input_mode=cudnn_rnn.CUDNN_INPUT_LINEAR_MODE,
        name="CudnnLSTM",
        dropout=0.0,
        seed=0.0,
        kernel_initializer=tf.initializers.ones(),
        bias_initializer=tf.initializers.zeros(),
        dtype=tf.float32)

    cudnn_outputs, cudnn_states = cudnn_cell(
        inputs=X_timemajor,  # 3-D tensor [time_len, batch_size, input_size]
        training=True)

print('X_batchmajor', X_batchmajor.shape)
#NHWC
print('...batch:', X_batchmajor.shape[0])
print('...in_width:', X_batchmajor.shape[1])
print('...in_channels:', X_batchmajor.shape[2])
Beispiel #13
0
def cudnn_stack_bidirectional_dynamic_rnn(
        inputs,
        layer_sizes,
        sequence_length,
        initial_state=None,
        dropout_keep_prob=1.0,
        cell_wrapper=None,
        variational_recurrent=True,
        base_cell=tf.contrib.cudnn_rnn.CudnnCompatibleLSTMCell,
        is_training=False):
    num_layers = len(layer_sizes)
    num_units = layer_sizes[0]
    num_dirs = 2  # bidirectional
    batch_size = tf.shape(inputs)[0]

    if not is_training:
        # for cpu restoring Cudnn-trained checkpoints
        single_cell = lambda: base_cell(num_units)
        cells_fw = [single_cell() for _ in range(num_layers)]
        cells_bw = [single_cell() for _ in range(num_layers)]

        if initial_state is not None:
            c, h = tf.split(initial_state, [num_units, num_units], -1)
            state_tuple = rnn_cell.LSTMStateTuple(c, h)
            initial_states_fw = initial_states_bw = [state_tuple] * num_layers
        else:
            initial_states_fw = initial_states_bw = None

        (outputs, output_state_fw,
         output_state_bw) = tf.contrib.rnn.stack_bidirectional_dynamic_rnn(
             cells_fw,
             cells_bw,
             inputs,
             dtype=tf.float32,
             initial_states_fw=initial_states_fw,
             initial_states_bw=initial_states_bw,
             time_major=False,
             scope='cudnn_lstm/stack_bidirectional_rnn')
        last_c_state = tf.concat(
            [output_state_fw[-1].c, output_state_bw[-1].c], 1)
        last_h_state = tf.concat(
            [output_state_fw[-1].h, output_state_bw[-1].h], 1)
        return outputs, last_h_state

    dropout_prob = 0.
    if dropout_keep_prob is not None:
        dropout_prob = 1. - dropout_keep_prob

    if initial_state is not None:
        initial_state = tf.expand_dims(initial_state, 0)
        c, h = tf.split(initial_state, [num_units, num_units], -1)
        h = tf.concat([h for _ in range(num_layers * num_dirs)], 0)
        c = tf.concat([c for _ in range(num_layers * num_dirs)], 0)
        initial_state = (h, c)
    else:
        initial_state = None

    lstm = cudnn_rnn.CudnnLSTM(num_layers=num_layers,
                               num_units=num_units,
                               direction=cudnn_rnn_ops.CUDNN_RNN_BIDIRECTION,
                               dropout=dropout_prob)

    inputs = tf.transpose(inputs, [1, 0, 2])
    outputs, (output_h, output_c) = lstm(inputs,
                                         initial_state=initial_state,
                                         training=is_training)
    outputs = tf.transpose(outputs, [1, 0, 2])
    last_c_state = tf.concat([output_c[-2], output_c[-1]], 1)
    last_h_state = tf.concat([output_h[-2], output_h[-1]], 1)
    return outputs, last_h_state
Beispiel #14
0
    def create_model(self, share_dense=True, concat_sub=True):
        self.input_y = tf.placeholder(dtype=tf.float32, shape=[None,n_sub,4], name='input_y')
        self.input_y2 = tf.placeholder(dtype=tf.float32, shape=[None,n_sub,4], name='input_y2')

        self.dropout_keep_prob = tf.placeholder(dtype=tf.float32, name='dropout_keep_prob')
        self.output_keep_prob = tf.placeholder(dtype=tf.float32, name='output_keep_prob')

        if self.main_feature.lower() in ['word', 'char']:
            self.input_x = tf.placeholder(dtype=tf.int32, shape=[None,self.max_len], name='input_x')
            self.word_embedding = tf.get_variable(initializer=self.embedding, name='word_embedding')
            self.word_encoding = tf.nn.embedding_lookup(self.embedding, self.input_x)
            self.word_encoding = tf.nn.dropout(self.word_encoding, self.dropout_keep_prob) # new

        elif self.main_feature.lower() in ['elmo_word', 'elmo_char', 'elmo_qiuqiu']:
            self.input_x = tf.placeholder(dtype=tf.int32, shape=[None,self.max_len+2], name='input_x')
            if self.main_feature == 'elmo_word':
                options_file = self.config.elmo_word_options_file
                weight_file = self.config.elmo_word_weight_file
                embed_file = self.config.elmo_word_embed_file
            elif self.main_feature == 'elmo_char':
                options_file = self.config.elmo_char_options_file
                weight_file = self.config.elmo_char_weight_file
                embed_file = self.config.elmo_char_embed_file
            elif self.main_feature == 'elmo_qiuqiu':
                options_file = self.config.elmo_qiuqiu_options_file
                weight_file = self.config.elmo_qiuqiu_weight_file
                embed_file = self.config.elmo_qiuqiu_embed_file
            self.bilm = BidirectionalLanguageModel(options_file,
                                                    weight_file,
                                                    use_character_inputs=False,
                                                    embedding_weight_file=embed_file,
                                                    max_batch_size=self.batch_size)
            bilm_embedding_op = self.bilm(self.input_x)
            bilm_embedding = weight_layers('output', bilm_embedding_op,l2_coef=0.0)
            self.word_encoding = bilm_embedding['weighted_op']
            self.word_encoding = tf.nn.dropout(self.word_encoding, self.dropout_keep_prob) # new

        else:
            exit('wrong feature')

        c_outputs = []
        for c in range(n_sub):
            with tf.variable_scope('lstm-{}'.format(c)):
                # self.forward = self.LSTM()
                # self.backward = self.LSTM()
                # x, _ = tf.nn.bidirectional_dynamic_rnn(self.forward,self.backward, self.word_encoding, dtype=tf.float32)
                # x = tf.concat(x, -1)
                #### cudnn lstm ####
                self.forward_lstm = cudnn_rnn.CudnnLSTM(num_layers=1, num_units=self.hidden_dim, direction=cudnn_rnn.CUDNN_RNN_BIDIRECTION, dtype=tf.float32)
                self.forward_gru = cudnn_rnn.CudnnGRU(num_layers=1, num_units=self.hidden_dim, direction=cudnn_rnn.CUDNN_RNN_BIDIRECTION, dtype=tf.float32)
                x, _ = self.forward_lstm(tf.transpose(self.word_encoding, [1, 0, 2]))
                x, _ = self.forward_gru(x)
                x = tf.transpose(x, [1, 0, 2])

            with tf.variable_scope('pooling-{}'.format(c)):
                max_pooled = tf.reshape(tf.reduce_max(x, 1), [-1, 2*self.hidden_dim])
                avg_pooled = tf.reshape(tf.reduce_mean(x, 1), [-1, 2*self.hidden_dim])

                att_w = tf.get_variable(shape=[2*self.hidden_dim,self.hidden_dim], name='att_w')
                att_b = tf.get_variable(shape=[self.hidden_dim],name='att_b')
                att_v = tf.get_variable(shape=[self.hidden_dim,1],name='att_v')

                x_reshape = tf.reshape(x, [-1, 2*self.hidden_dim])
                score = tf.reshape(tf.matmul(tf.nn.tanh(tf.matmul(x_reshape, att_w)) + att_b, att_v), [-1, 1, self.max_len])
                alpha = tf.nn.softmax(score, axis=-1)
                att_pooled = tf.reshape(tf.matmul(alpha, x), [-1, 2*self.hidden_dim])

                concat_pooled = tf.concat((max_pooled, att_pooled, avg_pooled), -1)

                concat_pooled = tf.nn.dropout(concat_pooled, self.dropout_keep_prob)
                dense = tf.layers.dense(concat_pooled, 4, activation=None)
                c_outputs.append(dense)

        self.logits = tf.reshape(tf.concat(c_outputs, axis=1), [-1, 10, 4])
        y_ = tf.nn.softmax(self.logits)
        self.prob = tf.reshape(y_, [-1, n_sub, 4])
        self.prediction = tf.argmax(self.prob, 2, name="prediction")

        if not self.config.balance:
            self.loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=self.logits, labels=tf.reshape(self.input_y, [-1,4])))
            # self.loss += tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=self.logits, labels=tf.reshape(self.input_y2, [-1,4])))
        else:
            #  class0_weight = 0.882 * self.n_classes  # 第0类的权重系数
            #  class1_weight = 0.019 * self.n_classes  # 第1类的权重系数
            #  class2_weight = 0.080 * self.n_classes  # 第2类的权重系数
            #  class3_weight = 0.019 * self.n_classes  # 第3类的权重系数
            class0_weight = 1  # 第0类的权重系数
            class1_weight = 3  # 第1类的权重系数
            class2_weight = 3  # 第2类的权重系数
            class3_weight = 3  # 第3类的权重系数
            #  coe = tf.constant([1., 1., 1., 1.])
            #  y = tf.reshape(self.input_y, [-1, 4]) * coe
            #  self.loss = -tf.reduce_mean(y * tf.log(y_))

            y = tf.reshape(self.input_y, [-1, 4])
            self.loss = tf.reduce_mean(-class0_weight * (y[:, 0]*tf.log(y_[:, 0]))
                                        -class1_weight * (y[:, 1]*tf.log(y_[:, 1]))
                                        -class2_weight * (y[:, 2]*tf.log(y_[:, 2]))
                                        -class3_weight * (y[:, 3]*tf.log(y_[:, 3])))
            #  tf.reduce_mean(-class1_weight*tf.reduce_sum(y_[:,0] * tf.log(y[:,0])-class2_weight*tf.reduce_sum(y_[:,1] * tf.log(y[:,1])-class3_weight*tf.reduce_sum(y_[:,2] * tf.log(y[:,2]))

        return self
    def create_model(self, share_dense=True, concat_sub=True):
        self.input_y = tf.placeholder(dtype=tf.float32,
                                      shape=[None, n_sub, 4],
                                      name='input_y')
        self.input_y2 = tf.placeholder(dtype=tf.float32,
                                       shape=[None, n_sub, 4],
                                       name='input_y2')
        self.dropout_keep_prob = tf.placeholder(dtype=tf.float32,
                                                name='dropout_keep_prob')
        self.output_keep_prob = tf.placeholder(dtype=tf.float32,
                                               name='output_keep_prob')

        if self.main_feature.lower() in ['word', 'char']:
            self.input_x = tf.placeholder(dtype=tf.int32,
                                          shape=[None, self.max_len],
                                          name='input_x')
            self.word_embedding = tf.get_variable(initializer=self.embedding,
                                                  name='word_embedding')
            self.word_encoding = tf.nn.embedding_lookup(
                self.embedding, self.input_x)
            self.word_encoding = tf.nn.dropout(self.word_encoding,
                                               self.dropout_keep_prob)  # new

        elif self.main_feature.lower() in [
                'elmo_word', 'elmo_char', 'elmo_qiuqiu'
        ]:
            self.input_x = tf.placeholder(dtype=tf.int32,
                                          shape=[None, self.max_len + 2],
                                          name='input_x')
            if self.main_feature == 'elmo_word':
                options_file = self.config.elmo_word_options_file
                weight_file = self.config.elmo_word_weight_file
                embed_file = self.config.elmo_word_embed_file
            elif self.main_feature == 'elmo_char':
                options_file = self.config.elmo_char_options_file
                weight_file = self.config.elmo_char_weight_file
                embed_file = self.config.elmo_char_embed_file
            elif self.main_feature == 'elmo_qiuqiu':
                options_file = self.config.elmo_qiuqiu_options_file
                weight_file = self.config.elmo_qiuqiu_weight_file
                embed_file = self.config.elmo_qiuqiu_embed_file
            self.bilm = BidirectionalLanguageModel(
                options_file,
                weight_file,
                use_character_inputs=False,
                embedding_weight_file=embed_file,
                max_batch_size=self.batch_size)
            bilm_embedding_op = self.bilm(self.input_x)
            bilm_embedding = weight_layers('output',
                                           bilm_embedding_op,
                                           l2_coef=0.0)
            self.word_encoding = bilm_embedding['weighted_op']
            self.word_encoding = tf.nn.dropout(self.word_encoding,
                                               self.dropout_keep_prob)  # new

        else:
            exit('wrong feature')

        c_outputs = []
        for c in range(n_sub):
            with tf.variable_scope('lstm-{}'.format(c)):
                # self.forward = self.LSTM()
                # self.backward = self.LSTM()
                # x, _ = tf.nn.bidirectional_dynamic_rnn(self.forward,self.backward, self.word_encoding, dtype=tf.float32)
                # x = tf.concat(x, -1)
                #### cudnn lstm ####
                self.forward = cudnn_rnn.CudnnLSTM(
                    num_layers=1,
                    num_units=self.hidden_dim,
                    direction=cudnn_rnn.CUDNN_RNN_BIDIRECTION,
                    dtype=tf.float32)
                x, _ = self.forward(tf.transpose(self.word_encoding,
                                                 [1, 0, 2]))
                x = tf.transpose(x, [1, 0, 2])

            with tf.variable_scope('conv-{}'.format(c)):
                inputs_expanded = tf.expand_dims(x, -1)
                filter_shape = [3, 2 * self.hidden_dim, 1, n_filters]
                W = tf.get_variable(initializer=tf.truncated_normal(
                    filter_shape, stddev=0.1),
                                    name='W')
                b = tf.get_variable('b',
                                    initializer=tf.constant(0.1,
                                                            shape=[n_filters]))
                conv = tf.nn.conv2d(inputs_expanded,
                                    W,
                                    strides=[1] * 4,
                                    padding='VALID',
                                    name='conv2d')
                h = tf.nn.relu(tf.nn.bias_add(conv, b), name='relu')
                max_pooled = tf.nn.max_pool(
                    h,
                    ksize=[1, self.max_len - 3 + 1, 1, 1],
                    strides=[1, 1, 1, 1],
                    padding='VALID',
                    name='max_pool')
                avg_pooled = tf.nn.avg_pool(
                    h,
                    ksize=[1, self.max_len - 3 + 1, 1, 1],
                    strides=[1, 1, 1, 1],
                    padding='VALID',
                    name='avg_pool')
                concat_pooled = tf.reshape(
                    tf.concat((max_pooled, avg_pooled), -1),
                    [-1, 2 * n_filters])

                concat_pooled = tf.nn.dropout(concat_pooled,
                                              self.dropout_keep_prob)
                dense = tf.layers.dense(concat_pooled, 4, activation=None)
                c_outputs.append(dense)

        self.logits = tf.reshape(tf.concat(c_outputs, axis=1), [-1, 10, 4])
        y_ = tf.nn.softmax(self.logits)
        self.prob = tf.reshape(y_, [-1, n_sub, 4])
        self.prediction = tf.argmax(self.prob, 2, name="prediction")

        if not self.config.balance:
            self.loss = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits(logits=self.logits,
                                                        labels=tf.reshape(
                                                            self.input_y,
                                                            [-1, 4])))
            self.loss += tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits(logits=self.logits,
                                                        labels=tf.reshape(
                                                            self.input_y2,
                                                            [-1, 4])))
        else:
            #  class0_weight = 0.882 * self.n_classes  # 第0类的权重系数
            #  class1_weight = 0.019 * self.n_classes  # 第1类的权重系数
            #  class2_weight = 0.080 * self.n_classes  # 第2类的权重系数
            #  class3_weight = 0.019 * self.n_classes  # 第3类的权重系数
            class0_weight = 1  # 第0类的权重系数
            class1_weight = 3  # 第1类的权重系数
            class2_weight = 3  # 第2类的权重系数
            class3_weight = 3  # 第3类的权重系数
            #  coe = tf.constant([1., 1., 1., 1.])
            #  y = tf.reshape(self.input_y, [-1, 4]) * coe
            #  self.loss = -tf.reduce_mean(y * tf.log(y_))

            y = tf.reshape(self.input_y, [-1, 4])
            self.loss = tf.reduce_mean(-class0_weight *
                                       (y[:, 0] * tf.log(y_[:, 0])) -
                                       class1_weight *
                                       (y[:, 1] * tf.log(y_[:, 1])) -
                                       class2_weight *
                                       (y[:, 2] * tf.log(y_[:, 2])) -
                                       class3_weight *
                                       (y[:, 3] * tf.log(y_[:, 3])))
            #  tf.reduce_mean(-class1_weight*tf.reduce_sum(y_[:,0] * tf.log(y[:,0])-class2_weight*tf.reduce_sum(y_[:,1] * tf.log(y[:,1])-class3_weight*tf.reduce_sum(y_[:,2] * tf.log(y[:,2]))

        return self