def testLayerBasic(self): num_layers = 4 num_units = 2 batch_size = 8 direction = CUDNN_RNN_UNIDIRECTION dir_count = 1 with vs.variable_scope("main"): kernel_initializer = init_ops.constant_initializer(0.) bias_initializer = init_ops.constant_initializer(0.) inputs = random_ops.random_uniform( [num_layers * dir_count, batch_size, num_units], dtype=dtypes.float32) lstm = cudnn_rnn.CudnnLSTM(num_layers, num_units, direction=direction, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, name="awesome_lstm") # Build the layer outputs1, _ = lstm(inputs) # Reuse the layer outputs2, _ = lstm(inputs) total_sum1 = math_ops.reduce_sum(outputs1) total_sum2 = math_ops.reduce_sum(outputs2) with vs.variable_scope("main", reuse=True): lstm = cudnn_rnn.CudnnLSTM(num_layers, num_units, direction=direction, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, name="awesome_lstm") # Reuse the layer outputs3, _ = lstm(inputs) total_sum3 = math_ops.reduce_sum(outputs3) self.assertEqual(1, len(variables.trainable_variables())) self.assertEqual( 1, len(ops.get_collection(ops.GraphKeys.SAVEABLE_OBJECTS))) self.assertEqual("main/awesome_lstm/opaque_kernel", variables.trainable_variables()[0].op.name) with self.test_session(use_gpu=True) as sess: sess.run(variables.global_variables_initializer()) (total_sum1_v, total_sum2_v, total_sum3_v) = sess.run([total_sum1, total_sum2, total_sum3]) self.assertEqual(0, total_sum1_v) self.assertEqual(0, total_sum2_v) self.assertEqual(0, total_sum3_v)
def __init__(self, vocab_size, embedding_dim, hidden_dim, num_layers, dropout_ratio, use_cudnn_rnn=True): super(PTBModel, self).__init__() self.keep_ratio = 1 - dropout_ratio self.use_cudnn_rnn = use_cudnn_rnn self.embedding = self.track_layer(Embedding(vocab_size, embedding_dim)) if self.use_cudnn_rnn: self.rnn = cudnn_rnn.CudnnLSTM( num_layers, hidden_dim, dropout=dropout_ratio) else: self.rnn = RNN(hidden_dim, num_layers, self.keep_ratio) self.track_layer(self.rnn) self.linear = self.track_layer( tf.layers.Dense( vocab_size, kernel_initializer=tf.random_uniform_initializer(-0.1, 0.1))) self._output_shape = [-1, embedding_dim]
def _TestOptimizerSupportHelper(self, opt): num_layers = 4 num_units = 2 batch_size = 8 direction = CUDNN_RNN_UNIDIRECTION dir_count = 1 with ops.Graph().as_default() as g: kernel_initializer = init_ops.constant_initializer(0.) bias_initializer = init_ops.constant_initializer(0.) inputs = random_ops.random_uniform([ num_layers * dir_count, batch_size, num_units], dtype=dtypes.float32) lstm = cudnn_rnn.CudnnLSTM(num_layers, num_units, direction=direction, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, name="awesome_lstm") outputs, _ = lstm(inputs) loss = math_ops.reduce_sum(outputs) optimizer = self._GetOptimizer(opt) train_op = optimizer.minimize(loss) with self.test_session(use_gpu=True, graph=g) as sess: sess.run(variables.global_variables_initializer()) sess.run(train_op)
def cudnn_lstm_layer(layer_sizes, dropout_keep_prob, name_or_scope='rnn'): """Builds a CudnnLSTM Layer based on the given parameters.""" for ls in layer_sizes: if ls != layer_sizes[0]: raise ValueError( 'CudnnLSTM does not support layers with differing sizes. Got: %s', layer_sizes) lstm = cudnn_rnn.CudnnLSTM(num_layers=len(layer_sizes), num_units=layer_sizes[0], direction='unidirectional', dropout=1.0 - dropout_keep_prob, name=name_or_scope) class BackwardCompatibleCudnnLSTMSaveable( tf.contrib.cudnn_rnn.CudnnLSTMSaveable): """Overrides CudnnLSTMSaveable for backward-compatible var names.""" def _TFCanonicalNamePrefix(self, layer, is_fwd=True): if self._direction == 'unidirectional': return 'multi_rnn_cell/cell_%d/lstm_cell' % layer else: return ( 'cell_%d/bidirectional_rnn/%s/multi_rnn_cell/cell_0/lstm_cell' % (layer, 'fw' if is_fwd else 'bw')) lstm._saveable_cls = BackwardCompatibleCudnnLSTMSaveable # pylint:disable=protected-access return lstm
def test(): inputs = tf.placeholder(tf.float32, shape=[None, None, 10], name='inputs') num = tf.placeholder(tf.float32, name='num') # shape0 = tf.shape(inputs)[0] # shape1 = tf.shape(inputs)[1] # mult = tf.multiply(inputs, num, name='multiply') # re_mult = tf.reshape(mult, shape=[shape0*shape1, 16], name='re_mult') lstm = cudnn_rnn.CudnnLSTM(num_layers=2, num_units=16, direction='bidirectional', dropout=0.0, name='cudnn_lstm') lstm.build([None, None, 10]) outputs, states = lstm(inputs, training=True) with tf.Session() as sess: sess.run(variables.global_variables_initializer()) raw_inputs = range(180) raw_inputs = np.asarray(raw_inputs, dtype="float32", order=None).reshape([6, 3, 10]) outputs, states = sess.run([outputs, states], feed_dict={inputs: raw_inputs}, options=None, run_metadata=None) print(outputs)
def cudnn_lstm_layer(layer_sizes, dropout_keep_prob, is_training=True, name_or_scope='rnn'): """Builds a CudnnLSTM Layer based on the given parameters.""" dropout_keep_prob = dropout_keep_prob if is_training else 1.0 for ls in layer_sizes: if ls != layer_sizes[0]: raise ValueError( 'CudnnLSTM does not support layers with differing sizes. Got: %s' % layer_sizes) lstm = cudnn_rnn.CudnnLSTM(num_layers=len(layer_sizes), num_units=layer_sizes[0], direction='unidirectional', dropout=1.0 - dropout_keep_prob, name=name_or_scope) class BackwardCompatibleCudnnParamsFormatConverterLSTM( contrib_cudnn_rnn.CudnnParamsFormatConverterLSTM): """Overrides CudnnParamsFormatConverterLSTM for backward-compatibility.""" def _cudnn_to_tf_biases(self, *cu_biases): """Overrides to subtract 1.0 from `forget_bias` (see BasicLSTMCell).""" (tf_bias, ) = (super( BackwardCompatibleCudnnParamsFormatConverterLSTM, self)._cudnn_to_tf_biases(*cu_biases)) i, c, f, o = tf.split(tf_bias, 4) # Non-Cudnn LSTM cells add 1.0 to the forget bias variable. return (tf.concat([i, c, f - 1.0, o], axis=0), ) def _tf_to_cudnn_biases(self, *tf_biases): """Overrides to add 1.0 to `forget_bias` (see BasicLSTMCell).""" (tf_bias, ) = tf_biases i, c, f, o = tf.split(tf_bias, 4) # Non-Cudnn LSTM cells add 1.0 to the forget bias variable. return (super(BackwardCompatibleCudnnParamsFormatConverterLSTM, self)._tf_to_cudnn_biases( tf.concat([i, c, f + 1.0, o], axis=0))) class BackwardCompatibleCudnnLSTMSaveable( contrib_cudnn_rnn.CudnnLSTMSaveable): """Overrides CudnnLSTMSaveable for backward-compatibility.""" _format_converter_cls = BackwardCompatibleCudnnParamsFormatConverterLSTM def _tf_canonical_name_prefix(self, layer, is_fwd=True): """Overrides for backward-compatible variable names.""" if self._direction == 'unidirectional': return 'multi_rnn_cell/cell_%d/lstm_cell' % layer else: return ( 'cell_%d/bidirectional_rnn/%s/multi_rnn_cell/cell_0/lstm_cell' % (layer, 'fw' if is_fwd else 'bw')) lstm._saveable_cls = BackwardCompatibleCudnnLSTMSaveable # pylint:disable=protected-access return lstm
def get_cell(rnn_type, hidden_size, layer_num=1, direction=cudnn_rnn.CUDNN_RNN_UNIDIRECTION): if rnn_type.endswith('lstm'): cudnn_cell = cudnn_rnn.CudnnLSTM(num_layers=layer_num, num_units=hidden_size, direction=direction, dropout=0) elif rnn_type.endswith('gru'): cudnn_cell = cudnn_rnn.CudnnGRU(num_layers=layer_num, num_units=hidden_size, direction=direction, dropout=0) elif rnn_type.endswith('rnn'): cudnn_cell = cudnn_rnn.CudnnRNNTanh(num_layers=layer_num, num_units=hidden_size, direction=direction, dropout=0) else: raise NotImplementedError('Unsuported rnn type: {}'.format(rnn_type)) return cudnn_cell
def __init__(self, hidden_size, keep_prob, num_layers, use_cudnn_lstm=False, batch_size=None, cudnn_dropout=None): """ Inputs: hidden_size: int. Hidden size of the RNN keep_prob: Tensor containing a single scalar that is the keep probability (for dropout) """ self.use_cudnn_lstm = use_cudnn_lstm self.hidden_size = hidden_size self.keep_prob = keep_prob self.num_layers = num_layers self.cudnn_dropout = cudnn_dropout if self.use_cudnn_lstm: print('Using cudnn lstm') self.direction = 'bidirectional' self.cudnn_cell = cudnn_rnn.CudnnLSTM(self.num_layers, self.hidden_size, direction=self.direction, dropout=cudnn_dropout) else: self.rnn_cell_fw = [ tf.contrib.rnn.LSTMCell(self.hidden_size, name='lstmf' + str(i)) for i in range(num_layers) ] self.rnn_cell_fw = [ DropoutWrapper(self.rnn_cell_fw[i], input_keep_prob=self.keep_prob) for i in range(num_layers) ] self.rnn_cell_bw = [ tf.contrib.rnn.LSTMCell(self.hidden_size, name='lstmb' + str(i)) for i in range(num_layers) ] self.rnn_cell_bw = [ DropoutWrapper(self.rnn_cell_bw[i], input_keep_prob=self.keep_prob) for i in range(num_layers) ]
def testSaveableGraphDeviceAssignment(self): num_layers = 4 num_units = 2 batch_size = 8 direction = CUDNN_RNN_UNIDIRECTION dir_count = 1 def DeviceFn(op): if op.type in ("Variable", "VariableV2"): return "/cpu:0" else: return "/gpu:0" with ops.Graph().as_default() as g: with ops.device(DeviceFn): with vs.variable_scope("main"): kernel_initializer = init_ops.constant_initializer(3.14) bias_initializer = init_ops.constant_initializer(1.59) inputs = random_ops.random_uniform( [num_layers * dir_count, batch_size, num_units], dtype=dtypes.float32) lstm = cudnn_rnn.CudnnLSTM(num_layers, num_units, direction=direction, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, name="awesome_lstm") outputs = lstm(inputs) # saver is created in the scope of DeviceFn. saver = saver_lib.Saver() with self.test_session(use_gpu=True, graph=g) as sess: save_path = os.path.join(self.get_temp_dir(), "test-saveable-device-assignment") sess.run(variables.global_variables_initializer()) saver.save(sess, save_path) saver.restore(sess, save_path) sess.run(outputs)
def __init__(self, vocab_size, embedding_dim, hidden_dim, num_layers, dropout_ratio, use_cudnn_rnn=True, forget_bias=0.2): super(LSTMModel, self).__init__() self.keep_ratio = 1 - dropout_ratio self.use_cudnn_rnn = use_cudnn_rnn self.embedding = Embedding(vocab_size, embedding_dim) if self.use_cudnn_rnn: self.rnn = cudnn_rnn.CudnnLSTM( num_layers, hidden_dim, dropout=dropout_ratio) else: self.rnn = RNN(hidden_dim, num_layers, self.keep_ratio,forget_bias) self.linear = layers.Dense( vocab_size, kernel_initializer=tf.random_uniform_initializer(-0.1, 0.1))#tf.keras.initializers.he_normal()) #tf.random_uniform_initializer(-0.1, 0.1)) self._output_shape = [-1, embedding_dim]
def build_graph(self): # Start building inputs # inputs: [time_len, batch_size, input_size] self.graph = tf.Graph() with self.graph.as_default(): with tf.name_scope("Inputs"): self.inputs = tf.placeholder( tf.float32, shape=[None, None, self.num_feature], name='inputs') self.labels = tf.SparseTensor( tf.placeholder(tf.int64, name='indices'), tf.placeholder(tf.int32, name='values'), tf.placeholder(tf.int64, name='shape')) self.seq_lens = tf.placeholder(tf.int32, shape=[None], name='seq_lens') self.learning_rate = tf.placeholder(tf.float32, name='learning_rate') # use __init__ variables instead # self.keep_prob = tf.placeholder(tf.float32, name='keep_prob') # Convolution Preprocessing # if self.using_conv: # with tf.name_scope("Convolution"): # self.conv = tf.nn.conv2d() # Start building RNN with tf.name_scope("RNN"): # TODO: add other LSTM categories # Only use cudnnLSTM for now if self.use_cudnn: self.lstm = cudnn_rnn.CudnnLSTM(num_layers=self.num_layers, num_units=self.num_units, direction='bidirectional', dropout=1.0 - self.keep_prob, name='cudnn_lstm') # build first(optional) self.lstm.build([None, None, self.num_feature]) self.outputs, self.states = self.lstm( self.inputs, training=self.is_training) # else: # # CudnnCompatibleLSTMCell # self.lstm = cud # input: [time_len, batch_size, input_size] # outputs: [time_len, batch_size, num_dirs * num_units] # states: a tuple of tensor(s) [num_layers * num_dirs, batch_size, num_units] self.encoder_outputs = self.outputs # Start building fully connected layers, with bottlenneck and FC with tf.name_scope("Fully_Connected"): batch_size = tf.shape(self.inputs)[1] max_time = tf.shape(self.inputs)[0] output_dim = self.encoder_outputs.shape.as_list()[-1] outputs_2d = tf.reshape( self.encoder_outputs, shape=[batch_size * max_time, output_dim]) # if self.bottleneck_dim is not None and self.bottleneck_dim != 0: # with tf.variable_scope('bottleneck') as scope: # outputs_2d = tf.contrib.layers.fully_connected( # outputs_2d, # num_outputs=self.bottleneck_dim, # activation_fn=tf.nn.relu) # # Dropout for the hidden-output connections # outputs_2d = tf.nn.dropout( # outputs_2d, keep_prob, name='dropout_bottleneck') with tf.variable_scope('output') as scope: logits_2d = tf.contrib.layers.fully_connected( outputs_2d, num_outputs=self.num_classes, activation_fn=None) if self.time_major: # Reshape back to the original shape logits = tf.reshape( logits_2d, shape=[max_time, batch_size, self.num_classes]) else: # Reshape back to the original shape logits = tf.reshape( logits_2d, shape=[batch_size, max_time, self.num_classes]) # Convert to time-major: `[T, B, num_classes]' logits = tf.transpose(logits, [1, 0, 2]) self.logits = logits #self.logits =tf.Print(self.logits, [tf.shape(self.logits)]) # Start building ctc loss # TODO: Could add weight decay policy here with tf.name_scope("CTC_Loss"): # TODO: dig into all variables # labels: int32 SparseTensor. # labels.indices[i, :] == [b, t] means labels.values[i] stores the id for (batch b, time t). # labels.values[i] must take on values in [0, num_labels) # logits: 3-D float Tensor [max_time, batch_size, num_classes] # inputs_seq_len: 1-D int32 vector, [batch_size] # return 1-D float tensor: [batch], neg-log prob ctc_losses = tf.nn.ctc_loss( self.labels, self.logits, #tf.cast(inputs_seq_len, tf.int32), self.seq_lens, preprocess_collapse_repeated=False, ctc_merge_repeated=True, ignore_longer_outputs_than_inputs=True, time_major=True) self.ctc_loss = tf.reduce_mean(ctc_losses, name='ctc_loss_mean') # TODO: add more optimizers with tf.name_scope("Optimizer"): self.optimizer = tf.train.RMSPropOptimizer( learning_rate=self.learning_rate, decay=0.9, momentum=0.0, epsilon=1e-10, use_locking=False, centered=False) if self.is_training: self.train_op = self.optimizer.minimize(self.ctc_loss)
dtype=tf.float32, initial_state=initial_state, time_major=False) outputs, states = tf.nn.dynamic_rnn(lstm_cell, X_timemajor, dtype=tf.float32, initial_state=initial_state, time_major=True) cudnn_cell = cudnn_rnn.CudnnLSTM( num_layers=1, num_units=num_input, direction=cudnn_rnn.CUDNN_RNN_UNIDIRECTION, input_mode=cudnn_rnn.CUDNN_INPUT_LINEAR_MODE, name="CudnnLSTM", dropout=0.0, seed=0.0, kernel_initializer=tf.initializers.ones(), bias_initializer=tf.initializers.zeros(), dtype=tf.float32) cudnn_outputs, cudnn_states = cudnn_cell( inputs=X_timemajor, # 3-D tensor [time_len, batch_size, input_size] training=True) print('X_batchmajor', X_batchmajor.shape) #NHWC print('...batch:', X_batchmajor.shape[0]) print('...in_width:', X_batchmajor.shape[1]) print('...in_channels:', X_batchmajor.shape[2])
def cudnn_stack_bidirectional_dynamic_rnn( inputs, layer_sizes, sequence_length, initial_state=None, dropout_keep_prob=1.0, cell_wrapper=None, variational_recurrent=True, base_cell=tf.contrib.cudnn_rnn.CudnnCompatibleLSTMCell, is_training=False): num_layers = len(layer_sizes) num_units = layer_sizes[0] num_dirs = 2 # bidirectional batch_size = tf.shape(inputs)[0] if not is_training: # for cpu restoring Cudnn-trained checkpoints single_cell = lambda: base_cell(num_units) cells_fw = [single_cell() for _ in range(num_layers)] cells_bw = [single_cell() for _ in range(num_layers)] if initial_state is not None: c, h = tf.split(initial_state, [num_units, num_units], -1) state_tuple = rnn_cell.LSTMStateTuple(c, h) initial_states_fw = initial_states_bw = [state_tuple] * num_layers else: initial_states_fw = initial_states_bw = None (outputs, output_state_fw, output_state_bw) = tf.contrib.rnn.stack_bidirectional_dynamic_rnn( cells_fw, cells_bw, inputs, dtype=tf.float32, initial_states_fw=initial_states_fw, initial_states_bw=initial_states_bw, time_major=False, scope='cudnn_lstm/stack_bidirectional_rnn') last_c_state = tf.concat( [output_state_fw[-1].c, output_state_bw[-1].c], 1) last_h_state = tf.concat( [output_state_fw[-1].h, output_state_bw[-1].h], 1) return outputs, last_h_state dropout_prob = 0. if dropout_keep_prob is not None: dropout_prob = 1. - dropout_keep_prob if initial_state is not None: initial_state = tf.expand_dims(initial_state, 0) c, h = tf.split(initial_state, [num_units, num_units], -1) h = tf.concat([h for _ in range(num_layers * num_dirs)], 0) c = tf.concat([c for _ in range(num_layers * num_dirs)], 0) initial_state = (h, c) else: initial_state = None lstm = cudnn_rnn.CudnnLSTM(num_layers=num_layers, num_units=num_units, direction=cudnn_rnn_ops.CUDNN_RNN_BIDIRECTION, dropout=dropout_prob) inputs = tf.transpose(inputs, [1, 0, 2]) outputs, (output_h, output_c) = lstm(inputs, initial_state=initial_state, training=is_training) outputs = tf.transpose(outputs, [1, 0, 2]) last_c_state = tf.concat([output_c[-2], output_c[-1]], 1) last_h_state = tf.concat([output_h[-2], output_h[-1]], 1) return outputs, last_h_state
def create_model(self, share_dense=True, concat_sub=True): self.input_y = tf.placeholder(dtype=tf.float32, shape=[None,n_sub,4], name='input_y') self.input_y2 = tf.placeholder(dtype=tf.float32, shape=[None,n_sub,4], name='input_y2') self.dropout_keep_prob = tf.placeholder(dtype=tf.float32, name='dropout_keep_prob') self.output_keep_prob = tf.placeholder(dtype=tf.float32, name='output_keep_prob') if self.main_feature.lower() in ['word', 'char']: self.input_x = tf.placeholder(dtype=tf.int32, shape=[None,self.max_len], name='input_x') self.word_embedding = tf.get_variable(initializer=self.embedding, name='word_embedding') self.word_encoding = tf.nn.embedding_lookup(self.embedding, self.input_x) self.word_encoding = tf.nn.dropout(self.word_encoding, self.dropout_keep_prob) # new elif self.main_feature.lower() in ['elmo_word', 'elmo_char', 'elmo_qiuqiu']: self.input_x = tf.placeholder(dtype=tf.int32, shape=[None,self.max_len+2], name='input_x') if self.main_feature == 'elmo_word': options_file = self.config.elmo_word_options_file weight_file = self.config.elmo_word_weight_file embed_file = self.config.elmo_word_embed_file elif self.main_feature == 'elmo_char': options_file = self.config.elmo_char_options_file weight_file = self.config.elmo_char_weight_file embed_file = self.config.elmo_char_embed_file elif self.main_feature == 'elmo_qiuqiu': options_file = self.config.elmo_qiuqiu_options_file weight_file = self.config.elmo_qiuqiu_weight_file embed_file = self.config.elmo_qiuqiu_embed_file self.bilm = BidirectionalLanguageModel(options_file, weight_file, use_character_inputs=False, embedding_weight_file=embed_file, max_batch_size=self.batch_size) bilm_embedding_op = self.bilm(self.input_x) bilm_embedding = weight_layers('output', bilm_embedding_op,l2_coef=0.0) self.word_encoding = bilm_embedding['weighted_op'] self.word_encoding = tf.nn.dropout(self.word_encoding, self.dropout_keep_prob) # new else: exit('wrong feature') c_outputs = [] for c in range(n_sub): with tf.variable_scope('lstm-{}'.format(c)): # self.forward = self.LSTM() # self.backward = self.LSTM() # x, _ = tf.nn.bidirectional_dynamic_rnn(self.forward,self.backward, self.word_encoding, dtype=tf.float32) # x = tf.concat(x, -1) #### cudnn lstm #### self.forward_lstm = cudnn_rnn.CudnnLSTM(num_layers=1, num_units=self.hidden_dim, direction=cudnn_rnn.CUDNN_RNN_BIDIRECTION, dtype=tf.float32) self.forward_gru = cudnn_rnn.CudnnGRU(num_layers=1, num_units=self.hidden_dim, direction=cudnn_rnn.CUDNN_RNN_BIDIRECTION, dtype=tf.float32) x, _ = self.forward_lstm(tf.transpose(self.word_encoding, [1, 0, 2])) x, _ = self.forward_gru(x) x = tf.transpose(x, [1, 0, 2]) with tf.variable_scope('pooling-{}'.format(c)): max_pooled = tf.reshape(tf.reduce_max(x, 1), [-1, 2*self.hidden_dim]) avg_pooled = tf.reshape(tf.reduce_mean(x, 1), [-1, 2*self.hidden_dim]) att_w = tf.get_variable(shape=[2*self.hidden_dim,self.hidden_dim], name='att_w') att_b = tf.get_variable(shape=[self.hidden_dim],name='att_b') att_v = tf.get_variable(shape=[self.hidden_dim,1],name='att_v') x_reshape = tf.reshape(x, [-1, 2*self.hidden_dim]) score = tf.reshape(tf.matmul(tf.nn.tanh(tf.matmul(x_reshape, att_w)) + att_b, att_v), [-1, 1, self.max_len]) alpha = tf.nn.softmax(score, axis=-1) att_pooled = tf.reshape(tf.matmul(alpha, x), [-1, 2*self.hidden_dim]) concat_pooled = tf.concat((max_pooled, att_pooled, avg_pooled), -1) concat_pooled = tf.nn.dropout(concat_pooled, self.dropout_keep_prob) dense = tf.layers.dense(concat_pooled, 4, activation=None) c_outputs.append(dense) self.logits = tf.reshape(tf.concat(c_outputs, axis=1), [-1, 10, 4]) y_ = tf.nn.softmax(self.logits) self.prob = tf.reshape(y_, [-1, n_sub, 4]) self.prediction = tf.argmax(self.prob, 2, name="prediction") if not self.config.balance: self.loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=self.logits, labels=tf.reshape(self.input_y, [-1,4]))) # self.loss += tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=self.logits, labels=tf.reshape(self.input_y2, [-1,4]))) else: # class0_weight = 0.882 * self.n_classes # 第0类的权重系数 # class1_weight = 0.019 * self.n_classes # 第1类的权重系数 # class2_weight = 0.080 * self.n_classes # 第2类的权重系数 # class3_weight = 0.019 * self.n_classes # 第3类的权重系数 class0_weight = 1 # 第0类的权重系数 class1_weight = 3 # 第1类的权重系数 class2_weight = 3 # 第2类的权重系数 class3_weight = 3 # 第3类的权重系数 # coe = tf.constant([1., 1., 1., 1.]) # y = tf.reshape(self.input_y, [-1, 4]) * coe # self.loss = -tf.reduce_mean(y * tf.log(y_)) y = tf.reshape(self.input_y, [-1, 4]) self.loss = tf.reduce_mean(-class0_weight * (y[:, 0]*tf.log(y_[:, 0])) -class1_weight * (y[:, 1]*tf.log(y_[:, 1])) -class2_weight * (y[:, 2]*tf.log(y_[:, 2])) -class3_weight * (y[:, 3]*tf.log(y_[:, 3]))) # tf.reduce_mean(-class1_weight*tf.reduce_sum(y_[:,0] * tf.log(y[:,0])-class2_weight*tf.reduce_sum(y_[:,1] * tf.log(y[:,1])-class3_weight*tf.reduce_sum(y_[:,2] * tf.log(y[:,2])) return self
def create_model(self, share_dense=True, concat_sub=True): self.input_y = tf.placeholder(dtype=tf.float32, shape=[None, n_sub, 4], name='input_y') self.input_y2 = tf.placeholder(dtype=tf.float32, shape=[None, n_sub, 4], name='input_y2') self.dropout_keep_prob = tf.placeholder(dtype=tf.float32, name='dropout_keep_prob') self.output_keep_prob = tf.placeholder(dtype=tf.float32, name='output_keep_prob') if self.main_feature.lower() in ['word', 'char']: self.input_x = tf.placeholder(dtype=tf.int32, shape=[None, self.max_len], name='input_x') self.word_embedding = tf.get_variable(initializer=self.embedding, name='word_embedding') self.word_encoding = tf.nn.embedding_lookup( self.embedding, self.input_x) self.word_encoding = tf.nn.dropout(self.word_encoding, self.dropout_keep_prob) # new elif self.main_feature.lower() in [ 'elmo_word', 'elmo_char', 'elmo_qiuqiu' ]: self.input_x = tf.placeholder(dtype=tf.int32, shape=[None, self.max_len + 2], name='input_x') if self.main_feature == 'elmo_word': options_file = self.config.elmo_word_options_file weight_file = self.config.elmo_word_weight_file embed_file = self.config.elmo_word_embed_file elif self.main_feature == 'elmo_char': options_file = self.config.elmo_char_options_file weight_file = self.config.elmo_char_weight_file embed_file = self.config.elmo_char_embed_file elif self.main_feature == 'elmo_qiuqiu': options_file = self.config.elmo_qiuqiu_options_file weight_file = self.config.elmo_qiuqiu_weight_file embed_file = self.config.elmo_qiuqiu_embed_file self.bilm = BidirectionalLanguageModel( options_file, weight_file, use_character_inputs=False, embedding_weight_file=embed_file, max_batch_size=self.batch_size) bilm_embedding_op = self.bilm(self.input_x) bilm_embedding = weight_layers('output', bilm_embedding_op, l2_coef=0.0) self.word_encoding = bilm_embedding['weighted_op'] self.word_encoding = tf.nn.dropout(self.word_encoding, self.dropout_keep_prob) # new else: exit('wrong feature') c_outputs = [] for c in range(n_sub): with tf.variable_scope('lstm-{}'.format(c)): # self.forward = self.LSTM() # self.backward = self.LSTM() # x, _ = tf.nn.bidirectional_dynamic_rnn(self.forward,self.backward, self.word_encoding, dtype=tf.float32) # x = tf.concat(x, -1) #### cudnn lstm #### self.forward = cudnn_rnn.CudnnLSTM( num_layers=1, num_units=self.hidden_dim, direction=cudnn_rnn.CUDNN_RNN_BIDIRECTION, dtype=tf.float32) x, _ = self.forward(tf.transpose(self.word_encoding, [1, 0, 2])) x = tf.transpose(x, [1, 0, 2]) with tf.variable_scope('conv-{}'.format(c)): inputs_expanded = tf.expand_dims(x, -1) filter_shape = [3, 2 * self.hidden_dim, 1, n_filters] W = tf.get_variable(initializer=tf.truncated_normal( filter_shape, stddev=0.1), name='W') b = tf.get_variable('b', initializer=tf.constant(0.1, shape=[n_filters])) conv = tf.nn.conv2d(inputs_expanded, W, strides=[1] * 4, padding='VALID', name='conv2d') h = tf.nn.relu(tf.nn.bias_add(conv, b), name='relu') max_pooled = tf.nn.max_pool( h, ksize=[1, self.max_len - 3 + 1, 1, 1], strides=[1, 1, 1, 1], padding='VALID', name='max_pool') avg_pooled = tf.nn.avg_pool( h, ksize=[1, self.max_len - 3 + 1, 1, 1], strides=[1, 1, 1, 1], padding='VALID', name='avg_pool') concat_pooled = tf.reshape( tf.concat((max_pooled, avg_pooled), -1), [-1, 2 * n_filters]) concat_pooled = tf.nn.dropout(concat_pooled, self.dropout_keep_prob) dense = tf.layers.dense(concat_pooled, 4, activation=None) c_outputs.append(dense) self.logits = tf.reshape(tf.concat(c_outputs, axis=1), [-1, 10, 4]) y_ = tf.nn.softmax(self.logits) self.prob = tf.reshape(y_, [-1, n_sub, 4]) self.prediction = tf.argmax(self.prob, 2, name="prediction") if not self.config.balance: self.loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=self.logits, labels=tf.reshape( self.input_y, [-1, 4]))) self.loss += tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=self.logits, labels=tf.reshape( self.input_y2, [-1, 4]))) else: # class0_weight = 0.882 * self.n_classes # 第0类的权重系数 # class1_weight = 0.019 * self.n_classes # 第1类的权重系数 # class2_weight = 0.080 * self.n_classes # 第2类的权重系数 # class3_weight = 0.019 * self.n_classes # 第3类的权重系数 class0_weight = 1 # 第0类的权重系数 class1_weight = 3 # 第1类的权重系数 class2_weight = 3 # 第2类的权重系数 class3_weight = 3 # 第3类的权重系数 # coe = tf.constant([1., 1., 1., 1.]) # y = tf.reshape(self.input_y, [-1, 4]) * coe # self.loss = -tf.reduce_mean(y * tf.log(y_)) y = tf.reshape(self.input_y, [-1, 4]) self.loss = tf.reduce_mean(-class0_weight * (y[:, 0] * tf.log(y_[:, 0])) - class1_weight * (y[:, 1] * tf.log(y_[:, 1])) - class2_weight * (y[:, 2] * tf.log(y_[:, 2])) - class3_weight * (y[:, 3] * tf.log(y_[:, 3]))) # tf.reduce_mean(-class1_weight*tf.reduce_sum(y_[:,0] * tf.log(y[:,0])-class2_weight*tf.reduce_sum(y_[:,1] * tf.log(y[:,1])-class3_weight*tf.reduce_sum(y_[:,2] * tf.log(y[:,2])) return self