X, Y, _ = readLabeledPoems("poems", "*.txt") chars = sorted(list(set(text))) char_indices = dict((c, i) for i, c in enumerate(chars, 1)) indices_char = dict((i, c) for i, c in enumerate(chars, 1)) phone_markers = sorted(list(set(phoneList))) phone_indices = dict((c, i) for i, c in enumerate(phone_markers, 2)) indices_phone = dict((i, c) for i, c in enumerate(phone_markers, 2)) # Load char2phone model layer_size = 512 layers = 3 dropout = 0.3 keep_prob = tf.placeholder(tf.float32) encoder_cell = rnn.DropoutWrapper(rnn.LSTMCell(layer_size), output_keep_prob=keep_prob) decoder_cell = rnn.DropoutWrapper(rnn.LSTMCell(layer_size * 2), output_keep_prob=keep_prob) if layers > 1: encoder_cell = rnn.MultiRNNCell([encoder_cell] * layers) decoder_cell = rnn.MultiRNNCell([decoder_cell] * layers) with tf.Session() as session: phoneModel = Seq2SeqModel(encoder_cell=encoder_cell, decoder_cell=decoder_cell, vocab_size=60, embedding_size=16, layers=layers, keep_prob=keep_prob, attention=True,
def __init__(self, train_config: TrainConfig): self.update_config(train_config) print('creating neural network...') with tf.Graph().as_default() as graph: self.seq_len = seq_len = tf.placeholder(tf.int32, [None], name='seq_len') self.labels = labels = tf.placeholder(tf.float32, [None, None], name='labels') self.mask = mask = tf.placeholder(tf.float32, [None, None], name='mask') self.input = input = tf.placeholder( tf.float32, [None, None, len(train_config.FEATURE_FUNCTIONS)], name='input') self.keep_prob = keep_prob = tf.placeholder(tf.float32) self.rnn_cell = rnn_cell = rnn.MultiRNNCell([ rnn.DropoutWrapper(rnn.LSTMCell(self.config.LSTM_LAYER_SIZE), input_keep_prob=keep_prob) for _ in range(self.config.LSTM_LAYERS) ]) state = () for s in rnn_cell.state_size: c = tf.placeholder(tf.float32, [None, s.c]) h = tf.placeholder(tf.float32, [None, s.h]) state += (tf.contrib.rnn.LSTMStateTuple(c, h), ) self.state = state # Batch size x time steps x features. output, new_state = tf.nn.dynamic_rnn(rnn_cell, input, initial_state=state, sequence_length=seq_len) self.new_state = new_state fc_layer_idx = 0 for num_units in self.config.FC_LAYERS: scope_name = 'fc_layer_%d' % fc_layer_idx with tf.name_scope(scope_name): output = tf.contrib.layers.fully_connected( output, num_units, activation_fn=tf.nn.relu, scope='dense_%d' % fc_layer_idx) output = tf.nn.dropout(output, keep_prob) fc_layer_idx += 1 # final layer to make prediction with tf.name_scope('prediction_layer'): self.returns = tf.contrib.layers.fully_connected( output, 1, activation_fn=None) with tf.name_scope('loss'): diff = self.returns - tf.expand_dims(labels, 2) self.sse = sse = tf.reduce_sum( tf.multiply(tf.square(diff), tf.expand_dims(mask, 2))) self.cost = sse / tf.reduce_sum(mask) self.optimizer = tf.train.AdamOptimizer() self.vars = tf.trainable_variables() self.grads_and_vars = self.optimizer.compute_gradients( self.cost, var_list=self.vars) self.train = self.optimizer.apply_gradients( self.grads_and_vars) self.init = tf.global_variables_initializer() self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=None) self.sess = tf.Session(graph=graph)
def lstm_cell(): cell = rnn.LSTMCell(hidden_size, reuse=tf.get_variable_scope().reuse) return rnn.DropoutWrapper(cell, output_keep_prob=keep_prob)
filters=256, kernel_size=ngram_size, strides=1, padding='same', dilation_rate=1, activation='relu', name='Text_Conv_1D_N{}'.format(ngram_size), kernel_regularizer=tf.contrib.layers.l2_regularizer( scale=0.01)) text_conv1d = tf.reduce_max(text_conv1d, axis=1, keepdims=False) result_tensors.append(text_conv1d) text_embeddings = tf.concat(result_tensors, axis=1) text_embeddings = tf.nn.dropout(text_embeddings, keep_prob=dropout_keep_prob) rnn_cell = rnn.LSTMCell(num_units=256) rnn_outputs, _ = tf.nn.dynamic_rnn(rnn_cell, X, time_major=False, dtype=tf.float32) #mean_rnn_outputs = tf.math.reduce_mean(rnn_outputs, axis=1, keepdims=False) mean_rnn_outputs = rnn_outputs[:, -1, :] if model_name == 'baseline': logit_X = mean_rnn_outputs elif model_name == 'text_only': logit_X = text_embeddings else: logit_X = tf.concat([text_embeddings, mean_rnn_outputs], axis=1) logits_regularizer = tf.contrib.layers.l2_regularizer(scale=0.01)
def __init__(self, dtype, *param, fn): super(LSTMMLP2, self).__init__(dtype) assert len(fn.input_names) == 2 and len(fn.output_names) == 1, "The function is not compatible with GRUMLP2" nonlin_str = param[0] nonlin = getattr(tf.nn, nonlin_str) weight = float(param[1]) check=0 for i, val in enumerate(param[2:]): if val == '/': check = i rnnDim = [int(i) for i in param[2:check+2]] mlpDim = [int(i) for i in param[check+3:]] self.input1 = tf.placeholder(dtype, shape=[None, None, rnnDim[0]], name=fn.input_names[0]) # [batch, time, statedim] self.input2 = tf.placeholder(dtype, shape=[None, None, rnnDim[1]], name=fn.input_names[1]) # [batch, time, actiondim] inputconcat = tf.concat([self.input1, self.input2], axis= 2, name = "inputconcat") length_ = tf.placeholder(dtype, name='length') # [batch] length_ = tf.cast(length_, dtype=tf.int32) self.seq_length = tf.reshape(length_, [-1]) # GRU cells = [] state_size = [] recurrent_state_size = 0 for size in rnnDim[2:]: cell = rnn.LSTMCell(size, state_is_tuple=True, initializer=tf.contrib.layers.xavier_initializer()) cells.append(cell) recurrent_state_size += cell.state_size.c + cell.state_size.h state_size.append(cell.state_size.c) state_size.append(cell.state_size.h) cell = rnn.MultiRNNCell(cells, state_is_tuple=True) hiddenStateDim = tf.identity(tf.constant(value=[recurrent_state_size], dtype=tf.int32), name='h_dim') h_in = tf.placeholder(dtype=dtype, shape=[None, recurrent_state_size], name='h_init') init_states = tf.split(h_in, num_or_size_splits=state_size, axis = 1) print(init_states) init_state_list = [] for i in range(len(cells)): init_state_list.append(rnn.LSTMStateTuple(init_states[2*i], init_states[2*i+1])) init_state_tuple = tuple(init_state_list) # LSTM output LSTMOutput, final_state = tf.nn.dynamic_rnn(cell=cell, inputs=inputconcat, sequence_length=self.seq_length, dtype=dtype, initial_state=init_state_tuple) # FCN top = tf.reshape(LSTMOutput, shape=[-1, rnnDim[-1]], name='fcIn') layer_n = 0 for dim in mlpDim[:-1]: with tf.name_scope('hidden_layer'+repr(layer_n)): top = fully_connected(activation_fn=nonlin, inputs=top, num_outputs=dim, weights_initializer=tf.contrib.layers.xavier_initializer(), trainable=True) layer_n += 1 with tf.name_scope('output_layer'): wo = tf.Variable(tf.random_uniform(dtype=dtype, shape=[mlpDim[-2], mlpDim[-1]], minval=-float(weight), maxval=float(weight))) bo = tf.Variable(tf.random_uniform(dtype=dtype, shape=[mlpDim[-1]], minval=-float(weight), maxval=float(weight))) top = tf.matmul(top, wo) + bo self.output = tf.reshape(top, [-1, tf.shape(self.input1)[1], mlpDim[-1]]) final_state_list = [] for state_tuple in final_state: final_state_list.append(state_tuple.c) final_state_list.append(state_tuple.h) hiddenState = tf.concat([state for state in final_state_list], axis=1, name='h_state') self.l_param_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) self.a_param_list = self.l_param_list self.net = None
def create_decoder_cell(agenda, extended_base_words, oov, base_sent_hiddens, mev_st, mev_ts, base_length, iw_length, dw_length, vocab_size, attn_dim, hidden_dim, num_layer, enable_alignment_history=False, enable_dropout=False, dropout_keep=1., no_insert_delete_attn=False, beam_width=None): base_attn = seq2seq.BahdanauAttention(attn_dim, base_sent_hiddens, base_length, name='base_attn') cnx_src, micro_evs_st = mev_st mev_st_attn = seq2seq.BahdanauAttention(attn_dim, cnx_src, iw_length, name='mev_st_attn') mev_st_attn._values = micro_evs_st attns = [base_attn, mev_st_attn] if not no_insert_delete_attn: cnx_tgt, micro_evs_ts = mev_ts mev_ts_attn = seq2seq.BahdanauAttention(attn_dim, cnx_tgt, dw_length, name='mev_ts_attn') mev_ts_attn._values = micro_evs_ts attns += [mev_ts_attn] bottom_cell = tf_rnn.LSTMCell(hidden_dim, name='bottom_cell') bottom_attn_cell = seq2seq.AttentionWrapper( bottom_cell, tuple(attns), alignment_history=enable_alignment_history, output_attention=False, name='att_bottom_cell') all_cells = [bottom_attn_cell] num_layer -= 1 for i in range(num_layer): cell = tf_rnn.LSTMCell(hidden_dim, name='layer_%s' % (i + 1)) if enable_dropout and dropout_keep < 1.: cell = tf_rnn.DropoutWrapper(cell, output_keep_prob=dropout_keep) all_cells.append(cell) decoder_cell = AttentionAugmentRNNCell(all_cells) decoder_cell.set_agenda(agenda) decoder_cell.set_source_attn_index(0) output_layer = DecoderOutputLayer(vocab.get_embeddings()) pg_cell = PointerGeneratorWrapper(decoder_cell, extended_base_words, 50, output_layer, vocab_size, decoder_cell.get_source_attention, name='PointerGeneratorWrapper') if beam_width: true_batch_size = tf.cast( tf.shape(base_sent_hiddens)[0] / beam_width, tf.int32) else: true_batch_size = tf.shape(base_sent_hiddens)[0] zero_state = create_trainable_zero_state(decoder_cell, true_batch_size, beam_width=beam_width) return pg_cell, zero_state
def buildModel(self): std = 0.1 self.cell = rnn.LSTMCell(self.units, initializer=tf.initializers.random_normal( 0, std), name="LSTMCell") self.W = tf.Variable(tf.random_normal([self.units, self.vocab_size], stddev=std), name="W") self.b = tf.Variable(tf.random_normal([self.vocab_size], stddev=std), name="b") sequence = tensor_array_ops.TensorArray(dtype=tf.int32, size=self.sequence_length, dynamic_size=False, infer_shape=True) sequence_logits = tensor_array_ops.TensorArray( dtype=tf.float32, size=self.sequence_length, dynamic_size=False, infer_shape=True) def loop_keep(time, inputs, cell_state, sequence, keepNumber): _, next_cell_state = self.cell(inputs, cell_state) next_inputs = self.ta_emb_seq.read(time) sequence = sequence.write(time, self.ta_seq.read(time)) return time + 1, next_inputs, next_cell_state, sequence, keepNumber def loop_gen(time, inputs, cell_state, sequence): outputs, next_cell_state = self.cell(inputs, cell_state) logits = tf.add(tf.matmul(outputs, self.W), self.b) prob = tf.nn.softmax(logits) sample_ids = tf.reshape( tf.multinomial(tf.log(prob), 1, output_dtype=tf.int32), [self.batch_size]) sequence = sequence.write(time, sample_ids) next_inputs = self.embedding.getEmbedding(sample_ids) return time + 1, next_inputs, next_cell_state, sequence def loop_prob(time, inputs, cell_state, sequence_logits): outputs, next_cell_state = self.cell(inputs, cell_state) logits = tf.add(tf.matmul(outputs, self.W), self.b) sequence_logits = sequence_logits.write(time, logits) next_inputs = self.ta_emb_seq.read(time) return time + 1, next_inputs, next_cell_state, sequence_logits time, inputs, cell_state, sequence, keepNumber = control_flow_ops.while_loop( cond=lambda time, _1, _2, _3, keepNumber: time < keepNumber, body=loop_keep, loop_vars=(tf.constant(0, dtype=tf.int32), self.embedded_start_token, self.cell.zero_state(self.batch_size, dtype=tf.float32), sequence, self.keepNumber)) _, _, _, sequence = control_flow_ops.while_loop( cond=lambda time, _1, _2, _3: time < self.sequence_length, body=loop_gen, loop_vars=(time, inputs, cell_state, sequence)) _, _, _, sequence_logits = control_flow_ops.while_loop( cond=lambda time, _1, _2, _3: time < self.sequence_length, body=loop_prob, loop_vars=(tf.constant(0, dtype=tf.int32), self.embedded_start_token, self.cell.zero_state(self.batch_size, dtype=tf.float32), sequence_logits)) sequence = tf.transpose(sequence.stack(), perm=[1, 0]) # batch_size x seq_length sequence_logits = tf.transpose( sequence_logits.stack(), perm=[1, 0, 2]) # batch_size x seq_length x vocab_size return sequence, sequence_logits
def add_word_embeddings_op(self): with tf.variable_scope("words"): print("word embedding...........") if self.config.embeddings is None: self.logger.info("WARNING: randomly initializing word vectors") _word_embeddings = tf.get_variable( name="_word_embeddings", initializer=tf.variance_scaling_initializer( distribution="uniform"), dtype=tf.float32, shape=[self.config.nwords, self.config.dim_word]) else: _word_embeddings = tf.Variable( self.config.embeddings, name="_word_embeddings", dtype=tf.float32, trainable=self.config.train_embeddings) word_embeddings = tf.nn.embedding_lookup(_word_embeddings, self.word_ids, name="word_embeddings") with tf.variable_scope("chars"): if self.config.use_char_lstm: print("char lstm..........") # get char embeddings matrix _char_embeddings = tf.get_variable( name="_char_embeddings", initializer=tf.variance_scaling_initializer( distribution="uniform"), dtype=tf.float32, shape=[self.config.nchars, self.config.dim_char]) char_embeddings = tf.nn.embedding_lookup( _char_embeddings, self.char_ids, name="char_embeddings") # put the time dimension on axis=1 s = tf.shape(char_embeddings) char_embeddings = tf.reshape( char_embeddings, shape=[s[0] * s[1], s[-2], self.config.dim_char]) word_lengths = tf.reshape(self.word_lengths, shape=[s[0] * s[1]]) # bi lstm on chars cell_fw = rnn.LSTMCell( self.config.hidden_size_char, initializer=tf.glorot_uniform_initializer()) cell_bw = rnn.LSTMCell( self.config.hidden_size_char, initializer=tf.glorot_uniform_initializer()) _output = tf.nn.bidirectional_dynamic_rnn( cell_fw, cell_bw, char_embeddings, sequence_length=word_lengths, dtype=tf.float32) # read and concat output _, ((_, output_fw), (_, output_bw)) = _output output = tf.concat([output_fw, output_bw], axis=-1) # shape = (batch size, max sentence length, char hidden size) output = tf.reshape( output, shape=[s[0], s[1], 2 * self.config.hidden_size_char]) self.char_embeddings = output if self.config.use_char_cnn: print("char_cnn............") _char_embeddings = tf.get_variable( name="_char_embeddings", initializer=tf.variance_scaling_initializer( distribution="uniform"), shape=[self.config.nchars, self.config.dim_char], dtype=tf.float32) char_embeddings = tf.nn.embedding_lookup( _char_embeddings, self.char_ids, name="char_embeddings") s = tf.shape(char_embeddings) char_embeddings = tf.reshape( char_embeddings, [s[0] * s[1], s[2], self.config.dim_char, 1]) with tf.variable_scope("conv"): weights = tf.get_variable( name="weights", shape=[ self.config.filter_size, self.config.dim_char, 1, self.config.filter_deep ], initializer=tf.glorot_uniform_initializer()) biases = tf.get_variable( name="biases", shape=[self.config.filter_deep], initializer=tf.constant_initializer(0)) conv = tf.nn.conv2d(char_embeddings, weights, strides=[1, 1, 1, 1], padding='VALID', name="conv") relu = tf.nn.relu(tf.nn.bias_add(conv, biases)) pool = tf.nn.max_pool(relu, ksize=[ 1, self.max_word_lengths - self.config.filter_size + 1, 1, 1 ], strides=[1, 1, 1, 1], padding="VALID", name="pool") pool_flatten = tf.reshape( pool, [s[0], s[1], self.config.filter_deep]) self.char_embeddings = pool_flatten word_embeddings = tf.concat([word_embeddings, self.char_embeddings], -1) print(word_embeddings.get_shape()) self.word_embeddings = tf.nn.dropout(word_embeddings, self.dropout)
# valid_batch_size = 1000 rnn_size = 512 max_n_token_sentence = 100 max_n_token_dict = 10000 + 3 learning_rate = 0.001 pre = 5 suf = 3 # data_reader = DataReader(shuffled_train_set, shuffled_valid_set, # 7, tokens_dict, 7, 10000) # tt = data_reader.iterate_mini_batch(batch_size) # t = next(tt) # GRAPH x = tf.placeholder('float32', shape=[None, None, max_n_token_dict]) y = tf.placeholder('float32', shape=[None, max_n_token_dict]) cell = rnn.LSTMCell(rnn_size, state_is_tuple=True, forget_bias=0.0, reuse=False) initial_rnn_state = cell.zero_state(batch_size, dtype='float32') outputs, final_rnn_state = tf.nn.dynamic_rnn(cell, x, initial_state=initial_rnn_state, dtype='float32') outputs = tf.transpose(outputs, [1, 0, 2]) last = outputs[-1] outputs_reshape = tf.reshape(last, shape=[-1, rnn_size]) w = tf.get_variable("w", [rnn_size, max_n_token_dict], dtype='float32') b = tf.get_variable("b", [max_n_token_dict], dtype='float32') preds = tf.nn.softmax(tf.matmul(outputs_reshape, w) + b) preds_argmax = tf.argmax(preds, axis=1) # preds_reshaped = tf.reshape(preds, shape=[-1, max_n_token_sentence, max_n_token_dict]) cost = - tf.reduce_sum(y * tf.log(tf.clip_by_value(preds, 1e-10, 1.0)), axis=1) cost = tf.reduce_mean(cost, axis=0) predictions = tf.cast(tf.equal(tf.argmax(preds, 1), tf.argmax(y, 1)), dtype='float32')
def _region_classification(self, fc7, is_training, initializer, initializer_bbox): # cls_score = slim.fully_connected(fc7, self._num_classes, # weights_initializer=initializer, # trainable=is_training, # activation_fn=None, scope='cls_score') # cls_prob = self._softmax_layer(cls_score, "cls_prob") # cls_pred = tf.argmax(cls_score, axis=1, name="cls_pred") # 使用lstm代替softmax进行车牌识别,注意: 这儿训练图片使用cv读取,所以shape是(height, width, channel) # TODO 1,先使用fc7来试试效果,无论效果如何,都要用pool5直接试试,记得转换shape # TODO 2,NUM_HIDDEN 256和128都试试, NUM_LAYERS 1和2都试试 # size为batch_size的以为数组,元素是每个待预测序列的长度 self.seq_len = tf.placeholder(tf.int32, [None]) # Here we use sparse_placeholder that will generate a # SparseTensor required by ctc_loss op. # targets = tf.sparse_placeholder(tf.int32) # 方式一: 使用fc7,shape为(batch_size, -1, 1), 最大时序为-1, feature为1 # fc7_shape = tf.shape(fc7) fc7_shape = fc7.get_shape() # feature = tf.reshape(fc7, [fc7_shape[0], cfg.MY.MAX_TIMESTEP, 1]) feature = tf.reshape(fc7, [fc7_shape[0], fc7_shape[1] * 4, -1]) stack = rnn.MultiRNNCell([ rnn.LSTMCell(cfg.MY.NUM_HIDDEN) for _ in range(cfg.MY.NUM_LAYERS) ]) outputs, _ = tf.nn.dynamic_rnn(stack, feature, self.seq_len, dtype=tf.float32) feature_shape = tf.shape(feature) batch_size, max_timesteps = feature_shape[0], feature_shape[1] # (batch_size * max_timesteps, num_hidden) outputs = tf.reshape(outputs, [-1, cfg.MY.NUM_HIDDEN]) W = tf.Variable( tf.truncated_normal([cfg.MY.NUM_HIDDEN, cfg.MY.NUM_CLASSES], name='lstm_w')) b = tf.Variable(tf.constant(0., shape=[cfg.MY.NUM_CLASSES]), name='lstm_b') logits = tf.matmul(outputs, W) + b # Reshaping back to the original shape logits = tf.reshape(logits, [batch_size, max_timesteps, cfg.MY.NUM_CLASSES]) # ctc使用下面这种形式(max_timesteps, batch_size, num_classes) logits = tf.transpose(logits, (1, 0, 2)) self.ctc_decoded, ctc_cls_prob = tf.nn.ctc_beam_search_decoder( logits, self.seq_len, merge_repeated=False) # 方式二: 使用pool5,转换shape为(batch_size, width, height * channel) # bbox_pred = slim.fully_connected(fc7, self._num_classes * 4, # weights_initializer=initializer_bbox, # trainable=is_training, # activation_fn=None, scope='bbox_pred') # self._predict_layers["cls_score"] = cls_score # self._predict_layers["cls_pred"] = cls_pred # self._predict_layers["cls_prob"] = cls_prob self._predict_layers["cls_logits"] = logits self._predict_layers['ctc_cls_prob'] = ctc_cls_prob # self._predict_layers["bbox_pred"] = bbox_pred self.seq_len_value = np.asarray([cfg.MY.MAX_TIMESTEP] * cfg.MY.IMG_BATCH, dtype=np.int32) self.seq_len_test_value = np.asarray([cfg.MY.MAX_TIMESTEP] * cfg.MY.IMG_BATCH, dtype=np.int32) # return cls_prob, bbox_pred # return logits, bbox_pred return logits
def BuildRnn(): bSaveModel = True epch = 300 states = None ##################### seqLen = tf.placeholder(tf.int32, [None], name='seqLen') X = tf.placeholder(dtype=tf.float32, shape=(batchSize, numSteps, inputDim), name='X') Y = tf.placeholder(dtype=tf.float32, shape=(batchSize, lableDim), name='Y') #testY = tf.placeholder(dtype=tf.float32, shape=(batchSize, lableDim), name='testY') learningRate = tf.placeholder(dtype=tf.float32, name='learn_rate') weights = tf.Variable(tf.random_normal(shape=[numHidden * 2, lableDim])) bais = tf.Variable(tf.random_normal(shape=[lableDim])) # two states in one layer in forward net, backward net do not need #numStateTensor = 2 * numLayers #state_tensor = tf.placeholder(dtype=tf.float32, shape=(numStateTensor, batchSize, numHidden), name='input_state') #print('state_tensor', state_tensor) ############### result = {} result['seqLen'] = seqLen result['X'] = X result['Y'] = Y result['learningRate'] = learningRate cells = [] dropoutCell = [] for i in range(numLayers * 2): c = rnn.LSTMCell(numHidden, use_peepholes=True, forget_bias=1.0) cells.append(c) c = tf.nn.rnn_cell.DropoutWrapper(c, output_keep_prob=0.6) dropoutCell.append(c) if states == None: init_state_fw = None else: numStates = 2 * numLayers # assert states.shape() == 2 * numStates states = tf.unstack(states) print(len(states), states[0]) init_state_fw = [None] * numLayers for i in range(numLayers): init_state_fw[i] = rnn.LSTMStateTuple(states[i * 2 + 0], states[i * 2 + 1]) output, final_state_fw, final_state_bw = rnn.stack_bidirectional_dynamic_rnn( dropoutCell[0:numLayers], dropoutCell[numLayers:], inputs=X, initial_states_fw=init_state_fw, initial_states_bw=None, sequence_length=seqLen, dtype=tf.float32) outlayerDim = tf.shape(output)[2] output = output[:, -1, :] print('output', output) prediction = tf.matmul(output, weights) + bais print('prediction', prediction) loss_op = tf.losses.mean_squared_error(labels=Y, predictions=prediction) print(loss_op) # Define loss and optimizer result['loss_op'] = loss_op optimizer = tf.train.AdamOptimizer(learning_rate=learningRate) # optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate) train_op = optimizer.minimize(loss_op) result['train_op'] = train_op # prediction without dropout output, final_state_fw, final_state_bw = rnn.stack_bidirectional_dynamic_rnn( cells[0:numLayers], cells[numLayers:], X, initial_states_fw=init_state_fw, initial_states_bw=None, sequence_length=seqLen, dtype=tf.float32) output = output[:, -1, :] predictions = tf.add(tf.matmul(output, weights), bais, name='predict_op') test_loss = tf.losses.mean_squared_error(labels=Y, predictions=predictions) result['test_loss'] = test_loss return result
def add_multilayer_rnn_op(self): """ Adds logits to self """ with tf.variable_scope("bi-lstm"): _inputs = self.input_feature_embeddings for n in range(self.num_layers): with tf.variable_scope(None, default_name="bidirectional-rnn"): if self.rnn_unit == 'lstm': cell_fw = rnn.LSTMCell(self.hidden_dim, forget_bias=1., state_is_tuple=True) cell_bw = rnn.LSTMCell(self.hidden_dim, forget_bias=1., state_is_tuple=True) elif self.rnn_unit == 'gru': cell_fw = rnn.GRUCell(self.hidden_dim) cell_bw = rnn.GRUCell(self.hidden_dim) elif self.rnn_unit == 'rnn': cell_fw = rnn.BasicRNNCell(self.hidden_dim) cell_bw = rnn.BasicRNNCell(self.hidden_dim) else: raise ValueError('rnn_unit must in (lstm, gru, rnn)!') initial_state_fw = cell_fw.zero_state(tf.shape( self.input_feature_embeddings)[0], dtype=tf.float32) initial_state_bw = cell_bw.zero_state(tf.shape( self.input_feature_embeddings)[0], dtype=tf.float32) (output, state) = tf.nn.bidirectional_dynamic_rnn( cell_fw, cell_bw, _inputs, self.sequence_lengths, initial_state_fw, initial_state_bw, dtype=tf.float32) _inputs = tf.concat(output, 2) self.output = tf.nn.dropout(_inputs, self.dropout_pl) if self.is_attention: with tf.variable_scope('attention'): embedding_dim = self.hidden_dim * 2 attn_mech = BahdanauAttention(embedding_dim, _inputs, self.sequence_lengths) dec_cell = rnn.LSTMCell(self.hidden_dim, state_is_tuple=True) attn_cell = AttentionWrapper(dec_cell, attn_mech, embedding_dim) attn_zero = attn_cell.zero_state(tf.shape( self.input_feature_embeddings)[0], dtype=tf.float32) helper = TrainingHelper(inputs=_inputs, sequence_length=self.sequence_lengths) decoder = BasicDecoder(cell=attn_cell, helper=helper, initial_state=attn_zero) final_outputs, final_state, final_sequence_length = dynamic_decode( decoder) self.output = tf.nn.dropout(final_outputs.rnn_output, self.dropout_pl) with tf.variable_scope("proj"): W = tf.get_variable("W", shape=[2 * self.hidden_dim, self.num_class], dtype=tf.float32) b = tf.get_variable("b", shape=[self.num_class], dtype=tf.float32, initializer=tf.zeros_initializer()) s = tf.shape(self.output) output = tf.reshape(self.output, [-1, 2 * self.hidden_dim]) pred = tf.matmul(output, W) + b self.logits = tf.reshape(pred, [-1, s[1], self.num_class])
def lstm_cell(config): cell = rnn.LSTMCell(config.n_hidden) with tf.name_scope('lstm_dropout'): return rnn.DropoutWrapper(cell, output_keep_prob=config.output_keep_prob)
def lstm(self): with tf.variable_scope("lstm_Cell"): lstm_cell = rnn.LSTMCell(self.rnn_units, reuse=tf.get_variable_scope().reuse) return lstm_cell
y=tf.placeholder(tf.float32,[None,time_step,n_outputs]) weights={'in':tf.Variable(tf.random_normal([n_input,n_hidden])), 'out': tf.Variable(tf.random_normal([n_hidden,n_outputs]))} biases={'in':tf.Variable(tf.constant(0.1,shape=[n_hidden,])), 'out': tf.Variable(tf.constant(0.1,shape=[n_outputs,]))} X=train_x Y=train_y test=test_x w_in=weights['in'] b_in=biases['in'] inputs=tf.reshape(x,[-1,n_input]) input_rnn=tf.matmul(inputs,w_in)+b_in input_rnn=tf.reshape(input_rnn,[-1,time_step,n_hidden]) lstm_cells=[rnn.LSTMCell(n_hidden,forget_bias=1.0) for _ in range(n_layers)] lstm=rnn.MultiRNNCell(lstm_cells) outputs,states=tf.nn.dynamic_rnn(lstm,inputs=x,dtype=tf.float32,time_major=False) outputs=tf.reshape(outputs,[-1,n_hidden]) w_out=weights['out'] b_out=biases['out'] pred=tf.matmul(outputs,w_out)+b_out #损失函数 loss=tf.reduce_mean(tf.square(tf.reshape(pred,[-1])-tf.reshape(y, [-1]))) train_op=tf.train.AdamOptimizer(learning_rate).minimize(loss) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) #重复训练10000次 for i in range(1000): step=0 start=0
def create_rnn_cell(size): # Create a cell that should be reused basic_cell = rnn.LSTMCell(size) return basic_cell
from tensorflow.contrib import nn numHidden = 1000 numHidden2 = 1000 numInputs = 2 numOutputs = 1 timesteps = 1 resultSet = [] w = tf.Variable(tf.truncated_normal([numHidden2, numOutputs])) b = tf.Variable(tf.random_normal([numOutputs])) lstm = rnn.LSTMCell(numHidden, state_is_tuple=True) lstm2 = rnn.LSTMCell(numHidden2, state_is_tuple=True) lstm3 = rnn.LSTMCell(numHidden2, state_is_tuple=True) cell = rnn.MultiRNNCell([lstm, lstm2]) def LSTM(X): output, state = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32) output = tf.transpose(output, (1, 0, 2)) out = tf.tanh(tf.matmul(output[-1], w) + b) return out
def BuildNetwork(self, learningRate): self.dataInput = tensorflow.placeholder(dtype=tensorflow.float32, shape=[None, 1000, 40], name='DataInput') self.labelInput = tensorflow.placeholder(dtype=tensorflow.float32, shape=[1, 1], name='LabelInput') self.seqInput = tensorflow.placeholder(dtype=tensorflow.int32, shape=[None], name='SeqInput') self.sentenceInput = tensorflow.placeholder(dtype=tensorflow.float32, shape=[None, 3200], name='sentenceInput') self.interviewInput = tensorflow.placeholder(dtype=tensorflow.float32, shape=[1, 12800], name='interviewInput') self.parameters['AttentionResultCurrent'] = [] for sample in self.convSize: self.parameters['Layer1st_Conv_%d' % sample] = tensorflow.layers.conv2d( inputs=self.dataInput[:, :, :, tensorflow.newaxis], filters=8, kernel_size=[sample, sample], strides=[1, 1], padding='SAME', activation=tensorflow.nn.relu, name='Layer1st_Conv_%d' % sample) self.parameters['Layer2nd_MaxPooling_%d' % sample] = tensorflow.layers.max_pooling2d( inputs=self.parameters['Layer1st_Conv_%d' % sample], pool_size=[3, 3], strides=[2, 2], padding='SAME', name='Layer2nd_MaxPooling_%d' % sample) self.parameters['Layer3rd_Conv_%d' % sample] = tensorflow.layers.conv2d( inputs=self.parameters['Layer2nd_MaxPooling_%d' % sample], filters=16, kernel_size=[sample, sample], strides=[1, 1], padding='SAME', activation=tensorflow.nn.relu, name='Layer3rd_Conv_%d' % sample) self.parameters['Layer4th_Reshape_%d' % sample] = tensorflow.reshape( tensor=self.parameters['Layer3rd_Conv_%d' % sample], shape=[-1, 500, 20 * 16], name='Layer4th_Reshape_%d' % sample) self.parameters['AttentionMechanism_%d' % sample] = self.firstAttention( dataInput=self.parameters['Layer4th_Reshape_%d' % sample], seqInput=self.seqInput, scopeName=self.firstAttentionName + '_Frame_%d' % sample, hiddenNoduleNumber=16 * 20, attentionScope=self.firstAttentionScope, blstmFlag=False) self.parameters['AttentionResult_%d' % sample] = self.parameters['AttentionMechanism_%d' % sample]['FinalResult'] self.parameters['AttentionResultCurrent'].append( self.parameters['AttentionResult_%d' % sample]) if self.sentenceFlag: self.parameters['SentenceTreatment'] = tensorflow.layers.dense( inputs=self.sentenceInput, units=128, activation=tensorflow.nn.relu, name='SentenceTreatment') self.parameters['AttentionResultCurrent'].append( self.parameters['SentenceTreatment']) self.parameters['AttentionResultConcat'] = tensorflow.concat( self.parameters['AttentionResultCurrent'], axis=1) self.parameters['BLSTM_FW_Cell'] = tensorflow.nn.rnn_cell.MultiRNNCell( cells=[ rnn.LSTMCell(num_units=self.hiddenNodules) for _ in range(self.rnnLayers) ], state_is_tuple=True) self.parameters['BLSTM_BW_Cell'] = tensorflow.nn.rnn_cell.MultiRNNCell( cells=[ rnn.LSTMCell(num_units=self.hiddenNodules) for _ in range(self.rnnLayers) ], state_is_tuple=True) self.parameters['BLSTM_Output'], self.parameters['BLSTM_FinalState'] = \ tensorflow.nn.bidirectional_dynamic_rnn( cell_fw=self.parameters['BLSTM_FW_Cell'], cell_bw=self.parameters['BLSTM_BW_Cell'], inputs=self.parameters['AttentionResultConcat'][tensorflow.newaxis, :, :], dtype=tensorflow.float32) self.parameters['BLSTM_AttentionMechanism'] = self.secondAttention( dataInput=self.parameters['BLSTM_Output'], seqInput=None, scopeName=self.secondAttentionName + '_Sentence', hiddenNoduleNumber=2 * self.hiddenNodules, attentionScope=self.secondAttentionScope, blstmFlag=True) if self.interviewFlag: self.parameters['InterviewTreatment'] = tensorflow.layers.dense( inputs=self.interviewInput, units=256, activation=tensorflow.nn.relu, name='InterviewTreatment') self.parameters['BLSTM_Result'] = tensorflow.concat( [ self.parameters['BLSTM_AttentionMechanism']['FinalResult'], self.parameters['InterviewTreatment'] ], axis=1, name='BLSTM_Result') else: self.parameters['BLSTM_Result'] = self.parameters[ 'BLSTM_AttentionMechanism']['FinalResult'] self.parameters['Predict'] = tensorflow.layers.dense( inputs=self.parameters['BLSTM_Result'], units=1, activation=None) self.parameters['Loss'] = tensorflow.losses.huber_loss( labels=self.labelInput, predictions=self.parameters['Predict']) self.train = tensorflow.train.AdamOptimizer( learning_rate=learningRate).minimize(self.parameters['Loss'])
def __init__(self, params): """ :param params:是一个字典,包含num_steps,state_size,batch_size,num_classes,learning_rate """ self.params = params n_steps = params["n_steps"] n_input = params["n_input"] n_units = params["n_units"] n_classes = params["n_classes"] batch_size = params["batch_size"] # "n_steps": 128, # "n_input": 128, # "n_units": 128, # "n_classes": 6, # "batch_size": 100, # "n_epochs": 50, # "learning_rate": 0.0003, # "display_step": 1, # "run_mode": "/cpu:0", # "split_png_data": "/Users/jw/Desktop/audio_data/1484131952_256_0.5/split_png_data/CASIA" tf.reset_default_graph() with tf.get_default_graph().as_default(): with tf.name_scope("placeholder"): self.x = tf.placeholder("float", [None, n_steps * n_input], name="x") self.input = tf.reshape(self.x, [-1, n_steps, n_input]) self.y = tf.placeholder("float", [None, n_classes], name="y") self.keep_prob = tf.placeholder(tf.float32) with tf.variable_scope("softmax"): weights = tf.Variable(tf.random_normal([n_units, n_classes]), name='weights') biases = tf.Variable(tf.random_normal([n_classes]), name='biases') # x = tf.transpose(self.x, [1, 0, 2]) # x = tf.reshape(x, [-1, n_input]) # x = tf.split(0, n_steps, x) sequence_length = np.zeros([batch_size], dtype=int) sequence_length += n_steps state_size = self.params["n_units"] num_layers = self.params["n_layers"] cell_type = self.params["cell_type"] num_weights_for_custom_cell = self.params.get("n_weights") if cell_type == 'Custom': cell = CustomCell(state_size, num_weights_for_custom_cell) cell = rnn.MultiRNNCell([ rnn.DropoutWrapper(rnn.LSTMCell(state_size, state_is_tuple=True), input_keep_prob=self.keep_prob) for _ in range(num_layers) ]) elif cell_type == 'GRU': cell = rnn.GRUCell(state_size) elif cell_type == 'LSTM': cell = rnn.MultiRNNCell([ rnn.DropoutWrapper(rnn.LSTMCell(state_size, state_is_tuple=True), input_keep_prob=self.keep_prob) for _ in range(num_layers) ]) elif cell_type == 'LN_LSTM': cell = LayerNormalizedLSTMCell(state_size) else: cell = rnn.BasicRNNCell(state_size) cell = rnn.DropoutWrapper(cell, output_keep_prob=self.keep_prob) self.init_state = cell.zero_state(batch_size, dtype=tf.float32) outputs, self.final_state = tf.nn.dynamic_rnn( cell, self.input, dtype=tf.float32, initial_state=self.init_state, sequence_length=sequence_length) # outputs's shape [batch_size, time_step, state_size] outputs = tf.transpose(outputs, [1, 0, 2]) pred = tf.matmul(outputs[-1], weights) + biases self.cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=self.y)) self.optimizer = tf.train.AdamOptimizer(learning_rate=params['learning_rate']) \ .minimize(self.cost) correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(self.y, 1)) self.accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) tf.summary.scalar("cost", self.cost) tf.summary.scalar("accuracy", self.accuracy) self.merge_summary_op = tf.summary.merge_all() logger.info("模型构建完毕")
def recurrent_neural_network(x, current_batch_size): lstm_cell_1_1 = rnn.LSTMCell(128, state_is_tuple=True) lstm_layer_1_1, lstm_layer_1_1_states = tf.nn.dynamic_rnn(lstm_cell_1_1, x, dtype=tf.float32) x = tf.reshape(x, [-1, tf.shape(x)[-2], tf.shape(x)[-1], 1]) conv1 = tf.nn.conv2d( x, weights['w_conv1'], strides=[1, 1, 1, 1 ], padding='SAME') + biases['b_conv1'] conv1 = tf.contrib.layers.batch_norm(conv1) conv1 = leakyrelu(conv1) conv1 = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') conv1_split1, conv1_split2, conv1_split3, conv1_split4 = tf.split( conv1, num_or_size_splits=4, axis=3 ) # refer docs for tf.split here: https://www.tensorflow.org/api_docs/python/tf/split conv1_split1 = tf.reshape(conv1_split1, [-1, 81, 50 * 4]) conv1_split2 = tf.reshape(conv1_split2, [-1, 81, 50 * 4]) conv1_split3 = tf.reshape(conv1_split3, [-1, 81, 50 * 4]) conv1_split4 = tf.reshape(conv1_split4, [-1, 81, 50 * 4]) lstm_cell_2_1 = rnn.LSTMCell(32, state_is_tuple=True) lstm_cell_2_2 = rnn.LSTMCell(32, state_is_tuple=True) lstm_cell_2_3 = rnn.LSTMCell(32, state_is_tuple=True) lstm_cell_2_4 = rnn.LSTMCell(32, state_is_tuple=True) lstm_layer_2_1, lstm_layer_2_1_states = tf.nn.dynamic_rnn( lstm_cell_2_1, conv1_split1, dtype=tf.float32, scope='lstm_layer_2_1') lstm_layer_2_2, lstm_layer_2_2_states = tf.nn.dynamic_rnn( lstm_cell_2_2, conv1_split2, dtype=tf.float32, scope='lstm_layer_2_2') lstm_layer_2_3, lstm_layer_2_3_states = tf.nn.dynamic_rnn( lstm_cell_2_3, conv1_split3, dtype=tf.float32, scope='lstm_layer_2_3') lstm_layer_2_4, lstm_layer_2_4_states = tf.nn.dynamic_rnn( lstm_cell_2_4, conv1_split4, dtype=tf.float32, scope='lstm_layer_2_4') conv2 = tf.nn.conv2d( conv1, weights['w_conv2'], strides=[1, 1, 1, 1], padding='SAME') + biases['b_conv2'] conv2 = tf.contrib.layers.batch_norm(conv2) conv2 = leakyrelu(conv2) conv2 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') conv2_split1, conv2_split2, conv2_split3, conv2_split4, conv2_split5, conv2_split6, conv2_split7, conv2_split8 = tf.split( conv2, num_or_size_splits=8, axis=3) conv2_split1 = tf.reshape(conv2_split1, [-1, 41, 100]) conv2_split2 = tf.reshape(conv2_split2, [-1, 41, 100]) conv2_split3 = tf.reshape(conv2_split3, [-1, 41, 100]) conv2_split4 = tf.reshape(conv2_split4, [-1, 41, 100]) conv2_split5 = tf.reshape(conv2_split5, [-1, 41, 100]) conv2_split6 = tf.reshape(conv2_split6, [-1, 41, 100]) conv2_split7 = tf.reshape(conv2_split7, [-1, 41, 100]) conv2_split8 = tf.reshape(conv2_split8, [-1, 41, 100]) lstm_cell_3_1 = rnn.LSTMCell(16, state_is_tuple=True) lstm_cell_3_2 = rnn.LSTMCell(16, state_is_tuple=True) lstm_cell_3_3 = rnn.LSTMCell(16, state_is_tuple=True) lstm_cell_3_4 = rnn.LSTMCell(16, state_is_tuple=True) lstm_cell_3_5 = rnn.LSTMCell(16, state_is_tuple=True) lstm_cell_3_6 = rnn.LSTMCell(16, state_is_tuple=True) lstm_cell_3_7 = rnn.LSTMCell(16, state_is_tuple=True) lstm_cell_3_8 = rnn.LSTMCell(16, state_is_tuple=True) lstm_layer_3_1, lstm_layer_3_1_states = tf.nn.dynamic_rnn( lstm_cell_3_1, conv2_split1, dtype=tf.float32, scope='lstm_layer_3_1') lstm_layer_3_2, lstm_layer_3_2_states = tf.nn.dynamic_rnn( lstm_cell_3_2, conv2_split2, dtype=tf.float32, scope='lstm_layer_3_2') lstm_layer_3_3, lstm_layer_3_3_states = tf.nn.dynamic_rnn( lstm_cell_3_3, conv2_split3, dtype=tf.float32, scope='lstm_layer_3_3') lstm_layer_3_4, lstm_layer_3_4_states = tf.nn.dynamic_rnn( lstm_cell_3_4, conv2_split4, dtype=tf.float32, scope='lstm_layer_3_4') lstm_layer_3_5, lstm_layer_3_5_states = tf.nn.dynamic_rnn( lstm_cell_3_5, conv2_split5, dtype=tf.float32, scope='lstm_layer_3_5') lstm_layer_3_6, lstm_layer_3_6_states = tf.nn.dynamic_rnn( lstm_cell_3_6, conv2_split6, dtype=tf.float32, scope='lstm_layer_3_6') lstm_layer_3_7, lstm_layer_3_7_states = tf.nn.dynamic_rnn( lstm_cell_3_7, conv2_split7, dtype=tf.float32, scope='lstm_layer_3_7') lstm_layer_3_8, lstm_layer_3_8_states = tf.nn.dynamic_rnn( lstm_cell_3_8, conv2_split8, dtype=tf.float32, scope='lstm_layer_3_8') conv3 = tf.nn.conv2d( conv2, weights['w_conv3'], strides=[1, 1, 1, 1], padding='SAME') + biases['b_conv3'] conv3 = tf.contrib.layers.batch_norm(conv3) conv3 = leakyrelu(conv3) conv3 = tf.nn.max_pool(conv3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') list_to_be_flattened_and_merged = [ conv1, conv2, conv3, lstm_layer_1_1, lstm_layer_2_1, lstm_layer_2_2, lstm_layer_2_3, lstm_layer_2_4, lstm_layer_3_1, lstm_layer_3_2, lstm_layer_3_3, lstm_layer_3_4, lstm_layer_3_5, lstm_layer_3_6, lstm_layer_3_7, lstm_layer_3_8 ] merged = flatten_and_merge(list_to_be_flattened_and_merged, current_batch_size) # fully connected layers num_features = 273248 w_fc1 = tf.get_variable('w_fc1', shape=[num_features, 128], dtype=tf.float32) w_fc2 = tf.get_variable('w_fc2', shape=[128, NUM_CLASSES], dtype=tf.float32) b_fc1 = tf.get_variable('b_fc1', shape=[128], dtype=tf.float32) b_fc2 = tf.get_variable('b_fc2', shape=[NUM_CLASSES], dtype=tf.float32) fully_connected_1 = tf.matmul(merged, w_fc1) + b_fc1 fully_connected_2 = tf.matmul(fully_connected_1, w_fc2) + b_fc2 return fully_connected_2
#'rnnsb': tf.Variable(weights['rnnsb'], name='embed_state_b'), #'rnnoutbeta': tf.Variable(tf.zeros([2048])), #'rnnoutscale': tf.Variable(tf.ones([2048])), #'cnnoutbeta': tf.Variable(tf.zeros([2048])), #'cnnoutscale': tf.Variable(tf.ones([2048])), #'featbeta': tf.Variable(tf.zeros([4096])), #'featscale': tf.Variable(tf.ones([4096])), #'gbeta': tf.Variable(tf.zeros([1000])), #'gscale': tf.Variable(tf.ones([1000])) } # question-embedding #embed_ques_W = tf.Variable(tf.random_uniform([vocabulary_size, input_embedding_size], -0.08, 0.08), name='embed_ques_W') # encoder: RNN body lstm_1 = rnn_cell.LSTMCell(rnn_size, input_embedding_size, use_peepholes=True, state_is_tuple=False) lstm_dropout_1 = rnn_cell.DropoutWrapper(lstm_1, output_keep_prob = 1 - dropout_rate) lstm_2 = rnn_cell.LSTMCell(rnn_size, rnn_size, use_peepholes=True, state_is_tuple=False) lstm_dropout_2 = rnn_cell.DropoutWrapper(lstm_2, output_keep_prob = 1 - dropout_rate) stacked_lstm = rnn_cell.MultiRNNCell([lstm_dropout_1, lstm_dropout_2], state_is_tuple=False) image = tf.placeholder(tf.float32, [batch_size, 2048]) question = tf.placeholder(tf.int32, [batch_size, max_words_q]) #answers_true = tf.placeholder(tf.float32, (batch_size, 1000)) #noise = tf.placeholder(tf.float32, [batch_size, 4096]) #answers_false = tf.placeholder(tf.float32, (None, 1000)) #image_false = tf.placeholder(tf.float32, (None, 2048)) #question_false = tf.placeholder(tf.int32, [batch_size, max_words_q])
def model(data, variables, constants, op='train'): # TODO: reconcile agains this article # https://medium.com/@erikhallstrm/using-the-tensorflow-lstm-api-3-7-5f2b97ca6b73 if (op == 'train'): feed_data = data['traX'] dropout_pkeep = 0.5 print('model set to train') else: feed_data = data['tstX'] dropout_pkeep = 1.0 print('model set to test') # PREP DATA FOR INPUT input_reshape_op = tf.reshape( feed_data, [-1, constants['INPUT_TIME_STEPS'], constants['INPUT_DIM']], name='input_reshape_op') # CREATE CELLS LSTMCell basic_cell = rnn.LSTMCell(num_units=constants['HIDDEN_DIM'], activation=tf.nn.tanh) rnn_output, states = tf.nn.dynamic_rnn(basic_cell, input_reshape_op, time_major=False, dtype=tf.float32) stacked_rnn_output = tf.reshape(rnn_output, [-1, constants['HIDDEN_DIM']], name='stacked_rnn') # stacked_outputs = tf.layers.dense(stacked_rnn_output, # constants['OUTPUT_TIME_STEPS'], name='dense_output') # FULLY CONNECTED LAYER --1-- REDUCTION # HIDDEN_DIM * MAX_BACKPROPOGATION => MAX_BACKPROPOGATION mult_op = tf.matmul(stacked_rnn_output, variables['w_1'], name='fc_1_mult') add_op = tf.add(mult_op, variables['b_1'], name='fc_1_add') reshape_out = tf.reshape( add_op, [-1, constants['OUTPUT_TIME_STEPS'], constants['OUTPUT_DIM']], name='reshout_2') print('***********************') shape_log(input_reshape_op) shape_log(stacked_rnn_output) shape_log(mult_op) shape_log(add_op) shape_log(reshape_out) logits = tf.reshape( reshape_out, [constants['OUTPUT_TIME_STEPS'] * constants['OUTPUT_DIM']]) # [-1, constants['OUTPUT_TIME_STEPS'], constants['OUTPUT_DIM']]) labels = tf.reshape( data['traY'], [constants['OUTPUT_TIME_STEPS'] * constants['OUTPUT_DIM']]) # [-1, constants['OUTPUT_TIME_STEPS'], constants['OUTPUT_DIM']]) # logits = tf.clip_by_value(logits, 0., 1.) logits = tf.identity(logits, name='logits') labels = tf.identity(labels, name='labels') shape_log(logits) shape_log(labels) return logits, labels
zero_targets_ph = targets_ph - tf.reduce_mean(targets_ph) # zero mean locs_list = tf.split(value=locs_ph, num_or_size_splits=6, axis=1) locs_list = [tf.squeeze(l) for l in locs_list] # Build the aux nets. with tf.variable_scope('glimpse_net'): gl = GlimpseNet(config, images_ph) with tf.variable_scope('loc_net'): loc_net = LocNet(config) # number of examples N = tf.shape(images_ph)[0] init_loc = tf.random_uniform((N, 2), minval=-1, maxval=1) init_glimpse = gl(init_loc) # Core network. lstm_cell = rnn_cell.LSTMCell(config.cell_size, state_is_tuple=True) init_state = lstm_cell.zero_state(N, tf.float32) inputs = [init_glimpse] inputs.extend([0] * (config.num_glimpses)) outputs, _ = seq2seq.rnn_decoder(inputs, init_state, lstm_cell, loop_function=get_next_input) # Time independent baselines with tf.variable_scope('baseline'): w_baseline = weight_variable((config.cell_output_size, 1)) b_baseline = bias_variable((1, )) baselines = [] for t, output in enumerate(outputs[1:]): baseline_t = tf.nn.xw_plus_b(output, w_baseline, b_baseline)
def __init__(self, num_words, num_chars, num_classes, num_steps, word_len, embedding_matrix=None): # Parameter self.config = Config() self.dropout_rate = self.config.model_para['dropout_rate'] self.batch_size = self.config.model_para['batch_size'] self.num_layers = self.config.model_para['lstm_layer_num'] self.input_dim = self.config.model_para['input_dim'] self.hidden_dim = self.config.model_para['hidden_dim'] self.char_input_dim = self.config.model_para['char_input_dim'] self.char_hidden_dim = self.config.model_para['char_hidden_dim'] self.use_pa_learning = self.config.model_para['use_pa_learning'] self.embedding_matrix = embedding_matrix self.word_len = word_len self.num_steps = num_steps self.num_words = num_words self.num_chars = num_chars self.num_classes = num_classes self.char_inputs = tf.placeholder(tf.int32, [None, self.word_len]) with tf.variable_scope("character-based-emb"): # char embedding self.char_embedding = tf.get_variable("char_emb", [self.num_chars, self.char_input_dim]) self.char_inputs_emb = tf.nn.embedding_lookup(self.char_embedding, self.char_inputs) self.char_inputs_emb = tf.transpose(self.char_inputs_emb, [1, 0, 2]) self.char_inputs_emb = tf.reshape(self.char_inputs_emb, [-1, self.char_input_dim]) self.char_inputs_emb = tf.split(self.char_inputs_emb, self.word_len, 0) # char forward and backward with tf.variable_scope("char-bi-lstm"): # char lstm cell char_lstm_cell_fw = rnn.LSTMCell(self.char_hidden_dim) char_lstm_cell_bw = rnn.LSTMCell(self.char_hidden_dim) # get the length of each word self.word_length = tf.reduce_sum(tf.sign(self.char_inputs), reduction_indices=1) self.word_length = tf.cast(self.word_length, tf.int32) char_outputs, f_output, r_output = tf.contrib.rnn.static_bidirectional_rnn( char_lstm_cell_fw, char_lstm_cell_bw, self.char_inputs_emb, dtype=tf.float32, sequence_length=self.word_length ) final_word_output = tf.concat([f_output.h, r_output.h], -1) self.word_lstm_last_output = tf.reshape(final_word_output, [-1, self.num_steps, self.char_hidden_dim*2]) # ''' # word input # ''' with tf.variable_scope("word-based-emb"): self.inputs = tf.placeholder(tf.int32, [None, self.num_steps]) # self.targets = tf.placeholder(tf.int32, [None, self.num_steps]) if self.use_pa_learning: self.targets = tf.placeholder(tf.float32, [None, self.num_steps+2, self.num_classes+1]) else: self.targets = tf.placeholder(tf.int32, [None, self.num_steps]) self.targets_transition = tf.placeholder(tf.int32, [None]) self.keep_prob = tf.placeholder(tf.float32) if embedding_matrix is not None: self.embedding = tf.Variable(embedding_matrix, trainable=True, name="word_emb", dtype=tf.float32) else: self.embedding = tf.get_variable("word_emb", [self.num_words, self.input_dim]) self.inputs_emb = tf.nn.embedding_lookup(self.embedding, self.inputs) self.inputs_emb = tf.concat([self.inputs_emb, self.word_lstm_last_output], -1) self.inputs_emb = tf.nn.dropout(self.inputs_emb, self.keep_prob) self.inputs_emb = tf.transpose(self.inputs_emb, [1, 0, 2]) self.inputs_emb = tf.reshape(self.inputs_emb, [-1, self.input_dim+self.char_hidden_dim*2]) self.inputs_emb = tf.split(self.inputs_emb, self.num_steps, 0) # word lstm cell lstm_cell_fw = rnn.LSTMCell(self.hidden_dim) lstm_cell_bw = rnn.LSTMCell(self.hidden_dim) # get the length of each sample self.length = tf.reduce_sum(tf.sign(self.inputs), reduction_indices=1) self.length = tf.cast(self.length, tf.int32) # forward and backward with tf.variable_scope("word-bi-lstm"): self.outputs, _, _ = tf.contrib.rnn.static_bidirectional_rnn( lstm_cell_fw, lstm_cell_bw, self.inputs_emb, dtype=tf.float32, sequence_length=self.length ) # bidirect concat final_outputs = tf.reshape(tf.concat(self.outputs, 1), [-1, self.hidden_dim * 2]) tanh_layer_w = tf.get_variable("tanh_layer_w", [self.hidden_dim * 2, self.hidden_dim]) tanh_layer_b = tf.get_variable("tanh_layer_b", [self.hidden_dim]) self.final_outputs = tf.tanh(tf.matmul(final_outputs, tanh_layer_w) + tanh_layer_b) # def add_placeholders(self): # ''' # char input = sen_batch * sen_len # ''' # self.char_inputs = tf.placeholder(tf.int32, [None, self.word_len]) # ''' # word input # ''' # self.inputs = tf.placeholder(tf.int32, [None, self.num_steps]) # self.targets = tf.placeholder(tf.int32, [None, self.num_steps]) # self.targets_transition = tf.placeholder(tf.int32, [None]) # self.keep_prob = tf.placeholder(tf.float32) # def add_lookup_op(self): # with tf.variable_scope("character-based-emb"): # # char embedding # self.char_embedding = tf.get_variable("char_emb", [self.num_chars, self.char_input_dim]) # self.char_inputs_emb = tf.nn.embedding_lookup(self.char_embedding, self.char_inputs) # with tf.variable_scope("word-based-emb"): # if self.embedding_matrix is not None: # self.embedding = tf.Variable(self.embedding_matrix, trainable=True, name="word_emb", dtype=tf.float32) # else: # self.embedding = tf.get_variable("word_emb", [self.num_words, self.input_dim]) # self.inputs_emb = tf.nn.embedding_lookup(self.embedding, self.inputs) # def add_feature_extractor_op(self): # with tf.variable_scope("char_bi-lstm"): # self.char_inputs_emb = tf.transpose(self.char_inputs_emb, [1, 0, 2]) # self.char_inputs_emb = tf.reshape(self.char_inputs_emb, [-1, self.char_input_dim]) # self.char_inputs_emb = tf.split(self.char_inputs_emb, self.word_len, 0) # # char lstm cell # char_lstm_cell_fw = rnn.LSTMCell(self.char_hidden_dim) # char_lstm_cell_bw = rnn.LSTMCell(self.char_hidden_dim) # # get the length of each word # self.word_length = tf.reduce_sum(tf.sign(self.char_inputs), reduction_indices=1) # self.word_length = tf.cast(self.word_length, tf.int32) # char_outputs, f_output, r_output = tf.contrib.rnn.static_bidirectional_rnn( # char_lstm_cell_fw, # char_lstm_cell_bw, # self.char_inputs_emb, # dtype=tf.float32, # sequence_length=self.word_length # ) # final_word_output = tf.concat([f_output.h, r_output.h], -1) # self.word_lstm_last_output = tf.reshape(final_word_output, [-1, self.num_steps, self.char_hidden_dim*2]) # with tf.variable_scope("word_bi-lstm"): # self.inputs_emb = tf.concat([self.inputs_emb, self.word_lstm_last_output], -1) # self.inputs_emb = tf.nn.dropout(self.inputs_emb, self.keep_prob) # self.inputs_emb = tf.transpose(self.inputs_emb, [1, 0, 2]) # self.inputs_emb = tf.reshape(self.inputs_emb, [-1, self.input_dim+self.char_hidden_dim*2]) # # self.inputs_emb = tf.reshape(self.inputs_emb, [-1, self.input_dim]) # self.inputs_emb = tf.split(self.inputs_emb, self.num_steps, 0) # # word lstm cell # lstm_cell_fw = rnn.LSTMCell(self.hidden_dim) # lstm_cell_bw = rnn.LSTMCell(self.hidden_dim) # # get the length of each sample # self.length = tf.reduce_sum(tf.sign(self.inputs), reduction_indices=1) # self.length = tf.cast(self.length, tf.int32) # self.outputs, _, _ = tf.contrib.rnn.static_bidirectional_rnn( # lstm_cell_fw, # lstm_cell_bw, # self.inputs_emb, # dtype=tf.float32, # sequence_length=self.length # ) # with tf.variable_scope("bidirect-concat"): # final_outputs = tf.reshape(tf.concat(self.outputs, 1), [-1, self.hidden_dim * 2]) # tanh_layer_w = tf.get_variable("tanh_layer_w", [self.hidden_dim * 2, self.hidden_dim]) # tanh_layer_b = tf.get_variable("tanh_layer_b", [self.hidden_dim]) # self.final_outputs = tf.tanh(tf.matmul(final_outputs, tanh_layer_w) + tanh_layer_b) # def forward(self): # self.add_placeholders() # self.add_lookup_op() # self.add_feature_extractor_op() # return self.final_outputs, self.length
def _create_rnn_cell(self): cell = rnn.LSTMCell(config.hiddenSize, use_peepholes=True, state_is_tuple=True) cell = rnn.DropoutWrapper(cell, output_keep_prob=config.dropout) return cell
def add_graph(self, noyear=False, feedforward=False): """ parameters: noyear: a boolean, indicates whether year information is included as input to the model feedforward: a boolean, indicates whether the model is a feedforward neural network or an LSTM Creates a graph for the model. Generates placeholders for X_word, X_year, Y_label, and the embedding matrix. Creates year embedding. Details model architecture. Calculates accuracy, log perplexity, and loss. Optimizes network based on loss. """ # Creates placeholders for LSTM self.X_word = tf.placeholder(tf.int32, [None, MAX_SENT_LENGTH]) self.X_year = tf.placeholder(tf.int32, [None]) self.Y_label = tf.placeholder(tf.int32, [None, MAX_SENT_LENGTH]) self.embedding_matrix = tf.placeholder(tf.float32, [MAX_THRESHOLD, EMBED_DIM]) # Looks up embeddings for each word X_word = tf.nn.embedding_lookup(self.embedding_matrix, self.X_word) # Creates year embedding new_years = tf.subtract(self.X_year, START_YEAR) unembedded_year = tf.tile(tf.expand_dims(new_years, axis=1), [1, MAX_SENT_LENGTH]) self.year_embed_mat = tf.get_variable( name="year_embed_mat", shape=(NUM_YEAR, EMBED_DIM), initializer=tf.contrib.layers.xavier_initializer()) embedded_year = tf.nn.embedding_lookup(self.year_embed_mat, unembedded_year) if noyear: embedded_year = tf.zeros_like(embedded_year) # Concatenates X_word and year embedding to get single combined input X = tf.concat([X_word, embedded_year], axis=2) if feedforward: # Implements Feed-Forward H = tf.layers.dense(inputs=X, units=LAYERS[0], activation=tf.nn.sigmoid) else: # Implements LSTM rnn_layers = [rnn.LSTMCell(size) for size in LAYERS] multi_rnn_cell = rnn.MultiRNNCell(rnn_layers) H, _ = tf.nn.dynamic_rnn(cell=multi_rnn_cell, inputs=X, dtype=tf.float32) # POS tags self.Y = tf.contrib.layers.fully_connected( inputs=H, num_outputs=N_POS, ) # Calculates accuracy equal = tf.equal(tf.cast(tf.argmax(self.Y, axis=2), tf.int32), tf.cast(self.Y_label, tf.int32)) self.acc = tf.reduce_mean(tf.cast(equal, tf.float32)) self.vec_acc = tf.reduce_mean(tf.cast(equal, tf.float32), axis=1) # Calculates perplexity mask = tf.cast(tf.one_hot(self.Y_label, N_POS), tf.float32) p = tf.reduce_sum(tf.nn.softmax(self.Y) * mask, axis=2) self.log_perp = -tf.reduce_sum(tf.log(p), axis=1) / MAX_SENT_LENGTH self.perp = tf.exp(self.log_perp) # Calculates loss self.loss = tf.losses.sparse_softmax_cross_entropy( labels=self.Y_label, logits=self.Y, ) # Sets train_step that uses AdamOptimizer to minimize loss self.train_step = tf.train.AdamOptimizer(LR).minimize(self.loss)
sess = tf.InteractiveSession() batch_size = tf.placeholder(tf.int32) _X = tf.placeholder( tf.float32, [None, timestep_size, 36]) # TODO change this to the divided ver y = tf.placeholder(tf.float32, [None, 3]) keep_prob = tf.placeholder(tf.float32) # -------------------------------------------- # Construct LSTM cells # -------------------------------------------- # Add here lstm_cell = rnn.LSTMCell(num_units=hidden_size, forget_bias=1.0, state_is_tuple=True) lstm_cell = rnn.DropoutWrapper(cell=lstm_cell, input_keep_prob=1.0, output_keep_prob=keep_prob) mlstm_cell = rnn.MultiRNNCell([lstm_cell] * layer_num, state_is_tuple=True) init_state = mlstm_cell.zero_state(batch_size, dtype=tf.float32) outputs, state = tf.nn.dynamic_rnn(mlstm_cell, inputs=_X, initial_state=init_state) h_state = outputs[:, -1, :] # 或者 h_state = state[-1][1]
def lstm_context_embedding(features, labels, mode, params): ''' :param features: dict of sentence features with shape (batch_size, max_words, dim_of_word) features['seq1'] return batch of query sentence features['seq2'] return batch of positive response sentence features['seq3'] return batch of negative response sentence :param labels: nothing :param mode: :param params: :return: ''' print('CURRENT MODE: %s' % mode.upper()) M = params['M'] # a constant for computed with loss input_keep_prob = params['input_keep_prob'] output_keep_prob = params['output_keep_prob'] n_lstm_units = 100 # number of hidden units # create a LSTM cell for context with tf.variable_scope("emb_cell1_context"): cell1 = rnn.LSTMCell(num_units=n_lstm_units, activation=tf.nn.tanh) if mode == ModeKeys.TRAIN: cell1 = rnn.DropoutWrapper(cell=cell1, input_keep_prob=input_keep_prob, output_keep_prob=output_keep_prob) def lstm_embed_context(x): outputs, _ = tf.nn.dynamic_rnn(cell=cell1, inputs=x, time_major=False, dtype=tf.float32) outputs = tf.transpose(outputs, [1, 0, 2]) outputs = outputs[-1] # assume that this outputs is a embed_vector return outputs def cosine_similarity(vec1, vec2): ''' Calculate cosine_similarity of each sample by A•B / (norm(A) * norm(B)) :param vec1: batch of vector1 :param vec2: batch of vector2 :return: ''' # calculate (norm(A) * norm(B)) # output.shape = [n_sample, ] vec_norm = tf.norm(vec1, axis=1) * tf.norm(vec2, axis=1) # multiply sub_vec vs sub_vec. # output.shape = [n_sample , emb_dim] mul = tf.multiply(vec1, vec2) # sum values in emb_dim for each sample so output.shape = [n_sample, ] reduce_sum = tf.reduce_sum(mul, axis=1) # calculate cosine similarity. # output.shape = [n_sample, ] cosine_sim = reduce_sum / vec_norm return cosine_sim loss = None train_op = None # Calculate Loss (for TRAIN, EVAL modes) if mode != ModeKeys.INFER: seq1 = features[CONTEXT_KEY] # get context seq2 = features[POS_RESP_KEY] # get a pos_response seq3 = features[NEG_RESP_KEY] # get a neg_response # get embedded vector: output.shape = [n_sample , emb_dim] vec1 = lstm_embed_context(seq1) vec2 = lstm_embed_context(seq2) vec3 = lstm_embed_context(seq3) # calculate cosine similarity of each vec pairs, output.shape = [n_sample, ] cosine_sim_pos = cosine_similarity(vec1, vec2) # need a large value cosine_sim_neg = cosine_similarity(vec1, vec3) # need a tiny value # LOSS # calculate loss of each pair pos_neg. output.shape = [n_sample,] losses = tf.maximum(0., M - cosine_sim_pos + cosine_sim_neg) # << too small too good # final_loss = sum all loss. and get output be scalar loss = tf.reduce_mean(losses) # Configure the Training Optimizer (for TRAIN modes) if mode == ModeKeys.TRAIN: # configuration the training Op train_op = tf.contrib.layers.optimize_loss( loss=loss, global_step=tf.contrib.framework.get_global_step(), optimizer=tf.train.AdamOptimizer, learning_rate=params['learning_rate'], summaries=[ 'learning_rate', 'loss', "gradients", "gradient_norm", ]) # Generate Predictions which is a embedding of given sentence predictions = {} if mode == ModeKeys.INFER: if features.keys().__contains__(CONTEXT_KEY): seq1 = features[CONTEXT_KEY] predictions = {'emb_vec': lstm_embed_context(seq1)} elif features.keys().__contains__(POS_RESP_KEY): seq2 = features[POS_RESP_KEY] predictions = {'emb_vec': lstm_embed_context(seq2)} # Return a ModelFnOps object return ModelFnOps(predictions=predictions, loss=loss, train_op=train_op, eval_metric_ops=None, mode=mode)
# name.close() # 定义占位符 x = tf.placeholder("float", [None, n_input, 1]) wordy = tf.placeholder("float", [None, words_size]) x1 = tf.reshape(x, [-1, n_input]) x2 = tf.split(x1, n_input, 1) ################### 模型训练与优化 ##################### # 放入3层LSTM网络,最终通过一个全连接生成words_size个节点,为后面的softmax做准备 # 2-layer LSTM,每层有 n_hidden 个units rnn_cell = rnn.MultiRNNCell([rnn.LSTMCell(n_hidden1), rnn.LSTMCell(n_hidden2), rnn.LSTMCell(n_hidden3)]) # 通过RNN得到输出 outputs, states = rnn.static_rnn(rnn_cell, x2, dtype=tf.float32) # 通过全连接输出指定维度 pred = tf.contrib.layers.fully_connected(outputs[-1], words_size, activation_fn=None) # 优化器使用的是AdamOptimizer,loss使用的是softmax的交叉熵,正确率是统计one_hot中索引对应的位置相同的个数 # 定义loss与优化器 loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=wordy)) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss) # 模型评估 correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(wordy, 1)) accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
def __init__(self, vocab_size, hidden_size, dropout, num_layers, max_gradient_norm, batch_size, learning_rate, lr_decay_factor, max_target_length, max_source_length, decoder_mode=False): ''' vocab_size: number of vocab tokens buckets: buckets of max sequence lengths hidden_size: dimension of hidden layers num_layers: number of hidden layers max_gradient_norm: maximum gradient magnitude batch_size: number of training examples fed to network at once learning_rate: starting learning rate of network lr_decay_factor: amount by which to decay learning rate num_samples: number of samples for sampled softmax decoder_mode: Whether to build backpass nodes or not ''' GO_ID = config.GO_ID EOS_ID = config.EOS_ID self.max_source_length = max_source_length self.max_target_length = max_target_length self.vocab_size = vocab_size self.batch_size = batch_size self.global_step = tf.Variable(0, trainable=False) self.learning_rate = learning_rate self.encoder_inputs = tf.placeholder(shape=(None, None), dtype=tf.int32, name='encoder_inputs') self.source_lengths = tf.placeholder(shape=(None,), dtype=tf.int32, name='source_lengths') self.decoder_targets = tf.placeholder(shape=(None, None), dtype=tf.int32, name='decoder_targets') self.target_lengths = tf.placeholder(shape=(None,), dtype=tf.int32, name="target_lengths") with tf.variable_scope('embeddings') as scope: embeddings = tf.Variable(tf.random_uniform([vocab_size, hidden_size], -1.0, 1.0), dtype=tf.float32) encoder_inputs_embedded = tf.nn.embedding_lookup(embeddings, self.encoder_inputs) targets_embedding = tf.nn.embedding_lookup(embeddings, self.decoder_targets) with tf.variable_scope('encoder') as scope: encoder_cell = rnn.LSTMCell(hidden_size) encoder_cell = rnn.DropoutWrapper(encoder_cell, input_keep_prob=dropout) encoder_cell = rnn.MultiRNNCell([encoder_cell] * num_layers) _, encoder_state = tf.nn.bidirectional_dynamic_rnn(cell_fw=encoder_cell, cell_bw=encoder_cell, sequence_length=self.source_lengths, inputs=encoder_inputs_embedded, dtype=tf.float32, time_major=False) with tf.variable_scope('decoder') as scope: decoder_cell = rnn.LSTMCell(hidden_size) decoder_cell = rnn.DropoutWrapper(decoder_cell, input_keep_prob=dropout) decoder_cell = rnn.MultiRNNCell([decoder_cell] * num_layers) #TODO add attention #seq2seq.BahdanauAttention(num_units=,memory=encoder_output) #decoder_cell = seq2seq.AttentionWrapper(cell=decoder_cell, # attention_mechanism=) if decoder_mode: decoder = seq2seq.BeamSearchDecoder(embedding=embeddings, start_tokens=tf.tile([GOD_ID], [batch_size]), end_token=EOS_ID, initial_state=encoder_state[0], beam_width=2) else: helper = seq2seq.TrainingHelper(inputs=targets_embedding, sequence_length=self.target_lengths) decoder = seq2seq.BasicDecoder(cell=decoder_cell, helper=helper, initial_state=encoder_state[-1], output_layer=Dense(vocab_size)) final_outputs, final_state, final_sequence_lengths =\ seq2seq.dynamic_decode(decoder=decoder) self.logits = final_outputs.rnn_output if not decoder_mode: with tf.variable_scope("loss") as scope: #have to pad logits, dynamic decode produces results not consistent #in shape with targets pad_size = self.max_target_length - tf.reduce_max(final_sequence_lengths) self.logits = tf.pad(self.logits, [[0, 0], [0,pad_size], [0, 0]]) weights = tf.sequence_mask(lengths=final_sequence_lengths, maxlen=self.max_target_length, dtype=tf.float32, name='weights') x_entropy_loss = seq2seq.sequence_loss(logits=self.logits, targets=self.decoder_targets, weights=weights) self.loss = tf.reduce_mean(x_entropy_loss) optimizer = tf.train.AdamOptimizer() gradients = optimizer.compute_gradients(x_entropy_loss) capped_grads = [(tf.clip_by_value(grad, -max_gradient_norm, max_gradient_norm), var) for grad, var in gradients] self.train_op = optimizer.apply_gradients(capped_grads, global_step=self.global_step) self.saver = tf.train.Saver(tf.global_variables())