def LSTM(x, weights, biases): x = tf.unstack(x, timesteps, 1) l = rnn.BasicLSTMCell(num_hidden, forget_bias=1.0) outputs, states = rnn.static_rnn(l, x, dtype=tf.float32) return tf.matmul(outputs[-1], weights['out']) + biases['out']
def __init__(self, args, infer=False): ''' Initialisation function for the class SocialModel params: args : Contains arguments required for the model creation ''' # If sampling new trajectories, then infer mode if infer: # Sample one position at a time args.batch_size = 1 args.seq_length = 1 # Store the arguments self.args = args self.infer = infer # Store rnn size and grid_size self.rnn_size = args.rnn_size self.grid_size = args.grid_size # Maximum number of peds self.maxNumPeds = args.maxNumPeds # NOTE : For now assuming, batch_size is always 1. That is the input # to the model is always a sequence of frames # Construct the basicLSTMCell recurrent unit with a dimension given by args.rnn_size with tf.name_scope("LSTM_cell"): cell = rnn_cell.BasicLSTMCell(args.rnn_size, state_is_tuple=False) # if not infer and args.keep_prob < 1: # cell = rnn_cell.DropoutWrapper(cell, output_keep_prob=args.keep_prob) # placeholders for the input data and the target data # A sequence contains an ordered set of consecutive frames # Each frame can contain a maximum of 'args.maxNumPeds' number of peds # For each ped we have their (pedID, x, y) positions as input self.input_data = tf.placeholder(tf.float32, [args.seq_length, args.maxNumPeds, 3], name="input_data") # target data would be the same format as input_data except with # one time-step ahead self.target_data = tf.placeholder( tf.float32, [args.seq_length, args.maxNumPeds, 3], name="target_data") # Grid data would be a binary matrix which encodes whether a pedestrian is present in # a grid cell of other pedestrian self.grid_data = tf.placeholder(tf.float32, [ args.seq_length, args.maxNumPeds, args.maxNumPeds, args.grid_size * args.grid_size ], name="grid_data") # Variable to hold the value of the learning rate self.lr = tf.Variable(args.learning_rate, trainable=False, name="learning_rate") # Output dimension of the model self.output_size = 5 # Define embedding and output layers self.define_embedding_and_output_layers(args) # Define LSTM states for each pedestrian with tf.variable_scope("LSTM_states"): self.LSTM_states = tf.zeros([args.maxNumPeds, cell.state_size], name="LSTM_states") self.initial_states = tf.split(self.LSTM_states, args.maxNumPeds, 0) # Define hidden output states for each pedestrian with tf.variable_scope("Hidden_states"): # self.output_states = tf.zeros([args.maxNumPeds, cell.output_size], name="hidden_states") self.output_states = tf.split( tf.zeros([args.maxNumPeds, cell.output_size]), args.maxNumPeds, 0) # List of tensors each of shape args.maxNumPedsx3 corresponding to each frame in the sequence with tf.name_scope("frame_data_tensors"): # frame_data = tf.split(0, args.seq_length, self.input_data, name="frame_data") frame_data = [ tf.squeeze(input_, [0]) for input_ in tf.split(self.input_data, args.seq_length, 0) ] with tf.name_scope("frame_target_data_tensors"): # frame_target_data = tf.split(0, args.seq_length, self.target_data, name="frame_target_data") frame_target_data = [ tf.squeeze(target_, [0]) for target_ in tf.split(self.target_data, args.seq_length, 0) ] with tf.name_scope("grid_frame_data_tensors"): # This would contain a list of tensors each of shape MNP x MNP x (GS**2) encoding the mask # grid_frame_data = tf.split(0, args.seq_length, self.grid_data, name="grid_frame_data") grid_frame_data = [ tf.squeeze(input_, [0]) for input_ in tf.split(self.grid_data, args.seq_length, 0) ] # Cost with tf.name_scope("Cost_related_stuff"): self.cost = tf.constant(0.0, name="cost") self.counter = tf.constant(0.0, name="counter") self.increment = tf.constant(1.0, name="increment") # Containers to store output distribution parameters with tf.name_scope("Distribution_parameters_stuff"): # self.initial_output = tf.zeros([args.maxNumPeds, self.output_size], name="distribution_parameters") self.initial_output = tf.split( tf.zeros([args.maxNumPeds, self.output_size]), args.maxNumPeds, 0) # Tensor to represent non-existent ped with tf.name_scope("Non_existent_ped_stuff"): nonexistent_ped = tf.constant(0.0, name="zero_ped") # Iterate over each frame in the sequence for seq, frame in enumerate(frame_data): print("Frame number", seq) current_frame_data = frame # MNP x 3 tensor current_grid_frame_data = grid_frame_data[ seq] # MNP x MNP x (GS**2) tensor social_tensor = self.getSocialTensor( current_grid_frame_data, self.output_states) # MNP x (GS**2 * RNN_size) # NOTE: Using a tensor of zeros as the social tensor # social_tensor = tf.zeros([args.maxNumPeds, args.grid_size*args.grid_size*args.rnn_size]) for ped in range(args.maxNumPeds): print("Pedestrian Number", ped) # pedID of the current pedestrian pedID = current_frame_data[ped, 0] with tf.name_scope("extract_input_ped"): # Extract x and y positions of the current ped self.spatial_input = tf.slice( current_frame_data, [ped, 1], [1, 2]) # Tensor of shape (1,2) # Extract the social tensor of the current ped self.tensor_input = tf.slice( social_tensor, [ped, 0], [1, args.grid_size * args.grid_size * args.rnn_size ]) # Tensor of shape (1, g*g*r) with tf.name_scope("embeddings_operations"): # Embed the spatial input embedded_spatial_input = tf.nn.relu( tf.nn.xw_plus_b(self.spatial_input, self.embedding_w, self.embedding_b)) # Embed the tensor input embedded_tensor_input = tf.nn.relu( tf.nn.xw_plus_b(self.tensor_input, self.embedding_t_w, self.embedding_t_b)) with tf.name_scope("concatenate_embeddings"): # Concatenate the embeddings complete_input = tf.concat( [embedded_spatial_input, embedded_tensor_input], 1) # One step of LSTM with tf.variable_scope("LSTM") as scope: if seq > 0 or ped > 0: scope.reuse_variables() self.output_states[ped], self.initial_states[ped] = cell( complete_input, self.initial_states[ped]) # with tf.name_scope("reshape_output"): # Store the output hidden state for the current pedestrian # self.output_states[ped] = tf.reshape(tf.concat(1, output), [-1, args.rnn_size]) # print self.output_states[ped] # Apply the linear layer. Output would be a tensor of shape 1 x output_size with tf.name_scope("output_linear_layer"): self.initial_output[ped] = tf.nn.xw_plus_b( self.output_states[ped], self.output_w, self.output_b) # with tf.name_scope("store_distribution_parameters"): # # Store the distribution parameters for the current ped # self.initial_output[ped] = output with tf.name_scope("extract_target_ped"): # Extract x and y coordinates of the target data # x_data and y_data would be tensors of shape 1 x 1 [x_data, y_data] = tf.split( tf.slice(frame_target_data[seq], [ped, 1], [1, 2]), 2, 1) target_pedID = frame_target_data[seq][ped, 0] with tf.name_scope("get_coef"): # Extract coef from output of the linear output layer [o_mux, o_muy, o_sx, o_sy, o_corr] = self.get_coef(self.initial_output[ped]) with tf.name_scope("calculate_loss"): # Calculate loss for the current ped lossfunc = self.get_lossfunc(o_mux, o_muy, o_sx, o_sy, o_corr, x_data, y_data) with tf.name_scope("increment_cost"): # If it is a non-existent ped, it should not contribute to cost # If the ped doesn't exist in the next frame, he/she should not contribute to cost as well self.cost = tf.where( tf.logical_or(tf.equal(pedID, nonexistent_ped), tf.equal(target_pedID, nonexistent_ped)), self.cost, tf.add(self.cost, lossfunc)) self.counter = tf.where( tf.logical_or(tf.equal(pedID, nonexistent_ped), tf.equal(target_pedID, nonexistent_ped)), self.counter, tf.add(self.counter, self.increment)) with tf.name_scope("mean_cost"): # Mean of the cost self.cost = tf.div(self.cost, self.counter) # Get all trainable variables tvars = tf.trainable_variables() # L2 loss l2 = args.lambda_param * sum(tf.nn.l2_loss(tvar) for tvar in tvars) self.cost = self.cost + l2 # Get the final LSTM states self.final_states = tf.concat(self.initial_states, 0) # Get the final distribution parameters self.final_output = self.initial_output # Compute gradients self.gradients = tf.gradients(self.cost, tvars) # Clip the gradients grads, _ = tf.clip_by_global_norm(self.gradients, args.grad_clip) # Define the optimizer optimizer = tf.train.RMSPropOptimizer(self.lr) # The train operator self.train_op = optimizer.apply_gradients(zip(grads, tvars))
X_train = np.reshape(X_train, newshape=(-1, TIME_STEPS, 1)) X_test = np.reshape(X_test, newshape=(-1, TIME_STEPS, 1)) plt.plot(range(1000), y_test[:1000, 0], 'r*') graph = tf.Graph() with graph.as_default(): X_p = tf.placeholder(dtype=tf.float32, shape=(None, TIME_STEPS, 1), name='input_placeholder') y_p = tf.placeholder(dtype=tf.float32, shape=(None, 1), name='pred_placeholder') lstm_cell = rnn.BasicLSTMCell(num_units=HIDDEN_UNITS) init_state = lstm_cell.zero_state(batch_size=BATCH_SIZE, dtype=tf.float32) outputs, states = tf.nn.dynamic_rnn(cell=lstm_cell, inputs=X_p, initial_state=init_state, dtype=tf.float32) h = outputs[:, -1, :] mse = tf.losses.mean_squared_error(labels=y_p, predictions=h) optimizer = tf.train.AdamOptimizer(LEARNING_RATE).minimize(loss=mse) init = tf.global_variables_initializer() with tf.Session(graph=graph) as sess: sess.run(init)
def __init__(self, num_units, *args, **kwargs): super(BasicLSTM, self).__init__(*args, **kwargs) self.num_units = num_units self.l('cell', rnn.BasicLSTMCell(self.num_units))
def network(inputs,shapes,num_tags,lstm_dim=100,initializer=tf.truncated_normal_initializer()): """ 外部函数,做预测使用 接收一个批次样本的特征数据,计算出网络的输出值 :lengths:之前对数据做了PAD填充,现在为了送入模型不计算,需要每句话的实际长度 :param char: :param bound: :param flag: :param radical: :param pinyin: :return: """ #---------------------特征嵌入:将所有特征的id转换成一个固定长度的向量,然后拼接------------------------------------------------------ # 词向量的嵌入 # 下面的代码把一个字的5个特征全部映射成一个31长度的向量 embedding=[] keys=list(shapes.keys()) for key in shapes.keys(): with tf.variable_scope(key+'_embedding'): # 变量空间 lookup = tf.get_variable( name=key+'_embedding', # 给变量指定一个名字 shape=shapes[key], # 指定形状 initializer=initializer # 初始化器 ) embedding.append(tf.nn.embedding_lookup(lookup,inputs[key])) # 把key里的内容全部映射成向量,本来分开写,为简便写成一个循环,实现特征嵌入 embed=tf.concat(embedding,axis=-1) # 因为要合起来成为一个向量送进神经网络里 # 所以要拼接,在最后一个维度上,shape[None × None × char_dim + bound_dim + flag_dim + radical_dim + pinyin_dim] sign=tf.sign(tf.abs(inputs[keys[0]])) # 计算出每句话的长度, sign=符号(绝对值(inputs['char'])),为保证传进来数值正确,所以变为keys=list(shapes.keys())---sign=符号(绝对值(inputs[keys][0]))---[None(批次),None(每个句子填充的长度)] # tf.sign--- -1 0 1 lengths=tf.reduce_sum(sign,reduction_indices=1) # 为了防止1个字符的句子,所以在第二个维度进行求和,1在这里是第二个维度,这样即便只有1个字符的句子,仍能组成一个列表 # 统计1有多少个,就能算出实际有多少个字符,也就是求出未填充PAD前的句子长度 num_time=tf.shape(inputs[keys[0]])[1] # 序列长度 #---------------------循环神经网络,BiLstm-双层双向------------------------------------------------------- with tf.variable_scope('BiLstm_layer1'): lstm_cell={} for name in ['forward','backward',]: # 第一层正反 with tf.variable_scope(name): # 命名空间 lstm_cell[name]=rnn.BasicLSTMCell( lstm_dim# 神经元个数 ) outputs1,finial_states1=tf.nn.bidirectional_dynamic_rnn( # 双向动态rnn lstm_cell['forward'], # 第一层正向 lstm_cell['backward'], # 第一层反向 embed, # 将5类数据映射成向量作为输入 dtype=tf.float32, sequence_length=lengths, # 未填充PAD的句子真实长度,(可给可不给) ) #------------------第一层的输出,第二层的输入------------ outputs1=tf.concat(outputs1,axis=-1) # 拼接,b,L,2 × lstm_dim #------------------第二层------------------------------- with tf.variable_scope('BiLstm_layer2'): lstm_cell = {} for name in ['forward', 'backward', ]: # 第一层正反 with tf.variable_scope(name): # 命名空间 lstm_cell[name] = rnn.BasicLSTMCell( lstm_dim # 神经元个数 ) outputs, finial_states1 = tf.nn.bidirectional_dynamic_rnn( # 双向动态rnn lstm_cell['forward'], # 第一层正向 lstm_cell['backward'], # 第一层反向 outputs1, # 将映射好的向量作为输入 dtype=tf.float32, sequence_length=lengths, # 未填充PAD的句子真实长度,(可给可不给) ) #-----------------第二层输出--------------------------- output = tf.concat(outputs, axis=-1) # batch_size , maxlength , 2 × lstm_dim #-----------------第一层输出映射----------------------------- output=tf.reshape(output,[-1,2*lstm_dim]) # reshap成二维矩阵 with tf.variable_scope('project_layer1'): # 第一层映射 w=tf.get_variable( name='w', shape=[2*lstm_dim,lstm_dim], initializer=initializer, ) b=tf.get_variable( name='b', shape=[lstm_dim], initializer=tf.zeros_initializer ) output=tf.nn.relu(tf.matmul(output,w)+b) # relu=激活 #----------------第二层输出映射----------------------------- with tf.variable_scope('project_layer2'): # 第一层映射 w=tf.get_variable( name='w', shape=[lstm_dim,num_tags], initializer=initializer, ) b=tf.get_variable( name='b', shape=[num_tags], initializer=tf.zeros_initializer ) output=tf.matmul(output,w)+b output=tf.reshape(output,[-1,num_time,num_tags]) return output,lengths # [?,?,31]
x_data = np.array([[h, e, l, l, o]], dtype=np.float32) print(x_data.shape) pp.pprint(x_data) outputs, states = tf.nn.dynamic_rnn(cell, x_data, dtype=tf.float32) sess.run(tf.global_variables_initializer()) pp.pprint(outputs.eval()) with tf.variable_scope('3_batches') as scope: # One cell RNN input_dim (4) -> output_dim (2). sequence: 5, batch 3 # 3 batches 'hello', 'eolll', 'lleel' x_data = np.array([[h, e, l, l, o], [e, o, l, l, l], [l, l, e, e, l]], dtype=np.float32) pp.pprint(x_data) hidden_size = 2 cell = rnn.BasicLSTMCell(num_units=hidden_size, state_is_tuple=True) outputs, _states = tf.nn.dynamic_rnn(cell, x_data, dtype=tf.float32) sess.run(tf.global_variables_initializer()) pp.pprint(outputs.eval()) with tf.variable_scope('3_batches_dynamic_length') as scope: # One cell RNN input_dim (4) -> output_dim (5). sequence: 5, batch 3 # 3 batches 'hello', 'eolll', 'lleel' x_data = np.array([[h, e, l, l, o], [e, o, l, l, l], [l, l, e, e, l]], dtype=np.float32) pp.pprint(x_data) hidden_size = 2 cell = rnn.BasicLSTMCell(num_units=hidden_size, state_is_tuple=True) outputs, _states = tf.nn.dynamic_rnn(cell, x_data,
} with slim.arg_scope([slim.fully_connected], activation_fn=None, # normalizer_fn=slim.batch_norm, weights_initializer=\ tf.truncated_normal_initializer(stddev=0.01), weights_regularizer=slim.l2_regularizer(0.0005)): feat = slim.fully_connected(tf.reshape(x,[batch_size*n_steps,n_input]), 128) feat = tf.reshape(feat,[batch_size,n_steps,128]) # x is B x n_steps x 6 t = tf.unstack(feat, n_steps, axis=1) # t is n_steps different features, that are each B x 6 # Define a lstm cell with tensorflow lstm_cell = rnn.BasicLSTMCell(n_hidden, forget_bias=1.0) # lstm_cell = rnn.DropoutWrapper(lstm_cell, input_keep_prob=dropout) # Get lstm cell output outputs, states = rnn.static_rnn(lstm_cell, t, dtype=tf.float32) # Linear activation, using rnn inner loop last output #with slim.arg_scope([slim.fully_connected], # activation_fn=None, # normalizer_fn=slim.batch_norm, # weights_initializer=\ # tf.truncated_normal_initializer(stddev=0.01), # weights_regularizer=slim.l2_regularizer(0.0005)): # pred = slim.fully_connected(outputs[-1], n_output) pred= tf.matmul(outputs[-1], weights['out']) + biases['out'] (d_roll, d_pitch, d_yaw)=quat_to_euler(pred[:,3:7]) d_x=pred[:,0]
def fit(self, X_train, y_train, len_train,pos_train,length_train,position_train, X_validation, y_validation, len_validation, pos_validation,length_validation,position_validation, name, print_log=True): # ---------------------------------------forward computation--------------------------------------------# y_train_pw = y_train[0] y_train_pph = y_train[1] #y_train_iph = y_train[2] y_validation_pw = y_validation[0] y_validation_pph = y_validation[1] #y_validation_iph = y_validation[2] # ---------------------------------------define graph---------------------------------------------# with self.graph.as_default(): # data place holder self.X_p = tf.placeholder( dtype=tf.int32, shape=(None, self.max_sentence_size), name="input_placeholder" ) # pos info placeholder self.pos_p = tf.placeholder( dtype=tf.int32, shape=(None, self.max_sentence_size), name="pos_placeholder" ) # length info placeholder self.length_p = tf.placeholder( dtype=tf.int32, shape=(None, self.max_sentence_size), name="length_placeholder" ) # position info placeholder self.position_p = tf.placeholder( dtype=tf.int32, shape=(None, self.max_sentence_size), name="length_placeholder" ) self.y_p_pw = tf.placeholder( dtype=tf.int32, shape=(None, self.max_sentence_size), name="label_placeholder_pw" ) self.y_p_pph = tf.placeholder( dtype=tf.int32, shape=(None, self.max_sentence_size), name="label_placeholder_pph" ) #self.y_p_iph = tf.placeholder( # dtype=tf.int32, # shape=(None, self.max_sentence_size), # name="label_placeholder_iph" #) # dropout 占位 self.keep_prob_p = tf.placeholder(dtype=tf.float32, shape=[], name="keep_prob_p") self.input_keep_prob_p = tf.placeholder(dtype=tf.float32, shape=[], name="input_keep_prob_p") self.output_keep_prob_p=tf.placeholder(dtype=tf.float32, shape=[], name="output_keep_prob_p") # 相应序列的长度占位 self.seq_len_p = tf.placeholder( dtype=tf.int32, shape=(None,), name="seq_len" ) #用来去掉padding的mask self.mask = tf.sequence_mask( lengths=self.seq_len_p, maxlen=self.max_sentence_size, name="mask" ) #去掉padding之后的labels y_p_pw_masked = tf.boolean_mask( #shape[seq_len1+seq_len2+....+,] tensor=self.y_p_pw, mask=self.mask, name="y_p_pw_masked" ) y_p_pph_masked = tf.boolean_mask( # shape[seq_len1+seq_len2+....+,] tensor=self.y_p_pph, mask=self.mask, name="y_p_pph_masked" ) #y_p_iph_masked = tf.boolean_mask( # shape[seq_len1+seq_len2+....+,] # tensor=self.y_p_iph, # mask=self.mask, # name="y_p_iph_masked" #) # embeddings #self.embeddings = tf.Variable( # initial_value=tf.zeros(shape=(self.vocab_size, self.embedding_size), dtype=tf.float32), # name="embeddings" #) self.word_embeddings=tf.Variable( initial_value=util.readEmbeddings(file="../data/embeddings/word_vec.txt"), name="word_embeddings" ) print("word_embeddings.shape",self.word_embeddings.shape) # pos one-hot self.pos_one_hot = tf.one_hot( indices=self.pos_p, depth=self.pos_num, name="pos_one_hot" ) print("shape of pos_one_hot:", self.pos_one_hot.shape) # length one-hot self.length_one_hot = tf.one_hot( indices=self.length_p, depth=self.length_num, name="pos_one_hot" ) print("shape of length_one_hot:", self.length_one_hot.shape) # position one-hot self.position_one_hot = tf.one_hot( indices=self.position_p, depth=self.max_sentence_size, name="pos_one_hot" ) print("shape of position_one_hot:", self.position_one_hot.shape) # -------------------------------------PW----------------------------------------------------- # embeded inputs:[batch_size,MAX_TIME_STPES,embedding_size] inputs_pw = tf.nn.embedding_lookup(params=self.word_embeddings, ids=self.X_p, name="embeded_input_pw") print("shape of inputs_pw:",inputs_pw.shape) #concat all information inputs_pw = tf.concat( values=[inputs_pw, self.pos_one_hot, self.length_one_hot, self.position_one_hot], axis=2, name="input_pw" ) print("shape of cancated inputs_pw:", inputs_pw.shape) # forward part en_lstm_forward1_pw = rnn.BasicLSTMCell(num_units=self.hidden_units_num) en_lstm_forward2_pw=rnn.BasicLSTMCell(num_units=self.hidden_units_num2) en_lstm_forward_pw=rnn.MultiRNNCell(cells=[en_lstm_forward1_pw,en_lstm_forward2_pw]) #dropout en_lstm_forward_pw=rnn.DropoutWrapper( cell=en_lstm_forward_pw, input_keep_prob=self.input_keep_prob_p, output_keep_prob=self.output_keep_prob_p ) # backward part en_lstm_backward1_pw = rnn.BasicLSTMCell(num_units=self.hidden_units_num) en_lstm_backward2_pw=rnn.BasicLSTMCell(num_units=self.hidden_units_num2) en_lstm_backward_pw=rnn.MultiRNNCell(cells=[en_lstm_backward1_pw,en_lstm_backward2_pw]) #dropout en_lstm_backward_pw=rnn.DropoutWrapper( cell=en_lstm_backward_pw, input_keep_prob=self.input_keep_prob_p, output_keep_prob=self.output_keep_prob_p ) outputs, states = tf.nn.bidirectional_dynamic_rnn( cell_fw=en_lstm_forward_pw, cell_bw=en_lstm_backward_pw, inputs=inputs_pw, sequence_length=self.seq_len_p, dtype=tf.float32, scope="pw" ) outputs_forward_pw = outputs[0] # shape [batch_size, max_time, cell_fw.output_size] outputs_backward_pw = outputs[1] # shape [batch_size, max_time, cell_bw.output_size] # concat final outputs [batch_size, max_time, cell_fw.output_size*2] h_pw = tf.concat(values=[outputs_forward_pw, outputs_backward_pw], axis=2) h_pw=tf.reshape(tensor=h_pw,shape=(-1,self.hidden_units_num*2),name="h_pw") print("h_pw.shape",h_pw.shape) # 全连接dropout h_pw = tf.nn.dropout(x=h_pw, keep_prob=self.keep_prob_p, name="dropout_h_pw") # fully connect layer(projection) w_pw = tf.Variable( initial_value=tf.random_normal(shape=(self.hidden_units_num*2, self.class_num)), name="weights_pw" ) b_pw = tf.Variable( initial_value=tf.random_normal(shape=(self.class_num,)), name="bias_pw" ) #logits logits_pw = tf.matmul(h_pw, w_pw) + b_pw #logits_pw:[batch_size*max_time, 2] logits_normal_pw=tf.reshape( #logits in an normal way:[batch_size,max_time_stpes,2] tensor=logits_pw, shape=(-1,self.max_sentence_size,self.class_num), name="logits_normal_pw" ) logits_pw_masked = tf.boolean_mask( # logits_pw_masked [seq_len1+seq_len2+....+,3] tensor=logits_normal_pw, mask=self.mask, name="logits_pw_masked" ) # prediction pred_pw = tf.cast(tf.argmax(logits_pw, 1), tf.int32, name="pred_pw") # pred_pw:[batch_size*max_time,] pred_normal_pw = tf.reshape( # pred in an normal way,[batch_size, max_time] tensor=pred_pw, shape=(-1, self.max_sentence_size), name="pred_normal_pw" ) pred_pw_masked = tf.boolean_mask( # logits_pw_masked [seq_len1+seq_len2+....+,] tensor=pred_normal_pw, mask=self.mask, name="pred_pw_masked" ) pred_normal_one_hot_pw = tf.one_hot( # one-hot the pred_normal:[batch_size, max_time,class_num] indices=pred_normal_pw, depth=self.class_num, name="pred_normal_one_hot_pw" ) # loss self.loss_pw = tf.losses.sparse_softmax_cross_entropy( labels=y_p_pw_masked, logits=logits_pw_masked )+tf.contrib.layers.l2_regularizer(self.lambda_pw)(w_pw) # --------------------------------------------------------------------------------------- # ----------------------------------PPH-------------------------------------------------- # embeded inputs:[batch_size,MAX_TIME_STPES,embedding_size] inputs_pph = tf.nn.embedding_lookup(params=self.word_embeddings, ids=self.X_p, name="embeded_input_pph") print("shape of input_pph:", inputs_pph.shape) # concat all information inputs_pph = tf.concat( values=[inputs_pph, self.pos_one_hot, self.length_one_hot, self.position_one_hot, pred_normal_one_hot_pw], axis=2, name="inputs_pph" ) print("shape of input_pph:", inputs_pph.shape) # forward part en_lstm_forward1_pph = rnn.BasicLSTMCell(num_units=self.hidden_units_num) en_lstm_forward2_pph = rnn.BasicLSTMCell(num_units=self.hidden_units_num2) en_lstm_forward_pph = rnn.MultiRNNCell(cells=[en_lstm_forward1_pph, en_lstm_forward2_pph]) #dropout en_lstm_forward_pph=rnn.DropoutWrapper( cell=en_lstm_forward_pph, input_keep_prob=self.input_keep_prob_p, output_keep_prob=self.output_keep_prob_p ) # backward part en_lstm_backward1_pph = rnn.BasicLSTMCell(num_units=self.hidden_units_num) en_lstm_backward2_pph = rnn.BasicLSTMCell(num_units=self.hidden_units_num2) en_lstm_backward_pph = rnn.MultiRNNCell(cells=[en_lstm_backward1_pph, en_lstm_backward2_pph]) #dropout en_lstm_backward_pph=rnn.DropoutWrapper( cell=en_lstm_backward_pph, input_keep_prob=self.input_keep_prob_p, output_keep_prob=self.output_keep_prob_p ) outputs, states = tf.nn.bidirectional_dynamic_rnn( cell_fw=en_lstm_forward_pph, cell_bw=en_lstm_backward_pph, inputs=inputs_pph, sequence_length=self.seq_len_p, dtype=tf.float32, scope="pph" ) outputs_forward_pph = outputs[0] # shape [batch_size, max_time, cell_fw.output_size] outputs_backward_pph = outputs[1] # shape [batch_size, max_time, cell_bw.output_size] # concat final outputs [batch_size, max_time, cell_fw.output_size*2] h_pph = tf.concat(values=[outputs_forward_pph, outputs_backward_pph], axis=2) h_pph = tf.reshape(tensor=h_pph, shape=(-1, self.hidden_units_num * 2), name="h_pph") # 全连接dropout h_pph = tf.nn.dropout(x=h_pph, keep_prob=self.keep_prob_p, name="dropout_h_pph") # fully connect layer(projection) w_pph = tf.Variable( initial_value=tf.random_normal(shape=(self.hidden_units_num*2, self.class_num)), name="weights_pph" ) b_pph = tf.Variable( initial_value=tf.random_normal(shape=(self.class_num,)), name="bias_pph" ) # logits logits_pph = tf.matmul(h_pph, w_pph) + b_pph # shape of logits:[batch_size*max_time, 2] logits_normal_pph = tf.reshape( # logits in an normal way:[batch_size,max_time_stpes,2] tensor=logits_pph, shape=(-1, self.max_sentence_size, self.class_num), name="logits_normal_pph" ) logits_pph_masked = tf.boolean_mask( # [seq_len1+seq_len2+....+,3] tensor=logits_normal_pph, mask=self.mask, name="logits_pph_masked" ) # prediction pred_pph = tf.cast(tf.argmax(logits_pph, 1), tf.int32, name="pred_pph") # pred_pph:[batch_size*max_time,] pred_normal_pph = tf.reshape( # pred in an normal way,[batch_size, max_time] tensor=pred_pph, shape=(-1, self.max_sentence_size), name="pred_normal_pph" ) pred_pph_masked = tf.boolean_mask( # logits_pph_masked [seq_len1+seq_len2+....+,] tensor=pred_normal_pph, mask=self.mask, name="pred_pph_masked" ) pred_normal_one_hot_pph = tf.one_hot( # one-hot the pred_normal:[batch_size, max_time,class_num] indices=pred_normal_pph, depth=self.class_num, name="pred_normal_one_hot_pph" ) # loss self.loss_pph = tf.losses.sparse_softmax_cross_entropy( labels=y_p_pph_masked, logits=logits_pph_masked )+tf.contrib.layers.l2_regularizer(self.lambda_pph)(w_pph) # ------------------------------------------------------------------------------------ ''' # ---------------------------------------IPH------------------------------------------ # embeded inputs:[batch_size,MAX_TIME_STPES,embedding_size] inputs_iph = tf.nn.embedding_lookup(params=self.embeddings, ids=self.X_p, name="embeded_input_iph") # shape of inputs[batch_size,max_time_stpes,embeddings_dims+class_num] inputs_iph = tf.concat(values=[inputs_iph, pred_normal_one_hot_pph], axis=2, name="inputs_pph") # print("shape of input_pph:", inputs_pph.shape) # encoder cells # forward part en_lstm_forward1_iph = rnn.BasicLSTMCell(num_units=self.hidden_units_num) # en_lstm_forward2=rnn.BasicLSTMCell(num_units=self.hidden_units_num2) # en_lstm_forward=rnn.MultiRNNCell(cells=[en_lstm_forward1,en_lstm_forward2]) # backward part en_lstm_backward1_iph = rnn.BasicLSTMCell(num_units=self.hidden_units_num) # en_lstm_backward2=rnn.BasicLSTMCell(num_units=self.hidden_units_num2) # en_lstm_backward=rnn.MultiRNNCell(cells=[en_lstm_backward1,en_lstm_backward2]) # decoder cells de_lstm_iph = rnn.BasicLSTMCell(num_units=self.hidden_units_num*2) # encode encoder_outputs_iph, encoder_states_iph = self.encoder( cell_forward=en_lstm_forward1_iph, cell_backward=en_lstm_backward1_iph, inputs=inputs_iph, seq_length=self.seq_len_p, scope_name="en_lstm_iph" ) # shape of h is [batch*time_steps,hidden_units*2] h_iph = self.decoder( cell=de_lstm_iph, initial_state=encoder_states_iph, inputs=encoder_outputs_iph, scope_name="de_lstm_iph" ) # fully connect layer(projection) w_iph = tf.Variable( initial_value=tf.random_normal(shape=(self.hidden_units_num*2, self.class_num)), name="weights_iph" ) b_iph = tf.Variable( initial_value=tf.random_normal(shape=(self.class_num,)), name="bias_iph" ) # logits logits_iph = tf.matmul(h_iph, w_iph) + b_iph # shape of logits:[batch_size*max_time, 3] logits_normal_iph = tf.reshape( # logits in an normal way:[batch_size,max_time_stpes,3] tensor=logits_iph, shape=(-1, self.max_sentence_size, 3), name="logits_normal_iph" ) logits_iph_masked = tf.boolean_mask( # [seq_len1+seq_len2+....+,3] tensor=logits_normal_iph, mask=self.mask, name="logits_iph_masked" ) # prediction pred_iph = tf.cast(tf.argmax(logits_iph, 1), tf.int32, name="pred_iph") # pred_iph:[batch_size*max_time,] pred_normal_iph = tf.reshape( # pred in an normal way,[batch_size, max_time] tensor=pred_iph, shape=(-1, self.max_sentence_size), name="pred_normal_iph" ) pred_iph_masked = tf.boolean_mask( # logits_iph_masked [seq_len1+seq_len2+....+,] tensor=pred_normal_iph, mask=self.mask, name="pred_iph_masked" ) pred_normal_one_hot_iph = tf.one_hot( # one-hot the pred_normal:[batch_size, max_time,class_num] indices=pred_normal_iph, depth=self.class_num, name="pred_normal_one_hot_iph" ) # loss self.loss_iph = tf.losses.sparse_softmax_cross_entropy( labels=y_p_iph_masked, logits=logits_iph_masked )+tf.contrib.layers.l2_regularizer(self.lambda_iph)(w_iph) # --------------------------------------------------------------------------------------- ''' # adjust learning rate global_step = tf.Variable(initial_value=1, trainable=False) start_learning_rate = self.learning_rate learning_rate = tf.train.exponential_decay( learning_rate=start_learning_rate, global_step=global_step, decay_steps=(X_train.shape[0] // self.batch_size) + 1, decay_rate=self.decay_rate, staircase=True, name="decay_learning_rate" ) # loss self.loss = self.loss_pw + self.loss_pph # optimizer self.optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(self.loss,global_step) self.init_op = tf.global_variables_initializer() self.init_local_op = tf.local_variables_initializer() # ------------------------------------Session----------------------------------------- with self.session as sess: print("Training Start") sess.run(self.init_op) # initialize all variables sess.run(self.init_local_op) train_Size = X_train.shape[0]; validation_Size = X_validation.shape[0] self.best_validation_loss = 1000 # best validation accuracy in training process # epoch for epoch in range(1, self.max_epoch + 1): print("Epoch:", epoch) start_time = time.time() # time evaluation # training loss/accuracy in every mini-batch self.train_losses = [] self.train_accus_pw = [] self.train_accus_pph = [] #self.train_accus_iph = [] self.c1_f_pw = []; self.c2_f_pw = [] # each class's f1 score self.c1_f_pph = []; self.c2_f_pph = [] #self.c1_f_iph = []; #self.c2_f_iph = [] lrs = [] # mini batch for i in range(0, (train_Size // self.batch_size)): #注意:这里获取的都是mask之后的值 _, train_loss, y_train_pw_masked,y_train_pph_masked,\ train_pred_pw, train_pred_pph,lr = sess.run( fetches=[self.optimizer, self.loss, y_p_pw_masked,y_p_pph_masked, pred_pw_masked, pred_pph_masked,learning_rate], feed_dict={ self.X_p: X_train[i * self.batch_size:(i + 1) * self.batch_size], self.y_p_pw: y_train_pw[i * self.batch_size:(i + 1) * self.batch_size], self.y_p_pph: y_train_pph[i * self.batch_size:(i + 1) * self.batch_size], self.seq_len_p: len_train[i * self.batch_size:(i + 1) * self.batch_size], self.pos_p: pos_train[i * self.batch_size:(i + 1) * self.batch_size], self.length_p: length_train[i * self.batch_size:(i + 1) * self.batch_size], self.position_p: position_train[i * self.batch_size:(i + 1) * self.batch_size], self.keep_prob_p: self.keep_prob, self.input_keep_prob_p:self.input_keep_prob, self.output_keep_prob_p:self.output_keep_prob } ) lrs.append(lr) # loss self.train_losses.append(train_loss) # metrics accuracy_pw, f1_pw= util.eval(y_true=y_train_pw_masked,y_pred=train_pred_pw) # pw accuracy_pph, f1_pph= util.eval(y_true=y_train_pph_masked,y_pred=train_pred_pph) # pph #accuracy_iph, f1_1_iph, f1_2_iph = util.eval(y_true=y_train_iph_masked,y_pred=train_pred_iph) # iph self.train_accus_pw.append(accuracy_pw) self.train_accus_pph.append(accuracy_pph) #self.train_accus_iph.append(accuracy_iph) # F1-score self.c1_f_pw.append(f1_pw[0]); self.c2_f_pw.append(f1_pw[1]) self.c1_f_pph.append(f1_pph[0]); self.c2_f_pph.append(f1_pph[1]) #self.c1_f_iph.append(f1_1_iph); #self.c2_f_iph.append(f1_2_iph) print("learning rate:", sum(lrs) / len(lrs)) # validation in every epoch self.validation_loss, y_valid_pw_masked,y_valid_pph_masked,\ valid_pred_pw, valid_pred_pph = sess.run( fetches=[self.loss, y_p_pw_masked,y_p_pph_masked, pred_pw_masked, pred_pph_masked], feed_dict={ self.X_p: X_validation, self.y_p_pw: y_validation_pw, self.y_p_pph: y_validation_pph, self.seq_len_p: len_validation, self.pos_p: pos_validation, self.length_p: length_validation, self.position_p: position_validation, self.keep_prob_p: 1.0, self.input_keep_prob_p:1.0, self.output_keep_prob_p:1.0 } ) # print("valid_pred_pw.shape:",valid_pred_pw.shape) # print("valid_pred_pph.shape:",valid_pred_pph.shape) # print("valid_pred_iph.shape:",valid_pred_iph.shape) # metrics self.valid_accuracy_pw, self.valid_f1_pw = util.eval(y_true=y_valid_pw_masked,y_pred=valid_pred_pw) self.valid_accuracy_pph, self.valid_f1_pph = util.eval(y_true=y_valid_pph_masked,y_pred=valid_pred_pph) #self.valid_accuracy_iph, self.valid_f1_1_iph, self.valid_f1_2_iph = util.eval(y_true=y_valid_iph_masked,y_pred=valid_pred_iph) print("Epoch ", epoch, " finished.", "spend ", round((time.time() - start_time) / 60, 2), " mins") self.showInfo(type="training") self.showInfo(type="validation") # when we get a new best validation accuracy,we store the model if self.best_validation_loss < self.validation_loss: self.best_validation_loss = self.validation_loss print("New Best loss ", self.best_validation_loss, " On Validation set! ") print("Saving Models......\n\n") # exist ./models folder? if not os.path.exists("./models/"): os.mkdir(path="./models/") if not os.path.exists("./models/" + name): os.mkdir(path="./models/" + name) if not os.path.exists("./models/" + name + "/bilstm"): os.mkdir(path="./models/" + name + "/bilstm") # create saver saver = tf.train.Saver() saver.save(sess, "./models/" + name + "/bilstm/my-model-10000") # Generates MetaGraphDef. saver.export_meta_graph("./models/" + name + "/bilstm/my-model-10000.meta") print("\n\n") # test:using X_validation_pw test_pred_pw, test_pred_pph = sess.run( fetches=[pred_pw, pred_pph], feed_dict={ self.X_p: X_validation, self.seq_len_p: len_validation, self.pos_p: pos_validation, self.length_p: length_validation, self.position_p: position_validation, self.keep_prob_p: 1.0, self.input_keep_prob_p:1.0, self.output_keep_prob_p:1.0 } ) if not os.path.exists("../result/bilstm_cbow/"): os.mkdir("../result/bilstm_cbow/") # recover to original corpus txt # shape of valid_pred_pw,valid_pred_pw,valid_pred_pw:[corpus_size*time_stpes] util.recover2( X=X_validation, preds_pw=test_pred_pw, preds_pph=test_pred_pph, filename="../result/bilstm_cbow/recover_epoch_" + str(epoch) + ".txt" )
def lstm_cell(): cell = rnn.BasicLSTMCell(hidden_size, state_is_tuple=True) return cell
def lstm_cells(layers): return [rnn.BasicLSTMCell(layer['steps']) for layer in layers]
def build_model(self): conv1 = self.conv(name='c1', inputs=self.x_image, shape=[3, 3, 3, 64], s=1, padding='SAME') conv1 = tf.nn.relu(conv1) conv1 = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID') self.con1 = conv1 self.layers.append(conv1) conv2 = self.conv(name='c2', inputs=conv1, shape=[3, 3, 64, 128], s=1, padding='SAME') conv2 = tf.nn.relu(conv2) conv2 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID') conv2 = tf.nn.dropout(conv2, self.keep_prob) self.con2 = conv2 conv3 = self.conv(name='c3', inputs=conv2, shape=[3, 3, 128, 256], s=1, padding='SAME') conv3 = tf.nn.relu(conv3) conv3 = tf.nn.max_pool(conv3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID') conv3 = tf.nn.dropout(conv3, self.keep_prob) self.con3 = conv3 self.layers.append(conv3) x_image = tf.reduce_mean(self.layers[-1], [1, 2]) # print('x_image',x_image) fc_l1 = tf.contrib.layers.fully_connected( x_image, 128, activation_fn=tf.nn.tanh, weights_regularizer=tf.contrib.layers.l2_regularizer(scale=0.2)) # CNN의 CON3를 LSTM의 INPUT 값으로 # merge_layer = conv3 # img_cell = self.conv(name = 'merge_layer', inputs = merge_layer, shape=[1,1,256,32], s = 1, padding = 'SAME') # _, w, h, d = img_cell.get_shape() # length = int(w) * int(h) * int(d) # img_vector = tf.reshape(img_cell, [-1, 1, length]) # mul = tf.constant([1, self.max_len, 1]) # img_feature = tf.tile(img_vector, mul) # embedding = tf.concat([self.x_emb, img_feature], axis=2) # input_cell = tf.concat(self.x_emb, img_vector, 1) fw_cell = rnn.DropoutWrapper(rnn.BasicLSTMCell(self.hidden_dim), output_keep_prob=self.keep_prob) bw_cell = rnn.DropoutWrapper(rnn.BasicLSTMCell(self.hidden_dim), output_keep_prob=self.keep_prob) output, (fw_state, bw_state) = tf.nn.bidirectional_dynamic_rnn( fw_cell, bw_cell, inputs=self.x_emb, sequence_length=self.x_len, dtype=tf.float32) # CNN의 결과와 RNN의 결과 합치기 text_vec = tf.concat([fw_state.h, bw_state.h], 1) # out_W = self.get_var(name="out_W", shape=[self.hidden_dim*2, 128]) # out_b = self.get_var(name="out_b", shape=[128]) # lstm_out = tf.matmul(text_vec, out_W) + out_b lstm_out = tf.contrib.layers.fully_connected( text_vec, 128, activation_fn=tf.nn.tanh, weights_regularizer=tf.contrib.layers.l2_regularizer(scale=0.2)) # text_vec = tf.concat([fw_state.h, bw_state.h], 1) final_layer = tf.concat([fc_l1, lstm_out], axis=1) self.genre_prob = tf.contrib.layers.fully_connected( final_layer, self.class_size, activation_fn=tf.nn.softmax) self.y_pred = tf.to_int32(tf.argmax(self.genre_prob, 1))
def __init__(self, sequence_length, num_classes, vocab_size, lstm_hidden_size, fc_hidden_size, embedding_size, embedding_type, filter_sizes, num_filters, l2_reg_lambda=0.0, pretrained_embedding=None): # Placeholders for input, output, dropout_prob and training_tag self.input_x = tf.placeholder(tf.int32, [None, sequence_length], name="input_x") self.input_y = tf.placeholder(tf.float32, [None, num_classes], name="input_y") self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob") self.is_training = tf.placeholder(tf.bool, name="is_training") self.global_step = tf.Variable(0, trainable=False, name="Global_Step") # Embedding Layer with tf.device('/cpu:0'), tf.name_scope("embedding"): # Use random generated the word vector by default # Can also be obtained through our own word vectors trained by our corpus if pretrained_embedding is None: self.embedding = tf.Variable(tf.random_uniform( [vocab_size, embedding_size], -1.0, 1.0, dtype=tf.float32), trainable=True, name="embedding") else: if embedding_type == 0: self.embedding = tf.constant(pretrained_embedding, dtype=tf.float32, name="embedding") if embedding_type == 1: self.embedding = tf.Variable(pretrained_embedding, trainable=True, dtype=tf.float32, name="embedding") self.embedded_sentence = tf.nn.embedding_lookup( self.embedding, self.input_x) self.embedded_sentence_expanded = tf.expand_dims( self.embedded_sentence, -1) # Create a convolution + maxpool layer for each filter size pooled_outputs = [] for filter_size in filter_sizes: with tf.name_scope("conv-filter{0}".format(filter_size)): # Convolution Layer filter_shape = [filter_size, embedding_size, 1, num_filters] W = tf.Variable(tf.truncated_normal(shape=filter_shape, stddev=0.1, dtype=tf.float32), name="W") b = tf.Variable(tf.constant(0.1, shape=[num_filters], dtype=tf.float32), name="b") conv = tf.nn.conv2d(self.embedded_sentence_expanded, W, strides=[1, 1, 1, 1], padding="VALID", name="conv") conv = tf.nn.bias_add(conv, b) # Batch Normalization Layer conv_bn = tf.layers.batch_normalization( conv, training=self.is_training) # Apply nonlinearity conv_out = tf.nn.relu(conv_bn, name="relu") with tf.name_scope("pool-filter{0}".format(filter_size)): # Maxpooling over the outputs pooled = tf.nn.max_pool( conv_out, ksize=[1, sequence_length - filter_size + 1, 1, 1], strides=[1, 1, 1, 1], padding="VALID", name="pool") pooled_outputs.append(pooled) # Combine all the pooled features num_filters_total = num_filters * len(filter_sizes) self.pool = tf.concat(pooled_outputs, 3) self.pool_flat = tf.reshape(self.pool, [-1, 1, num_filters_total]) self.pool_flat = tf.nn.dropout(self.pool_flat, self.dropout_keep_prob) # Bi-LSTM Layer with tf.name_scope("Bi-lstm"): lstm_fw_cell = rnn.BasicLSTMCell( lstm_hidden_size) # forward direction cell lstm_bw_cell = rnn.BasicLSTMCell( lstm_hidden_size) # backward direction cell if self.dropout_keep_prob is not None: lstm_fw_cell = rnn.DropoutWrapper( lstm_fw_cell, output_keep_prob=self.dropout_keep_prob) lstm_bw_cell = rnn.DropoutWrapper( lstm_bw_cell, output_keep_prob=self.dropout_keep_prob) # Creates a dynamic bidirectional recurrent neural network # shape of `outputs`: tuple -> (outputs_fw, outputs_bw) # shape of `outputs_fw`: [batch_size, sequence_length, hidden_size] # shape of `state`: tuple -> (outputs_state_fw, output_state_bw) # shape of `outputs_state_fw`: tuple -> (c, h) c: memory cell; h: hidden state outputs, state = tf.nn.bidirectional_dynamic_rnn(lstm_fw_cell, lstm_bw_cell, self.pool_flat, dtype=tf.float32) # Concat output self.lstm_concat = tf.concat( outputs, axis=2) # [batch_size, 1, hidden_size * 2] self.lstm_out = tf.reduce_mean(self.lstm_concat, axis=1) # [batch_size, hidden_size * 2] # Fully Connected Layer with tf.name_scope("fc"): W = tf.Variable(tf.truncated_normal( shape=[lstm_hidden_size * 2, fc_hidden_size], stddev=0.1, dtype=tf.float32), name="W") b = tf.Variable(tf.constant(0.1, shape=[fc_hidden_size], dtype=tf.float32), name="b") self.fc = tf.nn.xw_plus_b(self.lstm_out, W, b) # Batch Normalization Layer self.fc_bn = tf.layers.batch_normalization( self.fc, training=self.is_training) # Apply nonlinearity self.fc_out = tf.nn.relu(self.fc_bn, name="relu") # Highway Layer self.highway = highway(self.fc_out, self.fc_out.get_shape()[1], num_layers=1, bias=0, scope="Highway") # Add dropout with tf.name_scope("dropout"): self.h_drop = tf.nn.dropout(self.highway, self.dropout_keep_prob) # Final scores with tf.name_scope("output"): W = tf.Variable(tf.truncated_normal( shape=[fc_hidden_size, num_classes], stddev=0.1, dtype=tf.float32), name="W") b = tf.Variable(tf.constant(0.1, shape=[num_classes], dtype=tf.float32), name="b") self.logits = tf.nn.xw_plus_b(self.h_drop, W, b, name="logits") self.scores = tf.sigmoid(self.logits, name="scores") # Calculate mean cross-entropy loss, L2 loss with tf.name_scope("loss"): losses = tf.nn.sigmoid_cross_entropy_with_logits( labels=self.input_y, logits=self.logits) losses = tf.reduce_mean(tf.reduce_sum(losses, axis=1), name="sigmoid_losses") l2_losses = tf.add_n([ tf.nn.l2_loss(tf.cast(v, tf.float32)) for v in tf.trainable_variables() ], name="l2_losses") * l2_reg_lambda self.loss = tf.add(losses, l2_losses, name="loss")
])), 'out': tf.Variable(tf.constant(0.1, shape=[ output_size, ])) } # todo:cell的输入 x_batch = tf.shape(X)[0] # x_time=tf.shape(X)[1] X_in = tf.reshape(X, [-1, input_size]) X_in = tf.matmul(X_in, weight['in']) + bias['in'] X_in = tf.reshape(X_in, [x_batch, -1, hidden_size]) # 构造RNN隐藏层LSTM层 lstm_cell = rnn.BasicLSTMCell(hidden_size) lstm_cell = rnn.DropoutWrapper(lstm_cell, output_keep_prob=drop_rate) lstm_cell = rnn.MultiRNNCell([lstm_cell for _ in range(layer_size)]) init_state = lstm_cell.zero_state(batch_size=x_batch, dtype=tf.float32) # 初始化状态神经元 state = init_state outputs, states = tf.nn.dynamic_rnn(lstm_cell, inputs=X_in, initial_state=state, sequence_length=X_len) final_outputs = states[layer_size - 1][1] # 返回最后一层最后一个状态元组的第二个张量,作为输出 preds = tf.matmul(final_outputs, weight['out']) + bias['out'] probs = tf.sigmoid(preds) """
def lstm(): return rnn.BasicLSTMCell(self.hidden_size, forget_bias=0.0, state_is_tuple=True)
def RNN(x, weights, biases, n_steps, n_layers, n_hidden, outputFrame): x = tf.unstack(x, n_steps, 1) multi_lstm_cell = rnn.MultiRNNCell([rnn.BasicLSTMCell(n_hidden, forget_bias=0.0) for _ in range(n_layers)]) # outputs, states = rnn.static_rnn(multi_lstm_cell, x, dtype=tf.float32) outputs, _ = rnn.static_rnn(multi_lstm_cell, x, dtype=tf.float32) return tf.matmul(outputs[outputFrame], weights['out']) + biases['out']
_batch_size = tf.shape(video)[0] encode_img_w = tf.Variable( tf.random_uniform([image_dim, dim_hidden], -0.1, 0.1)) encode_img_b = tf.Variable(tf.zeros([dim_hidden])) v_flat = tf.reshape(video, [-1, image_dim]) image_feat = tf.matmul(v_flat, encode_img_w) + encode_img_b image_feat = tf.reshape(image_feat, [_batch_size, video_length, dim_hidden]) word_embedding = tf.Variable(tf.random_uniform([n_voca, dim_hidden], -0.1, 0.1)) embed_word_w = tf.Variable(tf.random_uniform([dim_hidden, n_voca], -0.1, 0.1)) embed_word_b = tf.Variable(tf.zeros([n_voca])) lstm = rnn.BasicLSTMCell(dim_hidden) """ encoding """ with tf.variable_scope('LSTM1'): output1, state1 = tf.nn.dynamic_rnn(lstm, image_feat, dtype=tf.float32) with tf.variable_scope('LSTM2'): padding = tf.zeros([_batch_size, video_length, dim_hidden], dtype=tf.float32) rnn_input = tf.concat([padding, output1], 2) output2, state2 = tf.nn.dynamic_rnn(lstm, rnn_input, dtype=tf.float32) """ decoding """ with tf.variable_scope('LSTM1', reuse=True): padding = tf.zeros([_batch_size, max_caption_length, dim_hidden]) output1, state1 = tf.nn.dynamic_rnn(lstm, padding, dtype=tf.float32)
import tensorflow as tf import numpy as np from tensorflow.contrib import rnn lstm_hidden_size = 2 lstm = rnn.BasicLSTMCell(lstm_hidden_size) #初始化函数 batch_size = [] lstm.zero_state(batch_size,tf.float32) #损失函数 loss = 0 #最大序列长度 num_steps = 20 # for i in range(num_steps): if i>0 : tf.get_variable_scope().reuse_variables() #???????????? lstm_output,state = lstm(current_input,state) final_output = fully_connect(lstm_output) loss += calc_loss(final_output,expected_output)
#coding=utf-8 #简单LSTM 结构的RNN 的前向传播过程实现 import tensorflow as tf from tensorflow.contrib import rnn lstm_hidden_size=1 batch_size=21 num_steps=22 lstm_cell = rnn.BasicLSTMCell(lstm_hidden_size, forget_bias=0.0, state_is_tuple=True) lstm=rnn.BasicLSTMCell(lstm_hidden_size) state = lstm.zero_state(batch_size,tf.float32) loss=0.0 # for i in range(num_steps): # if i>0: # tf.get_variable_scope().reuse_variables() # lstm_output,state = lstm(current_input,state) # final_output = fully_connected(lstm_output) # loss+=calc_loss(final_output,expected_output)
#weights and biases of appropriate shape to accomplish above task out_weights=tf.Variable(tf.random_normal([num_units,n_classes])) out_bias=tf.Variable(tf.random_normal([n_classes])) #defining placeholders #input image placeholder x=tf.placeholder("float",[None,time_steps,n_input]) #input label placeholder y=tf.placeholder("float",[None,n_classes]) #processing the input tensor from [batch_size,n_steps,n_input] to "time_steps" number of [batch_size,n_input] tensors input=tf.unstack(x ,time_steps,1) #defining the network lstm_layer=rnn.BasicLSTMCell(num_units,forget_bias=1) outputs,_=rnn.static_rnn(lstm_layer,input,dtype="float32") #As we are concerned only with input of last time step, we will generate our prediction out of it. #converting last output of dimension [batch_size,num_units] to [batch_size,n_classes] by out_weight multiplication prediction=tf.matmul(outputs[-1],out_weights)+out_bias #loss_function loss=tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prediction,labels=y)) #optimization opt=tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss) #model evaluation correct_prediction=tf.equal(tf.argmax(prediction,1),tf.argmax(y,1)) accuracy=tf.reduce_mean(tf.cast(correct_prediction,tf.float32))
chr_2_idx[i] = len(chr_2_idx) for i, j in chr_2_idx.items(): idx_2_chr[j] = i return chr_2_idx, idx_2_chr chr_to_index, index_to_chr = most_common(vocabulary) print(chr_to_index) print(index_to_chr) input_x = tf.placeholder(tf.float32, [None, 4, 1]) input_y = tf.placeholder(tf.int32, [None, 1]) cell = rnn.BasicLSTMCell(num_units=len(chr_to_index), state_is_tuple=True) initial_cell = cell.zero_state(1, tf.float32) rnn_model, states = tf.nn.dynamic_rnn(cell, input_x, initial_state=initial_cell, dtype=tf.float32) x_for_fc = tf.reshape(rnn_model, [-1, len(chr_to_index)]) real_output = tf.contrib.layers.fully_connected(inputs=x_for_fc, num_outputs=len(chr_to_index), activation_fn=None) real_output = tf.reshape(real_output, [4, len(chr_to_index), 1]) weights = tf.ones([4, len(chr_to_index)]) sequence_loss = tf.contrib.seq2seq.sequence_loss(logits=real_output, targets=input_y,
def LSTM(self): return rnn.BasicLSTMCell(self.hidden_units, forget_bias=1.0)
def train_test_neural_network(): features, labels, = preProcess() split_frac1 = 0.8 idx1 = int(len(features) * split_frac1) train_x, test_x = features[:idx1], features[idx1:] train_y, test_y = labels[:idx1], labels[idx1:] epochs = 8 n_classes = 1 n_units = 200 n_features = 29 batch_size = 35 # Create the graph object graph = tf.Graph() # Add nodes to the graph with graph.as_default(): # shape of place holder when training (<batch size>, 30) xplaceholder = tf.placeholder('float', [None, n_features]) # shape of place holder when training (<batch size>,) yplaceholder = tf.placeholder('float') # giving the weights and biases random values layer = {'weights': tf.Variable(tf.random_normal([n_units, n_classes])), 'bias': tf.Variable(tf.random_normal([n_classes]))} x = tf.split(xplaceholder, n_features, 0) lstm_cell = rnn.BasicLSTMCell(n_units) outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32) output = tf.matmul(outputs[-1], layer['weights']) + layer['bias'] logit = tf.reshape(output, [-1]) cost, final_state = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=logit, labels=yplaceholder)) optimizer = tf.train.AdamOptimizer().minimize(cost) predictions = tf.round(tf.nn.sigmoid(logit)) with tf.Session(graph=graph) as sess: tf.set_random_seed(1) sess.run(tf.global_variables_initializer()) iteration = 1 for e in range(epochs): for ii, (x, y) in enumerate(get_batches(np.array(train_x), np.array(train_y), batch_size), 1): feed = {xplaceholder: x, yplaceholder: y, } loss, states, _ = sess.run([cost, final_state, optimizer], feed_dict=feed) if iteration % 5 == 0: print("Epoch: {}/{}".format(e, epochs), "Iteration: {}".format(iteration), "Train loss: {:.3f}".format(loss)) iteration += 1 # -----------------testing test set----------------------------------------- print("starting testing set") prediction_val = [] y_val = [] with tf.Session(graph=graph) as sess: tf.set_random_seed(1) for ii, (x, y) in enumerate(get_batches(np.array(test_x), np.array(test_y), batch_size), 1): feed = {xplaceholder: x, yplaceholder: y, } prediction = sess.run(predictions, feed_dict=feed) prediction = prediction.astype(int) for i in range(len(prediction)): prediction_val.append(prediction[i][0]) y_val.append(y[i]) accuracy = accuracy_score(y_val, prediction_val) f1 = f1_score(y_val, prediction_val, average='macro') recall = recall_score(y_true=y_val, y_pred=prediction_val, average='macro') precision = precision_score(y_val, prediction_val, average='macro') print("-----------------testing validation set-----------------------------------------") print("Test accuracy: {:.3f}".format(accuracy)) print("F1 Score: {:.3f}".format(f1)) print("Recall: {:.3f}".format(recall)) print("Precision: {:.3f}".format(precision))
def __init__(self, feature_space, action_space): self.state = X = tf.placeholder(shape=[None, 84, 84, 1], dtype=tf.uint8, name="X") X = tf.to_float(X) / 255.0 # batch_size = tf.shape[X][0] # feature encoding phi phi = build_shared_network(X) # Xt is the time series input for LSTMs--> augment a fake batch dimension of 1 to # do LSTMs over time dimension Xt = tf.expand_dims(phi, [0]) # Initialize RNN-LSTMs cell with feature space size = 256 lstm = rnn.BasicLSTMCell(num_units=feature_space, forget_bias=1.0, state_is_tuple=True) # Todo: is this time sequence step size? step_size = tf.shape(Xt)[:1] # Reset lstm memeory cells # lstm cell is a tuple = [c_in, h_in] c_init = np.zeros((1, lstm.state_size.c), np.float32) h_init = np.zeros((1, lstm.state_size.h), np.float32) self.state_init = [c_init, h_init] # Pass lstm state from last training to the current training c_in = tf.placeholder(tf.float32, [1, lstm.state_size.c], name='c_in') h_in = tf.placeholder(tf.float32, [1, lstm.state_size.h], name='h_in') self.state_in = [c_in, h_in] init_tuple = tf.nn.rnn_cell.LSTMStateTuple(c_in, h_in) lstm_outputs, lstm_state = tf.nn.dynamic_rnn(lstm, Xt, initial_state=init_tuple, sequence_length=step_size, time_major=False) c_out, h_out = lstm_state # RNN feature-space state rnn_features = tf.reshape(lstm_outputs, [-1, feature_space]) self.state_out = [c_out[:1, :], h_out[:1, :]] self.value_logits = tf.squeeze(tf.layers.dense(inputs=rnn_features, units=1, name="value_fcn", activation=None), squeeze_dims=[1]) self.policy_logits = tf.layers.dense(inputs=rnn_features, units=action_space, name="policy", activation=None) #self.probs = tf.nn.softmax(logits=self.policy_logits, dim=-1)[0,:] self.probs = tf.nn.softmax(self.policy_logits) self.log_probs = tf.nn.log_softmax(self.policy_logits) # Choose an action based on policy probability self.action = tf.one_hot( tf.squeeze(input=tf.multinomial(logits=self.log_probs, num_samples=1), axis=1), action_space)[0, :] #self.action = tf.one_hot(tf.argmax(self.probs, axis=1), action_space) # For mini-batch tranining # We add entropy to the loss to encourage exploration self.entropy = -tf.reduce_mean( tf.reduce_sum(self.probs * self.log_probs, 1), name="entropy") # Policy targets self.advantage = tf.placeholder(shape=[None], dtype=tf.float32) # Value fcn targets self.reward = tf.placeholder(shape=[None], dtype=tf.float32) # Actions that have been made (one hot) self.acs = tf.placeholder(shape=[None, action_space], dtype=tf.float32) # Maximize policy gradient: log_prob*adv self.policy_loss = -tf.reduce_mean( tf.reduce_sum(self.log_probs * self.acs, axis=1) * self.advantage) self.value_fcn_loss = 0.5 * tf.reduce_mean( tf.squared_difference(self.value_logits, self.reward)) # Final A3C loss self.loss = self.policy_loss + 0.5 * self.value_fcn_loss + 0.01 * self.entropy self.optimizer = tf.train.RMSPropOptimizer(0.00001, 0.99, 0.0, 1e-8) self.grads_and_vars = self.optimizer.compute_gradients(self.loss) self.grads_and_vars = [[grad, var] for grad, var in self.grads_and_vars if grad is not None] # Traning op # self.train_op = self.optimizer.apply_gradients(self.grads_and_vars, # global_step=tf.train.get_global_step()) tf.summary.scalar(self.loss.op.name, self.loss) # Merge summaries from this network and the shared network (but not the value net) var_scope_name = tf.get_variable_scope().name summary_ops = tf.get_collection(tf.GraphKeys.SUMMARIES) #>>> Todo: why is'summaries' defined twice? sumaries = [s for s in summary_ops if "local_net" in s.name] sumaries = [s for s in summary_ops if var_scope_name in s.name] self.summaries = tf.summary.merge(sumaries)
def lstm_cell(fb): return rnn.BasicLSTMCell(size, forget_bias=fb, state_is_tuple=True, reuse=tf.get_variable_scope().reuse)
def inference(x, y, n_batch, is_training, input_digits=None, output_digits=None, n_hidden=None, n_out=None): def weight_variable(shape): initial = tf.truncated_normal(shape, stddev=0.01) return tf.Variable(initial) def bias_variable(shape): initial = tf.zeros(shape, dtype=tf.float32) return tf.Variable(initial) # Encode encoder = rnn.BasicLSTMCell(n_hidden, forget_bias=1.0) encoder = rnn.AttentionCellWrapper(encoder, input_digits, state_is_tuple=True) state = encoder.zero_state(n_batch, tf.float32) encoder_outputs = [] encoder_states = [] with tf.variable_scope('Encoder'): for t in range(input_digits): if t > 0: tf.get_variable_scope().reuse_variables() (output, state) = encoder(x[:, t, :], state) encoder_outputs.append(output) encoder_states.append(state) # Decode decoder = rnn.BasicLSTMCell(n_hidden, forget_bias=1.0) decoder = rnn.AttentionCellWrapper(decoder, input_digits, state_is_tuple=True) state = encoder_states[-1] decoder_outputs = [encoder_outputs[-1]] # 출력층의 웨이트와 바이어스를 미리 정의해둔다 V = weight_variable([n_hidden, n_out]) c = bias_variable([n_out]) outputs = [] with tf.variable_scope('Decoder'): for t in range(1, output_digits): if t > 1: tf.get_variable_scope().reuse_variables() if is_training is True: (output, state) = decoder(y[:, t - 1, :], state) else: # 직전의 출력을 구한다 linear = tf.matmul(decoder_outputs[-1], V) + c out = tf.nn.softmax(linear) outputs.append(out) out = tf.one_hot(tf.argmax(out, -1), depth=output_digits) (output, state) = decoder(out, state) decoder_outputs.append(output) if is_training is True: output = tf.reshape(tf.concat(decoder_outputs, axis=1), [-1, output_digits, n_hidden]) linear = tf.einsum('ijk,kl->ijl', output, V) + c return tf.nn.softmax(linear) else: # 마지막 출력을 구한다 linear = tf.matmul(decoder_outputs[-1], V) + c out = tf.nn.softmax(linear) outputs.append(out) output = tf.reshape(tf.concat(outputs, axis=1), [-1, output_digits, n_out]) return output
def __init__(self, ob_space, ac_space): self.x = x = tf.placeholder(tf.float32, [None] + list(ob_space)) if openai: for i in range(4): x = tf.nn.relu( conv2d(x, 32, "l{}".format(i + 1), [3, 3], [2, 2])) #x = tf.expand_dims(flatten(x), [0]) x = flatten(x) else: conv1 = tf.contrib.layers.conv2d(x, 16, 8, 4, activation_fn=tf.nn.relu, scope="conv1") conv2 = tf.contrib.layers.conv2d(conv1, 32, 4, 2, activation_fn=tf.nn.relu, scope="conv2") x = tf.contrib.layers.fully_connected( inputs=tf.contrib.layers.flatten(conv2), num_outputs=256, scope="fc1") #x = tf.expand_dims(x, [0]) size = 256 self.size = size self.h_aux0 = tf.placeholder(tf.float32, [None, size]) self.h_aux1 = tf.placeholder(tf.float32, [None, size]) self.h_aux2 = tf.placeholder(tf.float32, [None, size]) x = tf.concat([x, self.h_aux0], 1) x = tf.expand_dims(x, [0]) lstm0 = rnn.BasicLSTMCell(size, forget_bias=1.0, state_is_tuple=True, reuse=tf.get_variable_scope().reuse) lstm1 = rnn.BasicLSTMCell(size, forget_bias=1.0, state_is_tuple=True, reuse=tf.get_variable_scope().reuse) lstm2 = rnn.BasicLSTMCell(size, forget_bias=1.0, state_is_tuple=True, reuse=tf.get_variable_scope().reuse) state_size0 = lstm0.state_size state_size1 = lstm1.state_size state_size2 = lstm2.state_size step_size = tf.shape(self.x)[0:1] c0_init = np.zeros((1, state_size0.c), np.float32) h0_init = np.zeros((1, state_size0.h), np.float32) c1_init = np.zeros((1, state_size1.c), np.float32) h1_init = np.zeros((1, state_size1.h), np.float32) c2_init = np.zeros((1, state_size2.c), np.float32) h2_init = np.zeros((1, state_size2.h), np.float32) self.state_init = [[c0_init, c1_init, c2_init], [h0_init, h1_init, h2_init]] c0 = tf.placeholder(tf.float32, [1, state_size0.c]) h0 = tf.placeholder(tf.float32, [1, state_size0.h]) c1 = tf.placeholder(tf.float32, [1, state_size1.c]) h1 = tf.placeholder(tf.float32, [1, state_size1.h]) c2 = tf.placeholder(tf.float32, [1, state_size2.c]) h2 = tf.placeholder(tf.float32, [1, state_size2.h]) self.state_in = [[c0, c1, c2], [h0, h1, h2]] state_in0 = rnn.LSTMStateTuple(c0, h0) state_in1 = rnn.LSTMStateTuple(c1, h1) state_in2 = rnn.LSTMStateTuple(c2, h2) outputs0, state0 = tf.nn.dynamic_rnn(lstm0, x, initial_state=state_in0, sequence_length=step_size, time_major=False, scope='rnn0') outputs0 = tf.concat([tf.reshape(outputs0, [-1, size]), self.h_aux1], 1) outputs0 = tf.reshape(outputs0, [1, -1, size * 2]) outputs1, state1 = tf.nn.dynamic_rnn(lstm1, outputs0, initial_state=state_in1, sequence_length=step_size, time_major=False, scope='rnn1') outputs1 = tf.concat([tf.reshape(outputs1, [-1, size]), self.h_aux2], 1) outputs1 = tf.reshape(outputs1, [1, -1, size * 2]) outputs2, state2 = tf.nn.dynamic_rnn(lstm2, outputs1, initial_state=state_in2, sequence_length=step_size, time_major=False, scope='rnn2') x = tf.reshape(outputs2, [-1, size]) self.lstm_c = [state0.c, state1.c, state2.c] self.lstm_h = [state0.h, state1.h, state2.h] self.logits = linear(x, ac_space, "action", normalized_columns_initializer(0.01)) self.vf = tf.reshape( linear(x, 1, "value", normalized_columns_initializer(1.0)), [-1]) self.state_out = [self.lstm_c, self.lstm_h] self.sample = categorical_sample(self.logits, ac_space)[0, :] self.var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, tf.get_variable_scope().name)
def lstm(layer_size, num_layers): return [rnn.BasicLSTMCell(num_units=layer_size, activation=tf.tanh) for _ in range(num_layers)]
def __init__(self, ob_space, ac_space): # ob_space is the dimension of the observation pixels. ac_space is the action space dimension # x is the input images with dimension [batchsize, observation dimension] # Pyhton syntax : a = b = 1. a and b have no reference or relationship. self.x = x = tf.placeholder(tf.float32, [None] + list(ob_space)) if openai: for i in range(4): x = tf.nn.relu( conv2d(x, 32, "l{}".format(i + 1), [3, 3], [2, 2])) x = tf.expand_dims(flatten(x), [0]) else: conv1 = tf.contrib.layers.conv2d(x, 16, 8, 4, activation_fn=tf.nn.relu, scope="conv1") conv2 = tf.contrib.layers.conv2d(conv1, 32, 4, 2, activation_fn=tf.nn.relu, scope="conv2") fc1 = tf.contrib.layers.fully_connected( inputs=tf.contrib.layers.flatten(conv2), num_outputs=256, scope="fc1") x = tf.expand_dims(fc1, [0]) # size of h, the hidden state vector size = 256 self.size = size # forget_bias is 1.0 by default. lstm = rnn.BasicLSTMCell(size, state_is_tuple=True) # state_size has two fields: c and h self.state_size = lstm.state_size # step_size equals to the sequence size # Note : self.x is different from x. They have different dimensions step_size = tf.shape(self.x)[0:1] c_init = np.zeros((1, lstm.state_size.c), np.float32) h_init = np.zeros((1, lstm.state_size.h), np.float32) self.state_init = [c_init, h_init] c_in = tf.placeholder(tf.float32, [1, lstm.state_size.c]) h_in = tf.placeholder(tf.float32, [1, lstm.state_size.h]) self.state_in = [c_in, h_in] state_in = rnn.LSTMStateTuple(c_in, h_in) lstm_outputs, lstm_state = tf.nn.dynamic_rnn(lstm, x, initial_state=state_in, sequence_length=step_size, time_major=False) lstm_c, lstm_h = lstm_state x = tf.reshape(lstm_outputs, [-1, size]) # logits is pi(a | s) self.logits = linear(x, ac_space, "action", normalized_columns_initializer(0.01)) # vf is V(s) self.vf = tf.reshape( linear(x, 1, "value", normalized_columns_initializer(1.0)), [-1]) self.state_out = [lstm_c[:1, :], lstm_h[:1, :]] self.sample = categorical_sample(self.logits, ac_space)[0, :] # list of the input variables, used in the gradient calculation of the loss function self.var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, tf.get_variable_scope().name)
Nbofbatch = trainsize // batchsize learningi = tf.placeholder("float", [1]) learning_rate = 0.001 * pow(0.1, learningi[0]) x = tf.placeholder("float", [None, time_steps, n_input]) #input label placeholder y = tf.placeholder("float", [None, n_classes]) #processing the input tensor from [batch_size,n_steps,n_input] to "time_steps" number of [batch_size,n_input] tensors input = tf.unstack(x, time_steps, 1) #defining the network #lstm_layer=rnn.BasicLSTMCell(num_units,forget_bias=1) lstm_layer = rnn.DropoutWrapper(rnn.BasicLSTMCell(num_units, forget_bias=1), input_keep_prob=0.95, output_keep_prob=0.95, state_keep_prob=0.95) outputs, _ = rnn.static_rnn(lstm_layer, input, dtype="float32") # print(len(outputs)) outputs = tf.contrib.layers.fully_connected( outputs[-1], 16, weights_regularizer=tf.contrib.layers.l2_regularizer(0.1), activation_fn=tf.nn.tanh) prediction = tf.contrib.layers.fully_connected(outputs, 1, activation_fn=tf.nn.tanh) loss = tf.losses.mean_squared_error(y, prediction)
batchsize = tf.placeholder(tf.int32, name="batchsize") alpha = tf.placeholder(tf.float32, name="alpha") with tf.name_scope("input_preparation"): X = tf.placeholder(tf.uint8, [None, None], name="X") Xo = tf.one_hot(X, VOCAB_SIZE, 1.0, 0.0) Y_ = tf.placeholder(tf.uint8, [None, None], name="y") Yo_ = tf.one_hot(Y_, VOCAB_SIZE, 1.0, 0.0) if args.cell_type == 1: initial_state = tf.placeholder(tf.float32, [None, args.internal_size * (args.layers * 2)], name="initial_state") else: initial_state = tf.placeholder(tf.float32, [None, args.internal_size * (args.layers)], name="initial_state") if args.cell_type == 1: net = [rnn.BasicLSTMCell(args.internal_size, state_is_tuple=False) for _ in range(args.layers)] else: net = [rnn.GRUCell(args.internal_size) for _ in range(args.layers)] net = [rnn.DropoutWrapper(cell, input_keep_prob=dropout_prob) for cell in net] multi_rnn = rnn.MultiRNNCell(net, state_is_tuple=False) drop_multi_rnn = rnn.DropoutWrapper(multi_rnn, output_keep_prob=dropout_prob) Yr, H = tf.nn.dynamic_rnn(drop_multi_rnn, Xo, initial_state=initial_state, dtype=tf.float32) H = tf.identity(H, name="H") Yflat = tf.reshape(Yr, [-1, args.internal_size]) Ylogits = layers.linear(Yflat, VOCAB_SIZE) Yflat_ = tf.reshape(Yo_, [-1, VOCAB_SIZE])