def __init__(self, params, is_training=True): self.is_training = is_training batch_size = params["batch_size"] num_layers = params['nlayer'] rnn_size = params['n_hidden'] grad_clip = params["grad_clip"] self.output_keep_prob = tf.placeholder(tf.float32) self.input_keep_prob = tf.placeholder(tf.float32) NOUT = params['n_output'] # Transition LSTM cell_lst = [] for i in range(num_layers): cell = rnncell.ModifiedLSTMCell( rnn_size, forget_bias=1, initializer=tf.contrib.layers.xavier_initializer(), num_proj=None, is_training=self.is_training) if i > -1 and is_training == True: cell_drop = rnncell.DropoutWrapper( cell, output_keep_prob=self.output_keep_prob) cell = cell_drop if i > 10 and params['input_keep_prob'] < 1: cell_drop = rnncell.DropoutWrapper( cell, input_keep_prob=self.input_keep_prob) cell = cell_drop cell_lst.append(cell) self.cell = rnncell.MultiRNNCell(cell_lst) # LSTM for Q noise cell_lst = [] for i in range(params['Qnlayer']): cell_Q_noise = rnncell.ModifiedLSTMCell( params['Qn_hidden'], forget_bias=1, initializer=tf.contrib.layers.xavier_initializer(), num_proj=None, is_training=self.is_training) if i > -1 and is_training == True: cell_drop = rnncell.DropoutWrapper( cell_Q_noise, output_keep_prob=self.output_keep_prob) cell_Q_noise = cell_drop if i > 10 and params['input_keep_prob'] < 1: cell_drop = rnncell.DropoutWrapper( cell, input_keep_prob=self.input_keep_prob) cell = cell_drop cell_lst.append(cell_Q_noise) self.cell_Q_noise = rnncell.MultiRNNCell(cell_lst) # LSTM for R noise cell_lst = [] for i in range(params['Rnlayer']): cell_R_noise = rnncell.ModifiedLSTMCell( params['Rn_hidden'], forget_bias=1, initializer=tf.contrib.layers.xavier_initializer(), num_proj=None, is_training=self.is_training) if i > -1 and is_training == True: cell_drop = rnncell.DropoutWrapper( cell_R_noise, output_keep_prob=self.output_keep_prob) cell_R_noise = cell_drop if i > 10 and params['input_keep_prob'] < 1: cell_drop = rnncell.DropoutWrapper( cell, input_keep_prob=self.input_keep_prob) cell = cell_drop cell_lst.append(cell_R_noise) self.cell_R_noise = rnncell.MultiRNNCell(cell_lst) self.initial_state = self.cell.zero_state( batch_size=params['batch_size'], dtype=tf.float32) self.initial_state_Q_noise = self.cell_Q_noise.zero_state( batch_size=params['batch_size'], dtype=tf.float32) # self.initial_state_R_noise = self.cell_Q_noise.zero_state(batch_size=params['batch_size'], dtype=tf.float32) self.initial_state_R_noise = self.cell_R_noise.zero_state( batch_size=params['batch_size'], dtype=tf.float32) self.repeat_data = tf.placeholder( dtype=tf.int32, shape=[params["batch_size"], params['seq_length']]) #Measurements self._z = tf.placeholder(dtype=tf.float32, shape=[None, params['seq_length'], NOUT ]) # batch size, seqlength, feature self._x_inp = tf.placeholder( dtype=tf.float32, shape=[None, NOUT], name='Initialx') # batch size, seqlength, feature self.target_data = tf.placeholder( dtype=tf.float32, shape=[None, params['seq_length'], NOUT]) # batch size, seqlength, feature self._P_inp = tf.placeholder(dtype=tf.float32, shape=[None, NOUT, NOUT], name='P') self._F = 0.0 # state transition matrix self._alpha_sq = 1. # fading memory control self.M = 0.0 # process-measurement cross correlation self._I = tf.placeholder(dtype=tf.float32, shape=[None, NOUT, NOUT], name='I') self.u = 0.0 xres_lst = [] xpred_lst = [] pres_lst = [] tres_lst = [] kres_lst = [] qres_lst = [] rres_lst = [] with tf.variable_scope('rnnlm'): output_w1 = tf.get_variable( "output_w1", [rnn_size, rnn_size], initializer=tf.contrib.layers.xavier_initializer()) output_b1 = tf.get_variable("output_b1", [rnn_size]) output_w2 = tf.get_variable( "output_w2", [rnn_size, rnn_size], initializer=tf.contrib.layers.xavier_initializer()) output_b2 = tf.get_variable("output_b2", [rnn_size]) output_w3 = tf.get_variable( "output_w3", [rnn_size, NOUT], initializer=tf.contrib.layers.xavier_initializer()) output_b3 = tf.get_variable("output_b3", [NOUT]) output_w1_Q_noise = tf.get_variable( "output_w_Q_noise", [params['Qn_hidden'], NOUT], initializer=tf.contrib.layers.xavier_initializer()) output_b1_Q_noise = tf.get_variable("output_b_Q_noise", [NOUT]) output_w1_R_noise = tf.get_variable( "output_w_R_noise", [params['Rn_hidden'], NOUT], initializer=tf.contrib.layers.xavier_initializer()) # output_b1_R_noise = tf.get_variable("output_b_R_noise", [NOUT],initializer=tf.ones_initializer()) output_b1_R_noise = tf.get_variable("output_b_R_noise", [NOUT]) # indices = list(zip(*np.tril_indices(NOUT))) indices = tf.constant([list(i) for i in indices], dtype=tf.int64) state_F = self.initial_state state_Q = self.initial_state_Q_noise state_R = self.initial_state_R_noise with tf.variable_scope("rnnlm"): for time_step in range(params['seq_length']): if time_step > 0: tf.get_variable_scope().reuse_variables() z = self._z[:, time_step, :] #bs,features if time_step == 0: # self._x= z self._x = self._x_inp self._P = self._P_inp with tf.variable_scope("transitionF"): (pred, state_F, ls_internals) = self.cell(self._x, state_F) # pred = tf.matmul(pred,output_w1)+output_b1 pred = tf.nn.relu( tf.add(tf.matmul(pred, output_w1), output_b1)) pred = tf.nn.relu( tf.add(tf.matmul(pred, output_w2), output_b2)) pred = tf.add(tf.matmul(pred, output_w3), output_b3) with tf.variable_scope("noiseQ"): (pred_Q_noise, state_Q, ls_internals) = self.cell_Q_noise(self._x, state_Q) pred_Q_noise = tf.matmul( pred_Q_noise, output_w1_Q_noise) + output_b1_Q_noise # one_mask = tf.ones(shape=(batch_size, NOUT)) # zero_mask = tf.zeros(shape=(batch_size, NOUT)) # random_mask = tf.random_uniform(shape=(batch_size, NOUT)) # means = tf.mul(tf.ones(shape=(batch_size, NOUT)), 1 - self.output_keep_prob) # mask = tf.select(random_mask - means > 0.5, zero_mask, one_mask) # meas_z = tf.select(self.output_keep_prob >= 1, z, tf.mul(z, mask)) # norm = tf.random_normal(shape=(batch_size, NOUT), mean=0, stddev=0.01) # meas_z = tf.select(self.output_keep_prob >= 1, z, tf.add(z, norm)) meas_z = z with tf.variable_scope("noiseR"): (pred_R_noise, state_R, ls_internals) = self.cell_R_noise(meas_z, state_R) pred_R_noise = tf.matmul( pred_R_noise, output_w1_R_noise) + output_b1_R_noise # self._x = pred # lst=tf.unpack(pred, axis=1) # Q= tf.sparse_to_dense(sparse_indices=indices, output_shape=[batch_size,NOUT, NOUT], \ # sparse_values=pred_Q_noise, default_value=0, \ # validate_indices=True) # Q = tf.matrix_diag(tf.exp(pred_Q_noise)) R = tf.matrix_diag(tf.exp(pred_R_noise)) # Q=tf.matmul(tf.matrix_diag(tf.exp(pred_Q_noise)),tf.matrix_diag(tf.exp(pred_Q_noise))) # R=tf.matmul(tf.matrix_diag(tf.exp(pred_R_noise)),tf.matrix_diag(tf.exp(pred_R_noise))) #predict P = self._P self._P = P + Q #update P = self._P x = self._x self._y = meas_z - x # S = HPH' + R # project system uncertainty into measurement space S = P + R # S = P # K = PH'inv(S) # map system uncertainty into kalman gain K = tf.matmul(P, tf.matrix_inverse(S)) #(Q+P_init/(R+Q+P_init)) # x = x + Ky # predict new x with residual scaled by the kalman gain self._x = tf.squeeze(tf.matmul(K, tf.expand_dims(self._y, 2)), -1) #K-->>1, _x=z, K-->>0, _x=x, xpred_lst.append(x) xres_lst.append(self._x) tres_lst.append(meas_z) kres_lst.append(tf.matrix_diag_part(K)) rres_lst.append(tf.matrix_diag_part(R)) qres_lst.append(tf.matrix_diag_part(Q)) # P = (I-KH)P(I-KH)' + KRK' I_KH = self._I - K self._P = tf.matmul( I_KH, tf.matmul(P, tf.matrix_transpose(I_KH))) + tf.matmul( K, tf.matmul(R, tf.matrix_transpose(K))) # self._P = tf.matmul(I_KH, tf.matmul(P, tf.matrix_transpose(I_KH))) + tf.matmul(K, tf.matrix_transpose(K)) self._S = S self._K = K final_output = tf.reshape(tf.transpose(tf.stack(xres_lst), [1, 0, 2]), [-1, params['n_output']]) final_pred_output = tf.reshape( tf.transpose(tf.stack(xpred_lst), [1, 0, 2]), [-1, params['n_output']]) final_q_output = tf.reshape( tf.transpose(tf.stack(qres_lst), [1, 0, 2]), [-1, params['n_output']]) final_r_output = tf.reshape( tf.transpose(tf.stack(rres_lst), [1, 0, 2]), [-1, params['n_output']]) final_k_output = tf.reshape( tf.transpose(tf.stack(kres_lst), [1, 0, 2]), [-1, params['n_output']]) final_meas_output = tf.reshape( tf.transpose(tf.stack(tres_lst), [1, 0, 2]), [-1, params['n_output']]) flt = tf.squeeze(tf.reshape(self.repeat_data, [-1, 1]), [1]) where_flt = tf.not_equal(flt, 0) indices = tf.where(where_flt) y = tf.reshape(self.target_data, [-1, params["n_output"]]) self.final_output = tf.gather(final_output, tf.squeeze(indices, [1])) self.final_pred_output = tf.gather(final_pred_output, tf.squeeze(indices, [1])) self.final_q_output = tf.gather(final_q_output, tf.squeeze(indices, [1])) self.final_r_output = tf.gather(final_r_output, tf.squeeze(indices, [1])) self.final_k_output = tf.gather(final_k_output, tf.squeeze(indices, [1])) self.final_meas_output = tf.gather(final_meas_output, tf.squeeze(indices, [1])) self.y = tf.gather(y, tf.squeeze(indices, [1])) tmp = self.final_output - self.y loss = tf.nn.l2_loss(tmp) tmp_pred = self.final_pred_output - self.y loss_pred = tf.nn.l2_loss(tmp_pred) # tmp_pred = self.final_pred_output - self.y # loss_pred = tf.nn.l2_loss(tmp_pred) self.tvars = tf.trainable_variables() l2_reg = tf.reduce_sum([tf.nn.l2_loss(var) for var in self.tvars]) l2_reg = tf.multiply(l2_reg, 1e-4) self.cost = tf.reduce_mean( loss) + l2_reg + 0.8 * tf.reduce_mean(loss_pred) self.lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() total_parameters = 0 for variable in self.tvars: # shape is an array of tf.Dimension shape = variable.get_shape() variable_parametes = 1 for dim in shape: variable_parametes *= dim.value total_parameters += variable_parametes self.total_parameters = total_parameters grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), grad_clip) optimizer = tf.train.AdamOptimizer(self.lr) self.train_op = optimizer.apply_gradients(zip(grads, tvars)) self.states = {} self.states["F_t"] = state_F self.states["Q_t"] = state_Q self.states["R_t"] = state_R self.states["PCov_t"] = self._P self.states["_x_t"] = self._x self.xres_lst = xres_lst self.pres_lst = pres_lst self.tres_lst = tres_lst self.kres_lst = kres_lst
def __init__(self, params, is_training=True): self.is_training = is_training batch_size = params["batch_size"] num_layers = params['nlayer'] rnn_size = params['n_hidden'] grad_clip = params["grad_clip"] self.output_keep_prob = tf.placeholder(tf.float32) NOUT = params['n_output'] # Transition LSTM cell_lst = [] for i in range(num_layers): cell = rnncell.ModifiedLSTMCell( rnn_size, forget_bias=1, initializer=tf.contrib.layers.xavier_initializer(), num_proj=None, is_training=self.is_training) if i > 10 and is_training == True: cell_drop = rnncell.DropoutWrapper( cell, output_keep_prob=self.output_keep_prob) cell = cell_drop cell_lst.append(cell) self.cell = rnncell.MultiRNNCell(cell_lst) # LSTM for Q noise cell_lst = [] for i in range(params['Qnlayer']): cell_Q_noise = rnncell.ModifiedLSTMCell( params['Qn_hidden'], forget_bias=1, initializer=tf.contrib.layers.xavier_initializer(), num_proj=None, is_training=self.is_training) if i > 10 and is_training == True: cell_drop = rnncell.DropoutWrapper( cell_Q_noise, output_keep_prob=self.output_keep_prob) cell_Q_noise = cell_drop cell_lst.append(cell_Q_noise) self.cell_Q_noise = rnncell.MultiRNNCell(cell_lst) # LSTM for R noise cell_lst = [] for i in range(params['Rnlayer']): cell_R_noise = rnncell.ModifiedLSTMCell( params['Rn_hidden'], forget_bias=1, initializer=tf.contrib.layers.xavier_initializer(), num_proj=None, is_training=self.is_training) if i > 10 and is_training == True: cell_drop = rnncell.DropoutWrapper( cell_R_noise, output_keep_prob=self.output_keep_prob) cell_R_noise = cell_drop cell_lst.append(cell_R_noise) self.cell_R_noise = rnncell.MultiRNNCell(cell_lst) self.initial_state = self.cell.zero_state( batch_size=params['batch_size'], dtype=tf.float32) self.initial_state_Q_noise = self.cell_Q_noise.zero_state( batch_size=params['batch_size'], dtype=tf.float32) self.initial_state_R_noise = self.cell_Q_noise.zero_state( batch_size=params['batch_size'], dtype=tf.float32) self.initial_state_R_noise = self.cell_R_noise.zero_state( batch_size=params['batch_size'], dtype=tf.float32) self.repeat_data = tf.placeholder( dtype=tf.int32, shape=[params["batch_size"], params['seq_length']]) #Measurements self._z = tf.placeholder(dtype=tf.float32, shape=[None, params['seq_length'], NOUT ]) # batch size, seqlength, feature self.target_data = tf.placeholder( dtype=tf.float32, shape=[None, params['seq_length'], NOUT]) # batch size, seqlength, feature self._P_inp = tf.placeholder(dtype=tf.float32, shape=[None, NOUT, NOUT], name='P') self._F = 0.0 # state transition matrix self._alpha_sq = 1. # fading memory control self.M = 0.0 # process-measurement cross correlation self._I = tf.placeholder(dtype=tf.float32, shape=[None, NOUT, NOUT], name='I') self.u = 0.0 xres_lst = [] xpred_lst = [] pres_lst = [] tres_lst = [] kres_lst = [] with tf.variable_scope('rnnlm'): output_w1 = tf.get_variable( "output_w", [rnn_size, NOUT], initializer=tf.contrib.layers.xavier_initializer()) output_b1 = tf.get_variable("output_b", [NOUT]) output_w1_Q_noise = tf.get_variable( "output_w_Q_noise", [params['Qn_hidden'], NOUT], initializer=tf.contrib.layers.xavier_initializer()) output_b1_Q_noise = tf.get_variable("output_b_Q_noise", [NOUT]) output_w1_R_noise = tf.get_variable( "output_w_R_noise", [params['Rn_hidden'], NOUT], initializer=tf.contrib.layers.xavier_initializer()) output_b1_R_noise = tf.get_variable("output_b_R_noise", [NOUT]) state_F = self.initial_state state_Q = self.initial_state_Q_noise state_R = self.initial_state_R_noise with tf.variable_scope("rnnlm"): for time_step in range(params['seq_length']): if time_step > 0: tf.get_variable_scope().reuse_variables() z = self._z[:, time_step, :] #bs,features if time_step == 0: self._x = z self._P = self._P_inp with tf.variable_scope("transitionF"): (pred, state_F, ls_internals) = self.cell(self._x, state_F) pred = tf.matmul(pred, output_w1) + output_b1 with tf.variable_scope("noiseQ"): (pred_Q_noise, state_Q, ls_internals) = self.cell_Q_noise(self._x, state_Q) pred_Q_noise = tf.matmul( pred_Q_noise, output_w1_Q_noise) + output_b1_Q_noise with tf.variable_scope("noiseR"): (pred_R_noise, state_R, ls_internals) = self.cell_R_noise(z, state_R) pred_R_noise = tf.matmul( pred_R_noise, output_w1_R_noise) + output_b1_R_noise self._x = pred # lst=tf.unpack(pred, axis=1) Q = tf.matrix_diag(tf.exp(pred_Q_noise)) # R=tf.matrix_diag(tf.exp(pred_R_noise)) #predict P = self._P self._P = P + Q #update P = self._P x = self._x self._y = z - x # S = HPH' + R # project system uncertainty into measurement space # S = P + R S = P # K = PH'inv(S) # map system uncertainty into kalman gain K = tf.matmul(P, tf.matrix_inverse(S)) # x = x + Ky # predict new x with residual scaled by the kalman gain self._x = x + tf.squeeze( tf.matmul(K, tf.expand_dims(self._y, 2))) xpred_lst.append(x) xres_lst.append(self._x) tres_lst.append(x) kres_lst.append(K) # P = (I-KH)P(I-KH)' + KRK' I_KH = self._I - K # self._P = tf.matmul(I_KH, tf.matmul(P, tf.matrix_transpose(I_KH))) + tf.matmul(K, tf.matmul(R, tf.matrix_transpose(K))) self._P = tf.matmul( I_KH, tf.matmul(P, tf.matrix_transpose(I_KH))) + tf.matmul( K, tf.matrix_transpose(K)) self._S = S self._K = K final_output = tf.reshape(tf.transpose(tf.pack(xres_lst), [1, 0, 2]), [-1, params['n_output']]) final_pred_output = tf.reshape( tf.transpose(tf.pack(xpred_lst), [1, 0, 2]), [-1, params['n_output']]) flt = tf.squeeze(tf.reshape(self.repeat_data, [-1, 1]), [1]) where_flt = tf.not_equal(flt, 0) indices = tf.where(where_flt) y = tf.reshape(self.target_data, [-1, params["n_output"]]) self.final_output = tf.gather(final_output, tf.squeeze(indices, [1])) self.final_pred_output = tf.gather(final_pred_output, tf.squeeze(indices, [1])) self.y = tf.gather(y, tf.squeeze(indices, [1])) tmp = self.final_output - self.y loss = tf.nn.l2_loss(tmp) tmp_pred = self.final_pred_output - self.y loss_pred = tf.nn.l2_loss(tmp_pred) self.tvars = tf.trainable_variables() l2_reg = tf.reduce_sum([tf.nn.l2_loss(var) for var in self.tvars]) l2_reg = tf.mul(l2_reg, 1e-4) self.cost = tf.reduce_mean(loss) + l2_reg self.lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() total_parameters = 0 for variable in self.tvars: # shape is an array of tf.Dimension shape = variable.get_shape() variable_parametes = 1 for dim in shape: variable_parametes *= dim.value total_parameters += variable_parametes self.total_parameters = total_parameters grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), grad_clip) optimizer = tf.train.AdamOptimizer(self.lr) self.train_op = optimizer.apply_gradients(zip(grads, tvars)) self.states = {} self.states["F_t"] = state_F self.states["Q_t"] = state_Q self.states["R_t"] = state_R self.states["PCov_t"] = self._P self.xres_lst = xres_lst self.pres_lst = pres_lst self.tres_lst = tres_lst self.kres_lst = kres_lst
def __init__(self, params): def identity_matrix(bs, n): diag = tf.diag(tf.Variable(initial_value=[1] * n, dtype=tf.float32)) lst = [] for i in range(bs): lst.append(diag) return tf.pack(lst) num_layers = params['nlayer'] F_shape = params['F_shape'] H_shape = params['H_shape'] rnn_size = params['n_hidden'] NOUT = params['n_output'] batch_size = params["batch_size"] self.output_keep_prob = tf.placeholder(tf.float32) grad_clip = params["grad_clip"] self._x = tf.zeros((batch_size, F_shape[1])) # state # self._P_inp = tf.placeholder(dtype=tf.float32, shape=[None, F_shape[1], F_shape[1]],name='P') self._P = identity_matrix(batch_size, F_shape[1]) # uncertainty covariance # self._I = tf.placeholder(dtype=tf.float32, shape=[None, NOUT, NOUT],name='I') # self._P = identity_matrix(bs,dim_x)* 500. # uncertainty covariance # self._Q = identity_matrix(bs,dim_x) # process uncertainty B = 0.0 # control transition matrix u = 0.0 self._F = 0.0 # state transition matrix self._alpha_sq = 1. # fading memory control self.M = 0.0 # process-measurement cross correlation self._I = identity_matrix(batch_size, F_shape[1]) # LSTM for Q noise cell_Q_noise = rnncell.ModifiedLSTMCell( params['Qn_hidden'], forget_bias=1, initializer=tf.contrib.layers.xavier_initializer(), num_proj=None) cell_Q_noise = rnncell.MultiRNNCell([cell_Q_noise] * params['Qnlayer']) cell_Q_noise = rnncell.DropoutWrapper( cell_Q_noise, output_keep_prob=self.output_keep_prob) self.cell_Q_noise = cell_Q_noise # LSTM for R noise cell_R_noise = rnncell.ModifiedLSTMCell( params['Rn_hidden'], forget_bias=1, initializer=tf.contrib.layers.xavier_initializer(), num_proj=None) cell_R_noise = rnncell.MultiRNNCell([cell_R_noise] * params['Rnlayer']) cell_R_noise = rnncell.DropoutWrapper( cell_R_noise, output_keep_prob=self.output_keep_prob) self.cell_R_noise = cell_R_noise self.initial_state_Q_noise = cell_Q_noise.zero_state( batch_size=params['batch_size'], dtype=tf.float32) self.initial_state_R_noise = cell_R_noise.zero_state( batch_size=params['batch_size'], dtype=tf.float32) self.input_data = tf.placeholder( dtype=tf.float32, shape=[params["batch_size"], params['seq_length'], NOUT], name="input_data") self.input_zero = tf.placeholder(dtype=tf.float32, shape=[ params["batch_size"], params['seq_length'], params['n_input'] ], name="input_zero") self.repeat_data = tf.placeholder( dtype=tf.int32, shape=[params["batch_size"], params['seq_length']], name="repeat_data") self.target_data = tf.placeholder(tf.float32, [None, params['seq_length'], NOUT], name="target_data") self.F = tf.placeholder(dtype=tf.float32, shape=F_shape) #batch size, seqlength, feature self.H = tf.placeholder(dtype=tf.float32, shape=H_shape) #batch size, seqlength, feature # dt = 1.0 # time step # F = tf.Variable(initial_value=[[1, dt, 0, 0], # [0, 1, 0, 0], # [0, 0, 1, dt], # [0, 0, 0, 1]],dtype=tf.float32) # # self.u = 0.0 # H = tf.Variable(initial_value=[[1, 0, 0, 0],[0, 1, 0, 0],[0, 0, 1, 0],[0, 0, 0, 1]],dtype=tf.float32) # Measurement function # # F=tf.pack([F]*bs) # H=tf.pack([H]*bs) with tf.variable_scope('rnnlm'): output_w1_Q_noise = tf.get_variable( "output_w_Q_noise", [rnn_size, F_shape[1]], initializer=tf.contrib.layers.xavier_initializer()) output_b1_Q_noise = tf.get_variable("output_b_Q_noise", [F_shape[1]]) output_w1_R_noise = tf.get_variable( "output_w_R_noise", [rnn_size, NOUT], initializer=tf.contrib.layers.xavier_initializer()) output_b1_R_noise = tf.get_variable("output_b_R_noise", [NOUT]) xres_lst = [] pres_lst = [] tres_lst = [] qres_lst = [] rres_lst = [] outputs = [] state_Q = self.initial_state_Q_noise state_R = self.initial_state_R_noise for time_step in range(params['seq_length']): z = self.input_data[:, time_step, :] #bs,features if time_step > 0: tf.get_variable_scope().reuse_variables() with tf.variable_scope("noiseQ"): (pred_Q_noise, state_Q, ls_internals) = cell_Q_noise(self._x, state_Q) pred_Q_noise = tf.matmul(pred_Q_noise, output_w1_Q_noise) + output_b1_Q_noise with tf.variable_scope("noiseR"): (pred_R_noise, state_R, ls_internals) = cell_R_noise(z, state_R) pred_R_noise = tf.matmul(pred_R_noise, output_w1_R_noise) + output_b1_R_noise # lst=tf.unpack(pred, axis=1) Q = tf.matrix_diag(tf.exp(pred_Q_noise)) R = tf.matrix_diag(tf.exp(pred_R_noise)) # R=tf.matmul(R,tf.matrix_transpose(R)) # Q=tf.matmul(Q,tf.matrix_transpose(Q)) qres_lst.append(self._P) #predict P = self._P # x = tf.expand_dims(tf.matmul(tf.squeeze(self._x),output_w1)+output_b1,2) # x = tf.expand_dims(self._x) x = self._x # x = Fx + Bu self._x = tf.matmul(self.F, tf.expand_dims(x, 2)) + tf.mul(B, u) # P = FPF' + Q self._P = self._alpha_sq * tf.matmul( self.F, tf.matmul(P, tf.matrix_transpose(self.F))) + Q # self._P = self._alpha_sq * dot3(F, self._P, F.T) + Q #update P = self._P x = self._x # Hx = tf.matmul(H, x) Hx = tf.matmul(self.H, x) self._y = tf.expand_dims(z, 2) - Hx # S = HPH' + R # project system uncertainty into measurement space S = tf.matmul(self.H, tf.matmul(P, tf.matrix_transpose( self.H))) + R # K = PH'inv(S) # map system uncertainty into kalman gain K = tf.matmul( P, tf.matmul(tf.matrix_transpose(self.H), tf.matrix_inverse(S))) # x = x + Ky # predict new x with residual scaled by the kalman gain self._x = tf.squeeze(x) + tf.squeeze(tf.matmul(K, self._y)) xres_lst.append(self._x) # P = (I-KH)P(I-KH)' + KRK' I_KH = self._I - tf.matmul(K, self.H) self._P = tf.matmul(I_KH, tf.matmul( P, tf.matrix_transpose(I_KH))) + tf.matmul( K, tf.matmul(R, tf.matrix_transpose(K))) pres_lst.append(P) rres_lst.append(R) # self._S = S # self._K = K # rnn_output = tf.reshape(tf.concat(1, outputs), [-1, params['n_hidden']]) test_mode = params['test_mode'] if test_mode == 'step2d': final_output = tf.pack([ tf.transpose(tf.squeeze(xres_lst), (1, 0, 2))[:, :, 0], tf.transpose(tf.squeeze(xres_lst), (1, 0, 2))[:, :, 2] ], axis=2) else: final_output = tf.transpose(tf.squeeze(xres_lst), (1, 0, 2)) self.y = tf.reshape(self.target_data, [-1, params["n_output"]]) self.final_output = tf.reshape(final_output, [-1, params["n_output"]]) tmp = self.final_output - self.y loss = tf.nn.l2_loss(tmp) self.tvars = tf.trainable_variables() l2_reg = tf.reduce_sum([tf.nn.l2_loss(var) for var in self.tvars]) l2_reg = tf.mul(l2_reg, 1e-4) self.cost = tf.reduce_mean(loss) + l2_reg self.lr = tf.Variable(0.0, trainable=False) self.states = {} self.states["Q_t"] = state_Q self.states["R_t"] = state_R self.pres_lst = pres_lst self.qres_lst = qres_lst self.rres_lst = rres_lst tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), grad_clip) optimizer = tf.train.AdamOptimizer(self.lr) self.train_op = optimizer.apply_gradients(zip(grads, tvars))
def __init__(self, params, is_training=True): self.is_training = tf.placeholder(tf.bool) self.output_keep_prob = tf.placeholder(tf.float32) num_layers = params['nlayer'] rnn_size = params['n_hidden'] grad_clip = params["grad_clip"] cell_lst = [] for i in range(num_layers): cell = rnncell.ModifiedLSTMCell( rnn_size, forget_bias=1, initializer=tf.contrib.layers.xavier_initializer(), num_proj=None, is_training=self.is_training) if i > -1 and is_training == True: cell_drop = rnncell.DropoutWrapper( cell, output_keep_prob=self.output_keep_prob) cell = cell_drop cell_lst.append(cell) cell = rnncell.MultiRNNCell(cell_lst) NOUT = params['n_output'] # end_of_stroke + prob + 2*(mu + sig) + corr self.input_data = tf.placeholder(dtype=tf.float32, shape=[ params["batch_size"], params['seq_length'], params['n_input'] ]) self.input_zero = tf.placeholder(dtype=tf.float32, shape=[ params["batch_size"], params['seq_length'], params['n_input'] ]) self.repeat_data = tf.placeholder( dtype=tf.int32, shape=[params["batch_size"], params['seq_length']]) self.target_data = tf.placeholder(tf.float32, [None, None, params["n_output"]]) self.initial_state = cell.zero_state(batch_size=params['batch_size'], dtype=tf.float32) with tf.variable_scope('rnnlm'): output_w1 = tf.get_variable( "output_w1", [rnn_size, rnn_size], initializer=tf.contrib.layers.xavier_initializer()) output_b1 = tf.get_variable("output_b1", [rnn_size]) output_w2 = tf.get_variable( "output_w2", [rnn_size, rnn_size], initializer=tf.contrib.layers.xavier_initializer()) output_b2 = tf.get_variable("output_b2", [rnn_size]) output_w3 = tf.get_variable( "output_w3", [rnn_size, NOUT], initializer=tf.contrib.layers.xavier_initializer()) output_b3 = tf.get_variable("output_b3", [NOUT]) # output_w3_pre = tf.get_variable("output_w3", [NOUT, NOUT], initializer=tf.contrib.layers.xavier_initializer()) # output_b3_pre = tf.get_variable("output_b3", [NOUT]) outputs = [] state = self.initial_state pre_state = state seq_ls_internal = [] with tf.variable_scope("rnnlm"): for time_step in range(params['seq_length']): if time_step > 0: tf.get_variable_scope().reuse_variables() # r=self.repeat_data[:,time_step] # where_flt=tf.not_equal(r,0) (cell_output, state, ls_internals) = cell(self.input_data[:, time_step, :], state) seq_ls_internal.append(ls_internals) # new_state=[] # for i in range(params['nlayer']): # s=[] # s.append(tf.select(where_flt,state[i][0],pre_state[i][0])) # s.append(tf.select(where_flt,state[i][1],pre_state[i][1])) # new_state.append(tuple(s)) # state=new_state # pre_state=state outputs.append(cell_output) rnn_output = tf.reshape(tf.transpose(tf.pack(outputs), [1, 0, 2]), [-1, params['n_hidden']]) # norm=tf.nn.batch_normalization(rnn_output) final_output = tf.nn.relu( tf.add(tf.matmul(rnn_output, output_w1), output_b1)) final_output = tf.nn.relu( tf.add(tf.matmul(final_output, output_w2), output_b2)) final_output = tf.add(tf.matmul(final_output, output_w3), output_b3) self.seq_ls_internal = seq_ls_internal flt = tf.squeeze(tf.reshape(self.repeat_data, [-1, 1]), [1]) where_flt = tf.not_equal(flt, 0) indices = tf.where(where_flt) y = tf.reshape(self.target_data, [-1, params["n_output"]]) self.final_output = tf.gather(final_output, tf.squeeze(indices, [1])) self.y = tf.gather(y, tf.squeeze(indices, [1])) tmp = self.final_output - self.y loss = tf.nn.l2_loss(tmp) self.tvars = tf.trainable_variables() l2_reg = tf.reduce_sum([tf.nn.l2_loss(var) for var in self.tvars]) l2_reg = tf.mul(l2_reg, 1e-4) self.cost = tf.reduce_mean(loss) + l2_reg self.states = {} self.states["lstm_t"] = state self.lr = tf.Variable(0.0, trainable=False) total_parameters = 0 for variable in self.tvars: # shape is an array of tf.Dimension shape = variable.get_shape() variable_parametes = 1 for dim in shape: variable_parametes *= dim.value total_parameters += variable_parametes self.total_parameters = total_parameters grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, self.tvars), grad_clip) optimizer = tf.train.AdamOptimizer(self.lr) self.train_op = optimizer.apply_gradients(zip(grads, self.tvars))
def __init__(self, params): def identity_matrix(bs, n): diag = tf.diag(tf.Variable(initial_value=[1] * n, dtype=tf.float32)) lst = [] for i in range(bs): lst.append(diag) return tf.pack(lst) batch_size = params["batch_size"] num_layers = params['nlayer'] rnn_size = params['n_hidden'] grad_clip = params["grad_clip"] self.output_keep_prob = tf.placeholder(tf.float32) NOUT = params['n_output'] # Transition LSTM cell = rnncell.ModifiedLSTMCell( rnn_size, forget_bias=1, initializer=tf.contrib.layers.xavier_initializer(), num_proj=None) cell = rnncell.MultiRNNCell([cell] * num_layers) cell = rnncell.DropoutWrapper(cell, output_keep_prob=self.output_keep_prob) self.cell = cell # LSTM for Kalman gain cell_K = rnncell.ModifiedLSTMCell( params['Kn_hidden'], forget_bias=1, initializer=tf.contrib.layers.xavier_initializer(), num_proj=None) cell_K = rnncell.MultiRNNCell([cell_K] * params['nlayer']) cell_K = rnncell.DropoutWrapper(cell_K, output_keep_prob=self.output_keep_prob) self.cell_K = cell_K self.initial_state = cell.zero_state(batch_size=params['batch_size'], dtype=tf.float32) self.initial_state_K = cell_K.zero_state( batch_size=params['batch_size'], dtype=tf.float32) self.repeat_data = tf.placeholder( dtype=tf.int32, shape=[params["batch_size"], params['seq_length']]) self._z = tf.placeholder(dtype=tf.float32, shape=[None, params['seq_length'], NOUT ]) # batch size, seqlength, feature self.target_data = tf.placeholder( dtype=tf.float32, shape=[None, params['seq_length'], NOUT]) # batch size, seqlength, feature xres_lst = [] pres_lst = [] tres_lst = [] kres_lst = [] with tf.variable_scope('rnnlm'): output_w1 = tf.get_variable( "output_w", [rnn_size, NOUT], initializer=tf.contrib.layers.xavier_initializer()) output_b1 = tf.get_variable("output_b", [NOUT]) output_w1_K = tf.get_variable( "output_w_K", [params['Kn_hidden'], NOUT], initializer=tf.contrib.layers.xavier_initializer()) output_b1_K = tf.get_variable("output_b_K", [NOUT]) output_w1_K_inp = tf.get_variable( "output_w_K_inp", [NOUT * 2, params['K_inp']], initializer=tf.contrib.layers.xavier_initializer()) output_b1_K_inp = tf.get_variable("output_b_K_inp", [params['K_inp']]) state_F = self.initial_state state_K = self.initial_state_K with tf.variable_scope("rnnlm"): for time_step in range(params['seq_length']): if time_step > 0: tf.get_variable_scope().reuse_variables() z = self._z[:, time_step, :] if time_step == 0: self._x = z with tf.variable_scope("transitionF"): (pred, state_F, ls_internals) = cell(self._x, state_F) self._x = tf.matmul(pred, output_w1) + output_b1 with tf.variable_scope("gainK"): inp = tf.concat(1, [self._x, z]) emb = tf.nn.relu( tf.matmul(inp, output_w1_K_inp) + output_b1_K_inp) (pred_val, state_K, ls_internals) = cell_K(emb, state_K) K = tf.nn.tanh( tf.matmul(pred_val, output_w1_K) + output_b1_K) self._y = z - self._x # predict new x with residual scaled by the kalman gain self._x = self._x + tf.mul(K, self._y) xres_lst.append(self._x) final_output = tf.reshape(tf.transpose(tf.pack(xres_lst), [1, 0, 2]), [-1, params['n_output']]) flt = tf.squeeze(tf.reshape(self.repeat_data, [-1, 1]), [1]) where_flt = tf.not_equal(flt, 0) indices = tf.where(where_flt) y = tf.reshape(self.target_data, [-1, params["n_output"]]) self.final_output = tf.gather(final_output, tf.squeeze(indices, [1])) self.y = tf.gather(y, tf.squeeze(indices, [1])) tmp = self.final_output - self.y loss = tf.nn.l2_loss(tmp) self.tvars = tf.trainable_variables() l2_reg = tf.reduce_sum([tf.nn.l2_loss(var) for var in self.tvars]) l2_reg = tf.mul(l2_reg, 1e-4) self.cost = tf.reduce_mean(loss) + l2_reg self.lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() total_parameters = 0 for variable in self.tvars: # shape is an array of tf.Dimension shape = variable.get_shape() variable_parametes = 1 for dim in shape: variable_parametes *= dim.value total_parameters += variable_parametes self.total_parameters = total_parameters grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), grad_clip) optimizer = tf.train.AdamOptimizer(self.lr) self.train_op = optimizer.apply_gradients(zip(grads, tvars)) self.states = {} self.states["F_t"] = state_F self.states["K_t"] = state_K self.xres_lst = xres_lst self.pres_lst = pres_lst self.tres_lst = tres_lst self.kres_lst = kres_lst