def make_encoder(sequence, output_dim, seed): rnn_cell=GRUCell(num_units=output_dim, kernel_initializer=tf.random_uniform_initializer(minval=-0.05, maxval=0.05,dtype=tf.float32, seed=seed), bias_initializer=tf.zeros_initializer()) rnn_out, rnn_state = tf.nn.static_rnn( cell=rnn_cell, inputs=tf.unstack(sequence,sequence.shape[1].value,1), initial_state=rnn_cell.zero_state(tf.shape(sequence)[0], dtype=tf.float32), ) return rnn_state
class BahdanauRnnCoverageMulAttention(BahdanauAttention): """ 对BahdanauAttention类增加coverage, 其中coverage采用RNN进行更新,每个h,t对应不同的coverage https://arxiv.org/pdf/1601.04811.pdf """ def __init__(self, num_units, memory, coverage_hidden_num_units, memory_sequence_length=None, normalize=False, probability_fn=None, score_mask_value=None, dtype=None, name="BahdanauCoverageAttention"): super(BahdanauRnnCoverageMulAttention, self).__init__( num_units=num_units, memory=memory, memory_sequence_length=memory_sequence_length, normalize=normalize, probability_fn=probability_fn, score_mask_value=score_mask_value, dtype=dtype, name=name) if dtype is None: dtype = dtypes.float32 # coverage初始状态 self.coverage_rnn_cell = GRUCell(coverage_hidden_num_units) self.coverage_state = self.coverage_rnn_cell.zero_state(self.batch_size * self._alignments_size, dtype) with variable_scope.variable_scope("coverage"): self.coverage_layer = layers_core.Dense( num_units, name="coverage_layer", use_bias=False, dtype=dtype) def __call__(self, query, state): with variable_scope.variable_scope(None, "bahdanau_coverage_attention", [query]): processed_query = self.query_layer(query) if self.query_layer else query coverage_features = self.coverage_layer(self.coverage_state) coverage_features = array_ops.reshape(coverage_features, [self.batch_size, self._alignments_size, -1]) score = _bahdanau_coverage_mul_score(processed_query, self._keys, coverage_features, self._normalize) alignments = self._probability_fn(score, state) next_state = alignments # 更新coverage_state coverage_cell_input = concat([alignments, query], 1) # coverage_cell_input复制alignments_size份 coverage_cell_input_tile = tf.contrib.seq2seq.tile_batch(coverage_cell_input, multiplier=self._alignments_size) # 将value reshape coverage_value_reshape = array_ops.reshape(self.values, [self.batch_size * self._alignments_size, -1]) coverage_cell_input_tile = concat([coverage_cell_input_tile, coverage_value_reshape], 1) _, coverage_cell_state = self.coverage_rnn_cell(coverage_cell_input_tile, self.coverage_state) self.coverage_state = coverage_cell_state return alignments, next_state
class LuongCoverageAttention(LuongAttention): """ 对LuongAttention增加coverage attention,可以认为coverage对score增加权重使得之前coverage较高的h,score相应减少 """ def __init__(self, num_units, memory, coverage_hidden_num_units, memory_sequence_length=None, scale=False, probability_fn=None, score_mask_value=None, dtype=None, name="LuongAttention"): super(LuongCoverageAttention, self).__init__( num_units=num_units, memory=memory, memory_sequence_length=memory_sequence_length, scale=scale, probability_fn=probability_fn, score_mask_value=score_mask_value, dtype=dtype, name=name ) if dtype is None: dtype = dtypes.float32 # coverage初始状态 self.coverage_rnn_cell = GRUCell(coverage_hidden_num_units) self.coverage_state = self.coverage_rnn_cell.zero_state(self.batch_size * self._alignments_size, dtype) with variable_scope.variable_scope("coverage"): self.coverage_layer = layers_core.Dense( self._alignments_size, name="coverage_layer", use_bias=False, dtype=dtype) def __call__(self, query, state): with variable_scope.variable_scope(None, "luong_attention", [query]): coverage_features = self.coverage_layer(self.coverage_state) coverage_features = array_ops.reshape(coverage_features, [self.batch_size, self._alignments_size, -1]) score = _luong_coverage_score(query, self._keys, coverage_features, self._scale) alignments = self._probability_fn(score, state) next_state = alignments # 更新coverage_state coverage_cell_input = concat([alignments, query], 1) # coverage_cell_input复制alignments_size份 coverage_cell_input_tile = tf.contrib.seq2seq.tile_batch(coverage_cell_input, multiplier=self._alignments_size) # 将value reshape coverage_value_reshape = array_ops.reshape(self.values, [self.batch_size * self._alignments_size, -1]) coverage_cell_input_tile = concat([coverage_cell_input_tile, coverage_value_reshape], 1) _, coverage_cell_state = self.coverage_rnn_cell(coverage_cell_input_tile, self.coverage_state) self.coverage_state = coverage_cell_state return alignments, next_state
def make_encoder(sequence, output_dim, seed): rnn_cell = GRUCell(num_units=output_dim, kernel_initializer=tf.random_uniform_initializer( minval=-0.05, maxval=0.05, dtype=tf.float32, seed=seed), bias_initializer=tf.zeros_initializer()) rnn_out, rnn_state = tf.nn.dynamic_rnn( cell=rnn_cell, inputs=tf.transpose(sequence, [1, 0, 2]), initial_state=rnn_cell.zero_state(tf.shape(sequence)[0], dtype=tf.float32), time_major=True) return rnn_state
class BahdanauRnnCoverageAttention(BahdanauAttention): """ 对BahdanauAttention类增加coverage, 其中coverage采用RNN进行更新 """ def __init__(self, num_units, memory, coverage_hidden_num_units, memory_sequence_length=None, normalize=False, probability_fn=None, score_mask_value=None, dtype=None, name="BahdanauCoverageAttention"): super(BahdanauRnnCoverageAttention, self).__init__( num_units=num_units, memory=memory, memory_sequence_length=memory_sequence_length, normalize=normalize, probability_fn=probability_fn, score_mask_value=score_mask_value, dtype=dtype, name=name) if dtype is None: dtype = dtypes.float32 # coverage初始状态 self.coverage_rnn_cell = GRUCell(coverage_hidden_num_units) self.coverage_state = self.coverage_rnn_cell.zero_state(self.batch_size, dtype) with variable_scope.variable_scope("coverage"): self.coverage_layer = layers_core.Dense( num_units, name="coverage_layer", use_bias=False, dtype=dtype) def __call__(self, query, state): with variable_scope.variable_scope(None, "bahdanau_coverage_attention", [query]): processed_query = self.query_layer(query) if self.query_layer else query coverage_features = self.coverage_layer(self.coverage_state) score = _bahdanau_coverage_score(processed_query, self._keys, coverage_features, self._normalize) alignments = self._probability_fn(score, state) next_state = alignments # 更新coverage_state coverage_cell_input = concat([alignments, query], 1) _, coverage_cell_state = self.coverage_rnn_cell(coverage_cell_input, self.coverage_state) self.coverage_state = coverage_cell_state return alignments, next_state
b.append(sample[-1, 1] - sample[-1, 0]) return a1,a2,b dataSet=tf.data.Dataset.from x1 = tf.placeholder(shape=shape, dtype=tf.float16) x2 = tf.placeholder(shape=[batch_size], dtype=tf.float16) y_ = tf.placeholder(shape=[batch_size], dtype=tf.float16) training = tf.placeholder(dtype=tf.bool) X = tf.layers.batch_normalization(x1, training=True, scale=False, center=False, axis=[0, -1]) # X=x1 gru = GRUCell(num_units=4, reuse=tf.AUTO_REUSE, activation=tf.nn.elu, kernel_initializer=tf.glorot_normal_initializer(), dtype=tf.float16) state = gru.zero_state(batch_size, dtype=tf.float16) with tf.variable_scope('RNN'): for timestep in range(long): if timestep == 1: tf.get_variable_scope().reuse_variables() (cell_output, state) = gru(X[:, timestep], state) out_put = state out = tf.nn.relu(out_put) y = ml.layer_basic(out, 1)[:, 0] loss = tf.cast(tf.reduce_mean((y - y_) * (y - y_)),dtype=tf.float16) # optimizer = tf.train.AdamOptimizer(learning_rate=0.01).minimize(loss) # optimizer_min = tf.train.AdamOptimizer(learning_rate=0.0001).minimize(loss)
sample = data[i:i + long] a.append(sample[:-1, :5]) b.append(sample[:-1, 5:10]) c.append(sample[-1][1]) return a, b, c x = tf.placeholder(shape=[batch_size, long - 1, 5], dtype=tf.float16) y = tf.placeholder(shape=[batch_size, long - 1, 5], dtype=tf.float16) z_ = tf.placeholder(shape=[batch_size], dtype=tf.float16) X = tf.nn.sigmoid(x) - 0.5 Y = tf.nn.sigmoid(y) - 0.5 gru_x = GRUCell(num_units=8, reuse=tf.AUTO_REUSE, activation=tf.nn.elu) state_x = gru_x.zero_state(batch_size, dtype=tf.float16) with tf.variable_scope('RNN_x'): for timestep in range(long - 1): if timestep == 1: tf.get_variable_scope().reuse_variables() (cell_output_x, state_x) = gru_x(X[:, timestep], state_x) out_put_x = state_x gru_y = GRUCell(num_units=8, reuse=tf.AUTO_REUSE, activation=tf.nn.elu) state_y = gru_y.zero_state(batch_size, dtype=tf.float16) with tf.variable_scope('RNN_y'): for timestep in range(long - 1): # be careful if timestep == 1: tf.get_variable_scope().reuse_variables() (cell_output_y, state_y) = gru_y(Y[:, timestep], state_y) out_put_y = state_y
next_element = iterator.get_next() train_iterator = train_dataset.make_one_shot_iterator() test_iterator = test_dataset.make_initializable_iterator() x, y_ = iterator.get_next() X = tf.reshape(x, shape=[batch_size, x.shape[1], x.shape[2]]) # X = tf.layers.batch_normalization(x, training=True, scale=False, center=False, axis=[0, -1]) gru = GRUCell(num_units=128, reuse=tf.AUTO_REUSE, activation=tf.nn.relu, kernel_initializer=tf.glorot_normal_initializer(), dtype=dtype) state = gru.zero_state(batch_size, dtype=dtype) with tf.variable_scope('RNN'): for timestep in range(long): if timestep == 1: tf.get_variable_scope().reuse_variables() (cell_output, state) = gru(X[:, timestep], state) out_put = state out = tf.nn.relu(out_put) y = tf.layers.dense(out, 1)[:, 0] loss = tf.reduce_mean((y - y_) * (y - y_)) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops):
def __init__(self, batch_size, max_seq_length, vocab_size, start_token_id=1, end_token_id=2, pad_token_id=0, unk_token_id=3, emb_size=100, memory_size=100, keep_prob=0.5, temperature=0.5, antilm=0.55, learning_rate=0.001, grad_clip=5.0, infer=False): self._batch_size = batch_size self._vocab_size = vocab_size self._memory_size = memory_size self._start_token_id = start_token_id self._end_token_id = end_token_id self._max_seq_length = max_seq_length self._unk_token_id = unk_token_id self._keep_prob = keep_prob self._temperature = temperature self._start_token_id = start_token_id self._end_token_id = end_token_id self._pad_token_id = pad_token_id self._infer = infer self._antilm = antilm self.input_data = tf.placeholder(tf.int32, [batch_size, max_seq_length], name="input_data") self.input_lengths = tf.placeholder(tf.int32, shape=[batch_size], name="input_lengths") self.output_data = tf.placeholder(tf.int32, [batch_size, max_seq_length], name='output_data') self.output_lengths = tf.placeholder(tf.int32, [batch_size], name='output_lengths') self.global_step = tf.Variable(0, name="global_step", trainable=False) with tf.device("/cpu:0"): self.embedding = tf.get_variable("embedding", [vocab_size, emb_size]) inputs = tf.nn.embedding_lookup(self.embedding, self.input_data) if self._keep_prob < 1 and not infer: inputs = tf.nn.dropout(inputs, keep_prob=self._keep_prob) with tf.variable_scope("encoder", initializer=glorot()): fw_cell = GRUCell(emb_size) bw_cell = GRUCell(emb_size) if self._keep_prob < 1 and not infer: fw_cell = DropoutWrapper(fw_cell, output_keep_prob=self._keep_prob) bw_cell = DropoutWrapper(bw_cell, output_keep_prob=self._keep_prob) with tf.variable_scope("context", initializer=glorot()): ctx_cell = GRUCell(memory_size * 2) self.ctx_w = tf.get_variable("context_w", [memory_size * 2, memory_size]) self.ctx_b = tf.get_variable( "context_b", [memory_size], initializer=init_ops.zeros_initializer()) self.initial_state = ctx_cell.zero_state(self._batch_size, tf.float32) with tf.variable_scope("decoder", initializer=glorot()): # GRU with conditional distribution in sec 2.2 of https://arxiv.org/pdf/1406.1078.pdf dec_cell = GRUCellCond(memory_size) self.outputs, self.output_ids, _, self.final_state = self.seq2seq( inputs, fw_cell, bw_cell, ctx_cell, dec_cell) loss = self.get_loss(self.outputs) self.loss = tf.reduce_mean(loss) tf.summary.scalar('loss', self.loss) tvars = tf.trainable_variables() print("parameter size:", _count_param_size(tvars)) grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars), grad_clip) optimizer = tf.train.AdamOptimizer(learning_rate) self.train_op = optimizer.apply_gradients(zip(grads, tvars), global_step=self.global_step)
class MultiMemoryRNN(RNNCell): def __init__(self, memories, size): self._rnn_memories = memories self._cell = GRUCell(size) self._size = size def __call__(self, inputs, state, scope=None): with tf.variable_scope(scope or type(self).__name__): mem_states = [] cell_state = tf.slice(state, [0, 0], [-1, self._cell.state_size]) offset = self._cell.state_size for m in self._rnn_memories: mem_states.append( tf.slice(state, [0, offset], [-1, m.state_size])) offset += m.state_size cell_output, _ = self._cell(inputs, cell_state) # read from memories mem_input = tf.concat(axis=1, values=[cell_output, inputs]) mem_out_states = [ m(mem_input, s, "memory" + str(i)) for i, m, s in zip(range(len(self._rnn_memories)), self._rnn_memories, mem_states) ] # [B, N+1, S] output = tf.concat( axis=1, values=[tf.expand_dims(m[0], 1) for m in mem_out_states] + [tf.expand_dims(cell_output, 1)]) # [B, N+1] gates = tf.contrib.layers.fully_connected( tf.reshape(output, [-1, (len(self._rnn_memories) + 1) * self._size]), len(self._rnn_memories) + 1, activation_fn=tf.sigmoid, weights_initializer=None, biases_initializer=tf.constant_initializer(0.0)) # [B, N+1, S] output = output * tf.expand_dims(gates, 2) output = tf.reduce_sum(output, [1]) #new_input = tf.contrib.layers.fully_connected(read, self._size, activation_fn=tf.tanh, weights_initializer=None) new_mem_states = [out_state[1] for out_state in mem_out_states] new_mem_states = tf.concat(axis=1, values=[cell_output] + new_mem_states) return output, new_mem_states #tf.concat(1, [output, new_mem_states]) def zero_state(self, batch_size, dtype): return tf.concat( axis=1, values=[self._cell.zero_state(batch_size, dtype)] + [m.zero_state(batch_size, dtype) for m in self._rnn_memories]) @property def state_size(self): return self._cell.state_size + sum(m.state_size for m in self._rnn_memories) @property def output_size(self): return self._size
sample = data[i:i + long] a.append(sample[:-1, :11]) b.append(sample[:-1, :11]) c.append(sample[-1][:4]) return a, b, c x = tf.placeholder(shape=[batch_size, long - 1, 10], dtype=tf.float16) y = tf.placeholder(shape=[batch_size, long - 1, 10], dtype=tf.float16) z_ = tf.placeholder(shape=[batch_size, 4], dtype=tf.float16) X = tf.nn.sigmoid(x) - 0.5 Y = tf.nn.sigmoid(y) - 0.5 gru_x_open = GRUCell(num_units=8, reuse=tf.AUTO_REUSE, activation=tf.nn.elu) state_x_open = gru_x_open.zero_state(batch_size, dtype=tf.float16) with tf.variable_scope('RNN_x_open'): for timestep in range(long - 1): if timestep == 1: tf.get_variable_scope().reuse_variables() (cell_output_x_open, state_x_open) = gru_x_open(X[:, timestep], state_x_open) out_put_x_open = state_x_open gru_x_high = GRUCell(num_units=8, reuse=tf.AUTO_REUSE, activation=tf.nn.elu) state_x_high = gru_x_high.zero_state(batch_size, dtype=tf.float16) with tf.variable_scope('RNN_x_high'): for timestep in range(long - 1): if timestep == 1: tf.get_variable_scope().reuse_variables() (cell_output_x_high,
class WGAN(object): model_name = "WGAN_no_mask" # name for checkpoint def __init__(self, sess, args, datasets): self.sess = sess self.isbatch_normal=args.isBatch_normal self.lr = args.lr self.epoch = args.epoch self.batch_size = args.batch_size self.n_inputs = args.n_inputs # MNIST data input (img shape: 28*28) self.n_steps = datasets.maxLength # time steps self.n_hidden_units = args.n_hidden_units # neurons in hidden layer self.n_classes = args.n_classes # MNIST classes (0-9 digits) self.gpus=args.gpus self.pretrain_epoch=args.pretrain_epoch self.impute_iter=args.impute_iter self.g_loss_lambda=args.g_loss_lambda self.datasets=datasets self.z_dim = args.z_dim # dimension of noise-vector # WGAN_GP parameter self.lambd = 0.25 # The higher value, the more stable, but the slower convergence self.disc_iters = args.disc_iters # The number of critic iterations for one-step of generator # train self.learning_rate = args.lr self.beta1 = args.beta1 self.Gru_g = GRUCell(self.n_hidden_units) self.Gru_d = GRUCell(self.n_hidden_units) self.num_batches = len(datasets.x) // self.batch_size def pretrainG(self, X,X_lengths,Keep_prob,reuse=False): with tf.variable_scope("g_enerator", reuse=reuse): """ the rnn cell's variable scope is defined by tensorflow, if we want to update rnn cell's weights, the variable scope must contains 'g_' or 'd_' """ w_out= tf.get_variable("g_w_out",shape=[self.n_hidden_units, self.n_inputs],initializer=tf.random_normal_initializer()) b_out= tf.get_variable("g_b_out",shape=[self.n_inputs, ],initializer=tf.constant_initializer(0.001)) w_z = tf.get_variable("g_w_z", shape=[self.z_dim, self.n_inputs], initializer=tf.random_normal_initializer()) b_z = tf.get_variable("g_b_z", shape=[self.n_inputs, ], initializer=tf.constant_initializer(0.001)) X_in = tf.reshape(X, [-1, self.n_steps, self.n_inputs]) init_state = self.Gru_g.zero_state(self.batch_size, dtype=tf.float32) # 初始化全零 state outputs, final_state = tf.nn.dynamic_rnn(self.Gru_g, X_in, \ initial_state=init_state,\ sequence_length=X_lengths, time_major=False) #outputs: batch_size*n_steps*n_hiddensize outputs=tf.reshape(outputs,[-1,self.n_hidden_units]) out_predict=tf.matmul(tf.nn.dropout(outputs,Keep_prob), w_out) + b_out out_predict=tf.reshape(out_predict,[-1,self.n_steps,self.n_inputs]) return out_predict def discriminator(self, X,X_lengths,Keep_prob, reuse=False): # Network Architecture is exactly same as in infoGAN (https://arxiv.org/abs/1606.03657) # Architecture : (64)4c2s-(128)4c2s_BL-FC1024_BL-FC1_S with tf.variable_scope("d_iscriminator", reuse=reuse): w_out= tf.get_variable("d_w_out",shape=[self.n_hidden_units, 1],initializer=tf.random_normal_initializer()) b_out= tf.get_variable("d_b_out",shape=[1, ],initializer=tf.constant_initializer(0.001)) X_in = tf.reshape(X, [self.batch_size, self.n_steps , self.n_inputs]) init_state = self.Gru_d.zero_state(self.batch_size, dtype=tf.float32) # 初始化全零 state outputs, final_state = tf.nn.dynamic_rnn(self.Gru_d, X_in, \ initial_state=init_state,\ sequence_length=X_lengths, time_major=False) # final_state:batch_size*n_hiddensize # 不能用最后一个,应该用第length个 之前用了最后一个,所以输出无论如何都是b_out out_logit=tf.matmul(tf.nn.dropout(final_state,Keep_prob), w_out) + b_out out =tf.nn.sigmoid(out_logit) #选取最后一个 output return out,out_logit def generator(self, z, Keep_prob, is_training=True, reuse=False): # x,delta,n_steps # z :[self.batch_size, self.z_dim] # first feed noize in rnn, then feed the previous output into next input # or we can feed noize and previous output into next input in future version with tf.variable_scope("g_enerator", reuse=reuse): #gennerate w_out= tf.get_variable("g_w_out",shape=[self.n_hidden_units, self.n_inputs],initializer=tf.random_normal_initializer()) b_out= tf.get_variable("g_b_out",shape=[self.n_inputs, ],initializer=tf.constant_initializer(0.001)) w_z=tf.get_variable("g_w_z",shape=[self.z_dim,self.n_inputs],initializer=tf.random_normal_initializer()) b_z=tf.get_variable("g_b_z",shape=[self.n_inputs, ],initializer=tf.constant_initializer(0.001)) #self.times=tf.reshape(self.times,[self.batch_size,self.n_steps,self.n_inputs]) #change z's dimension # batch_size*z_dim-->batch_size*n_inputs x=tf.matmul(z,w_z)+b_z X_in = tf.reshape(x, [-1, 1, self.n_inputs]) init_state = self.Gru_g.zero_state(self.batch_size, dtype=tf.float32) # 初始化全零 state #z=tf.reshape(z,[self.batch_size,1,self.z_dim]) seq_len=tf.constant(1,shape=[self.batch_size]) outputs, final_state = tf.nn.dynamic_rnn(self.Gru_g, X_in, \ initial_state=init_state,\ sequence_length=seq_len, time_major=False) init_state=final_state #outputs: batch_size*1*n_hidden outputs=tf.reshape(outputs,[-1,self.n_hidden_units]) # full connect out_predict=tf.matmul(tf.nn.dropout(outputs,Keep_prob), w_out) + b_out out_predict=tf.reshape(out_predict,[-1,1,self.n_inputs]) total_result=tf.multiply(out_predict,1.0) for i in range(1,self.n_steps): out_predict=tf.reshape(out_predict,[self.batch_size,self.n_inputs]) #输出加上noise z out_predict=out_predict+tf.matmul(z,w_z)+b_z X_in = tf.reshape(out_predict, [-1, 1, self.n_inputs]) outputs, final_state = tf.nn.dynamic_rnn(self.Gru_g, X_in, \ initial_state=init_state,\ sequence_length=seq_len, time_major=False) init_state=final_state outputs=tf.reshape(outputs,[-1,self.n_hidden_units]) out_predict=tf.matmul(tf.nn.dropout(outputs,Keep_prob), w_out) + b_out out_predict=tf.reshape(out_predict,[-1,1,self.n_inputs]) total_result=tf.concat([total_result,out_predict],1) #delta:[batch_size,,n_inputs] if self.isbatch_normal: with tf.variable_scope("g_bn", reuse=tf.AUTO_REUSE): total_result=bn(total_result,is_training=is_training, scope="g_bn_imple") return total_result def impute(self): with tf.variable_scope("impute", reuse=tf.AUTO_REUSE): z_need_tune=tf.get_variable("z_needtune",shape=[self.batch_size,self.z_dim],initializer=tf.random_normal_initializer(mean=0,stddev=0.1) ) return z_need_tune def build_model(self): self.keep_prob = tf.placeholder(tf.float32) self.x = tf.placeholder(tf.float32, [self.batch_size, self.n_steps, self.n_inputs]) self.m = tf.placeholder(tf.float32, [self.batch_size, self.n_steps, self.n_inputs]) self.x_lengths = tf.placeholder(tf.int32, shape=[self.batch_size,]) self.z = tf.placeholder(tf.float32, [self.batch_size, self.z_dim], name='z') """ Loss Function """ # 不进行preTrain Pre_out=self.pretrainG(self.x, self.x_lengths,\ self.keep_prob, \ reuse=False) self.pretrain_loss=tf.reduce_sum(tf.square(tf.multiply(Pre_out,self.m)-self.x)) / tf.cast(tf.reduce_sum(self.x_lengths),tf.float32) D_real, D_real_logits = self.discriminator(self.x, \ self.x_lengths,self.keep_prob, \ reuse=False) #G return total_result,self.imputed_deltapre,self.imputed_deltasub,self.imputed_m,self.x_lengths,last_values,sub_values g_x = self.generator(self.z,self.keep_prob, is_training=True, reuse=True) D_fake, D_fake_logits = self.discriminator(g_x,self.x_lengths,self.keep_prob,\ reuse = True) """ impute loss """ self.z_need_tune=self.impute() impute_out=self.generator(self.z_need_tune,self.keep_prob, is_training=False, reuse=True) impute_fake, impute_fake_logits = self.discriminator(impute_out,self.x_lengths,\ self.keep_prob, reuse=True ) # loss for imputation self.mask_loss = tf.reduce_mean(tf.square(tf.multiply(impute_out,self.m)-self.x)) self.g_impute_loss = -tf.reduce_mean(impute_fake_logits) self.impute_loss=self.mask_loss + self.g_loss_lambda*self.g_impute_loss self.impute_out=impute_out #the imputed results self.imputed=tf.multiply((1-self.m),self.impute_out)+self.x # get loss for discriminator d_loss_real = - tf.reduce_mean(D_real_logits) d_loss_fake = tf.reduce_mean(D_fake_logits) self.d_loss = d_loss_real + d_loss_fake # get loss for generator self.g_loss = - d_loss_fake """ Training """ # divide trainable variables into a group for D and a group for G t_vars = tf.trainable_variables() d_vars = [var for var in t_vars if 'd_' in var.name] g_vars = [var for var in t_vars if 'g_' in var.name] z_vars = [self.z_need_tune] ''' print("d vars:") for v in d_vars: print(v.name) print("g vars:") for v in g_vars: print(v.name) print("z vars:") for v in z_vars: print(v.name) ''' #don't need normalization because we have adopted the dropout """ ld = 0.0 for w in d_vars: ld += tf.contrib.layers.l2_regularizer(1e-4)(w) lg = 0.0 for w in g_vars: lg += tf.contrib.layers.l2_regularizer(1e-4)(w) self.d_loss+=ld self.g_loss+=lg """ # optimizers with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)): # this code have used batch normalization, so the upside line should be executed self.d_optim = tf.train.AdamOptimizer(self.learning_rate, beta1=self.beta1) \ .minimize(self.d_loss, var_list=d_vars) #self.d_optim=self.optim(self.learning_rate, self.beta1,self.d_loss,d_vars) self.g_optim = tf.train.AdamOptimizer(self.learning_rate*self.disc_iters, beta1=self.beta1) \ .minimize(self.g_loss, var_list=g_vars) #self.g_optim=self.optim(self.learning_rate, self.beta1,self.g_loss,g_vars) self.g_pre_optim=tf.train.AdamOptimizer(self.learning_rate*2,beta1=self.beta1) \ .minimize(self.pretrain_loss,var_list=g_vars) self.impute_optim=tf.train.AdamOptimizer(self.learning_rate*7,beta1=self.beta1).minimize(self.impute_loss,var_list=z_vars) #clip weight self.clip_all_vals = [p.assign(tf.clip_by_value(p, -0.99, 0.99)) for p in t_vars] self.clip_D = [p.assign(tf.clip_by_value(p, -0.99, 0.99)) for p in d_vars] self.clip_G = [p.assign(tf.clip_by_value(p, -0.99, 0.99)) for p in g_vars] """" Testing """ # for test # self.fake_x,self.fake_delta,_,_ = self.generator(self.z, self.keep_prob, is_training=False, reuse=True) """ Summary """ d_loss_real_sum = tf.summary.scalar("d_loss_real", d_loss_real) d_loss_fake_sum = tf.summary.scalar("d_loss_fake", d_loss_fake) d_loss_sum = tf.summary.scalar("d_loss", self.d_loss) g_loss_sum = tf.summary.scalar("g_loss", self.g_loss) g_pretrain_loss_sum=tf.summary.scalar("g_pretrain_loss", self.pretrain_loss) # final summary operations self.impute_sum=tf.summary.scalar("impute_loss", self.impute_loss) self.g_sum = g_loss_sum self.g_pretrain_sum=tf.summary.merge([g_pretrain_loss_sum]) self.d_sum = tf.summary.merge([d_loss_real_sum,d_loss_fake_sum, d_loss_sum]) def optim(self,learning_rate,beta,loss,var): optimizer = tf.train.AdamOptimizer(learning_rate, beta1=beta) grads = optimizer.compute_gradients(loss,var_list=var) for i, (g, v) in enumerate(grads): if g is not None: grads[i] = (tf.clip_by_norm(g, 5), v) # clip gradients train_op = optimizer.apply_gradients(grads) return train_op def pretrain(self, start_epoch,counter,start_time): if start_epoch < self.pretrain_epoch: #todo self.pretrainG_fig_loss = plt.figure() self.pretrainG_ax_loss = self.pretrainG_fig_loss.add_subplot(1, 1, 1) p_loss_list = [] for epoch in range(start_epoch, self.pretrain_epoch): # get batch data self.datasets.shuffle(self.batch_size,True) idx=0 #x,y,mean,m,deltaPre,x_lengths,lastvalues,files,imputed_deltapre,imputed_m,deltaSub,subvalues,imputed_deltasub for data_x,data_missing,data_m,data_detla,data_x_lengths,_ in self.datasets.nextBatch(): # pretrain _, summary_str, p_loss = self.sess.run([self.g_pre_optim, self.g_pretrain_sum, self.pretrain_loss], feed_dict={self.x: data_x, self.m: data_m, self.x_lengths: data_x_lengths, self.keep_prob: 0.5}) # self.writer.add_summary(summary_str, counter) p_loss_list.append(p_loss) self.pretrain_plot_loss(p_loss_list) counter += 1 # display training status print("Epoch: [%2d] [%4d/%4d] time: %4.4f, pretrain_loss: %.8f" \ % (epoch, idx, self.num_batches, time.time() - start_time, p_loss)) idx+=1 # After an epoch, start_batch_id is set to zero # non-zero value is only for the first epoch after loading pre-trained model def train(self): # graph inputs for visualize training results self.sample_z = np.random.standard_normal(size=(self.batch_size , self.z_dim)) # initialize all variables tf.global_variables_initializer().run() start_epoch = 0 counter = 1 # loop for epoch start_time = time.time() self.pretrain(start_epoch,counter,start_time) if start_epoch < self.pretrain_epoch: start_epoch=self.pretrain_epoch # d_loss_plot,g_loss_plot self.gan_fig_loss = plt.figure() self.gan_ax_loss = self.gan_fig_loss.add_subplot(1, 1, 1) d_loss_list = [] g_loss_list = [] d_loss = 0 for epoch in range(start_epoch, self.epoch): # get batch data self.datasets.shuffle(self.batch_size,True) idx=0 for data_x,data_missing,data_m,data_deltaPre,data_x_lengths,_ in self.datasets.nextBatch(): batch_z = np.random.standard_normal(size=(self.batch_size, self.z_dim)) if counter % self.disc_iters == 0: _ = self.sess.run(self.clip_all_vals) _, summary_str, d_loss = self.sess.run([self.d_optim, self.d_sum, self.d_loss], feed_dict={self.z: batch_z, self.x: data_x, self.m: data_m, self.x_lengths: data_x_lengths, self.keep_prob: 0.5}) # display training status print("Epoch: [%2d] [%4d/%4d] time: %4.4f, d_loss: %.8f, counter:%4d" \ % (epoch, idx, self.num_batches, time.time() - start_time, d_loss, counter)) # update G network #batch_z = np.random.normal(0, 1, [self.batch_size, self.z_dim]).astype(np.float32) _, summary_str, g_loss = self.sess.run([self.g_optim, self.g_sum, self.g_loss], feed_dict={self.z: batch_z, self.keep_prob: 0.5, self.x_lengths: data_x_lengths }) # self.writer.add_summary(summary_str, counter) d_loss_list.append(d_loss) g_loss_list.append(g_loss) self.gan_plot_loss(g_loss_list,d_loss_list) print("Epoch: [%2d] [%4d/%4d] time: %4.4f, g_loss: %.8f,counter:%4d" \ % (epoch, idx, self.num_batches, time.time() - start_time, g_loss,counter)) counter += 1 idx+=1 def imputation(self,dataset): self.datasets=dataset # self.datasets.shuffle(self.batch_size,True) tf.variables_initializer([self.z_need_tune]).run() #是否shuffle无所谓,填充之后存起来,测试的时候用填充之后的数据再shuffle即可 #训练数据集不能被batch_size整除剩下的部分,扔掉 start_time = time.time() batchid=1 impute_tune_time=1 counter=1 imputed_list = [] # impute_loss_plot,mask_loss_plot,g_impute_loss self.impute_fig_loss = plt.figure() self.impute_ax_loss = self.impute_fig_loss.add_subplot(1, 1, 1) impute_loss_list = [] mask_loss_list = [] g_impute_loss_list = [] loss_sum = 0 m_sum = 0 for data_x,data_missing,data_m,data_deltaPre,data_x_lengths,_ in self.datasets.nextBatch(): #self.z_need_tune=tf.assign(self.z_need_tune,tf.random_normal([self.batch_size,self.z_dim])) tf.variables_initializer([self.z_need_tune]).run() for i in range(0,self.impute_iter): _, impute_out, summary_str, impute_loss, imputed,mask_loss,g_impute_loss = self.sess.run([self.impute_optim, self.impute_out, self.impute_sum, self.impute_loss, self.imputed,self.mask_loss,self.g_impute_loss ], \ feed_dict={self.x: data_missing, self.m: data_m, self.x_lengths: data_x_lengths, self.keep_prob: 1.0}) impute_tune_time+=1 counter+=1 # 计算loss_sum loss_sum = loss_sum + np.sum(np.multiply(np.abs(data_x - imputed),1-data_m)) m_sum = m_sum+np.sum(data_m) print(loss_sum/m_sum) impute_loss_list.append(impute_loss) mask_loss_list.append(mask_loss) g_impute_loss_list.append(g_impute_loss) self.impute_plot_loss(impute_loss_list, mask_loss_list, g_impute_loss_list) if counter%10==0: print("Batchid: [%2d] [%4d/%4d] time: %4.4f, impute_loss: %.8f" \ % (batchid, impute_tune_time, self.impute_iter, time.time() - start_time, impute_loss)) imputed_list.append(imputed) batchid+=1 impute_tune_time=1 self.imputed_list = np.array(imputed_list) self.loss_pre = loss_sum/m_sum def pretrain_plot_loss(self,loss): if self.pretrainG_ax_loss.lines: self.pretrainG_ax_loss.lines.remove(self.pretrainG_ax_loss.lines[0]) self.pretrainG_ax_loss.plot(loss,linestyle='-',color='#2E68AA') plt.title("PreTrainG_loss") plt.ylabel("loss") plt.ion() plt.show() plt.pause(0.1) def gan_plot_loss(self,g_loss,d_loss): if self.gan_ax_loss.lines: self.gan_ax_loss.lines.remove(self.gan_ax_loss.lines[0]) # self.gan_ax_loss.lines.remove(self.gan_ax_loss.lines[1]) self.gan_ax_loss.plot(g_loss,linestyle='-',color='blue') self.gan_ax_loss.plot(d_loss, linestyle='-', color='red') plt.title("gan_loss") plt.ylabel("loss") plt.ion() plt.show() plt.pause(0.1) def impute_plot_loss(self,impute_loss,mask_loss_list,g_impute_loss_list): if self.impute_ax_loss.lines: self.impute_ax_loss.lines.remove(self.impute_ax_loss.lines[0]) self.impute_ax_loss.plot(impute_loss,linestyle='-',color='#2E68AA') self.impute_ax_loss.plot(mask_loss_list, linestyle='-', color='red') self.impute_ax_loss.plot(g_impute_loss_list, linestyle='-', color='yellow') plt.title("impute_loss") plt.ylabel("loss") plt.ion() plt.show() plt.pause(0.1)