Beispiel #1
0
def make_encoder(sequence, output_dim, seed):
    rnn_cell=GRUCell(num_units=output_dim,
                kernel_initializer=tf.random_uniform_initializer(minval=-0.05, maxval=0.05,dtype=tf.float32, seed=seed),
                bias_initializer=tf.zeros_initializer())
    rnn_out, rnn_state  = tf.nn.static_rnn(
        cell=rnn_cell,
        inputs=tf.unstack(sequence,sequence.shape[1].value,1),
        initial_state=rnn_cell.zero_state(tf.shape(sequence)[0], dtype=tf.float32),
        )
    return rnn_state
class BahdanauRnnCoverageMulAttention(BahdanauAttention):
    """
    对BahdanauAttention类增加coverage, 其中coverage采用RNN进行更新,每个h,t对应不同的coverage
    https://arxiv.org/pdf/1601.04811.pdf
    """

    def __init__(self,
                 num_units,
                 memory,
                 coverage_hidden_num_units,
                 memory_sequence_length=None,
                 normalize=False,
                 probability_fn=None,
                 score_mask_value=None,
                 dtype=None,
                 name="BahdanauCoverageAttention"):
        super(BahdanauRnnCoverageMulAttention, self).__init__(
            num_units=num_units,
            memory=memory,
            memory_sequence_length=memory_sequence_length,
            normalize=normalize,
            probability_fn=probability_fn,
            score_mask_value=score_mask_value,
            dtype=dtype,
            name=name)
        if dtype is None:
            dtype = dtypes.float32
        # coverage初始状态
        self.coverage_rnn_cell = GRUCell(coverage_hidden_num_units)
        self.coverage_state = self.coverage_rnn_cell.zero_state(self.batch_size * self._alignments_size, dtype)
        with variable_scope.variable_scope("coverage"):
            self.coverage_layer = layers_core.Dense(
                num_units, name="coverage_layer", use_bias=False, dtype=dtype)

    def __call__(self, query, state):
        with variable_scope.variable_scope(None, "bahdanau_coverage_attention", [query]):
            processed_query = self.query_layer(query) if self.query_layer else query
            coverage_features = self.coverage_layer(self.coverage_state)
            coverage_features = array_ops.reshape(coverage_features, [self.batch_size, self._alignments_size, -1])
            score = _bahdanau_coverage_mul_score(processed_query, self._keys, coverage_features, self._normalize)
        alignments = self._probability_fn(score, state)
        next_state = alignments
        # 更新coverage_state
        coverage_cell_input = concat([alignments, query], 1)
        # coverage_cell_input复制alignments_size份
        coverage_cell_input_tile = tf.contrib.seq2seq.tile_batch(coverage_cell_input, multiplier=self._alignments_size)
        # 将value reshape
        coverage_value_reshape = array_ops.reshape(self.values, [self.batch_size * self._alignments_size, -1])
        coverage_cell_input_tile = concat([coverage_cell_input_tile, coverage_value_reshape], 1)
        _, coverage_cell_state = self.coverage_rnn_cell(coverage_cell_input_tile, self.coverage_state)
        self.coverage_state = coverage_cell_state
        return alignments, next_state
class LuongCoverageAttention(LuongAttention):
    """
    对LuongAttention增加coverage attention,可以认为coverage对score增加权重使得之前coverage较高的h,score相应减少
    """

    def __init__(self,
                 num_units,
                 memory,
                 coverage_hidden_num_units,
                 memory_sequence_length=None,
                 scale=False,
                 probability_fn=None,
                 score_mask_value=None,
                 dtype=None,
                 name="LuongAttention"):
        super(LuongCoverageAttention, self).__init__(
            num_units=num_units,
            memory=memory,
            memory_sequence_length=memory_sequence_length,
            scale=scale,
            probability_fn=probability_fn,
            score_mask_value=score_mask_value,
            dtype=dtype,
            name=name
        )
        if dtype is None:
            dtype = dtypes.float32
        # coverage初始状态
        self.coverage_rnn_cell = GRUCell(coverage_hidden_num_units)
        self.coverage_state = self.coverage_rnn_cell.zero_state(self.batch_size * self._alignments_size, dtype)
        with variable_scope.variable_scope("coverage"):
            self.coverage_layer = layers_core.Dense(
                self._alignments_size, name="coverage_layer", use_bias=False, dtype=dtype)

    def __call__(self, query, state):
        with variable_scope.variable_scope(None, "luong_attention", [query]):
            coverage_features = self.coverage_layer(self.coverage_state)
            coverage_features = array_ops.reshape(coverage_features, [self.batch_size, self._alignments_size, -1])
            score = _luong_coverage_score(query, self._keys, coverage_features, self._scale)
        alignments = self._probability_fn(score, state)
        next_state = alignments
        # 更新coverage_state
        coverage_cell_input = concat([alignments, query], 1)
        # coverage_cell_input复制alignments_size份
        coverage_cell_input_tile = tf.contrib.seq2seq.tile_batch(coverage_cell_input, multiplier=self._alignments_size)
        # 将value reshape
        coverage_value_reshape = array_ops.reshape(self.values, [self.batch_size * self._alignments_size, -1])
        coverage_cell_input_tile = concat([coverage_cell_input_tile, coverage_value_reshape], 1)
        _, coverage_cell_state = self.coverage_rnn_cell(coverage_cell_input_tile, self.coverage_state)
        self.coverage_state = coverage_cell_state
        return alignments, next_state
Beispiel #4
0
def make_encoder(sequence, output_dim, seed):
    rnn_cell = GRUCell(num_units=output_dim,
                       kernel_initializer=tf.random_uniform_initializer(
                           minval=-0.05,
                           maxval=0.05,
                           dtype=tf.float32,
                           seed=seed),
                       bias_initializer=tf.zeros_initializer())
    rnn_out, rnn_state = tf.nn.dynamic_rnn(
        cell=rnn_cell,
        inputs=tf.transpose(sequence, [1, 0, 2]),
        initial_state=rnn_cell.zero_state(tf.shape(sequence)[0],
                                          dtype=tf.float32),
        time_major=True)
    return rnn_state
class BahdanauRnnCoverageAttention(BahdanauAttention):
    """
    对BahdanauAttention类增加coverage, 其中coverage采用RNN进行更新
    """

    def __init__(self,
                 num_units,
                 memory,
                 coverage_hidden_num_units,
                 memory_sequence_length=None,
                 normalize=False,
                 probability_fn=None,
                 score_mask_value=None,
                 dtype=None,
                 name="BahdanauCoverageAttention"):
        super(BahdanauRnnCoverageAttention, self).__init__(
            num_units=num_units,
            memory=memory,
            memory_sequence_length=memory_sequence_length,
            normalize=normalize,
            probability_fn=probability_fn,
            score_mask_value=score_mask_value,
            dtype=dtype,
            name=name)
        if dtype is None:
            dtype = dtypes.float32
        # coverage初始状态
        self.coverage_rnn_cell = GRUCell(coverage_hidden_num_units)
        self.coverage_state = self.coverage_rnn_cell.zero_state(self.batch_size, dtype)
        with variable_scope.variable_scope("coverage"):
            self.coverage_layer = layers_core.Dense(
                num_units, name="coverage_layer", use_bias=False, dtype=dtype)

    def __call__(self, query, state):
        with variable_scope.variable_scope(None, "bahdanau_coverage_attention", [query]):
            processed_query = self.query_layer(query) if self.query_layer else query
            coverage_features = self.coverage_layer(self.coverage_state)
            score = _bahdanau_coverage_score(processed_query, self._keys, coverage_features, self._normalize)
        alignments = self._probability_fn(score, state)
        next_state = alignments
        # 更新coverage_state
        coverage_cell_input = concat([alignments, query], 1)
        _, coverage_cell_state = self.coverage_rnn_cell(coverage_cell_input, self.coverage_state)
        self.coverage_state = coverage_cell_state
        return alignments, next_state
Beispiel #6
0
        b.append(sample[-1, 1] - sample[-1, 0])
    return a1,a2,b


dataSet=tf.data.Dataset.from

x1 = tf.placeholder(shape=shape, dtype=tf.float16)
x2 = tf.placeholder(shape=[batch_size], dtype=tf.float16)
y_ = tf.placeholder(shape=[batch_size], dtype=tf.float16)
training = tf.placeholder(dtype=tf.bool)

X = tf.layers.batch_normalization(x1, training=True, scale=False, center=False, axis=[0, -1])
# X=x1
gru = GRUCell(num_units=4, reuse=tf.AUTO_REUSE, activation=tf.nn.elu, kernel_initializer=tf.glorot_normal_initializer(),
              dtype=tf.float16)
state = gru.zero_state(batch_size, dtype=tf.float16)
with tf.variable_scope('RNN'):
    for timestep in range(long):
        if timestep == 1:
            tf.get_variable_scope().reuse_variables()
        (cell_output, state) = gru(X[:, timestep], state)
    out_put = state

out = tf.nn.relu(out_put)

y = ml.layer_basic(out, 1)[:, 0]

loss = tf.cast(tf.reduce_mean((y - y_) * (y - y_)),dtype=tf.float16)
# optimizer = tf.train.AdamOptimizer(learning_rate=0.01).minimize(loss)
# optimizer_min = tf.train.AdamOptimizer(learning_rate=0.0001).minimize(loss)
Beispiel #7
0
        sample = data[i:i + long]
        a.append(sample[:-1, :5])
        b.append(sample[:-1, 5:10])
        c.append(sample[-1][1])
    return a, b, c


x = tf.placeholder(shape=[batch_size, long - 1, 5], dtype=tf.float16)
y = tf.placeholder(shape=[batch_size, long - 1, 5], dtype=tf.float16)
z_ = tf.placeholder(shape=[batch_size], dtype=tf.float16)

X = tf.nn.sigmoid(x) - 0.5
Y = tf.nn.sigmoid(y) - 0.5

gru_x = GRUCell(num_units=8, reuse=tf.AUTO_REUSE, activation=tf.nn.elu)
state_x = gru_x.zero_state(batch_size, dtype=tf.float16)
with tf.variable_scope('RNN_x'):
    for timestep in range(long - 1):
        if timestep == 1:
            tf.get_variable_scope().reuse_variables()
        (cell_output_x, state_x) = gru_x(X[:, timestep], state_x)
    out_put_x = state_x

gru_y = GRUCell(num_units=8, reuse=tf.AUTO_REUSE, activation=tf.nn.elu)
state_y = gru_y.zero_state(batch_size, dtype=tf.float16)
with tf.variable_scope('RNN_y'):
    for timestep in range(long - 1):  # be careful
        if timestep == 1:
            tf.get_variable_scope().reuse_variables()
        (cell_output_y, state_y) = gru_y(Y[:, timestep], state_y)
    out_put_y = state_y
Beispiel #8
0
next_element = iterator.get_next()
train_iterator = train_dataset.make_one_shot_iterator()
test_iterator = test_dataset.make_initializable_iterator()

x, y_ = iterator.get_next()

X = tf.reshape(x, shape=[batch_size, x.shape[1], x.shape[2]])

# X = tf.layers.batch_normalization(x, training=True, scale=False, center=False, axis=[0, -1])

gru = GRUCell(num_units=128,
              reuse=tf.AUTO_REUSE,
              activation=tf.nn.relu,
              kernel_initializer=tf.glorot_normal_initializer(),
              dtype=dtype)
state = gru.zero_state(batch_size, dtype=dtype)
with tf.variable_scope('RNN'):
    for timestep in range(long):
        if timestep == 1:
            tf.get_variable_scope().reuse_variables()
        (cell_output, state) = gru(X[:, timestep], state)
    out_put = state

out = tf.nn.relu(out_put)

y = tf.layers.dense(out, 1)[:, 0]

loss = tf.reduce_mean((y - y_) * (y - y_))

update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
Beispiel #9
0
    def __init__(self,
                 batch_size,
                 max_seq_length,
                 vocab_size,
                 start_token_id=1,
                 end_token_id=2,
                 pad_token_id=0,
                 unk_token_id=3,
                 emb_size=100,
                 memory_size=100,
                 keep_prob=0.5,
                 temperature=0.5,
                 antilm=0.55,
                 learning_rate=0.001,
                 grad_clip=5.0,
                 infer=False):

        self._batch_size = batch_size
        self._vocab_size = vocab_size
        self._memory_size = memory_size
        self._start_token_id = start_token_id
        self._end_token_id = end_token_id
        self._max_seq_length = max_seq_length
        self._unk_token_id = unk_token_id
        self._keep_prob = keep_prob
        self._temperature = temperature
        self._start_token_id = start_token_id
        self._end_token_id = end_token_id
        self._pad_token_id = pad_token_id
        self._infer = infer
        self._antilm = antilm

        self.input_data = tf.placeholder(tf.int32,
                                         [batch_size, max_seq_length],
                                         name="input_data")
        self.input_lengths = tf.placeholder(tf.int32,
                                            shape=[batch_size],
                                            name="input_lengths")
        self.output_data = tf.placeholder(tf.int32,
                                          [batch_size, max_seq_length],
                                          name='output_data')
        self.output_lengths = tf.placeholder(tf.int32, [batch_size],
                                             name='output_lengths')
        self.global_step = tf.Variable(0, name="global_step", trainable=False)

        with tf.device("/cpu:0"):
            self.embedding = tf.get_variable("embedding",
                                             [vocab_size, emb_size])
            inputs = tf.nn.embedding_lookup(self.embedding, self.input_data)

        if self._keep_prob < 1 and not infer:
            inputs = tf.nn.dropout(inputs, keep_prob=self._keep_prob)

        with tf.variable_scope("encoder", initializer=glorot()):
            fw_cell = GRUCell(emb_size)
            bw_cell = GRUCell(emb_size)
            if self._keep_prob < 1 and not infer:
                fw_cell = DropoutWrapper(fw_cell,
                                         output_keep_prob=self._keep_prob)
                bw_cell = DropoutWrapper(bw_cell,
                                         output_keep_prob=self._keep_prob)

        with tf.variable_scope("context", initializer=glorot()):
            ctx_cell = GRUCell(memory_size * 2)
            self.ctx_w = tf.get_variable("context_w",
                                         [memory_size * 2, memory_size])
            self.ctx_b = tf.get_variable(
                "context_b", [memory_size],
                initializer=init_ops.zeros_initializer())
            self.initial_state = ctx_cell.zero_state(self._batch_size,
                                                     tf.float32)

        with tf.variable_scope("decoder", initializer=glorot()):
            # GRU with conditional distribution in sec 2.2 of https://arxiv.org/pdf/1406.1078.pdf
            dec_cell = GRUCellCond(memory_size)

        self.outputs, self.output_ids, _, self.final_state = self.seq2seq(
            inputs, fw_cell, bw_cell, ctx_cell, dec_cell)

        loss = self.get_loss(self.outputs)
        self.loss = tf.reduce_mean(loss)
        tf.summary.scalar('loss', self.loss)

        tvars = tf.trainable_variables()

        print("parameter size:", _count_param_size(tvars))

        grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars),
                                          grad_clip)
        optimizer = tf.train.AdamOptimizer(learning_rate)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars),
                                                  global_step=self.global_step)
Beispiel #10
0
class MultiMemoryRNN(RNNCell):
    def __init__(self, memories, size):
        self._rnn_memories = memories
        self._cell = GRUCell(size)
        self._size = size

    def __call__(self, inputs, state, scope=None):
        with tf.variable_scope(scope or type(self).__name__):
            mem_states = []
            cell_state = tf.slice(state, [0, 0], [-1, self._cell.state_size])
            offset = self._cell.state_size
            for m in self._rnn_memories:
                mem_states.append(
                    tf.slice(state, [0, offset], [-1, m.state_size]))
                offset += m.state_size

            cell_output, _ = self._cell(inputs, cell_state)

            # read from memories
            mem_input = tf.concat(axis=1, values=[cell_output, inputs])
            mem_out_states = [
                m(mem_input, s, "memory" + str(i))
                for i, m, s in zip(range(len(self._rnn_memories)),
                                   self._rnn_memories, mem_states)
            ]
            # [B, N+1, S]
            output = tf.concat(
                axis=1,
                values=[tf.expand_dims(m[0], 1) for m in mem_out_states] +
                [tf.expand_dims(cell_output, 1)])
            # [B, N+1]
            gates = tf.contrib.layers.fully_connected(
                tf.reshape(output,
                           [-1, (len(self._rnn_memories) + 1) * self._size]),
                len(self._rnn_memories) + 1,
                activation_fn=tf.sigmoid,
                weights_initializer=None,
                biases_initializer=tf.constant_initializer(0.0))
            # [B, N+1, S]
            output = output * tf.expand_dims(gates, 2)
            output = tf.reduce_sum(output, [1])

            #new_input = tf.contrib.layers.fully_connected(read, self._size, activation_fn=tf.tanh, weights_initializer=None)

            new_mem_states = [out_state[1] for out_state in mem_out_states]
            new_mem_states = tf.concat(axis=1,
                                       values=[cell_output] + new_mem_states)
            return output, new_mem_states  #tf.concat(1, [output, new_mem_states])

    def zero_state(self, batch_size, dtype):
        return tf.concat(
            axis=1,
            values=[self._cell.zero_state(batch_size, dtype)] +
            [m.zero_state(batch_size, dtype) for m in self._rnn_memories])

    @property
    def state_size(self):
        return self._cell.state_size + sum(m.state_size
                                           for m in self._rnn_memories)

    @property
    def output_size(self):
        return self._size
Beispiel #11
0
        sample = data[i:i + long]
        a.append(sample[:-1, :11])
        b.append(sample[:-1, :11])
        c.append(sample[-1][:4])
    return a, b, c


x = tf.placeholder(shape=[batch_size, long - 1, 10], dtype=tf.float16)
y = tf.placeholder(shape=[batch_size, long - 1, 10], dtype=tf.float16)
z_ = tf.placeholder(shape=[batch_size, 4], dtype=tf.float16)

X = tf.nn.sigmoid(x) - 0.5
Y = tf.nn.sigmoid(y) - 0.5

gru_x_open = GRUCell(num_units=8, reuse=tf.AUTO_REUSE, activation=tf.nn.elu)
state_x_open = gru_x_open.zero_state(batch_size, dtype=tf.float16)
with tf.variable_scope('RNN_x_open'):
    for timestep in range(long - 1):
        if timestep == 1:
            tf.get_variable_scope().reuse_variables()
        (cell_output_x_open,
         state_x_open) = gru_x_open(X[:, timestep], state_x_open)
    out_put_x_open = state_x_open

gru_x_high = GRUCell(num_units=8, reuse=tf.AUTO_REUSE, activation=tf.nn.elu)
state_x_high = gru_x_high.zero_state(batch_size, dtype=tf.float16)
with tf.variable_scope('RNN_x_high'):
    for timestep in range(long - 1):
        if timestep == 1:
            tf.get_variable_scope().reuse_variables()
        (cell_output_x_high,
Beispiel #12
0
class WGAN(object):
    model_name = "WGAN_no_mask"     # name for checkpoint

    def __init__(self, sess, args, datasets):
        self.sess = sess
        self.isbatch_normal=args.isBatch_normal
        self.lr = args.lr                 
        self.epoch = args.epoch     
        self.batch_size = args.batch_size
        self.n_inputs = args.n_inputs                 # MNIST data input (img shape: 28*28)
        self.n_steps = datasets.maxLength             # time steps
        self.n_hidden_units = args.n_hidden_units        # neurons in hidden layer
        self.n_classes = args.n_classes                # MNIST classes (0-9 digits)
        self.gpus=args.gpus
        self.pretrain_epoch=args.pretrain_epoch
        self.impute_iter=args.impute_iter
        self.g_loss_lambda=args.g_loss_lambda
        
        self.datasets=datasets
        self.z_dim = args.z_dim         # dimension of noise-vector
        
        # WGAN_GP parameter
        self.lambd = 0.25       # The higher value, the more stable, but the slower convergence
        self.disc_iters = args.disc_iters     # The number of critic iterations for one-step of generator

        # train
        self.learning_rate = args.lr
        self.beta1 = args.beta1
        self.Gru_g = GRUCell(self.n_hidden_units)
        self.Gru_d = GRUCell(self.n_hidden_units)

        self.num_batches = len(datasets.x) // self.batch_size

    def pretrainG(self, X,X_lengths,Keep_prob,reuse=False):
        with tf.variable_scope("g_enerator", reuse=reuse):
            
            """
            the rnn cell's variable scope is defined by tensorflow,
            if we want to update rnn cell's weights, the variable scope must contains 'g_' or 'd_'
            
            """
            
            w_out= tf.get_variable("g_w_out",shape=[self.n_hidden_units, self.n_inputs],initializer=tf.random_normal_initializer())
            b_out= tf.get_variable("g_b_out",shape=[self.n_inputs, ],initializer=tf.constant_initializer(0.001))
            w_z = tf.get_variable("g_w_z", shape=[self.z_dim, self.n_inputs],
                                  initializer=tf.random_normal_initializer())
            b_z = tf.get_variable("g_b_z", shape=[self.n_inputs, ], initializer=tf.constant_initializer(0.001))
            X_in = tf.reshape(X, [-1, self.n_steps, self.n_inputs])
         
            init_state = self.Gru_g.zero_state(self.batch_size, dtype=tf.float32) # 初始化全零 state
            outputs, final_state = tf.nn.dynamic_rnn(self.Gru_g, X_in, \
                                initial_state=init_state,\
                                sequence_length=X_lengths,
                                time_major=False)
            #outputs: batch_size*n_steps*n_hiddensize
            outputs=tf.reshape(outputs,[-1,self.n_hidden_units])
            out_predict=tf.matmul(tf.nn.dropout(outputs,Keep_prob), w_out) + b_out
            out_predict=tf.reshape(out_predict,[-1,self.n_steps,self.n_inputs])
            return out_predict

    def discriminator(self, X,X_lengths,Keep_prob, reuse=False):
        # Network Architecture is exactly same as in infoGAN (https://arxiv.org/abs/1606.03657)
        # Architecture : (64)4c2s-(128)4c2s_BL-FC1024_BL-FC1_S
        with tf.variable_scope("d_iscriminator", reuse=reuse):
            w_out= tf.get_variable("d_w_out",shape=[self.n_hidden_units, 1],initializer=tf.random_normal_initializer())
            b_out= tf.get_variable("d_b_out",shape=[1, ],initializer=tf.constant_initializer(0.001))

            X_in = tf.reshape(X, [self.batch_size, self.n_steps , self.n_inputs])

            init_state = self.Gru_d.zero_state(self.batch_size, dtype=tf.float32) # 初始化全零 state
            outputs, final_state = tf.nn.dynamic_rnn(self.Gru_d, X_in, \
                                initial_state=init_state,\
                                sequence_length=X_lengths,
                                time_major=False)

            # final_state:batch_size*n_hiddensize
            # 不能用最后一个,应该用第length个  之前用了最后一个,所以输出无论如何都是b_out
            out_logit=tf.matmul(tf.nn.dropout(final_state,Keep_prob), w_out) + b_out
            out =tf.nn.sigmoid(out_logit)    #选取最后一个 output
            return out,out_logit

    def generator(self, z, Keep_prob, is_training=True, reuse=False):
        # x,delta,n_steps
        # z :[self.batch_size, self.z_dim]
        # first feed noize in rnn, then feed the previous output into next input
        # or we can feed noize and previous output into next input in future version
        with tf.variable_scope("g_enerator", reuse=reuse):
            #gennerate 
            
            w_out= tf.get_variable("g_w_out",shape=[self.n_hidden_units, self.n_inputs],initializer=tf.random_normal_initializer())
            b_out= tf.get_variable("g_b_out",shape=[self.n_inputs, ],initializer=tf.constant_initializer(0.001))
            w_z=tf.get_variable("g_w_z",shape=[self.z_dim,self.n_inputs],initializer=tf.random_normal_initializer())
            b_z=tf.get_variable("g_b_z",shape=[self.n_inputs, ],initializer=tf.constant_initializer(0.001))
            
            #self.times=tf.reshape(self.times,[self.batch_size,self.n_steps,self.n_inputs])
            #change z's dimension
            # batch_size*z_dim-->batch_size*n_inputs
            x=tf.matmul(z,w_z)+b_z

            X_in = tf.reshape(x, [-1, 1, self.n_inputs])
            
            init_state = self.Gru_g.zero_state(self.batch_size, dtype=tf.float32) # 初始化全零 state
            #z=tf.reshape(z,[self.batch_size,1,self.z_dim])
            seq_len=tf.constant(1,shape=[self.batch_size])
            
            outputs, final_state = tf.nn.dynamic_rnn(self.Gru_g, X_in, \
                                initial_state=init_state,\
                                sequence_length=seq_len,
                                time_major=False)
            init_state=final_state
            #outputs: batch_size*1*n_hidden
            outputs=tf.reshape(outputs,[-1,self.n_hidden_units])
            # full connect
            out_predict=tf.matmul(tf.nn.dropout(outputs,Keep_prob), w_out) + b_out
            out_predict=tf.reshape(out_predict,[-1,1,self.n_inputs])
            total_result=tf.multiply(out_predict,1.0)
            
            for i in range(1,self.n_steps):
                out_predict=tf.reshape(out_predict,[self.batch_size,self.n_inputs])
                #输出加上noise z
                out_predict=out_predict+tf.matmul(z,w_z)+b_z
                X_in = tf.reshape(out_predict, [-1, 1, self.n_inputs])
                
                outputs, final_state = tf.nn.dynamic_rnn(self.Gru_g, X_in, \
                            initial_state=init_state,\
                            sequence_length=seq_len,
                            time_major=False)
                init_state=final_state
                outputs=tf.reshape(outputs,[-1,self.n_hidden_units])
                out_predict=tf.matmul(tf.nn.dropout(outputs,Keep_prob), w_out) + b_out
                out_predict=tf.reshape(out_predict,[-1,1,self.n_inputs])
                total_result=tf.concat([total_result,out_predict],1)
            
            #delta:[batch_size,,n_inputs]

            if self.isbatch_normal:
                with tf.variable_scope("g_bn", reuse=tf.AUTO_REUSE):
                    total_result=bn(total_result,is_training=is_training, scope="g_bn_imple")

            return total_result
        
    def impute(self):
        with tf.variable_scope("impute", reuse=tf.AUTO_REUSE):
            z_need_tune=tf.get_variable("z_needtune",shape=[self.batch_size,self.z_dim],initializer=tf.random_normal_initializer(mean=0,stddev=0.1) )
            return z_need_tune
            
    def build_model(self):
        self.keep_prob = tf.placeholder(tf.float32) 
        self.x = tf.placeholder(tf.float32, [self.batch_size, self.n_steps, self.n_inputs])
        self.m = tf.placeholder(tf.float32, [self.batch_size, self.n_steps, self.n_inputs])
        self.x_lengths = tf.placeholder(tf.int32,  shape=[self.batch_size,])
        self.z = tf.placeholder(tf.float32, [self.batch_size, self.z_dim], name='z')

        """ Loss Function """
        # 不进行preTrain
        Pre_out=self.pretrainG(self.x, self.x_lengths,\
                                                     self.keep_prob, \
                                                       reuse=False)
        
        self.pretrain_loss=tf.reduce_sum(tf.square(tf.multiply(Pre_out,self.m)-self.x)) / tf.cast(tf.reduce_sum(self.x_lengths),tf.float32)

        D_real, D_real_logits = self.discriminator(self.x,  \
                                                       self.x_lengths,self.keep_prob, \
                                                      reuse=False)

        #G return total_result,self.imputed_deltapre,self.imputed_deltasub,self.imputed_m,self.x_lengths,last_values,sub_values
        g_x = self.generator(self.z,self.keep_prob, is_training=True, reuse=True)
        
        D_fake, D_fake_logits = self.discriminator(g_x,self.x_lengths,self.keep_prob,\
                                                   reuse = True)
        
        """
        impute loss
        """
        self.z_need_tune=self.impute()
        
        impute_out=self.generator(self.z_need_tune,self.keep_prob, is_training=False, reuse=True)
        
        
        impute_fake, impute_fake_logits = self.discriminator(impute_out,self.x_lengths,\
                                                      self.keep_prob,
                                                      reuse=True )
        
        # loss for imputation

        self.mask_loss = tf.reduce_mean(tf.square(tf.multiply(impute_out,self.m)-self.x))
        self.g_impute_loss = -tf.reduce_mean(impute_fake_logits)
        self.impute_loss=self.mask_loss + self.g_loss_lambda*self.g_impute_loss
        
        self.impute_out=impute_out
        
        #the imputed results
        self.imputed=tf.multiply((1-self.m),self.impute_out)+self.x
        # get loss for discriminator
        d_loss_real = - tf.reduce_mean(D_real_logits)
        d_loss_fake = tf.reduce_mean(D_fake_logits)

        self.d_loss = d_loss_real + d_loss_fake

        # get loss for generator
        self.g_loss = - d_loss_fake
        

        """ Training """
        # divide trainable variables into a group for D and a group for G
        t_vars = tf.trainable_variables()
        d_vars = [var for var in t_vars if 'd_' in var.name]
        g_vars = [var for var in t_vars if 'g_' in var.name]
        z_vars = [self.z_need_tune]
        '''
        print("d vars:")
        for v in d_vars:
            print(v.name)
        print("g vars:")
        for v in g_vars:
            print(v.name)
        print("z vars:")
        for v in z_vars:
            print(v.name)
        '''
        
        #don't need normalization because we have adopted the dropout
        """
        ld = 0.0
        for w in d_vars:
            ld += tf.contrib.layers.l2_regularizer(1e-4)(w)
        lg = 0.0
        for w in g_vars:
            lg += tf.contrib.layers.l2_regularizer(1e-4)(w)
        
        self.d_loss+=ld
        self.g_loss+=lg
        """
        
        # optimizers
        with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
        # this code have used batch normalization, so the upside line should be executed
            self.d_optim = tf.train.AdamOptimizer(self.learning_rate, beta1=self.beta1) \
                        .minimize(self.d_loss, var_list=d_vars)
            #self.d_optim=self.optim(self.learning_rate, self.beta1,self.d_loss,d_vars)
            self.g_optim = tf.train.AdamOptimizer(self.learning_rate*self.disc_iters, beta1=self.beta1) \
                        .minimize(self.g_loss, var_list=g_vars)
            #self.g_optim=self.optim(self.learning_rate, self.beta1,self.g_loss,g_vars)
            self.g_pre_optim=tf.train.AdamOptimizer(self.learning_rate*2,beta1=self.beta1) \
                        .minimize(self.pretrain_loss,var_list=g_vars)
        self.impute_optim=tf.train.AdamOptimizer(self.learning_rate*7,beta1=self.beta1).minimize(self.impute_loss,var_list=z_vars)


        #clip weight
        self.clip_all_vals = [p.assign(tf.clip_by_value(p, -0.99, 0.99)) for p in t_vars]
        self.clip_D = [p.assign(tf.clip_by_value(p, -0.99, 0.99)) for p in d_vars]
        self.clip_G = [p.assign(tf.clip_by_value(p, -0.99, 0.99)) for p in g_vars]
        
        
        """" Testing """
        # for test
        # self.fake_x,self.fake_delta,_,_ = self.generator(self.z, self.keep_prob, is_training=False, reuse=True)

        """ Summary """
        d_loss_real_sum = tf.summary.scalar("d_loss_real", d_loss_real)
        d_loss_fake_sum = tf.summary.scalar("d_loss_fake", d_loss_fake)
        d_loss_sum = tf.summary.scalar("d_loss", self.d_loss)
        g_loss_sum = tf.summary.scalar("g_loss", self.g_loss)
        g_pretrain_loss_sum=tf.summary.scalar("g_pretrain_loss", self.pretrain_loss)
        # final summary operations
        self.impute_sum=tf.summary.scalar("impute_loss", self.impute_loss)
        self.g_sum = g_loss_sum
        self.g_pretrain_sum=tf.summary.merge([g_pretrain_loss_sum])
        self.d_sum = tf.summary.merge([d_loss_real_sum,d_loss_fake_sum, d_loss_sum])
        
    def optim(self,learning_rate,beta,loss,var):
        optimizer = tf.train.AdamOptimizer(learning_rate, beta1=beta)
        grads = optimizer.compute_gradients(loss,var_list=var)
        for i, (g, v) in enumerate(grads):
            if g is not None:
                grads[i] = (tf.clip_by_norm(g, 5), v)  # clip gradients
        train_op = optimizer.apply_gradients(grads)
        return train_op

    def pretrain(self, start_epoch,counter,start_time):
        
        if start_epoch < self.pretrain_epoch:
            #todo
            self.pretrainG_fig_loss = plt.figure()
            self.pretrainG_ax_loss = self.pretrainG_fig_loss.add_subplot(1, 1, 1)
            p_loss_list = []
            for epoch in range(start_epoch, self.pretrain_epoch):
            # get batch data
                self.datasets.shuffle(self.batch_size,True)
                idx=0
                #x,y,mean,m,deltaPre,x_lengths,lastvalues,files,imputed_deltapre,imputed_m,deltaSub,subvalues,imputed_deltasub
                for data_x,data_missing,data_m,data_detla,data_x_lengths,_ in self.datasets.nextBatch():
                    
                    # pretrain
                    _, summary_str, p_loss = self.sess.run([self.g_pre_optim, self.g_pretrain_sum, self.pretrain_loss],
                                                   feed_dict={self.x: data_x,
                                                              self.m: data_m,
                                                              self.x_lengths: data_x_lengths,
                                                              self.keep_prob: 0.5})
                    # self.writer.add_summary(summary_str, counter)
                    p_loss_list.append(p_loss)
                    self.pretrain_plot_loss(p_loss_list)
    
                    counter += 1
    
                    # display training status
                    print("Epoch: [%2d] [%4d/%4d] time: %4.4f, pretrain_loss: %.8f" \
                          % (epoch, idx, self.num_batches, time.time() - start_time, p_loss))
                    idx+=1
                # After an epoch, start_batch_id is set to zero
                # non-zero value is only for the first epoch after loading pre-trained model

    def train(self):

        # graph inputs for visualize training results
        self.sample_z = np.random.standard_normal(size=(self.batch_size , self.z_dim))

        # initialize all variables
        tf.global_variables_initializer().run()
        start_epoch = 0
        counter = 1
        # loop for epoch
        start_time = time.time()
        
        self.pretrain(start_epoch,counter,start_time)
        if start_epoch < self.pretrain_epoch:
            start_epoch=self.pretrain_epoch

        # d_loss_plot,g_loss_plot
        self.gan_fig_loss = plt.figure()
        self.gan_ax_loss = self.gan_fig_loss.add_subplot(1, 1, 1)
        d_loss_list = []
        g_loss_list = []
        d_loss = 0

        for epoch in range(start_epoch, self.epoch):

            # get batch data
            self.datasets.shuffle(self.batch_size,True)
            idx=0
            for data_x,data_missing,data_m,data_deltaPre,data_x_lengths,_ in self.datasets.nextBatch():
                
                batch_z = np.random.standard_normal(size=(self.batch_size, self.z_dim))
                if counter % self.disc_iters == 0:
                    _ = self.sess.run(self.clip_all_vals)
                    _, summary_str, d_loss = self.sess.run([self.d_optim, self.d_sum, self.d_loss],
                                               feed_dict={self.z: batch_z,
                                                          self.x: data_x,
                                                          self.m: data_m,
                                                          self.x_lengths: data_x_lengths,
                                                          self.keep_prob: 0.5})
                    # display training status
                    print("Epoch: [%2d] [%4d/%4d] time: %4.4f, d_loss: %.8f, counter:%4d" \
                          % (epoch, idx, self.num_batches, time.time() - start_time, d_loss, counter))

                # update G network

                #batch_z = np.random.normal(0, 1, [self.batch_size, self.z_dim]).astype(np.float32)
                _, summary_str, g_loss = self.sess.run([self.g_optim, self.g_sum, self.g_loss],
                                                           feed_dict={self.z: batch_z,
                                                           self.keep_prob: 0.5,
                                                           self.x_lengths: data_x_lengths
                                                           })
                # self.writer.add_summary(summary_str, counter)
                d_loss_list.append(d_loss)
                g_loss_list.append(g_loss)
                self.gan_plot_loss(g_loss_list,d_loss_list)
                print("Epoch: [%2d] [%4d/%4d] time: %4.4f,  g_loss: %.8f,counter:%4d" \
                      % (epoch, idx, self.num_batches, time.time() - start_time, g_loss,counter))

                counter += 1
                idx+=1

    def imputation(self,dataset):
        self.datasets=dataset
        # self.datasets.shuffle(self.batch_size,True)
        tf.variables_initializer([self.z_need_tune]).run()
        #是否shuffle无所谓,填充之后存起来,测试的时候用填充之后的数据再shuffle即可
        #训练数据集不能被batch_size整除剩下的部分,扔掉
        start_time = time.time()
        batchid=1
        impute_tune_time=1
        counter=1
        imputed_list = []

        # impute_loss_plot,mask_loss_plot,g_impute_loss
        self.impute_fig_loss = plt.figure()
        self.impute_ax_loss = self.impute_fig_loss.add_subplot(1, 1, 1)
        impute_loss_list = []
        mask_loss_list = []
        g_impute_loss_list = []
        loss_sum = 0
        m_sum = 0

        for data_x,data_missing,data_m,data_deltaPre,data_x_lengths,_ in self.datasets.nextBatch():
            #self.z_need_tune=tf.assign(self.z_need_tune,tf.random_normal([self.batch_size,self.z_dim]))
            tf.variables_initializer([self.z_need_tune]).run()
            for i in range(0,self.impute_iter):
                _, impute_out, summary_str, impute_loss, imputed,mask_loss,g_impute_loss = self.sess.run([self.impute_optim, self.impute_out, self.impute_sum, self.impute_loss, self.imputed,self.mask_loss,self.g_impute_loss ], \
                                                       feed_dict={self.x: data_missing,
                                                                  self.m: data_m,
                                                                  self.x_lengths: data_x_lengths,
                                                                  self.keep_prob: 1.0})
                impute_tune_time+=1
                counter+=1

                # 计算loss_sum
                loss_sum = loss_sum + np.sum(np.multiply(np.abs(data_x - imputed),1-data_m))
                m_sum =  m_sum+np.sum(data_m)
                print(loss_sum/m_sum)
                impute_loss_list.append(impute_loss)
                mask_loss_list.append(mask_loss)
                g_impute_loss_list.append(g_impute_loss)
                self.impute_plot_loss(impute_loss_list, mask_loss_list, g_impute_loss_list)
                if counter%10==0:
                    print("Batchid: [%2d] [%4d/%4d] time: %4.4f, impute_loss: %.8f" \
                          % (batchid, impute_tune_time, self.impute_iter, time.time() - start_time, impute_loss))
            imputed_list.append(imputed)
            batchid+=1
            impute_tune_time=1
        self.imputed_list = np.array(imputed_list)
        self.loss_pre = loss_sum/m_sum

    def pretrain_plot_loss(self,loss):
        if self.pretrainG_ax_loss.lines:
            self.pretrainG_ax_loss.lines.remove(self.pretrainG_ax_loss.lines[0])
        self.pretrainG_ax_loss.plot(loss,linestyle='-',color='#2E68AA')

        plt.title("PreTrainG_loss")
        plt.ylabel("loss")
        plt.ion()
        plt.show()
        plt.pause(0.1)

    def gan_plot_loss(self,g_loss,d_loss):
        if self.gan_ax_loss.lines:
            self.gan_ax_loss.lines.remove(self.gan_ax_loss.lines[0])
            # self.gan_ax_loss.lines.remove(self.gan_ax_loss.lines[1])
        self.gan_ax_loss.plot(g_loss,linestyle='-',color='blue')
        self.gan_ax_loss.plot(d_loss, linestyle='-', color='red')
        plt.title("gan_loss")
        plt.ylabel("loss")
        plt.ion()
        plt.show()
        plt.pause(0.1)

    def impute_plot_loss(self,impute_loss,mask_loss_list,g_impute_loss_list):
        if self.impute_ax_loss.lines:
            self.impute_ax_loss.lines.remove(self.impute_ax_loss.lines[0])
        self.impute_ax_loss.plot(impute_loss,linestyle='-',color='#2E68AA')
        self.impute_ax_loss.plot(mask_loss_list, linestyle='-', color='red')
        self.impute_ax_loss.plot(g_impute_loss_list, linestyle='-', color='yellow')
        plt.title("impute_loss")
        plt.ylabel("loss")
        plt.ion()
        plt.show()
        plt.pause(0.1)