def _single_cell(unit_type,
                 num_units,
                 forget_bias,
                 dropout,
                 mode,
                 residual_connection=False,
                 device_str=None,
                 residual_fn=None):
    """
    创建一个RNN单元。
    :param unit_type: RNN类型
    :param num_units: 隐层神经元个数
    :param forget_bias: 遗忘门偏置
    :param dropout: dropout比例
    :param mode: 训练模式(只有train模式下才设置dropout)
    :param residual_connection: 是否使用残差连接
    :param device_str: 设备
    :param residual_fn: 残差方法
    :return:
    """
    # dropout (= 1 - keep_prob) is set to 0 during eval and infer
    dropout = dropout if mode == tf.contrib.learn.ModeKeys.TRAIN else 0.0

    # Cell Type
    if unit_type == "lstm":
        print("  LSTM, forget_bias=%g" % forget_bias, end='')
        single_cell = rnn.BasicLSTMCell(num_units, forget_bias=forget_bias)
    elif unit_type == "gru":
        print("  GRU", end='')
        single_cell = rnn.GRUCell(num_units)
    elif unit_type == "layer_norm_lstm":
        print("  Layer Normalized LSTM, forget_bias=%g" % forget_bias, end='')
        single_cell = rnn.LayerNormBasicLSTMCell(num_units,
                                                 forget_bias=forget_bias,
                                                 layer_norm=True)
    elif unit_type == "nas":
        print("  NASCell", end='')
        single_cell = rnn.NASCell(num_units)
    else:
        raise ValueError("Unknown unit type %s!" % unit_type)

    # Dropout (= 1 - keep_prob)
    if dropout > 0.0:
        single_cell = rnn.DropoutWrapper(cell=single_cell,
                                         input_keep_prob=(1.0 - dropout))
        print("  %s, dropout=%g " % (type(single_cell).__name__, dropout),
              end='')

    # Residual
    if residual_connection:
        single_cell = rnn.ResidualWrapper(single_cell, residual_fn=residual_fn)
        print("  %s" % type(single_cell).__name__, end='')

    # Device Wrapper
    if device_str:
        single_cell = rnn.DeviceWrapper(single_cell, device_str)
        print("  %s, device=%s" % (type(single_cell).__name__, device_str),
              end='')

    return single_cell
Example #2
0
    def _new_cell_wrapper(device_id=None):
        c = _new_cell()

        if input_keep_prob < 1.0 or output_keep_prob < 1.0:
            c = rnn.DropoutWrapper(c, input_keep_prob=input_keep_prob, output_keep_prob=output_keep_prob)

        if device_id:
            c = rnn.DeviceWrapper(c, device_id)

        return c
Example #3
0
 def create_cell(device):
   if rnn_type == "GRU":
     cell = rnn.GRUCell(rnn_size)
   elif rnn_type == "LSTM":
     if 'reuse' in inspect.signature(tf.contrib.rnn.BasicLSTMCell.__init__).parameters:
       cell = rnn.LayerNormBasicLSTMCell(rnn_size, forget_bias=0.0, reuse=tf.get_variable_scope().reuse)
     else:
       cell = rnn.LayerNormBasicLSTMCell(rnn_size, forget_bias=0.0)
   elif rnn_type == "RWA":
     cell = RWACell(rnn_size)
   elif rnn_type == "RAN":
     cell = RANCell(rnn_size, normalize=self.is_training)
   cell = SwitchableDropoutWrapper(rnn.DeviceWrapper(cell, device="/gpu:{}".format(device)), is_train=self.is_training)
   return cell
Example #4
0
    def _new_cell_wrapper(residual_connection=False, device_id=None):
        c = _new_cell()

        if input_keep_prob < 1.0 or output_keep_prob < 1.0:
            c = rnn.DropoutWrapper(c,
                                   input_keep_prob=input_keep_prob,
                                   output_keep_prob=output_keep_prob)

        if residual_connection:
            c = rnn.ResidualWrapper(c)

        if device_id:
            c = rnn.DeviceWrapper(c, device_id)

        return c
Example #5
0
    def __init__(self,
                 batch_size,
                 num_input,
                 num_hidden,
                 layer_depth,
                 rnn_type,
                 seq_length,
                 learning_rate,
                 keep_drop=0.5,
                 grad_clip=5.0,
                 is_training=False):
        self.num_input = num_input
        self.num_hidden = num_hidden
        self.seq_length = seq_length
        self.batch_size = batch_size
        self.rnn_type = rnn_type
        self.layer_depth = layer_depth
        self.learning_rate = learning_rate
        self.grad_clip = grad_clip
        self.is_training = is_training
        self.keep_drop = keep_drop
        self.x = tf.placeholder(tf.float32,
                                [batch_size, seq_length, self.num_input])

        # LSTM cells for encoder and decoder
        def create_cell():
            if rnn_type == "GRU":
                cell = rnn.GRUCell(num_hidden)
            elif rnn_type == "RAN":
                cell = RANCell(num_hidden,
                               normalize=tf.constant(self.is_training))
            cell = SwitchableDropoutWrapper(cell,
                                            output_keep_prob=self.keep_drop,
                                            is_train=tf.constant(
                                                self.is_training))
            return cell

        with tf.variable_scope(
                'encoder_cells',
                initializer=tf.contrib.layers.xavier_initializer()):
            self.enc_cell = rnn.DeviceWrapper(rnn.MultiRNNCell(
                [create_cell() for _ in range(layer_depth)]),
                                              device="/gpu:0")

        with tf.variable_scope(
                'decoder_cells',
                initializer=tf.contrib.layers.xavier_initializer()):
            self.dec_cell = rnn.DeviceWrapper(rnn.MultiRNNCell(
                [create_cell() for _ in range(layer_depth)]),
                                              device="/gpu:1")

        with tf.variable_scope('encoder'):
            outputs, _ = tf.nn.dynamic_rnn(cell=self.enc_cell,
                                           inputs=self.x,
                                           time_major=False,
                                           swap_memory=True,
                                           dtype=tf.float32)
            self.enc_output = outputs[:, -1, :]

        with tf.variable_scope('latent'):
            # reparametrization trick
            with tf.name_scope("Z"):
                self.z_mean = tf.contrib.layers.fully_connected(
                    inputs=self.enc_output,
                    num_outputs=num_hidden,
                    activation_fn=None,
                    scope="z_mean")
                self.z_stddev = tf.contrib.layers.fully_connected(
                    inputs=self.enc_output,
                    num_outputs=num_hidden,
                    activation_fn=tf.nn.softplus,
                    scope="z_ls2")

            # sample z from the latent distribution
            with tf.name_scope("z_samples"):
                with tf.name_scope('random_normal_sample'):
                    eps = tf.random_normal(
                        (batch_size, num_hidden), 0, 1,
                        dtype=tf.float32)  # draw a random number
                with tf.name_scope('z_sample'):
                    self.z = self.z_mean + tf.sqrt(
                        self.z_stddev) * eps  # a sample it from Z -> z

        with tf.variable_scope('decoder'):
            reversed_inputs = tf.reverse(self.x, [1])
            flat_targets = tf.reshape(reversed_inputs, [-1])
            dec_first_inp = tf.nn.relu(_linear(self.z, self.num_input, True))

            # [GO, ...inputs]
            dec_inputs = tf.concat(
                (tf.expand_dims(dec_first_inp, 1), reversed_inputs[:, 1:, :]),
                1)
            self.w1 = tf.get_variable(
                "w1",
                shape=[self.num_hidden, self.num_input],
                initializer=tf.contrib.layers.xavier_initializer())
            self.b1 = tf.get_variable("b1",
                                      shape=[self.num_input],
                                      initializer=tf.constant_initializer(0.0))
            self.initial_state = self.dec_cell.zero_state(batch_size,
                                                          dtype=tf.float32)
            dec_outputs, _ = tf.nn.dynamic_rnn(
                cell=self.dec_cell,
                inputs=dec_inputs,
                initial_state=self.initial_state,
                time_major=False,
                swap_memory=True,
                dtype=tf.float32)
        logist = tf.matmul(tf.reshape(dec_outputs, [-1, self.num_hidden]),
                           self.w1) + self.b1
        self.reconstruction = tf.reshape(logist, [-1])
        self.reconstruction_loss = 0.5 * tf.reduce_mean(
            tf.pow(self.reconstruction - flat_targets, 2.0))
        self.latent_loss = -0.5 * (1.0 + tf.log(self.z_stddev) -
                                   tf.square(self.z_mean) - self.z_stddev)
        self.latent_loss = tf.reduce_sum(self.latent_loss, 1) / tf.cast(
            seq_length, tf.float32)
        self.latent_loss = tf.reduce_sum(self.latent_loss) / tf.cast(
            batch_size, tf.float32)
        self.cost = tf.reduce_mean(self.reconstruction_loss + self.latent_loss)

        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                                          self.grad_clip)
        optimizer = tf.train.AdamOptimizer(learning_rate, epsilon=0.001)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))