def _single_cell(unit_type, num_units, forget_bias, dropout, mode, residual_connection=False, device_str=None, residual_fn=None): """ 创建一个RNN单元。 :param unit_type: RNN类型 :param num_units: 隐层神经元个数 :param forget_bias: 遗忘门偏置 :param dropout: dropout比例 :param mode: 训练模式(只有train模式下才设置dropout) :param residual_connection: 是否使用残差连接 :param device_str: 设备 :param residual_fn: 残差方法 :return: """ # dropout (= 1 - keep_prob) is set to 0 during eval and infer dropout = dropout if mode == tf.contrib.learn.ModeKeys.TRAIN else 0.0 # Cell Type if unit_type == "lstm": print(" LSTM, forget_bias=%g" % forget_bias, end='') single_cell = rnn.BasicLSTMCell(num_units, forget_bias=forget_bias) elif unit_type == "gru": print(" GRU", end='') single_cell = rnn.GRUCell(num_units) elif unit_type == "layer_norm_lstm": print(" Layer Normalized LSTM, forget_bias=%g" % forget_bias, end='') single_cell = rnn.LayerNormBasicLSTMCell(num_units, forget_bias=forget_bias, layer_norm=True) elif unit_type == "nas": print(" NASCell", end='') single_cell = rnn.NASCell(num_units) else: raise ValueError("Unknown unit type %s!" % unit_type) # Dropout (= 1 - keep_prob) if dropout > 0.0: single_cell = rnn.DropoutWrapper(cell=single_cell, input_keep_prob=(1.0 - dropout)) print(" %s, dropout=%g " % (type(single_cell).__name__, dropout), end='') # Residual if residual_connection: single_cell = rnn.ResidualWrapper(single_cell, residual_fn=residual_fn) print(" %s" % type(single_cell).__name__, end='') # Device Wrapper if device_str: single_cell = rnn.DeviceWrapper(single_cell, device_str) print(" %s, device=%s" % (type(single_cell).__name__, device_str), end='') return single_cell
def _new_cell_wrapper(device_id=None): c = _new_cell() if input_keep_prob < 1.0 or output_keep_prob < 1.0: c = rnn.DropoutWrapper(c, input_keep_prob=input_keep_prob, output_keep_prob=output_keep_prob) if device_id: c = rnn.DeviceWrapper(c, device_id) return c
def create_cell(device): if rnn_type == "GRU": cell = rnn.GRUCell(rnn_size) elif rnn_type == "LSTM": if 'reuse' in inspect.signature(tf.contrib.rnn.BasicLSTMCell.__init__).parameters: cell = rnn.LayerNormBasicLSTMCell(rnn_size, forget_bias=0.0, reuse=tf.get_variable_scope().reuse) else: cell = rnn.LayerNormBasicLSTMCell(rnn_size, forget_bias=0.0) elif rnn_type == "RWA": cell = RWACell(rnn_size) elif rnn_type == "RAN": cell = RANCell(rnn_size, normalize=self.is_training) cell = SwitchableDropoutWrapper(rnn.DeviceWrapper(cell, device="/gpu:{}".format(device)), is_train=self.is_training) return cell
def _new_cell_wrapper(residual_connection=False, device_id=None): c = _new_cell() if input_keep_prob < 1.0 or output_keep_prob < 1.0: c = rnn.DropoutWrapper(c, input_keep_prob=input_keep_prob, output_keep_prob=output_keep_prob) if residual_connection: c = rnn.ResidualWrapper(c) if device_id: c = rnn.DeviceWrapper(c, device_id) return c
def __init__(self, batch_size, num_input, num_hidden, layer_depth, rnn_type, seq_length, learning_rate, keep_drop=0.5, grad_clip=5.0, is_training=False): self.num_input = num_input self.num_hidden = num_hidden self.seq_length = seq_length self.batch_size = batch_size self.rnn_type = rnn_type self.layer_depth = layer_depth self.learning_rate = learning_rate self.grad_clip = grad_clip self.is_training = is_training self.keep_drop = keep_drop self.x = tf.placeholder(tf.float32, [batch_size, seq_length, self.num_input]) # LSTM cells for encoder and decoder def create_cell(): if rnn_type == "GRU": cell = rnn.GRUCell(num_hidden) elif rnn_type == "RAN": cell = RANCell(num_hidden, normalize=tf.constant(self.is_training)) cell = SwitchableDropoutWrapper(cell, output_keep_prob=self.keep_drop, is_train=tf.constant( self.is_training)) return cell with tf.variable_scope( 'encoder_cells', initializer=tf.contrib.layers.xavier_initializer()): self.enc_cell = rnn.DeviceWrapper(rnn.MultiRNNCell( [create_cell() for _ in range(layer_depth)]), device="/gpu:0") with tf.variable_scope( 'decoder_cells', initializer=tf.contrib.layers.xavier_initializer()): self.dec_cell = rnn.DeviceWrapper(rnn.MultiRNNCell( [create_cell() for _ in range(layer_depth)]), device="/gpu:1") with tf.variable_scope('encoder'): outputs, _ = tf.nn.dynamic_rnn(cell=self.enc_cell, inputs=self.x, time_major=False, swap_memory=True, dtype=tf.float32) self.enc_output = outputs[:, -1, :] with tf.variable_scope('latent'): # reparametrization trick with tf.name_scope("Z"): self.z_mean = tf.contrib.layers.fully_connected( inputs=self.enc_output, num_outputs=num_hidden, activation_fn=None, scope="z_mean") self.z_stddev = tf.contrib.layers.fully_connected( inputs=self.enc_output, num_outputs=num_hidden, activation_fn=tf.nn.softplus, scope="z_ls2") # sample z from the latent distribution with tf.name_scope("z_samples"): with tf.name_scope('random_normal_sample'): eps = tf.random_normal( (batch_size, num_hidden), 0, 1, dtype=tf.float32) # draw a random number with tf.name_scope('z_sample'): self.z = self.z_mean + tf.sqrt( self.z_stddev) * eps # a sample it from Z -> z with tf.variable_scope('decoder'): reversed_inputs = tf.reverse(self.x, [1]) flat_targets = tf.reshape(reversed_inputs, [-1]) dec_first_inp = tf.nn.relu(_linear(self.z, self.num_input, True)) # [GO, ...inputs] dec_inputs = tf.concat( (tf.expand_dims(dec_first_inp, 1), reversed_inputs[:, 1:, :]), 1) self.w1 = tf.get_variable( "w1", shape=[self.num_hidden, self.num_input], initializer=tf.contrib.layers.xavier_initializer()) self.b1 = tf.get_variable("b1", shape=[self.num_input], initializer=tf.constant_initializer(0.0)) self.initial_state = self.dec_cell.zero_state(batch_size, dtype=tf.float32) dec_outputs, _ = tf.nn.dynamic_rnn( cell=self.dec_cell, inputs=dec_inputs, initial_state=self.initial_state, time_major=False, swap_memory=True, dtype=tf.float32) logist = tf.matmul(tf.reshape(dec_outputs, [-1, self.num_hidden]), self.w1) + self.b1 self.reconstruction = tf.reshape(logist, [-1]) self.reconstruction_loss = 0.5 * tf.reduce_mean( tf.pow(self.reconstruction - flat_targets, 2.0)) self.latent_loss = -0.5 * (1.0 + tf.log(self.z_stddev) - tf.square(self.z_mean) - self.z_stddev) self.latent_loss = tf.reduce_sum(self.latent_loss, 1) / tf.cast( seq_length, tf.float32) self.latent_loss = tf.reduce_sum(self.latent_loss) / tf.cast( batch_size, tf.float32) self.cost = tf.reduce_mean(self.reconstruction_loss + self.latent_loss) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), self.grad_clip) optimizer = tf.train.AdamOptimizer(learning_rate, epsilon=0.001) self.train_op = optimizer.apply_gradients(zip(grads, tvars))