def build_enc_dec(self, X, with_drop=True, dropout_rate=[0.1, 0.1, 0.], filters=[4, 5], strides=[2, 2], name='enc_dec'): l_relu = lambda v: tf.nn.leaky_relu(v, alpha=0.01) with tf.variable_scope(name, reuse=tf.AUTO_REUSE): X_c = tf.layers.dropout(X, rate=dropout_rate[0], training=with_drop) l_e1 = tf.layers.dropout(conv2d(X_c, 50, activation_fn=l_relu, kernel_size=filters[0], name='l_e1'), rate=dropout_rate[1], training=with_drop) l_e2 = tf.layers.dropout(conv2d(l_e1, 50, activation_fn=l_relu, kernel_size=filters[1], name='l_e2'), rate=dropout_rate[2], training=with_drop) l_e2_flat = tf.contrib.layers.flatten(l_e2) l_e3 = fc(l_e2_flat, self.config.z_dim, activation_fn=tf.tanh, name='l_e3') l_d2_flat = fc(l_e3, l_e2_flat.get_shape()[1], activation_fn=l_relu, name='l_d2_flat') l_d2 = tf.reshape(l_d2_flat, tf.shape(l_e2)) l_d1 = deconv2d(l_d2, 50, activation_fn=l_relu, kernel_size=filters[1], name='l_d1') l_d0 = deconv2d(l_d1, self.config.c, activation_fn=tf.tanh, kernel_size=filters[0], name='l_d0') return l_e1, l_e2, l_e3, l_d2, l_d1, l_d0
def build_feat_image(x, ndim=3): for i in range(self.common_length): self.res_cnt += 1 name = 'Res%d' % self.res_cnt x = residual_block(name, x, residual_dim) x = ops.get_norm(x, name=self.norm_mtd, training=self.training, reuse=tf.AUTO_REUSE) for depth in output_side: self.conv_cnt += 1 x = ops.deconv2d("deconv%d" % self.conv_cnt, x, depth, 3, 2, activation_fn=tf.nn.relu, normalizer_mode=self.norm_mtd, training=self.training, reuse=tf.AUTO_REUSE) x = ops.conv2d("deconv%d" % (self.conv_cnt + 1), x, ndim, large_ksize, 1, activation_fn=tf.nn.tanh, normalizer_mode=None, training=self.training, reuse=tf.AUTO_REUSE) return x
def build_inference(self, x): ndf = 64 ksize = 4 layer_depth = [ndf * 4, ndf * 8, ndf * 16, ndf * 4, ndf * 8] self.norm_mtd = "inst" x = L.conv2d(x, ndf, 7, 2, padding='SAME', scope='conv1', reuse=tf.AUTO_REUSE, activation_fn=ops.LeakyReLU) conv_cnt = 1 for depth in layer_depth: conv_cnt += 1 name = "conv%d" % conv_cnt x = ops.conv2d(name, x, depth, ksize, 2, activation_fn=ops.LeakyReLU, normalizer_mode=self.norm_mtd, training=self.training, reuse=tf.AUTO_REUSE) self.disc_out = ops.conv2d("conv%d" % (conv_cnt + 1), x, 1, 1, 1, activation_fn=None, training=self.training, reuse=tf.AUTO_REUSE) print("ImageConditionalDeepDiscriminator shape:") print(self.disc_out.get_shape()) return self.disc_out
def build_add_noise(image_feat, noise_feat): concat_feat = tf.concat([image_feat, noise_feat], axis=3) new_feat = ops.conv2d("conv_add", concat_feat, residual_dim, 3, 1, activation_fn=tf.nn.relu, normalizer_mode=self.norm_mtd, training=self.training, reuse=tf.AUTO_REUSE) return new_feat
def tdnn(input_, kernels, kernel_features, scope='TDNN'): ''' Time Delay Neural Network :input: input float tensor of shape [(batch_size*num_unroll_steps) x max_word_length x embed_size] :kernels: array of kernel sizes :kernel_features: array of kernel feature sizes (parallel to kernels) ''' assert len(kernels) == len( kernel_features), 'Kernel and Features must have the same size' # input_ is a np.array of shape ('b', 'sentence_length', 'max_word_length', 'embed_size') we # need to convert it to shape ('b * sentence_length', 1, 'max_word_length', 'embed_size') to # use conv2D input_ = tf.reshape(input_, [-1, self.max_word_length, ALPHABET_SIZE]) input_ = tf.expand_dims(input_, 1) layers = [] with tf.variable_scope(scope): for kernel_size, kernel_feature_size in zip( kernels, kernel_features): reduced_length = self.max_word_length - kernel_size + 1 # [batch_size * sentence_length x max_word_length x embed_size x kernel_feature_size] conv = conv2d(input_, kernel_feature_size, 1, kernel_size, name="kernel_%d" % kernel_size) # [batch_size * sentence_length x 1 x 1 x kernel_feature_size] pool = tf.nn.max_pool(tf.tanh(conv), [1, 1, reduced_length, 1], [1, 1, 1, 1], 'VALID') layers.append(tf.squeeze(pool, [1, 2])) if len(kernels) > 1: output = tf.concat(layers, 1) else: output = layers[0] return output
def build_DQN(s_t, action_size, target_q_t, action, learning_rate_step, cnn_format='NHWC'): min_delta = -1 max_delta = 1 learning_rate_initial = 0.00025 learning_rate_minimum = 0.00025 learning_rate_decay = 0.96 learning_rate_decay_step = 50 w = {} #initializer = tf.contrib.layers.xavier_initializer() initializer = tf.truncated_normal_initializer(0, 0.02) activation_fn = tf.nn.relu with tf.variable_scope('Q_network'): l1, w['l1_w'], w['l1_b'] = conv2d(s_t, 32, [8, 8], [4, 4], initializer, activation_fn, cnn_format, name='l1') l2, w['l2_w'], w['l2_b'] = conv2d(l1, 64, [4, 4], [2, 2], initializer, activation_fn, cnn_format, name='l2') l3, w['l3_w'], w['l3_b'] = conv2d(l2, 64, [3, 3], [1, 1], initializer, activation_fn, cnn_format, name='l3') shape = l3.get_shape().as_list() l3_flat = tf.reshape(l3, [-1, reduce(lambda x, y: x * y, shape[1:])]) l4, w['l4_w'], w['l4_b'] = linear(l3_flat, 512, activation_fn=activation_fn, name='l4') q, w['q_w'], w['q_b'] = linear(l4, action_size, name='q') q_summary = [] avg_q = tf.reduce_mean(q, 0) for idx in range(action_size): q_summary.append(tf.histogram_summary('q/%s' % idx, avg_q[idx])) q_summary = tf.merge_summary(q_summary, 'q_summary') with tf.variable_scope('optimzier'): action_one_hot = tf.one_hot(action, action_size, 1.0, 0.0, name='action_one_hot') q_acted = tf.reduce_sum(q * action_one_hot, reduction_indices=1, name='q_acted') delta = target_q_t - q_acted clipped_delta = tf.clip_by_value(delta, min_delta, max_delta, name='clipped_delta') loss = tf.reduce_mean(tf.square(clipped_delta), name='loss') learning_rate = tf.maximum( learning_rate_minimum, tf.train.exponential_decay(learning_rate_initial, learning_rate_step, learning_rate_decay_step, learning_rate_decay, staircase=True)) optim = tf.train.RMSPropOptimizer(learning_rate, momentum=0.95, epsilon=0.01).minimize(loss) return w, q, q_summary, optim, loss
def _create_a3c_network(self): """ Creates the A3C network """ # Input image is of shape [84 x 84 x 3] self.input = tf.placeholder("float", [None, 84, 84, 3]) # The action is a one-hot encoded vector of shape [self._action_size] # and the reward is a floating point. We return both values as a # concatenated vector of shape [self._action_size + 1] self.last_action_reward = tf.placeholder("float", [None, self._action_size + 1]) # We use the same network as Mnih & Al.'s A3C implementation # [batch_size x 20 x 20 x 16] cnn = conv2d(self.input, 16, 8, 8, stride=4, name='conv0') # [batch_size x 9 x 9 x 32] cnn = conv2d(cnn, 32, 4, 4, stride=2, name='conv1') # we reshape the output of the conv layer to [batch_size x 32 * 9 * 9] # lstm_input is of shape [batch_size x 256], note that in our case # the batch_size is the number of frames. In our implementation # we will compute the outputs of the LSTM frame-by-frame and backpropagate # every 20 frames, thus, sequence_length (forward) = 1 and sequence_length # (backward) = 20 lstm_input = fc_layer(tf.reshape(cnn, [-1, 2592]), 256, name='fc0') # sequence_length = tf.shape(lstm_input)[:1] with tf.variable_scope('lstm') as scope: # In the paper, they concatenate the downsampled environment # with the last action and reward before feeding it to the LSTM lstm_input = tf.concat([lstm_input, self.last_action_reward], 1) # the dynamic_rnn method takes an input of shape # [batch_size x sequence_length x input_dim] in our case # batch_size = 1, sequence_length = unroll_step (default:20) # and input_dim = 256 + action_size + 1 (lstm_input + last action # encoded as one-hot vector + the reward (float) lstm_input = tf.reshape(lstm_input, [1, -1, 256 + self._action_size + 1]) # The LSTM cell is created in the _create_network method, # here we only initialize it initial_state = self.lstm_cell.zero_state(batch_size=1, dtype=tf.float32) # Fetch the output and the last state of the LSTM, Given the cell # state of an LSTM and the input at time t we can compute the # output and cell state at time t + 1 (t = 0, 1, ...), therefore, we use # the state to forward propagate manually. This will become # clear once we get into the actual training self.lstm_outputs, self.lstm_state = tf.nn.dynamic_rnn( self.lstm_cell, lstm_input, initial_state=initial_state, scope=scope, dtype=tf.float32) # self.lstm_outputs is of shape [batch_size=1 x seq_length x n_units] we # simply reshape it to [seq_length x n_units] self.lstm_outputs = tf.reshape(self.lstm_outputs, shape=[-1, 256]) # Once we have the output of the LSTM we need to compute the policy (pi) # and the value function (v) for this frame, both of them are # approximated using a neural network # pi is of shape [batch_size=1, self._action_size] it is # the probability distribution from which the action # are sampled with tf.variable_scope('policy') as scope: self.pi = fc_layer(self.lstm_outputs, self._action_size, name='fc_pi', activation=tf.nn.softmax) # v is of shape [batch_size=1, 1] (floating point) with tf.variable_scope('value') as scope: self.v = fc_layer(self.lstm_outputs, 1, name='fc_v', activation=None)