예제 #1
0
    def build_enc_dec(self,
                      X,
                      with_drop=True,
                      dropout_rate=[0.1, 0.1, 0.],
                      filters=[4, 5],
                      strides=[2, 2],
                      name='enc_dec'):

        l_relu = lambda v: tf.nn.leaky_relu(v, alpha=0.01)
        with tf.variable_scope(name, reuse=tf.AUTO_REUSE):

            X_c = tf.layers.dropout(X,
                                    rate=dropout_rate[0],
                                    training=with_drop)

            l_e1 = tf.layers.dropout(conv2d(X_c,
                                            50,
                                            activation_fn=l_relu,
                                            kernel_size=filters[0],
                                            name='l_e1'),
                                     rate=dropout_rate[1],
                                     training=with_drop)

            l_e2 = tf.layers.dropout(conv2d(l_e1,
                                            50,
                                            activation_fn=l_relu,
                                            kernel_size=filters[1],
                                            name='l_e2'),
                                     rate=dropout_rate[2],
                                     training=with_drop)

            l_e2_flat = tf.contrib.layers.flatten(l_e2)
            l_e3 = fc(l_e2_flat,
                      self.config.z_dim,
                      activation_fn=tf.tanh,
                      name='l_e3')

            l_d2_flat = fc(l_e3,
                           l_e2_flat.get_shape()[1],
                           activation_fn=l_relu,
                           name='l_d2_flat')
            l_d2 = tf.reshape(l_d2_flat, tf.shape(l_e2))

            l_d1 = deconv2d(l_d2,
                            50,
                            activation_fn=l_relu,
                            kernel_size=filters[1],
                            name='l_d1')
            l_d0 = deconv2d(l_d1,
                            self.config.c,
                            activation_fn=tf.tanh,
                            kernel_size=filters[0],
                            name='l_d0')

            return l_e1, l_e2, l_e3, l_d2, l_d1, l_d0
예제 #2
0
        def build_feat_image(x, ndim=3):
            for i in range(self.common_length):
                self.res_cnt += 1
                name = 'Res%d' % self.res_cnt
                x = residual_block(name, x, residual_dim)

            x = ops.get_norm(x,
                             name=self.norm_mtd,
                             training=self.training,
                             reuse=tf.AUTO_REUSE)

            for depth in output_side:
                self.conv_cnt += 1
                x = ops.deconv2d("deconv%d" % self.conv_cnt,
                                 x,
                                 depth,
                                 3,
                                 2,
                                 activation_fn=tf.nn.relu,
                                 normalizer_mode=self.norm_mtd,
                                 training=self.training,
                                 reuse=tf.AUTO_REUSE)

            x = ops.conv2d("deconv%d" % (self.conv_cnt + 1),
                           x,
                           ndim,
                           large_ksize,
                           1,
                           activation_fn=tf.nn.tanh,
                           normalizer_mode=None,
                           training=self.training,
                           reuse=tf.AUTO_REUSE)

            return x
예제 #3
0
    def build_inference(self, x):
        ndf = 64
        ksize = 4
        layer_depth = [ndf * 4, ndf * 8, ndf * 16, ndf * 4, ndf * 8]
        self.norm_mtd = "inst"

        x = L.conv2d(x,
                     ndf,
                     7,
                     2,
                     padding='SAME',
                     scope='conv1',
                     reuse=tf.AUTO_REUSE,
                     activation_fn=ops.LeakyReLU)

        conv_cnt = 1
        for depth in layer_depth:
            conv_cnt += 1
            name = "conv%d" % conv_cnt
            x = ops.conv2d(name,
                           x,
                           depth,
                           ksize,
                           2,
                           activation_fn=ops.LeakyReLU,
                           normalizer_mode=self.norm_mtd,
                           training=self.training,
                           reuse=tf.AUTO_REUSE)

        self.disc_out = ops.conv2d("conv%d" % (conv_cnt + 1),
                                   x,
                                   1,
                                   1,
                                   1,
                                   activation_fn=None,
                                   training=self.training,
                                   reuse=tf.AUTO_REUSE)

        print("ImageConditionalDeepDiscriminator shape:")
        print(self.disc_out.get_shape())

        return self.disc_out
예제 #4
0
 def build_add_noise(image_feat, noise_feat):
     concat_feat = tf.concat([image_feat, noise_feat], axis=3)
     new_feat = ops.conv2d("conv_add",
                           concat_feat,
                           residual_dim,
                           3,
                           1,
                           activation_fn=tf.nn.relu,
                           normalizer_mode=self.norm_mtd,
                           training=self.training,
                           reuse=tf.AUTO_REUSE)
     return new_feat
예제 #5
0
        def tdnn(input_, kernels, kernel_features, scope='TDNN'):
            ''' Time Delay Neural Network
            :input:           input float tensor of shape [(batch_size*num_unroll_steps) x max_word_length x embed_size]
            :kernels:         array of kernel sizes
            :kernel_features: array of kernel feature sizes (parallel to kernels)
            '''
            assert len(kernels) == len(
                kernel_features), 'Kernel and Features must have the same size'

            # input_ is a np.array of shape ('b', 'sentence_length', 'max_word_length', 'embed_size') we
            # need to convert it to shape ('b * sentence_length', 1, 'max_word_length', 'embed_size') to
            # use conv2D
            input_ = tf.reshape(input_,
                                [-1, self.max_word_length, ALPHABET_SIZE])
            input_ = tf.expand_dims(input_, 1)

            layers = []
            with tf.variable_scope(scope):
                for kernel_size, kernel_feature_size in zip(
                        kernels, kernel_features):
                    reduced_length = self.max_word_length - kernel_size + 1

                    # [batch_size * sentence_length x max_word_length x embed_size x kernel_feature_size]
                    conv = conv2d(input_,
                                  kernel_feature_size,
                                  1,
                                  kernel_size,
                                  name="kernel_%d" % kernel_size)

                    # [batch_size * sentence_length x 1 x 1 x kernel_feature_size]
                    pool = tf.nn.max_pool(tf.tanh(conv),
                                          [1, 1, reduced_length, 1],
                                          [1, 1, 1, 1], 'VALID')

                    layers.append(tf.squeeze(pool, [1, 2]))

                if len(kernels) > 1:
                    output = tf.concat(layers, 1)
                else:
                    output = layers[0]

            return output
예제 #6
0
def build_DQN(s_t,
              action_size,
              target_q_t,
              action,
              learning_rate_step,
              cnn_format='NHWC'):

    min_delta = -1
    max_delta = 1
    learning_rate_initial = 0.00025
    learning_rate_minimum = 0.00025
    learning_rate_decay = 0.96
    learning_rate_decay_step = 50

    w = {}
    #initializer = tf.contrib.layers.xavier_initializer()
    initializer = tf.truncated_normal_initializer(0, 0.02)
    activation_fn = tf.nn.relu

    with tf.variable_scope('Q_network'):

        l1, w['l1_w'], w['l1_b'] = conv2d(s_t,
                                          32, [8, 8], [4, 4],
                                          initializer,
                                          activation_fn,
                                          cnn_format,
                                          name='l1')
        l2, w['l2_w'], w['l2_b'] = conv2d(l1,
                                          64, [4, 4], [2, 2],
                                          initializer,
                                          activation_fn,
                                          cnn_format,
                                          name='l2')
        l3, w['l3_w'], w['l3_b'] = conv2d(l2,
                                          64, [3, 3], [1, 1],
                                          initializer,
                                          activation_fn,
                                          cnn_format,
                                          name='l3')

        shape = l3.get_shape().as_list()
        l3_flat = tf.reshape(l3, [-1, reduce(lambda x, y: x * y, shape[1:])])

        l4, w['l4_w'], w['l4_b'] = linear(l3_flat,
                                          512,
                                          activation_fn=activation_fn,
                                          name='l4')
        q, w['q_w'], w['q_b'] = linear(l4, action_size, name='q')

        q_summary = []
        avg_q = tf.reduce_mean(q, 0)
        for idx in range(action_size):
            q_summary.append(tf.histogram_summary('q/%s' % idx, avg_q[idx]))
        q_summary = tf.merge_summary(q_summary, 'q_summary')

    with tf.variable_scope('optimzier'):

        action_one_hot = tf.one_hot(action,
                                    action_size,
                                    1.0,
                                    0.0,
                                    name='action_one_hot')
        q_acted = tf.reduce_sum(q * action_one_hot,
                                reduction_indices=1,
                                name='q_acted')

        delta = target_q_t - q_acted
        clipped_delta = tf.clip_by_value(delta,
                                         min_delta,
                                         max_delta,
                                         name='clipped_delta')

        loss = tf.reduce_mean(tf.square(clipped_delta), name='loss')
        learning_rate = tf.maximum(
            learning_rate_minimum,
            tf.train.exponential_decay(learning_rate_initial,
                                       learning_rate_step,
                                       learning_rate_decay_step,
                                       learning_rate_decay,
                                       staircase=True))

        optim = tf.train.RMSPropOptimizer(learning_rate,
                                          momentum=0.95,
                                          epsilon=0.01).minimize(loss)

    return w, q, q_summary, optim, loss
예제 #7
0
파일: a3c.py 프로젝트: charlesashby/RL_tuts
    def _create_a3c_network(self):
        """ Creates the A3C network """
        # Input image is of shape [84 x 84 x 3]
        self.input = tf.placeholder("float", [None, 84, 84, 3])

        # The action is a one-hot encoded vector of shape [self._action_size]
        # and the reward is a floating point. We return both values as a
        # concatenated vector of shape [self._action_size + 1]
        self.last_action_reward = tf.placeholder("float",
                                                 [None, self._action_size + 1])

        # We use the same network as Mnih & Al.'s A3C implementation
        # [batch_size x 20 x 20 x 16]
        cnn = conv2d(self.input, 16, 8, 8, stride=4, name='conv0')

        # [batch_size x 9 x 9 x 32]
        cnn = conv2d(cnn, 32, 4, 4, stride=2, name='conv1')

        # we reshape the output of the conv layer to [batch_size x 32 * 9 * 9]
        # lstm_input is of shape [batch_size x 256], note that in our case
        # the batch_size is the number of frames. In our implementation
        # we will compute the outputs of the LSTM frame-by-frame and backpropagate
        # every 20 frames, thus, sequence_length (forward) = 1 and sequence_length
        # (backward) = 20
        lstm_input = fc_layer(tf.reshape(cnn, [-1, 2592]), 256, name='fc0')
        # sequence_length = tf.shape(lstm_input)[:1]

        with tf.variable_scope('lstm') as scope:
            # In the paper, they concatenate the downsampled environment
            # with the last action and reward before feeding it to the LSTM
            lstm_input = tf.concat([lstm_input, self.last_action_reward], 1)

            # the dynamic_rnn method takes an input of shape
            # [batch_size x sequence_length x input_dim] in our case
            # batch_size = 1, sequence_length = unroll_step (default:20)
            # and input_dim = 256 + action_size + 1 (lstm_input + last action
            # encoded as one-hot vector + the reward (float)
            lstm_input = tf.reshape(lstm_input,
                                    [1, -1, 256 + self._action_size + 1])

            # The LSTM cell is created in the _create_network method,
            # here we only initialize it
            initial_state = self.lstm_cell.zero_state(batch_size=1,
                                                      dtype=tf.float32)

            # Fetch the output and the last state of the LSTM, Given the cell
            # state of an LSTM and the input at time t we can compute the
            # output and cell state at time t + 1 (t = 0, 1, ...), therefore, we use
            # the state to forward propagate manually. This will become
            # clear once we get into the actual training
            self.lstm_outputs, self.lstm_state = tf.nn.dynamic_rnn(
                self.lstm_cell,
                lstm_input,
                initial_state=initial_state,
                scope=scope,
                dtype=tf.float32)

            # self.lstm_outputs is of shape [batch_size=1 x seq_length x n_units] we
            # simply reshape it to [seq_length x n_units]
            self.lstm_outputs = tf.reshape(self.lstm_outputs, shape=[-1, 256])

        # Once we have the output of the LSTM we need to compute the policy (pi)
        # and the value function (v) for this frame, both of them are
        # approximated using a neural network

        # pi is of shape [batch_size=1, self._action_size] it is
        # the probability distribution from which the action
        # are sampled
        with tf.variable_scope('policy') as scope:
            self.pi = fc_layer(self.lstm_outputs,
                               self._action_size,
                               name='fc_pi',
                               activation=tf.nn.softmax)

        # v is of shape [batch_size=1, 1] (floating point)
        with tf.variable_scope('value') as scope:
            self.v = fc_layer(self.lstm_outputs,
                              1,
                              name='fc_v',
                              activation=None)