def reward_prediction_mid(input_images):
    """A reward predictor network from intermediate layers.

     The inputs can be any image size (usually the intermediate conv outputs).
     The model runs 3 conv layers on top of each with a dense layer at the end.
     All of these are combined with 2 additional dense layer.

  Args:
    input_images: the input images. size is arbitrary.

  Returns:
    the predicted reward.
  """
    encoded = []
    for i, x in enumerate(input_images):
        enc = x
        enc = tfl.conv2d(enc,
                         16, [3, 3],
                         strides=(1, 1),
                         activation=tf.nn.relu)
        enc = tfl.conv2d(enc, 8, [3, 3], strides=(2, 2), activation=tf.nn.relu)
        enc = tfl.conv2d(enc, 4, [3, 3], strides=(2, 2), activation=tf.nn.relu)
        enc = tfl.flatten(enc)
        enc = tfl.dense(enc, 8, activation=tf.nn.relu, name="rew_enc_%d" % i)
        encoded.append(enc)
    x = encoded
    x = tf.stack(x, axis=1)
    x = tfl.flatten(x)
    x = tfl.dense(x, 32, activation=tf.nn.relu, name="rew_dense1")
    x = tfl.dense(x, 16, activation=tf.nn.relu, name="rew_dense2")
    return x
Example #2
0
 def __call__(self, x, reuse=True):
     with tf.variable_scope(self.name) as vs:
         if reuse:
             vs.reuse_variables()
         fc = x
         fc = tf.reshape(fc, shape=[-1, 56, 56, 3])
         fc = layers.conv2d(fc, filters=self.nfilt, kernel_initializer=tf.keras.initializers.glorot_normal(), kernel_size=self.k,
             padding='same', strides=[self.s,self.s], activation=None, name='h1')
         #fc = bn(fc, 'eb1')
         fc = tf.nn.leaky_relu(fc)
         fc = layers.conv2d(fc, filters=self.nfilt*2, kernel_initializer=tf.keras.initializers.glorot_normal(), kernel_size=self.k,
             padding='same', strides=[self.s,self.s], activation=None, name='h2')
         #fc = bn(fc, 'eb2')
         fc = tf.nn.leaky_relu(fc)
         fc = layers.conv2d(fc, filters=self.nfilt*4, kernel_initializer=tf.keras.initializers.glorot_normal(), kernel_size=self.k,
             padding='same', strides=[self.s,self.s], activation=None, name='h3')
         #fc = bn(fc, 'eb3')
         fc = tf.nn.leaky_relu(fc)
         fc = layers.flatten(fc)
         fc = layers.dense(
             fc, self.num_at-1,
             activation=self.act_at,
             kernel_initializer=tf.keras.initializers.glorot_normal()
         )
         return fc
Example #3
0
def _basic_discrete_domain_network(min_vals, max_vals, num_actions, state,
                                   num_atoms=None):
  """Builds a basic network for discrete domains, rescaling inputs to [-1, 1].

  Args:
    min_vals: float, minimum attainable values (must be same shape as `state`).
    max_vals: float, maximum attainable values (must be same shape as `state`).
    num_actions: int, number of actions.
    state: `tf.Tensor`, the state input.
    num_atoms: int or None, if None will construct a DQN-style network,
      otherwise will construct a Rainbow-style network.

  Returns:
    The Q-values for DQN-style agents or logits for Rainbow-style agents.
  """
  net = tf.cast(state, tf.float32)
  net = layers.flatten(net)
  net -= min_vals
  net /= max_vals - min_vals
  net = 2.0 * net - 1.0  # Rescale in range [-1, 1].
  net = layers.fully_connected(net, 512)
  net = layers.fully_connected(net, 512)
  if num_atoms is None:
    # We are constructing a DQN-style network.
    return layers.fully_connected(net, num_actions, activation_fn=None)
  else:
    # We are constructing a Rainbow-style network.
    return layers.fully_connected(
        net, num_actions * num_atoms, activation_fn=None)
Example #4
0
def fourier_dqn_network(min_vals,
                        max_vals,
                        num_actions,
                        state,
                        fourier_basis_order=3):
  """Builds the function approximator used to compute the agent's Q-values.

  It uses FourierBasis features and a linear layer.

  Args:
    min_vals: float, minimum attainable values (must be same shape as `state`).
    max_vals: float, maximum attainable values (must be same shape as `state`).
    num_actions: int, number of actions.
    state: `tf.Tensor`, contains the agent's current state.
    fourier_basis_order: int, order of the Fourier basis functions.

  Returns:
    The Q-values for DQN-style agents or logits for Rainbow-style agents.
  """
  net = tf.cast(state, tf.float32)
  net = layers.flatten(net)

  # Feed state through Fourier basis.
  feature_generator = FourierBasis(
      net.get_shape().as_list()[-1],
      min_vals,
      max_vals,
      order=fourier_basis_order)
  net = feature_generator.compute_features(net)

  # Q-values are always linear w.r.t. last layer.
  q_values = layers.fully_connected(
      net, num_actions, activation_fn=None, biases_initializer=None)
  return q_values
def reward_prediction_video_conv(frames, rewards, prediction_len):
    """A reward predictor network from observed/predicted images.

     The inputs is a list of frames.

  Args:
    frames: the list of input images.
    rewards: previously observed rewards.
    prediction_len: the length of the reward vector.

  Returns:
    the predicted rewards.
  """
    x = tf.concat(frames, axis=-1)
    x = tfl.conv2d(x, 32, [3, 3], strides=(2, 2), activation=tf.nn.relu)
    x = tfl.conv2d(x, 32, [3, 3], strides=(2, 2), activation=tf.nn.relu)
    x = tfl.conv2d(x, 16, [3, 3], strides=(2, 2), activation=tf.nn.relu)
    x = tfl.conv2d(x, 8, [3, 3], strides=(2, 2), activation=tf.nn.relu)
    x = tfl.flatten(x)

    y = tf.concat(rewards, axis=-1)
    y = tfl.dense(y, 32, activation=tf.nn.relu)
    y = tfl.dense(y, 16, activation=tf.nn.relu)
    y = tfl.dense(y, 8, activation=tf.nn.relu)

    z = tf.concat([x, y], axis=-1)
    z = tfl.dense(z, 32, activation=tf.nn.relu)
    z = tfl.dense(z, 16, activation=tf.nn.relu)
    z = tfl.dense(z, prediction_len, activation=None)
    z = tf.expand_dims(z, axis=-1)
    return z
Example #6
0
def decode_to_shape(inputs, shape, scope):
    """Encode the given tensor to given image shape."""
    with tf.variable_scope(scope, reuse=tf.AUTO_REUSE):
        x = inputs
        x = tfl.flatten(x)
        x = tfl.dense(x, shape[2], activation=None, name="dec_dense")
        x = tf.expand_dims(x, axis=1)
        return x
Example #7
0
def encode_to_shape(inputs, shape, scope):
    """Encode the given tensor to given image shape."""
    with tf.variable_scope(scope, reuse=tf.AUTO_REUSE):
        w, h = shape[1], shape[2]
        x = inputs
        x = tfl.flatten(x)
        x = tfl.dense(x, w * h, activation=None, name="enc_dense")
        x = tf.reshape(x, (-1, w, h, 1))
        return x
Example #8
0
def discriminator_L(input, reuse, name):
    with tf.compat.v1.variable_scope(name):
        # image is 256 x 256 x input_c_dim
        if reuse:
            tf.compat.v1.get_variable_scope().reuse_variables()
        else:
            assert tf.compat.v1.get_variable_scope().reuse is False

        p = tf.pad(tensor=input,
                   paddings=[[0, 0], [2, 2], [2, 2], [0, 0]],
                   mode="REFLECT")
        L1 = layers.conv2d(p,
                           64, [5, 5],
                           strides=2,
                           padding='VALID',
                           activation=None)
        #L1 = instance_norm(L1, 'di1l')
        L1 = tf.nn.leaky_relu(L1)  # 32 32 64

        p = tf.pad(tensor=L1,
                   paddings=[[0, 0], [2, 2], [2, 2], [0, 0]],
                   mode="REFLECT")
        L2 = layers.conv2d(p,
                           128, [5, 5],
                           strides=2,
                           padding='VALID',
                           activation=None)
        #L2 = instance_norm(L2, 'di2l')
        L2 = tf.nn.leaky_relu(L2)  # 16 16 128

        p = tf.pad(tensor=L2,
                   paddings=[[0, 0], [2, 2], [2, 2], [0, 0]],
                   mode="REFLECT")
        L3 = layers.conv2d(p,
                           256, [5, 5],
                           strides=2,
                           padding='VALID',
                           activation=None)
        #L3 = instance_norm(L3, 'di3l')
        L3 = tf.nn.leaky_relu(L3)  # 8 8 256

        p = tf.pad(tensor=L3,
                   paddings=[[0, 0], [2, 2], [2, 2], [0, 0]],
                   mode="REFLECT")
        L4 = layers.conv2d(p,
                           512, [5, 5],
                           strides=2,
                           padding='VALID',
                           activation=None)
        #L4 = instance_norm(L4, 'di4l')
        L4 = tf.nn.leaky_relu(L4)  # 4 4 512
        L4 = layers.flatten(L4)

        L5 = tf.compat.v1.layers.dense(L4, 1)

        return L5
def reward_prediction_big(input_images, input_reward, action, latent,
                          action_injection, small_mode):
    """A big reward predictor network that incorporates lots of additional info.

  Args:
    input_images: context frames.
    input_reward: context rewards.
    action: next action.
    latent: predicted latent vector for this frame.
    action_injection: action injection method.
    small_mode: smaller convs for faster runtiume.

  Returns:
    the predicted reward.
  """
    conv_size = common.tinyify([32, 32, 16, 8], False, small_mode)

    x = tf.concat(input_images, axis=3)
    x = tfcl.layer_norm(x)

    if not small_mode:
        x = tfl.conv2d(x,
                       conv_size[1], [3, 3],
                       strides=(2, 2),
                       activation=tf.nn.relu,
                       name="reward_conv1")
        x = tfcl.layer_norm(x)

    # Inject additional inputs
    if action is not None:
        x = layers.inject_additional_input(x, action, "action_enc",
                                           action_injection)
    if input_reward is not None:
        x = layers.inject_additional_input(x, input_reward, "reward_enc")
    if latent is not None:
        latent = tfl.flatten(latent)
        latent = tf.expand_dims(latent, axis=1)
        latent = tf.expand_dims(latent, axis=1)
        x = layers.inject_additional_input(x, latent, "latent_enc")

    x = tfl.conv2d(x,
                   conv_size[2], [3, 3],
                   strides=(2, 2),
                   activation=tf.nn.relu,
                   name="reward_conv2")
    x = tfcl.layer_norm(x)
    x = tfl.conv2d(x,
                   conv_size[3], [3, 3],
                   strides=(2, 2),
                   activation=tf.nn.relu,
                   name="reward_conv3")
    return x
Example #10
0
def atari_model(img_in, num_actions, scope, reuse=False):
    with tf.variable_scope(scope, reuse=reuse):
        out = img_in
        with tf.variable_scope("convnet"):
            # out = layers.convolution2d(out, num_outputs=32,
            #         kernel_size=8, stride=4, activation_fn=tf.nn.relu)
            # out = layers.convolution2d(out, num_outputs=64,
            #         kernel_size=4, stride=2, activation_fn=tf.nn.relu)
            # out = layers.convolution2d(out, num_outputs=64,
            #         kernel_size=3, stride=1, activation_fn=tf.nn.relu)
            # out = layers.flatten(out)
            print(tf.shape(out))
            out = layers.conv2d(out,
                                filters=32,
                                kernel_size=8,
                                strides=(4, 4),
                                activation=tf.nn.relu)
            print(tf.shape(out))
            out = layers.conv2d(out,
                                filters=64,
                                kernel_size=4,
                                strides=(2, 2),
                                activation=tf.nn.relu)
            print(tf.shape(out))
            out = layers.conv2d(out,
                                filters=64,
                                kernel_size=3,
                                strides=(1, 1),
                                activation=tf.nn.relu)
            print(tf.shape(out))
            out = layers.flatten(out)
        with tf.variable_scope("action_value"):
            # out = layers.fully_connected(out, num_outputs=512,
            #         activation_fn=tf.nn.relu)
            # out = layers.fully_connected(out, num_outputs=num_actions,
            #         activation_fn=None)
            print(tf.shape(out))
            out = layers.dense(out, units=512, activation=tf.nn.relu)
            out = layers.dense(out, units=num_actions, activation=None)
        return out
    def get_q_values_op(self, state, scope, reuse=False):
        """
        Returns Q values for all actions

        Args:
            state: (tf tensor)
                shape = (batch_size, img height, img width, nchannels x config.state_history)
            scope: (string) scope name, that specifies if target network or not
            reuse: (bool) reuse of variables in the scope

        Returns:
            out: (tf tensor) of shape = (batch_size, num_actions)
        """
        # this information might be useful
        num_actions = self.env.action_space.n

        ##############################################################
        """
        TODO:
            Implement a fully connected with no hidden layer (linear
            approximation with bias) using tensorflow.

        HINT:
            - You may find the following functions useful:
                - tf.layers.flatten
                - tf.layers.dense

            - Make sure to also specify the scope and reuse
        """
        ##############################################################
        ################ YOUR CODE HERE - 2-3 lines ##################

        out = layers.flatten(state)
        out = layers.dense(state,units = num_actions, name = scope, reuse = reuse)
        ##############################################################
        ######################## END YOUR CODE #######################

        return out
Example #12
0
def network(
    x: tf.placeholder,
    grayscale: bool,
    normalize: bool,
    low_keep_prob: float,
    high_keep_prob: float,
):
    """
    Multilayer network to classify traffic sign images.
    @param x: input images
    @param grayscale: whether the images should be converted to grayscale
    @param normalize: whether the converted images should be normalized
    @param low_keep_prob: a lower probability of keeping values for the dropout regularization
    @param high_keep_prob: a higher probability of keeping values for the dropout regularization
    """
    depth = 3
    if grayscale:
        x = tf.image.rgb_to_grayscale(x)
        depth = 1
        if normalize:
            x = ly.normalize_grayscale(x)

    # Layer 1: Convolutional. Input = 32x32x1. Output = 28x28x6.
    layer_1 = ly.convolutional_network(x, 32, 1, 5, 6)

    # Activation.
    layer_1 = tf.nn.relu(layer_1)
    layer_1 = tf.nn.dropout(layer_1, high_keep_prob)

    # Layer 2: Convolutional. Input = 28x28x6. Output = 10x10x16.
    layer_2 = ly.convolutional_network(layer_1, 28, 6, 5, 16)

    # Activation.
    layer_2 = tf.nn.relu(layer_2)

    # Pooling. Input = 10x10x16. Output = 5x5x16.
    k = [1, 2, 2, 1]
    strides = [1, 2, 2, 1]
    padding = "VALID"
    layer_2 = tf.nn.max_pool(layer_2, k, strides, padding)

    # Layer 3: Convolutional. Input = 5x5x16, Output = 8x8x412.
    layer_3 = ly.convolutional_network(layer_2, 5, 16, 5, 512)

    # Flatten. Input = 8x8x1024. Output = 26368.
    fc = flatten(layer_3)
    fc = tf.nn.dropout(fc, high_keep_prob)

    # Layer 4: Fully Connected. Input = 65536. Output = 512.
    layer_4 = ly.linear_network(fc, 32768, 256)

    # Activation.
    layer_4 = tf.nn.relu(layer_4)
    layer_4 = tf.nn.dropout(layer_4, low_keep_prob)

    # Layer 5: Fully Connected. Input = 512. Output = 86.
    layer_5 = ly.linear_network(layer_4, 256, 128)

    # Activation.
    layer_5 = tf.nn.relu(layer_5)
    layer_5 = tf.nn.dropout(layer_5, low_keep_prob)

    # Layer 6: Fully Connected. Input = 86. Output = 43.
    logits = ly.linear_network(layer_5, 128, 43)

    return logits
Example #13
0
def loss(self, net_out):
    """
    Takes net.out and placeholders value
    returned in batch() func above,
    to build train_op and loss
    """
    # meta
    m = self.meta
    sprob = float(m['class_scale'])
    sconf = float(m['object_scale'])
    snoob = float(m['noobject_scale'])
    scoor = float(m['coord_scale'])
    S, B, C = m['side'], m['num'], m['classes']
    SS = S * S  # number of grid cells

    print('{} loss hyper-parameters:'.format(m['model']))
    print('\tside    = {}'.format(m['side']))
    print('\tbox     = {}'.format(m['num']))
    print('\tclasses = {}'.format(m['classes']))
    print('\tscales  = {}'.format([sprob, sconf, snoob, scoor]))

    size1 = [None, SS, C]
    size2 = [None, SS, B]

    # return the below placeholders
    _probs = tf.placeholder(tf.float32, size1)
    _confs = tf.placeholder(tf.float32, size2)
    _coord = tf.placeholder(tf.float32, size2 + [4])
    # weights term for L2 loss
    _proid = tf.placeholder(tf.float32, size1)
    # material calculating IOU
    _areas = tf.placeholder(tf.float32, size2)
    _upleft = tf.placeholder(tf.float32, size2 + [2])
    _botright = tf.placeholder(tf.float32, size2 + [2])

    self.placeholders = {
        'probs': _probs,
        'confs': _confs,
        'coord': _coord,
        'proid': _proid,
        'areas': _areas,
        'upleft': _upleft,
        'botright': _botright
    }

    # Extract the coordinate prediction from net.out
    coords = net_out[:, SS * (C + B):]
    coords = tf.reshape(coords, [-1, SS, B, 4])
    wh = tf.pow(coords[:, :, :, 2:4], 2) * S  # unit: grid cell
    area_pred = wh[:, :, :, 0] * wh[:, :, :, 1]  # unit: grid cell^2
    centers = coords[:, :, :, 0:2]  # [batch, SS, B, 2]
    floor = centers - (wh * .5)  # [batch, SS, B, 2]
    ceil = centers + (wh * .5)  # [batch, SS, B, 2]

    # calculate the intersection areas
    intersect_upleft = tf.maximum(floor, _upleft)
    intersect_botright = tf.minimum(ceil, _botright)
    intersect_wh = intersect_botright - intersect_upleft
    intersect_wh = tf.maximum(intersect_wh, 0.0)
    intersect = tf.multiply(intersect_wh[:, :, :, 0], intersect_wh[:, :, :, 1])

    # calculate the best IOU, set 0.0 confidence for worse boxes
    iou = tf.truediv(intersect, _areas + area_pred - intersect)
    best_box = tf.equal(iou, tf.reduce_max(iou, [2], True))
    best_box = tf.to_float(best_box)
    confs = tf.multiply(best_box, _confs)

    # take care of the weight terms
    conid = snoob * (1. - confs) + sconf * confs
    weight_coo = tf.concat(4 * [tf.expand_dims(confs, -1)], 3)
    cooid = scoor * weight_coo
    proid = sprob * _proid

    # flatten 'em all
    probs = slim.flatten(_probs)
    proid = slim.flatten(proid)
    confs = slim.flatten(confs)
    conid = slim.flatten(conid)
    coord = slim.flatten(_coord)
    cooid = slim.flatten(cooid)

    self.fetch += [probs, confs, conid, cooid, proid]
    true = tf.concat([probs, confs, coord], 1)
    wght = tf.concat([proid, conid, cooid], 1)
    print('Building {} loss'.format(m['model']))
    loss = tf.pow(net_out - true, 2)
    loss = tf.multiply(loss, wght)
    loss = tf.reduce_sum(loss, 1)
    self.loss = .5 * tf.reduce_mean(loss)
    tf.summary.scalar('{} loss'.format(m['model']), self.loss)
Example #14
0
 def forward(self):
     temp = tf.transpose(self.inp.out, [0, 3, 1, 2])
     self.out = slim.flatten(temp, scope=self.scope)
Example #15
0
    def __init__(self,
                 myScope,
                 h_size,
                 agent,
                 env,
                 trace_length,
                 batch_size,
                 reuse=None,
                 step=False):
        if step:
            trace_length = 1
        else:
            trace_length = trace_length
        with tf.variable_scope(myScope, reuse=reuse):
            self.batch_size = batch_size
            zero_state = tf.zeros((batch_size, h_size * 2), dtype=tf.float32)
            self.gamma_array = tf.placeholder(shape=[1, trace_length],
                                              dtype=tf.float32,
                                              name='gamma_array')
            self.gamma_array_inverse = tf.placeholder(shape=[1, trace_length],
                                                      dtype=tf.float32,
                                                      name='gamma_array_inv')

            self.lstm_state = tf.placeholder(shape=[batch_size, h_size * 2],
                                             dtype=tf.float32,
                                             name='lstm_state')

            if step:
                self.state_input = tf.placeholder(shape=[self.batch_size] +
                                                  env.ob_space_shape,
                                                  dtype=tf.float32,
                                                  name='state_input')
                lstm_state = self.lstm_state
            else:
                self.state_input = tf.placeholder(
                    shape=[batch_size * trace_length] + env.ob_space_shape,
                    dtype=tf.float32,
                    name='state_input')
                lstm_state = zero_state

            self.sample_return = tf.placeholder(shape=[None, trace_length],
                                                dtype=tf.float32,
                                                name='sample_return')
            self.sample_reward = tf.placeholder(shape=[None, trace_length],
                                                dtype=tf.float32,
                                                name='sample_reward')
            with tf.variable_scope('input_proc', reuse=reuse):
                output = layers.conv2d(self.state_input,
                                       kernel_size=(3, 3),
                                       filters=20,
                                       activation=tf.nn.relu,
                                       padding='same')
                output = layers.conv2d(output,
                                       kernel_size=(3, 3),
                                       filters=20,
                                       activation=tf.nn.relu,
                                       padding='same')
                output = layers.flatten(output)
                print('values', output.get_shape())
                self.value = tf.reshape(layers.dense(tf.nn.relu(output), 1),
                                        [-1, trace_length])
            if step:
                output_seq = batch_to_seq(output, self.batch_size, 1)
            else:
                output_seq = batch_to_seq(output, self.batch_size,
                                          trace_length)
            output_seq, state_output = lstm(output_seq,
                                            lstm_state,
                                            scope='rnn',
                                            nh=h_size)
            output = seq_to_batch(output_seq)

            output = layers.dense(output,
                                  units=env.NUM_ACTIONS,
                                  activation=None)
            self.log_pi = tf.nn.log_softmax(output)
            self.lstm_state_output = state_output

            self.actions = tf.placeholder(shape=[None],
                                          dtype=tf.int32,
                                          name='actions')
            self.actions_onehot = tf.one_hot(self.actions,
                                             env.NUM_ACTIONS,
                                             dtype=tf.float32)

            predict = tf.multinomial(self.log_pi, 1)
            self.predict = tf.squeeze(predict)

            self.next_value = tf.placeholder(shape=[None, 1],
                                             dtype=tf.float32,
                                             name='next_value')
            self.next_v = tf.matmul(self.next_value, self.gamma_array_inverse)
            self.target = self.sample_return + self.next_v

            self.td_error = tf.square(self.target - self.value) / 2
            self.loss = tf.reduce_mean(self.td_error)

        self.parameters = []
        self.value_params = []
        for i in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                   scope=myScope):
            if not ('value_params' in i.name):
                self.parameters.append(i)  # i.name if you want just a name
            if 'input_proc' in i.name:
                self.value_params.append(i)

        if not step:
            self.log_pi_action = tf.reduce_mean(tf.multiply(
                self.log_pi, self.actions_onehot),
                                                reduction_indices=1)
            self.log_pi_action_bs = tf.reduce_sum(
                tf.reshape(self.log_pi_action, [-1, trace_length]), 1)
            self.log_pi_action_bs_t = tf.reshape(
                self.log_pi_action, [self.batch_size, trace_length])
            self.trainer = tf.train.GradientDescentOptimizer(learning_rate=1)
            self.updateModel = self.trainer.minimize(
                self.loss, var_list=self.value_params)

        self.setparams = SetFromFlat(self.parameters)
        self.getparams = GetFlat(self.parameters)
        self.param_len = len(self.parameters)

        for var in self.parameters:
            print(var.name, var.get_shape())
Example #16
0
    def __init__(self, state_size, action_size, learning_rate, name='DQLearner'):
        self.state_size = state_size
        self.action_size = action_size
        self.learning_rate = learning_rate

        with v1.variable_scope(name):
            # We create the placeholders
            # *state_size means that we take each elements of state_size in tuple hence is like if we wrote
            # [None, 84, 84, 4]
            self.inputs_ = v1.placeholder(tf.float32, [None, *state_size], name="inputs")
            self.actions_ = v1.placeholder(tf.float32, [None, 3], name="actions_")

            # Remember that target_Q is the R(s,a) + ymax Qhat(s', a')
            self.target_Q = v1.placeholder(tf.float32, [None], name="target")

            """
            First convnet:
            CNN
            BatchNormalization
            ELU
            """
            # Input is 84x84x4
            self.conv1 = v1l.conv2d(inputs=self.inputs_,
                                          filters=32,
                                          kernel_size=[8, 8],
                                          strides=[4, 4],
                                          padding="VALID",
                                          kernel_initializer=v1.initializers.glorot_uniform(),
                                          name="conv1")

            self.conv1_batchnorm = v1l.batch_normalization(self.conv1,
                                                         training=True,
                                                         epsilon=1e-5,
                                                         name='batch_norm1')

            self.conv1_out = tf.nn.elu(self.conv1_batchnorm, name="conv1_out")
            ## --> [20, 20, 32]

            """
            Second convnet:
            CNN
            BatchNormalization
            ELU
            """
            self.conv2 = v1l.conv2d(inputs=self.conv1_out,
                                          filters=64,
                                          kernel_size=[4, 4],
                                          strides=[2, 2],
                                          padding="VALID",
                                          kernel_initializer=v1.initializers.glorot_uniform(),
                                          name="conv2")

            self.conv2_batchnorm = v1l.batch_normalization(self.conv2,
                                                         training=True,
                                                         epsilon=1e-5,
                                                         name='batch_norm2')

            self.conv2_out = tf.nn.elu(self.conv2_batchnorm, name="conv2_out")
            ## --> [9, 9, 64]

            """
            Third convnet:
            CNN
            BatchNormalization
            ELU
            """
            self.conv3 = v1l.conv2d(inputs=self.conv2_out,
                                  filters=128,
                                  kernel_size=[4, 4],
                                  strides=[2, 2],
                                  padding="VALID",
                                  kernel_initializer=v1.initializers.glorot_uniform(),
                                  name="conv3")

            self.conv3_batchnorm = v1l.batch_normalization(self.conv3,
                                                         training=True,
                                                         epsilon=1e-5,
                                                         name='batch_norm3')

            self.conv3_out = tf.nn.elu(self.conv3_batchnorm, name="conv3_out")
            ## --> [3, 3, 128]

            self.flatten = v1l.flatten(self.conv3_out)
            ## --> [1152]

            self.fc = v1l.dense(inputs=self.flatten,
                              units=512,
                              activation=tf.nn.elu,
                              kernel_initializer=v1.initializers.glorot_uniform(),
                              name="fc1")

            self.output = v1l.dense(inputs=self.fc,
                                  kernel_initializer=v1.initializers.glorot_uniform(),
                                  units=3,
                                  activation=None)

            # Q is our predicted Q value.
            self.Q = tf.math.reduce_sum(tf.math.multiply(self.output, self.actions_), axis=1)

            # The loss is the difference between our predicted Q_values and the Q_target
            # Sum(Qtarget - Q)^2
            self.loss = tf.math.reduce_mean(tf.math.square(self.target_Q - self.Q))

            self.optimizer = v1.train.RMSPropOptimizer(self.learning_rate).minimize(self.loss)
Example #17
0
def LeNet(x):
    # Arguments used for tf.truncated_normal, randomly defines variables for the weights and biases for each layer
    mu = 0
    sigma = 0.1

    weights = {
        # The shape of the filter weight is (height, width, input_depth, output_depth)
        'conv1':
        tf.Variable(
            tf.random.truncated_normal(shape=(5, 5, 1, 6),
                                       mean=mu,
                                       stddev=sigma)),
        'conv2':
        tf.Variable(
            tf.random.truncated_normal(shape=(5, 5, 6, 16),
                                       mean=mu,
                                       stddev=sigma)),
        'fl1':
        tf.Variable(
            tf.random.truncated_normal(shape=(5 * 5 * 16, 120),
                                       mean=mu,
                                       stddev=sigma)),
        'fl2':
        tf.Variable(
            tf.random.truncated_normal(shape=(120, 84), mean=mu,
                                       stddev=sigma)),
        'out':
        tf.Variable(
            tf.random.truncated_normal(shape=(84, n_classes),
                                       mean=mu,
                                       stddev=sigma))
    }

    biases = {
        # The shape of the filter bias is (output_depth,)
        'conv1': tf.Variable(tf.zeros(6)),
        'conv2': tf.Variable(tf.zeros(16)),
        'fl1': tf.Variable(tf.zeros(120)),
        'fl2': tf.Variable(tf.zeros(84)),
        'out': tf.Variable(tf.zeros(n_classes))
    }

    # Layer 1: Convolutional. Input = 32x32x1. Output = 28x28x6.
    conv1 = tf.nn.conv2d(input=x,
                         filters=weights['conv1'],
                         strides=[1, 1, 1, 1],
                         padding='VALID')
    conv1 = tf.nn.bias_add(conv1, biases['conv1'])
    # Activation.
    conv1 = tf.nn.relu(conv1)
    # Pooling. Input = 28x28x6. Output = 14x14x6.
    conv1 = tf.nn.avg_pool2d(input=conv1,
                             ksize=[1, 2, 2, 1],
                             strides=[1, 2, 2, 1],
                             padding='VALID')

    # Layer 2: Convolutional. Output = 10x10x16.
    conv2 = tf.nn.conv2d(input=conv1,
                         filters=weights['conv2'],
                         strides=[1, 1, 1, 1],
                         padding='VALID')
    conv2 = tf.nn.bias_add(conv2, biases['conv2'])
    # Activation.
    conv2 = tf.nn.relu(conv2)
    # Pooling. Input = 10x10x16. Output = 5x5x16.
    conv2 = tf.nn.avg_pool2d(input=conv2,
                             ksize=[1, 2, 2, 1],
                             strides=[1, 2, 2, 1],
                             padding='VALID')

    # Flatten. Input = 5x5x16. Output = 400.
    fl0 = flatten(conv2)

    # Layer 3: Fully Connected. Input = 400. Output = 120.
    fl1 = tf.add(tf.matmul(fl0, weights['fl1']), biases['fl1'])
    # Activation.
    fl1 = tf.nn.relu(fl1)

    # Layer 4: Fully Connected. Input = 120. Output = 84.
    fl2 = tf.add(tf.matmul(fl1, weights['fl2']), biases['fl2'])
    # Activation.
    fl2 = tf.nn.relu(fl2)

    # Layer 5: Fully Connected. Input = 84. Output = 10.
    logits = tf.add(tf.matmul(fl2, weights['out']), biases['out'])

    return logits