def reward_prediction_mid(input_images): """A reward predictor network from intermediate layers. The inputs can be any image size (usually the intermediate conv outputs). The model runs 3 conv layers on top of each with a dense layer at the end. All of these are combined with 2 additional dense layer. Args: input_images: the input images. size is arbitrary. Returns: the predicted reward. """ encoded = [] for i, x in enumerate(input_images): enc = x enc = tfl.conv2d(enc, 16, [3, 3], strides=(1, 1), activation=tf.nn.relu) enc = tfl.conv2d(enc, 8, [3, 3], strides=(2, 2), activation=tf.nn.relu) enc = tfl.conv2d(enc, 4, [3, 3], strides=(2, 2), activation=tf.nn.relu) enc = tfl.flatten(enc) enc = tfl.dense(enc, 8, activation=tf.nn.relu, name="rew_enc_%d" % i) encoded.append(enc) x = encoded x = tf.stack(x, axis=1) x = tfl.flatten(x) x = tfl.dense(x, 32, activation=tf.nn.relu, name="rew_dense1") x = tfl.dense(x, 16, activation=tf.nn.relu, name="rew_dense2") return x
def __call__(self, x, reuse=True): with tf.variable_scope(self.name) as vs: if reuse: vs.reuse_variables() fc = x fc = tf.reshape(fc, shape=[-1, 56, 56, 3]) fc = layers.conv2d(fc, filters=self.nfilt, kernel_initializer=tf.keras.initializers.glorot_normal(), kernel_size=self.k, padding='same', strides=[self.s,self.s], activation=None, name='h1') #fc = bn(fc, 'eb1') fc = tf.nn.leaky_relu(fc) fc = layers.conv2d(fc, filters=self.nfilt*2, kernel_initializer=tf.keras.initializers.glorot_normal(), kernel_size=self.k, padding='same', strides=[self.s,self.s], activation=None, name='h2') #fc = bn(fc, 'eb2') fc = tf.nn.leaky_relu(fc) fc = layers.conv2d(fc, filters=self.nfilt*4, kernel_initializer=tf.keras.initializers.glorot_normal(), kernel_size=self.k, padding='same', strides=[self.s,self.s], activation=None, name='h3') #fc = bn(fc, 'eb3') fc = tf.nn.leaky_relu(fc) fc = layers.flatten(fc) fc = layers.dense( fc, self.num_at-1, activation=self.act_at, kernel_initializer=tf.keras.initializers.glorot_normal() ) return fc
def _basic_discrete_domain_network(min_vals, max_vals, num_actions, state, num_atoms=None): """Builds a basic network for discrete domains, rescaling inputs to [-1, 1]. Args: min_vals: float, minimum attainable values (must be same shape as `state`). max_vals: float, maximum attainable values (must be same shape as `state`). num_actions: int, number of actions. state: `tf.Tensor`, the state input. num_atoms: int or None, if None will construct a DQN-style network, otherwise will construct a Rainbow-style network. Returns: The Q-values for DQN-style agents or logits for Rainbow-style agents. """ net = tf.cast(state, tf.float32) net = layers.flatten(net) net -= min_vals net /= max_vals - min_vals net = 2.0 * net - 1.0 # Rescale in range [-1, 1]. net = layers.fully_connected(net, 512) net = layers.fully_connected(net, 512) if num_atoms is None: # We are constructing a DQN-style network. return layers.fully_connected(net, num_actions, activation_fn=None) else: # We are constructing a Rainbow-style network. return layers.fully_connected( net, num_actions * num_atoms, activation_fn=None)
def fourier_dqn_network(min_vals, max_vals, num_actions, state, fourier_basis_order=3): """Builds the function approximator used to compute the agent's Q-values. It uses FourierBasis features and a linear layer. Args: min_vals: float, minimum attainable values (must be same shape as `state`). max_vals: float, maximum attainable values (must be same shape as `state`). num_actions: int, number of actions. state: `tf.Tensor`, contains the agent's current state. fourier_basis_order: int, order of the Fourier basis functions. Returns: The Q-values for DQN-style agents or logits for Rainbow-style agents. """ net = tf.cast(state, tf.float32) net = layers.flatten(net) # Feed state through Fourier basis. feature_generator = FourierBasis( net.get_shape().as_list()[-1], min_vals, max_vals, order=fourier_basis_order) net = feature_generator.compute_features(net) # Q-values are always linear w.r.t. last layer. q_values = layers.fully_connected( net, num_actions, activation_fn=None, biases_initializer=None) return q_values
def reward_prediction_video_conv(frames, rewards, prediction_len): """A reward predictor network from observed/predicted images. The inputs is a list of frames. Args: frames: the list of input images. rewards: previously observed rewards. prediction_len: the length of the reward vector. Returns: the predicted rewards. """ x = tf.concat(frames, axis=-1) x = tfl.conv2d(x, 32, [3, 3], strides=(2, 2), activation=tf.nn.relu) x = tfl.conv2d(x, 32, [3, 3], strides=(2, 2), activation=tf.nn.relu) x = tfl.conv2d(x, 16, [3, 3], strides=(2, 2), activation=tf.nn.relu) x = tfl.conv2d(x, 8, [3, 3], strides=(2, 2), activation=tf.nn.relu) x = tfl.flatten(x) y = tf.concat(rewards, axis=-1) y = tfl.dense(y, 32, activation=tf.nn.relu) y = tfl.dense(y, 16, activation=tf.nn.relu) y = tfl.dense(y, 8, activation=tf.nn.relu) z = tf.concat([x, y], axis=-1) z = tfl.dense(z, 32, activation=tf.nn.relu) z = tfl.dense(z, 16, activation=tf.nn.relu) z = tfl.dense(z, prediction_len, activation=None) z = tf.expand_dims(z, axis=-1) return z
def decode_to_shape(inputs, shape, scope): """Encode the given tensor to given image shape.""" with tf.variable_scope(scope, reuse=tf.AUTO_REUSE): x = inputs x = tfl.flatten(x) x = tfl.dense(x, shape[2], activation=None, name="dec_dense") x = tf.expand_dims(x, axis=1) return x
def encode_to_shape(inputs, shape, scope): """Encode the given tensor to given image shape.""" with tf.variable_scope(scope, reuse=tf.AUTO_REUSE): w, h = shape[1], shape[2] x = inputs x = tfl.flatten(x) x = tfl.dense(x, w * h, activation=None, name="enc_dense") x = tf.reshape(x, (-1, w, h, 1)) return x
def discriminator_L(input, reuse, name): with tf.compat.v1.variable_scope(name): # image is 256 x 256 x input_c_dim if reuse: tf.compat.v1.get_variable_scope().reuse_variables() else: assert tf.compat.v1.get_variable_scope().reuse is False p = tf.pad(tensor=input, paddings=[[0, 0], [2, 2], [2, 2], [0, 0]], mode="REFLECT") L1 = layers.conv2d(p, 64, [5, 5], strides=2, padding='VALID', activation=None) #L1 = instance_norm(L1, 'di1l') L1 = tf.nn.leaky_relu(L1) # 32 32 64 p = tf.pad(tensor=L1, paddings=[[0, 0], [2, 2], [2, 2], [0, 0]], mode="REFLECT") L2 = layers.conv2d(p, 128, [5, 5], strides=2, padding='VALID', activation=None) #L2 = instance_norm(L2, 'di2l') L2 = tf.nn.leaky_relu(L2) # 16 16 128 p = tf.pad(tensor=L2, paddings=[[0, 0], [2, 2], [2, 2], [0, 0]], mode="REFLECT") L3 = layers.conv2d(p, 256, [5, 5], strides=2, padding='VALID', activation=None) #L3 = instance_norm(L3, 'di3l') L3 = tf.nn.leaky_relu(L3) # 8 8 256 p = tf.pad(tensor=L3, paddings=[[0, 0], [2, 2], [2, 2], [0, 0]], mode="REFLECT") L4 = layers.conv2d(p, 512, [5, 5], strides=2, padding='VALID', activation=None) #L4 = instance_norm(L4, 'di4l') L4 = tf.nn.leaky_relu(L4) # 4 4 512 L4 = layers.flatten(L4) L5 = tf.compat.v1.layers.dense(L4, 1) return L5
def reward_prediction_big(input_images, input_reward, action, latent, action_injection, small_mode): """A big reward predictor network that incorporates lots of additional info. Args: input_images: context frames. input_reward: context rewards. action: next action. latent: predicted latent vector for this frame. action_injection: action injection method. small_mode: smaller convs for faster runtiume. Returns: the predicted reward. """ conv_size = common.tinyify([32, 32, 16, 8], False, small_mode) x = tf.concat(input_images, axis=3) x = tfcl.layer_norm(x) if not small_mode: x = tfl.conv2d(x, conv_size[1], [3, 3], strides=(2, 2), activation=tf.nn.relu, name="reward_conv1") x = tfcl.layer_norm(x) # Inject additional inputs if action is not None: x = layers.inject_additional_input(x, action, "action_enc", action_injection) if input_reward is not None: x = layers.inject_additional_input(x, input_reward, "reward_enc") if latent is not None: latent = tfl.flatten(latent) latent = tf.expand_dims(latent, axis=1) latent = tf.expand_dims(latent, axis=1) x = layers.inject_additional_input(x, latent, "latent_enc") x = tfl.conv2d(x, conv_size[2], [3, 3], strides=(2, 2), activation=tf.nn.relu, name="reward_conv2") x = tfcl.layer_norm(x) x = tfl.conv2d(x, conv_size[3], [3, 3], strides=(2, 2), activation=tf.nn.relu, name="reward_conv3") return x
def atari_model(img_in, num_actions, scope, reuse=False): with tf.variable_scope(scope, reuse=reuse): out = img_in with tf.variable_scope("convnet"): # out = layers.convolution2d(out, num_outputs=32, # kernel_size=8, stride=4, activation_fn=tf.nn.relu) # out = layers.convolution2d(out, num_outputs=64, # kernel_size=4, stride=2, activation_fn=tf.nn.relu) # out = layers.convolution2d(out, num_outputs=64, # kernel_size=3, stride=1, activation_fn=tf.nn.relu) # out = layers.flatten(out) print(tf.shape(out)) out = layers.conv2d(out, filters=32, kernel_size=8, strides=(4, 4), activation=tf.nn.relu) print(tf.shape(out)) out = layers.conv2d(out, filters=64, kernel_size=4, strides=(2, 2), activation=tf.nn.relu) print(tf.shape(out)) out = layers.conv2d(out, filters=64, kernel_size=3, strides=(1, 1), activation=tf.nn.relu) print(tf.shape(out)) out = layers.flatten(out) with tf.variable_scope("action_value"): # out = layers.fully_connected(out, num_outputs=512, # activation_fn=tf.nn.relu) # out = layers.fully_connected(out, num_outputs=num_actions, # activation_fn=None) print(tf.shape(out)) out = layers.dense(out, units=512, activation=tf.nn.relu) out = layers.dense(out, units=num_actions, activation=None) return out
def get_q_values_op(self, state, scope, reuse=False): """ Returns Q values for all actions Args: state: (tf tensor) shape = (batch_size, img height, img width, nchannels x config.state_history) scope: (string) scope name, that specifies if target network or not reuse: (bool) reuse of variables in the scope Returns: out: (tf tensor) of shape = (batch_size, num_actions) """ # this information might be useful num_actions = self.env.action_space.n ############################################################## """ TODO: Implement a fully connected with no hidden layer (linear approximation with bias) using tensorflow. HINT: - You may find the following functions useful: - tf.layers.flatten - tf.layers.dense - Make sure to also specify the scope and reuse """ ############################################################## ################ YOUR CODE HERE - 2-3 lines ################## out = layers.flatten(state) out = layers.dense(state,units = num_actions, name = scope, reuse = reuse) ############################################################## ######################## END YOUR CODE ####################### return out
def network( x: tf.placeholder, grayscale: bool, normalize: bool, low_keep_prob: float, high_keep_prob: float, ): """ Multilayer network to classify traffic sign images. @param x: input images @param grayscale: whether the images should be converted to grayscale @param normalize: whether the converted images should be normalized @param low_keep_prob: a lower probability of keeping values for the dropout regularization @param high_keep_prob: a higher probability of keeping values for the dropout regularization """ depth = 3 if grayscale: x = tf.image.rgb_to_grayscale(x) depth = 1 if normalize: x = ly.normalize_grayscale(x) # Layer 1: Convolutional. Input = 32x32x1. Output = 28x28x6. layer_1 = ly.convolutional_network(x, 32, 1, 5, 6) # Activation. layer_1 = tf.nn.relu(layer_1) layer_1 = tf.nn.dropout(layer_1, high_keep_prob) # Layer 2: Convolutional. Input = 28x28x6. Output = 10x10x16. layer_2 = ly.convolutional_network(layer_1, 28, 6, 5, 16) # Activation. layer_2 = tf.nn.relu(layer_2) # Pooling. Input = 10x10x16. Output = 5x5x16. k = [1, 2, 2, 1] strides = [1, 2, 2, 1] padding = "VALID" layer_2 = tf.nn.max_pool(layer_2, k, strides, padding) # Layer 3: Convolutional. Input = 5x5x16, Output = 8x8x412. layer_3 = ly.convolutional_network(layer_2, 5, 16, 5, 512) # Flatten. Input = 8x8x1024. Output = 26368. fc = flatten(layer_3) fc = tf.nn.dropout(fc, high_keep_prob) # Layer 4: Fully Connected. Input = 65536. Output = 512. layer_4 = ly.linear_network(fc, 32768, 256) # Activation. layer_4 = tf.nn.relu(layer_4) layer_4 = tf.nn.dropout(layer_4, low_keep_prob) # Layer 5: Fully Connected. Input = 512. Output = 86. layer_5 = ly.linear_network(layer_4, 256, 128) # Activation. layer_5 = tf.nn.relu(layer_5) layer_5 = tf.nn.dropout(layer_5, low_keep_prob) # Layer 6: Fully Connected. Input = 86. Output = 43. logits = ly.linear_network(layer_5, 128, 43) return logits
def loss(self, net_out): """ Takes net.out and placeholders value returned in batch() func above, to build train_op and loss """ # meta m = self.meta sprob = float(m['class_scale']) sconf = float(m['object_scale']) snoob = float(m['noobject_scale']) scoor = float(m['coord_scale']) S, B, C = m['side'], m['num'], m['classes'] SS = S * S # number of grid cells print('{} loss hyper-parameters:'.format(m['model'])) print('\tside = {}'.format(m['side'])) print('\tbox = {}'.format(m['num'])) print('\tclasses = {}'.format(m['classes'])) print('\tscales = {}'.format([sprob, sconf, snoob, scoor])) size1 = [None, SS, C] size2 = [None, SS, B] # return the below placeholders _probs = tf.placeholder(tf.float32, size1) _confs = tf.placeholder(tf.float32, size2) _coord = tf.placeholder(tf.float32, size2 + [4]) # weights term for L2 loss _proid = tf.placeholder(tf.float32, size1) # material calculating IOU _areas = tf.placeholder(tf.float32, size2) _upleft = tf.placeholder(tf.float32, size2 + [2]) _botright = tf.placeholder(tf.float32, size2 + [2]) self.placeholders = { 'probs': _probs, 'confs': _confs, 'coord': _coord, 'proid': _proid, 'areas': _areas, 'upleft': _upleft, 'botright': _botright } # Extract the coordinate prediction from net.out coords = net_out[:, SS * (C + B):] coords = tf.reshape(coords, [-1, SS, B, 4]) wh = tf.pow(coords[:, :, :, 2:4], 2) * S # unit: grid cell area_pred = wh[:, :, :, 0] * wh[:, :, :, 1] # unit: grid cell^2 centers = coords[:, :, :, 0:2] # [batch, SS, B, 2] floor = centers - (wh * .5) # [batch, SS, B, 2] ceil = centers + (wh * .5) # [batch, SS, B, 2] # calculate the intersection areas intersect_upleft = tf.maximum(floor, _upleft) intersect_botright = tf.minimum(ceil, _botright) intersect_wh = intersect_botright - intersect_upleft intersect_wh = tf.maximum(intersect_wh, 0.0) intersect = tf.multiply(intersect_wh[:, :, :, 0], intersect_wh[:, :, :, 1]) # calculate the best IOU, set 0.0 confidence for worse boxes iou = tf.truediv(intersect, _areas + area_pred - intersect) best_box = tf.equal(iou, tf.reduce_max(iou, [2], True)) best_box = tf.to_float(best_box) confs = tf.multiply(best_box, _confs) # take care of the weight terms conid = snoob * (1. - confs) + sconf * confs weight_coo = tf.concat(4 * [tf.expand_dims(confs, -1)], 3) cooid = scoor * weight_coo proid = sprob * _proid # flatten 'em all probs = slim.flatten(_probs) proid = slim.flatten(proid) confs = slim.flatten(confs) conid = slim.flatten(conid) coord = slim.flatten(_coord) cooid = slim.flatten(cooid) self.fetch += [probs, confs, conid, cooid, proid] true = tf.concat([probs, confs, coord], 1) wght = tf.concat([proid, conid, cooid], 1) print('Building {} loss'.format(m['model'])) loss = tf.pow(net_out - true, 2) loss = tf.multiply(loss, wght) loss = tf.reduce_sum(loss, 1) self.loss = .5 * tf.reduce_mean(loss) tf.summary.scalar('{} loss'.format(m['model']), self.loss)
def forward(self): temp = tf.transpose(self.inp.out, [0, 3, 1, 2]) self.out = slim.flatten(temp, scope=self.scope)
def __init__(self, myScope, h_size, agent, env, trace_length, batch_size, reuse=None, step=False): if step: trace_length = 1 else: trace_length = trace_length with tf.variable_scope(myScope, reuse=reuse): self.batch_size = batch_size zero_state = tf.zeros((batch_size, h_size * 2), dtype=tf.float32) self.gamma_array = tf.placeholder(shape=[1, trace_length], dtype=tf.float32, name='gamma_array') self.gamma_array_inverse = tf.placeholder(shape=[1, trace_length], dtype=tf.float32, name='gamma_array_inv') self.lstm_state = tf.placeholder(shape=[batch_size, h_size * 2], dtype=tf.float32, name='lstm_state') if step: self.state_input = tf.placeholder(shape=[self.batch_size] + env.ob_space_shape, dtype=tf.float32, name='state_input') lstm_state = self.lstm_state else: self.state_input = tf.placeholder( shape=[batch_size * trace_length] + env.ob_space_shape, dtype=tf.float32, name='state_input') lstm_state = zero_state self.sample_return = tf.placeholder(shape=[None, trace_length], dtype=tf.float32, name='sample_return') self.sample_reward = tf.placeholder(shape=[None, trace_length], dtype=tf.float32, name='sample_reward') with tf.variable_scope('input_proc', reuse=reuse): output = layers.conv2d(self.state_input, kernel_size=(3, 3), filters=20, activation=tf.nn.relu, padding='same') output = layers.conv2d(output, kernel_size=(3, 3), filters=20, activation=tf.nn.relu, padding='same') output = layers.flatten(output) print('values', output.get_shape()) self.value = tf.reshape(layers.dense(tf.nn.relu(output), 1), [-1, trace_length]) if step: output_seq = batch_to_seq(output, self.batch_size, 1) else: output_seq = batch_to_seq(output, self.batch_size, trace_length) output_seq, state_output = lstm(output_seq, lstm_state, scope='rnn', nh=h_size) output = seq_to_batch(output_seq) output = layers.dense(output, units=env.NUM_ACTIONS, activation=None) self.log_pi = tf.nn.log_softmax(output) self.lstm_state_output = state_output self.actions = tf.placeholder(shape=[None], dtype=tf.int32, name='actions') self.actions_onehot = tf.one_hot(self.actions, env.NUM_ACTIONS, dtype=tf.float32) predict = tf.multinomial(self.log_pi, 1) self.predict = tf.squeeze(predict) self.next_value = tf.placeholder(shape=[None, 1], dtype=tf.float32, name='next_value') self.next_v = tf.matmul(self.next_value, self.gamma_array_inverse) self.target = self.sample_return + self.next_v self.td_error = tf.square(self.target - self.value) / 2 self.loss = tf.reduce_mean(self.td_error) self.parameters = [] self.value_params = [] for i in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=myScope): if not ('value_params' in i.name): self.parameters.append(i) # i.name if you want just a name if 'input_proc' in i.name: self.value_params.append(i) if not step: self.log_pi_action = tf.reduce_mean(tf.multiply( self.log_pi, self.actions_onehot), reduction_indices=1) self.log_pi_action_bs = tf.reduce_sum( tf.reshape(self.log_pi_action, [-1, trace_length]), 1) self.log_pi_action_bs_t = tf.reshape( self.log_pi_action, [self.batch_size, trace_length]) self.trainer = tf.train.GradientDescentOptimizer(learning_rate=1) self.updateModel = self.trainer.minimize( self.loss, var_list=self.value_params) self.setparams = SetFromFlat(self.parameters) self.getparams = GetFlat(self.parameters) self.param_len = len(self.parameters) for var in self.parameters: print(var.name, var.get_shape())
def __init__(self, state_size, action_size, learning_rate, name='DQLearner'): self.state_size = state_size self.action_size = action_size self.learning_rate = learning_rate with v1.variable_scope(name): # We create the placeholders # *state_size means that we take each elements of state_size in tuple hence is like if we wrote # [None, 84, 84, 4] self.inputs_ = v1.placeholder(tf.float32, [None, *state_size], name="inputs") self.actions_ = v1.placeholder(tf.float32, [None, 3], name="actions_") # Remember that target_Q is the R(s,a) + ymax Qhat(s', a') self.target_Q = v1.placeholder(tf.float32, [None], name="target") """ First convnet: CNN BatchNormalization ELU """ # Input is 84x84x4 self.conv1 = v1l.conv2d(inputs=self.inputs_, filters=32, kernel_size=[8, 8], strides=[4, 4], padding="VALID", kernel_initializer=v1.initializers.glorot_uniform(), name="conv1") self.conv1_batchnorm = v1l.batch_normalization(self.conv1, training=True, epsilon=1e-5, name='batch_norm1') self.conv1_out = tf.nn.elu(self.conv1_batchnorm, name="conv1_out") ## --> [20, 20, 32] """ Second convnet: CNN BatchNormalization ELU """ self.conv2 = v1l.conv2d(inputs=self.conv1_out, filters=64, kernel_size=[4, 4], strides=[2, 2], padding="VALID", kernel_initializer=v1.initializers.glorot_uniform(), name="conv2") self.conv2_batchnorm = v1l.batch_normalization(self.conv2, training=True, epsilon=1e-5, name='batch_norm2') self.conv2_out = tf.nn.elu(self.conv2_batchnorm, name="conv2_out") ## --> [9, 9, 64] """ Third convnet: CNN BatchNormalization ELU """ self.conv3 = v1l.conv2d(inputs=self.conv2_out, filters=128, kernel_size=[4, 4], strides=[2, 2], padding="VALID", kernel_initializer=v1.initializers.glorot_uniform(), name="conv3") self.conv3_batchnorm = v1l.batch_normalization(self.conv3, training=True, epsilon=1e-5, name='batch_norm3') self.conv3_out = tf.nn.elu(self.conv3_batchnorm, name="conv3_out") ## --> [3, 3, 128] self.flatten = v1l.flatten(self.conv3_out) ## --> [1152] self.fc = v1l.dense(inputs=self.flatten, units=512, activation=tf.nn.elu, kernel_initializer=v1.initializers.glorot_uniform(), name="fc1") self.output = v1l.dense(inputs=self.fc, kernel_initializer=v1.initializers.glorot_uniform(), units=3, activation=None) # Q is our predicted Q value. self.Q = tf.math.reduce_sum(tf.math.multiply(self.output, self.actions_), axis=1) # The loss is the difference between our predicted Q_values and the Q_target # Sum(Qtarget - Q)^2 self.loss = tf.math.reduce_mean(tf.math.square(self.target_Q - self.Q)) self.optimizer = v1.train.RMSPropOptimizer(self.learning_rate).minimize(self.loss)
def LeNet(x): # Arguments used for tf.truncated_normal, randomly defines variables for the weights and biases for each layer mu = 0 sigma = 0.1 weights = { # The shape of the filter weight is (height, width, input_depth, output_depth) 'conv1': tf.Variable( tf.random.truncated_normal(shape=(5, 5, 1, 6), mean=mu, stddev=sigma)), 'conv2': tf.Variable( tf.random.truncated_normal(shape=(5, 5, 6, 16), mean=mu, stddev=sigma)), 'fl1': tf.Variable( tf.random.truncated_normal(shape=(5 * 5 * 16, 120), mean=mu, stddev=sigma)), 'fl2': tf.Variable( tf.random.truncated_normal(shape=(120, 84), mean=mu, stddev=sigma)), 'out': tf.Variable( tf.random.truncated_normal(shape=(84, n_classes), mean=mu, stddev=sigma)) } biases = { # The shape of the filter bias is (output_depth,) 'conv1': tf.Variable(tf.zeros(6)), 'conv2': tf.Variable(tf.zeros(16)), 'fl1': tf.Variable(tf.zeros(120)), 'fl2': tf.Variable(tf.zeros(84)), 'out': tf.Variable(tf.zeros(n_classes)) } # Layer 1: Convolutional. Input = 32x32x1. Output = 28x28x6. conv1 = tf.nn.conv2d(input=x, filters=weights['conv1'], strides=[1, 1, 1, 1], padding='VALID') conv1 = tf.nn.bias_add(conv1, biases['conv1']) # Activation. conv1 = tf.nn.relu(conv1) # Pooling. Input = 28x28x6. Output = 14x14x6. conv1 = tf.nn.avg_pool2d(input=conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID') # Layer 2: Convolutional. Output = 10x10x16. conv2 = tf.nn.conv2d(input=conv1, filters=weights['conv2'], strides=[1, 1, 1, 1], padding='VALID') conv2 = tf.nn.bias_add(conv2, biases['conv2']) # Activation. conv2 = tf.nn.relu(conv2) # Pooling. Input = 10x10x16. Output = 5x5x16. conv2 = tf.nn.avg_pool2d(input=conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID') # Flatten. Input = 5x5x16. Output = 400. fl0 = flatten(conv2) # Layer 3: Fully Connected. Input = 400. Output = 120. fl1 = tf.add(tf.matmul(fl0, weights['fl1']), biases['fl1']) # Activation. fl1 = tf.nn.relu(fl1) # Layer 4: Fully Connected. Input = 120. Output = 84. fl2 = tf.add(tf.matmul(fl1, weights['fl2']), biases['fl2']) # Activation. fl2 = tf.nn.relu(fl2) # Layer 5: Fully Connected. Input = 84. Output = 10. logits = tf.add(tf.matmul(fl2, weights['out']), biases['out']) return logits