def build_encoder_conv_(self, x, namespace, share_weights=False): # same network as for minatar, more or less with tf.variable_scope(namespace, reuse=share_weights): x = x[:, :, :, tf.newaxis] with tf.variable_scope("conv1"): x = tf.layers.conv2d( x, 16, 3, (1, 1), padding="same", activation=tf.nn.relu, kernel_regularizer=agent_utils.get_weight_regularizer( self.weight_decay), kernel_initializer=agent_utils.get_mrsa_initializer()) x = tf.layers.flatten(x) with tf.variable_scope("fc1"): x = tf.layers.dense( x, 128, activation=tf.nn.relu, kernel_regularizer=agent_utils.get_weight_regularizer( self.weight_decay), kernel_initializer=agent_utils.get_mrsa_initializer()) with tf.variable_scope("latent"): mu = tf.layers.dense( x, 32, activation=None, kernel_regularizer=agent_utils.get_weight_regularizer( self.weight_decay), kernel_initializer=agent_utils.get_mrsa_initializer()) sigma = tf.layers.dense( x, 32, activation=None, kernel_regularizer=agent_utils.get_weight_regularizer( self.weight_decay), kernel_initializer=agent_utils.get_mrsa_initializer()) sd = tf.nn.softplus(sigma) noise = tf.random_normal(shape=(tf.shape(mu)[0], 32), mean=0, stddev=1.0) sd_noise_t = noise * sd sample = mu + sd_noise_t var = tf.square(sd) return mu, var, sd, sample, noise
def build_encoder_fc_(self, x, namespace, share_weights=False): with tf.variable_scope(namespace, reuse=share_weights): x = tf.layers.flatten(x) with tf.variable_scope("fc1"): x = tf.layers.dense( x, 128, activation=tf.nn.relu, kernel_regularizer=agent_utils.get_weight_regularizer( self.weight_decay), kernel_initializer=agent_utils.get_mrsa_initializer()) with tf.variable_scope("fc2"): x = tf.layers.dense( x, 128, activation=tf.nn.relu, kernel_regularizer=agent_utils.get_weight_regularizer( self.weight_decay), kernel_initializer=agent_utils.get_mrsa_initializer()) with tf.variable_scope("latent"): mu = tf.layers.dense( x, 32, activation=None, kernel_regularizer=agent_utils.get_weight_regularizer( self.weight_decay), kernel_initializer=agent_utils.get_mrsa_initializer()) sigma = tf.layers.dense( x, 32, activation=None, kernel_regularizer=agent_utils.get_weight_regularizer( self.weight_decay), kernel_initializer=agent_utils.get_mrsa_initializer()) sd = tf.nn.softplus(sigma) noise = tf.random_normal(shape=(tf.shape(mu)[0], 32), mean=0, stddev=1.0) sd_noise_t = noise * sd sample = mu + sd_noise_t var = tf.square(sd) return mu, var, sd, sample, noise
def build_predictors_(self): with tf.variable_scope(self.ENCODER_NAMESPACE): with tf.variable_scope("predict_q"): self.q_prediction_t = tf.layers.dense( self.state_sample_t, self.NUM_ACTIONS, activation=None, kernel_regularizer=agent_utils.get_weight_regularizer( self.weight_decay), kernel_initializer=agent_utils.get_mrsa_initializer()) self.masked_q_prediction_t = tf.reduce_sum(self.actions_mask_t * self.q_prediction_t, axis=1)
def build_network_(self, namespace): with tf.variable_scope(namespace): if self.target_size is not None: x = tf.image.resize_bilinear(self.depth_pl, size=(self.target_size, self.target_size)) else: x = self.depth_pl with tf.variable_scope("convs"): for i in range(len(self.num_filters_list)): with tf.variable_scope("conv{:d}".format(i + 1)): x = tf.layers.conv2d( x, self.num_filters_list[i], self.filter_size_list[i], self.stride_list[i], padding="SAME", activation=tf.nn.relu, kernel_initializer=utils.get_mrsa_initializer() ) x = tf.layers.flatten(x, name="flatten") with tf.variable_scope("fcs"): for i in range(len(self.hiddens)): with tf.variable_scope("fc{:d}".format(i + 1)): x = tf.layers.dense( x, self.hiddens[i], activation=tf.nn.relu ) with tf.variable_scope("logits"): # predict for both hand empty and hand full logits = tf.layers.dense(x, self.output_shape[0] * self.output_shape[1] * 2) logits = tf.reshape(logits, shape=(-1, self.output_shape[0], self.output_shape[1], 1)) # mask hand states logits = self.mask_hand_states(logits, self.hand_states_pl) return tf.reshape( logits, shape=(-1, self.output_shape[0] * self.output_shape[1]) )
def build_network_(self, namespace): with tf.variable_scope(namespace): x = self.depth_pl with tf.variable_scope("convs"): for i in range(len(self.num_filters_list)): with tf.variable_scope("conv{:d}".format(i + 1)): x = tf.layers.conv2d( x, self.num_filters_list[i], self.filter_size_list[i], self.stride_list[i], padding="SAME", activation=tf.nn.relu, kernel_initializer=utils.get_mrsa_initializer()) x = tf.layers.flatten(x, name="flatten") with tf.variable_scope("fcs"): for i in range(len(self.hiddens)): with tf.variable_scope("fc{:d}".format(i + 1)): x = tf.layers.dense(x, self.hiddens[i], activation=tf.nn.relu) with tf.variable_scope("logits"): logits = tf.layers.dense(x, self.num_actions) return logits
def build_model_conv_(self): # same network as for minatar, more or less with tf.variable_scope(self.MODEL_NAMESPACE): x = self.states_pl[:, :, :, tf.newaxis] batch_size = tf.shape(x)[0] with tf.variable_scope("conv1"): x = tf.layers.conv2d( x, 16, 3, (1, 1), padding="same", activation=tf.nn.relu, kernel_regularizer=agent_utils.get_weight_regularizer( self.weight_decay), kernel_initializer=agent_utils.get_mrsa_initializer()) x = tf.layers.flatten(x) with tf.variable_scope("fc1"): x = tf.layers.dense( x, 128, activation=tf.nn.relu, kernel_regularizer=agent_utils.get_weight_regularizer( self.weight_decay), kernel_initializer=agent_utils.get_mrsa_initializer()) if self.dropout_prob is not None and self.dropout_prob > 0.0: x = tf.layers.dropout(x, rate=self.dropout_prob, training=self.is_training) with tf.variable_scope("predict_reward"): self.reward_prediction_t = tf.layers.dense( x, 1, activation=None, kernel_regularizer=agent_utils.get_weight_regularizer( self.weight_decay), kernel_initializer=agent_utils.get_mrsa_initializer())[:, 0] with tf.variable_scope("predict_transition"): self.transition_prediction_t = tf.layers.dense( x, self.NUM_ACTIONS * self.height * self.width, activation=None, kernel_regularizer=agent_utils.get_weight_regularizer( self.weight_decay), kernel_initializer=agent_utils.get_mrsa_initializer()) self.transition_prediction_t = tf.reshape( self.transition_prediction_t, (batch_size, self.NUM_ACTIONS, self.height, self.width)) self.masked_transition_prediction_t = tf.reduce_sum( self.transition_prediction_t * self.actions_mask_t[:, :, tf.newaxis, tf.newaxis], axis=1) self.masked_transition_prediction_softmax_t = \ tf.reshape(tf.nn.softmax(tf.reshape( self.masked_transition_prediction_t, shape=[batch_size, -1] ), axis=1), [batch_size, self.height, self.width])