def sac_cnn_lstm(scaled_images, **kwargs): activ = tf.nn.relu conv1 = activ( conv(scaled_images, 'c1', n_filters=32, filter_size=5, stride=1, init_scale=np.sqrt(2), **kwargs)) conv2 = activ( conv(conv1, 'c2', n_filters=64, filter_size=3, stride=1, init_scale=np.sqrt(2), **kwargs)) conv3 = activ( conv(conv2, 'c3', n_filters=64, filter_size=3, stride=2, init_scale=np.sqrt(2), **kwargs)) conv3 = conv_to_fc(conv3) # try w/o LSTM first return activ(linear(conv3, 'fc1', n_hidden=512, init_scale=np.sqrt(2)))
def modified_cnn(scaled_images, **kwargs): activ = tf.nn.relu layer_1 = activ(conv(scaled_images, 'c1', n_filters=64, filter_size=2, stride=1, **kwargs)) layer_2 = activ(conv(layer_1, 'c2', n_filters=128, filter_size=2, stride=1, **kwargs)) layer_3 = activ(conv(layer_2, 'c3', n_filters=256, filter_size=2, stride=1, **kwargs)) layer_3 = conv_to_fc(layer_3) return activ(linear(layer_3, 'fc1', n_hidden=512, init_scale=np.sqrt(2)))
def Cnn1(image, **kwargs): activ = tf.nn.relu layer_1 = activ( conv(image, 'c1', n_filters=32, filter_size=3, stride=1, init_scale=np.sqrt(2), **kwargs)) layer_2 = activ( conv(layer_1, 'c2', n_filters=64, filter_size=3, stride=1, init_scale=np.sqrt(2), **kwargs)) layer_3 = activ( conv(layer_2, 'c3', n_filters=64, filter_size=3, stride=1, init_scale=np.sqrt(2), **kwargs)) layer_3 = conv_to_fc(layer_3) return activ(linear(layer_3, 'fc1', n_hidden=512, init_scale=np.sqrt(2)))
def __init__(self, sess, ob_space, ac_space, n_env, n_steps, n_batch, reuse=False, **kwargs): super(NatureCNN, self).__init__(sess, ob_space, ac_space, n_env, n_steps, n_batch, reuse=reuse, scale=True) with tf.variable_scope("model", reuse=reuse): activ = tf.nn.relu input = self.processed_obs layer_1 = activ(conv(input, 'c1', n_filters=32, filter_size=8, stride=4, init_scale=np.sqrt(2), **kwargs)) layer_2 = activ(conv(layer_1, 'c2', n_filters=64, filter_size=4, stride=2, init_scale=np.sqrt(2), **kwargs)) layer_3 = activ(conv(layer_2, 'c3', n_filters=64, filter_size=3, stride=1, init_scale=np.sqrt(2), **kwargs)) layer_3 = conv_to_fc(layer_3) extracted_features = activ(linear(layer_3, 'fc1', n_hidden=256, init_scale=np.sqrt(2))) value_fn = tf.layers.dense(extracted_features, 1, name='vf') self.proba_distribution, self.policy, self.q_value = \ self.pdtype.proba_distribution_from_latent(extracted_features, extracted_features, init_scale=0.01) self.value_fn = value_fn self.initial_state = None self._setup_init() total = 0 for v in tf.trainable_variables(): dims = v.get_shape().as_list() num = int(np.prod(dims)) total += num print(' %s \t\t Num: %d \t\t Shape %s ' % (v.name, num, dims)) print('\nTotal number of params: %d' % total)
def ppo_cnn(scaled_images, **kwargs): activ = tf.nn.elu conv1 = activ( conv(scaled_images, 'c1', n_filters=32, filter_size=5, stride=1, init_scale=np.sqrt(2), **kwargs)) conv2 = activ( conv(conv1, 'c2', n_filters=64, filter_size=3, stride=1, init_scale=np.sqrt(2), **kwargs)) conv3 = activ( conv(conv2, 'c3', n_filters=64, filter_size=3, stride=2, init_scale=np.sqrt(2), **kwargs)) conv3 = conv_to_fc(conv3) return activ(linear(conv3, 'fc1', n_hidden=512, init_scale=0.01))
def proba_distribution_from_latent_infer(self, infer_latent_vector, init_scale=1.0, init_bias=0.0, std_normal=False, prior_std=0): if std_normal: pdparam = tf.concat([ tf.zeros([1, self.size]), prior_std * tf.ones([1, self.size]) ], axis=1) mean = tf.zeros([1, self.size]) return self.proba_distribution_from_flat(pdparam), mean else: mean = linear(infer_latent_vector, 'infer', self.size, init_scale=init_scale, init_bias=init_bias) logstd = tf.get_variable(name='infer/logstd', shape=[1, self.size], initializer=tf.zeros_initializer()) pdparam = tf.concat([mean, mean * 0.0 + logstd], axis=1) return self.proba_distribution_from_flat(pdparam), mean
def __init__(self, sess, ob_space, ac_space, n_env, n_steps, n_batch, reuse=False, net_arch=None, act_fun=tf.tanh, feature_extraction="cnn", **kwargs): super(RelationalPolicy, self).__init__(sess, ob_space, ac_space, n_env, n_steps, n_batch, reuse=reuse, scale=(feature_extraction == "cnn")) self._kwargs_check(feature_extraction, kwargs) with tf.variable_scope("model", reuse=reuse): print('self.processed_obs', self.processed_obs) relation_block_output = self.relation_block(self.processed_obs) pi_latent = vf_latent = tf.layers.flatten(relation_block_output) # original code self._value_fn = linear(vf_latent, 'vf', 1) self._proba_distribution, self._policy, self.q_value = \ self.pdtype.proba_distribution_from_latent(pi_latent, vf_latent, init_scale=0.01) self._setup_init()
def tic_tac_toe_cnn(scaled_images, **kwargs): """ Custom CNN for Tic Tac Toe env. :param scaled_images: (TensorFlow Tensor) Image input placeholder :return: (TensorFlow Tensor) The CNN output layer """ activ = tf.nn.relu layer = scaled_images # print(kwargs) net_arch = kwargs['cnn_arch'] filter_size = kwargs['filter_size'] pad = kwargs['pad'] for i, f in enumerate(net_arch[:-1], start=1): # print('c' + str(i), f) layer = activ(conv(layer, 'c' + str(i), n_filters=f, filter_size=filter_size, stride=1, pad=pad, data_format='NCHW')) layer = conv_to_fc(layer) # print('fc1', net_arch[-1]) # print() return activ(linear(layer, 'fc1', n_hidden=net_arch[-1]))
def cnn_3d(scaled_voxels, n_hidden, filters, filter_sizes, strides, **kwargs): """ CNN in 3D. :param scaled_voxels: (TensorFlow Tensor) Voxel input placeholder :param n_hidden: (int) Number of nodes in the last linear layer :param filters: (array) Filter numbers for the convolutional layers of the CNN :param filter_sizes: (array) Filter sizes for the convolutional layers of the CNN :param strides: (array) Strides for the convolutional layers of the CNN :param kwargs: (dict) Extra keywords parameters for the convolutional layers of the CNN :return: (TensorFlow Tensor) The CNN output layer """ activ = tf.tanh layers = [] for i, (n_filter, filter_size, stride) in enumerate(zip(filters, filter_sizes, strides)): input_layer = scaled_voxels if i == 0 else layers[-1] label = 'c%d' % (i + 1) layer = activ( conv3d(input_layer, label, n_filters=n_filter, filter_size=filter_size, stride=stride, init_scale=np.sqrt(2), **kwargs)) layers.append(layer) print('layer_%d' % (i + 1), layer.shape) layer = conv_to_fc(layers[-1]) return tf.tanh( linear(layer, 'fc1', n_hidden=n_hidden, init_scale=np.sqrt(2)))
def __init__(self, sess, ob_space, ac_space, n_env, n_steps, n_batch, reuse=False, layers=None, net_arch=None, act_fun=tf.tanh, cnn_extractor=nature_cnn, feature_extraction="cnn", **kwargs): super(FeedForwardPolicy, self).__init__(sess, ob_space, ac_space, n_env, n_steps, n_batch, reuse=reuse, scale=(feature_extraction == "cnn")) self._kwargs_check(feature_extraction, kwargs) if layers is not None: warnings.warn("Usage of the `layers` parameter is deprecated! Use net_arch instead " "(it has a different semantics though).", DeprecationWarning) if net_arch is not None: warnings.warn("The new `net_arch` parameter overrides the deprecated `layers` parameter!", DeprecationWarning) if net_arch is None: if layers is None: layers = [64, 64] net_arch = [dict(vf=layers, pi=layers)] with tf.variable_scope("model", reuse=reuse): if feature_extraction == "cnn": pi_latent = vf_latent = cnn_extractor(self.processed_obs, **kwargs) else: pi_latent, vf_latent = mlp_extractor(tf.layers.flatten(self.processed_obs), net_arch, act_fun) self.value_fn = linear(vf_latent, 'vf', 1) self.proba_distribution, self.policy, self.q_value = \ self.pdtype.proba_distribution_from_latent(pi_latent, vf_latent, init_scale=0.01) self.initial_state = None self._setup_init()
def embedding(entities, n_heads, embedding_sizes, scope): """ :param entities: (TensorFlow Tensor) The input entities : [B,N,D] :param scope: (str) The TensorFlow variable scope :param n_heads: (float) The number of attention heads to use :return: (TensorFlow Tensor) [B,n_heads,N,embedding_sizes[i]] """ with tf.variable_scope(scope): N = entities.shape[1].value channels = entities.shape[2].value # total_size Denoted as F, n_heads Denoted as H total_size = sum(embedding_sizes) * n_heads # [B*N,D] entities = tf.reshape(entities, [-1, channels]) # [B*N,F] F = sum(embedding_sizes) * n_heads embedded_entities = linear(entities, "mlp", total_size) # [B*N,F] --> [B,N,F] new embedded_entities = tf.reshape(embedded_entities, [-1, N, total_size]) # [B*N,F] qkv = layerNorm(embedded_entities, "ln") # qkv = batchNorm(embedded_entities, "bn") # qkv = instanceNorm(embedded_entities, "instacne_n") # qkv = FRNorm(embedded_entities, 'FRNorm') # # [B,N,F] # qkv = tf.reshape(qkv, [-1, N, total_size]) # [B,N,n_heads,sum(embedding_sizes)] qkv = tf.reshape(qkv, [-1, N, n_heads, sum(embedding_sizes)]) # [B,N,n_heads,sum(embedding_sizes)] -> [B,n_heads,N,sum(embedding_sizes)] qkv = tf.transpose(qkv, [0, 2, 1, 3]) return tf.split(qkv, embedding_sizes, -1)
def minigrid_extractor_small(scaled_images, **kwargs): """ CNN for MiniGrid environments with variable grid sizes """ activ = tf.nn.relu # first layer is just an embedding finder layer_1 = conv(scaled_images, 'c1', n_filters=32, filter_size=1, stride=1, init_scale=np.sqrt(2), **kwargs) layer_2 = activ( conv(layer_1, 'c2', n_filters=64, filter_size=3, stride=1, init_scale=np.sqrt(2), **kwargs)) layer_3 = activ( conv(layer_2, 'c3', n_filters=64, filter_size=3, stride=1, init_scale=np.sqrt(2), **kwargs)) layer_4 = conv_to_fc(layer_3) print(layer_3) return activ(linear(layer_4, 'fc1', n_hidden=128, init_scale=np.sqrt(2)))
def proba_distribution_from_latent(self, pi_latent_vector, vf_latent_vector, init_scale=1.0, init_bias=0.0): pdparam = linear(pi_latent_vector, 'pi', self.size, init_scale=init_scale, init_bias=init_bias) q_values = linear(vf_latent_vector, 'q', self.size, init_scale=init_scale, init_bias=init_bias) return self.proba_distribution_from_flat(pdparam), pdparam, q_values
def nature_cnn(scaled_images, **kwargs): """ CNN from Nature paper. :param scaled_images: (TensorFlow Tensor) Image input placeholder :param kwargs: (dict) Extra keywords parameters for the convolutional layers of the CNN :return: (TensorFlow Tensor) The CNN output layer """ activ = tf.nn.relu layer_1 = activ( conv(scaled_images, 'c1', n_filters=32, filter_size=8, stride=4, init_scale=np.sqrt(2), **kwargs)) layer_2 = activ( conv(layer_1, 'c2', n_filters=64, filter_size=4, stride=2, init_scale=np.sqrt(2), **kwargs)) layer_3 = activ( conv(layer_2, 'c3', n_filters=64, filter_size=3, stride=1, init_scale=np.sqrt(2), **kwargs)) layer_3 = conv_to_fc(layer_3) return activ(linear(layer_3, 'fc1', n_hidden=512, init_scale=np.sqrt(2)))
def __init__(self, sess, ob_space, ac_space, n_env, n_steps, n_batch, n_lstm=256, reuse=False, layers=None, net_arch=None, layer_norm=False, feature_extraction="cnn", **kwargs): # state_shape = [n_lstm * 2] dim because of the cell and hidden states of the LSTM super(RelationalLstmPolicy, self).__init__(sess, ob_space, ac_space, n_env, n_steps, n_batch, state_shape=(2 * n_lstm, ), reuse=reuse, scale=(feature_extraction == "cnn")) self._kwargs_check(feature_extraction, kwargs) with tf.variable_scope("model", reuse=reuse): print('self.processed_obs', self.processed_obs) relation_block_output = self.relation_block(self.processed_obs) # original code input_sequence = batch_to_seq(relation_block_output, self.n_env, n_steps) print('input_sequence', input_sequence) masks = batch_to_seq(self.dones_ph, self.n_env, n_steps) rnn_output, self.snew = lstm(input_sequence, masks, self.states_ph, 'lstm1', n_hidden=n_lstm, layer_norm=layer_norm) rnn_output = seq_to_batch(rnn_output) value_fn = linear(rnn_output, 'vf', 1) self._proba_distribution, self._policy, self.q_value = \ self.pdtype.proba_distribution_from_latent(rnn_output, rnn_output) self._value_fn = value_fn self._setup_init()
def __init__(self, sess, ob_space, ac_space, n_env, n_steps, n_batch, reuse=False, **kwargs): super(NatureCNN, self).__init__(sess, ob_space, ac_space, n_env, n_steps, n_batch, reuse=reuse, scale=True) with tf.variable_scope("model", reuse=reuse): activ = tf.nn.relu input = self.processed_obs layer_1 = activ( conv(input, 'c1', n_filters=32, filter_size=8, stride=4, init_scale=np.sqrt(2), **kwargs)) layer_2 = activ( conv(layer_1, 'c2', n_filters=64, filter_size=4, stride=2, init_scale=np.sqrt(2), **kwargs)) layer_3 = activ( conv(layer_2, 'c3', n_filters=64, filter_size=3, stride=1, init_scale=np.sqrt(2), **kwargs)) layer_3 = conv_to_fc(layer_3) extracted_features = activ( linear(layer_3, 'fc1', n_hidden=256, init_scale=np.sqrt(2))) value_fn = tf.layers.dense(extracted_features, 1, name='vf') self.proba_distribution, self.policy, self.q_value = \ self.pdtype.proba_distribution_from_latent(extracted_features, extracted_features, init_scale=0.01) self.value_fn = value_fn self.initial_state = None self._setup_init()
def modified_cnn(unscaled_images, **kwargs): import tensorflow as tf scaled_images = tf.cast(unscaled_images, tf.float32) / 255. activ = tf.nn.relu layer_1 = activ(conv(scaled_images, 'c1', n_filters=32, filter_size=1, stride=1, init_scale=np.sqrt(2), **kwargs)) layer_2 = activ(conv(layer_1, 'c2', n_filters=32, filter_size=2, stride=2, init_scale=np.sqrt(2), **kwargs)) layer_2 = conv_to_fc(layer_2) return activ(linear(layer_2, 'fc1', n_hidden=512, init_scale=np.sqrt(2)))
def modified_cnn(scaled_images, **kwargs): import tensorflow as tf activ = tf.nn.relu layer_1 = activ(conv(scaled_images, 'c1', n_filters=32, filter_size=3, stride=1, init_scale=np.sqrt(2), **kwargs)) layer_2 = activ(conv(layer_1, 'c2', n_filters=64, filter_size=3, stride=1, init_scale=np.sqrt(2), **kwargs)) layer_3 = activ(conv(layer_2, 'c3', n_filters=64, filter_size=3, stride=1, init_scale=np.sqrt(2), **kwargs)) layer_3 = conv_to_fc(layer_3) return activ(linear(layer_3, 'fc1', n_hidden=512, init_scale=np.sqrt(2)))
def modified_cnn(scaled_images, **kwargs): activ = tf.nn.relu # layer 1 conv1 = conv(scaled_images, "c1", n_filters=128, filter_size=(1, 2), stride=1, init_scale=np.sqrt(2), **kwargs) conv2 = conv(scaled_images, "c2", n_filters=128, filter_size=(2, 1), stride=1, init_scale=np.sqrt(2), **kwargs) relu1 = activ(conv1) relu2 = activ(conv2) # layer 2 conv11 = conv(relu1, "c3", n_filters=128, filter_size=(1, 2), stride=1, init_scale=np.sqrt(2), **kwargs) conv12 = conv(relu1, "c4", n_filters=128, filter_size=(2, 1), stride=1, init_scale=np.sqrt(2), **kwargs) conv21 = conv(relu2, "c3", n_filters=128, filter_size=(1, 2), stride=1, init_scale=np.sqrt(2), **kwargs) conv22 = conv(relu2, "c4", n_filters=128, filter_size=(2, 1), stride=1, init_scale=np.sqrt(2), **kwargs) # layer2 relu activation relu11 = tf.nn.relu(conv11) relu12 = tf.nn.relu(conv12) relu21 = tf.nn.relu(conv21) relu22 = tf.nn.relu(conv22) # get shapes of all activations shape1 = relu1.get_shape().as_list() shape2 = relu2.get_shape().as_list() shape11 = relu11.get_shape().as_list() shape12 = relu12.get_shape().as_list() shape21 = relu21.get_shape().as_list() shape22 = relu22.get_shape().as_list() # expansion hidden1 = tf.reshape(relu1, [-1, shape1[1] * shape1[2] * shape1[3]]) hidden2 = tf.reshape(relu2, [-1, shape2[1] * shape2[2] * shape2[3]]) hidden11 = tf.reshape(relu11, [-1, shape11[1] * shape11[2] * shape11[3]]) hidden12 = tf.reshape(relu12, [-1, shape12[1] * shape12[2] * shape12[3]]) hidden21 = tf.reshape(relu21, [-1, shape21[1] * shape21[2] * shape21[3]]) hidden22 = tf.reshape(relu22, [-1, shape22[1] * shape22[2] * shape22[3]]) # concatenation hidden = tf.concat([hidden1, hidden2, hidden11, hidden12, hidden21, hidden22], axis=1) # linear layer 1 linear_1 = activ(linear(hidden, scope="fc1", n_hidden=512, init_scale=np.sqrt(2))) # linear layer 2 linear_2 = activ(linear(linear_1, scope="fc2", n_hidden=128, init_scale=np.sqrt(2))) return linear_2
def proba_distribution_from_latent(self, pi_latent_vector, vf_latent_vector, init_scale=1.0, init_bias=0.0, mult_tensors=None, policy=None): if mult_tensors is not None: mean = linear_with_mult(mult_tensors, pi_latent_vector, 'pi', self.size, init_scale=init_scale, init_bias=init_bias) logstd = get_mult_variable(mult_tensors, name="pi/logstd", shape=[1, self.size], initializer=tf.zeros_initializer()) pdparam = tf.concat([mean, mean * 0.0 + logstd], axis=1) q_values = linear_with_mult(mult_tensors, vf_latent_vector, 'q', self.size, init_scale=init_scale, init_bias=init_bias) return self.proba_distribution_from_flat(pdparam), mean, q_values else: mean = linear(pi_latent_vector, 'pi', self.size, init_scale=init_scale, init_bias=init_bias) policy.policy_neurons.append(mean) logstd = tf.get_variable(name='pi/logstd', shape=[1, self.size], initializer=tf.zeros_initializer()) policy.policy_neurons.append(logstd) pdparam = tf.concat([mean, mean * 0.0 + logstd], axis=1) q_values = linear(vf_latent_vector, 'q', self.size, init_scale=init_scale, init_bias=init_bias) return self.proba_distribution_from_flat(pdparam), mean, q_values
def __init__(self, sess, ob_space, ac_space, n_env, n_steps, n_batch, reuse=False, **kwargs): super(CustomWPPolicy, self).__init__(sess, ob_space, ac_space, n_env, n_steps, n_batch, reuse=reuse, scale=False) with tf.variable_scope("model", reuse=reuse): activ = tf.nn.tanh measurement_features = tf.expand_dims(self.processed_obs[:, -1], axis=1) measurement_features_flat = tf.layers.flatten(measurement_features) pi_h = activ( linear(measurement_features_flat, "pi_vae_fc", 64, init_scale=np.sqrt(2))) pi_latent = activ(linear(pi_h, "pi_fc", 64, init_scale=np.sqrt(2))) vf_h = activ( linear(measurement_features_flat, "vf_vae_fc", 64, init_scale=np.sqrt(2))) vf_latent = activ(linear(pi_h, "vf_fc", 64, init_scale=np.sqrt(2))) value_fn = linear(vf_latent, 'vf', 1, init_scale=np.sqrt(2)) self._proba_distribution, self._policy, self.q_value = \ self.pdtype.proba_distribution_from_latent(pi_latent, vf_latent, init_scale=0.01) self._value_fn = value_fn self._setup_init()
def vf_builder(vf_arch: str, latent: tf.Tensor, act_fun: tf.function, shared_graph: GraphsTuple = None, input_graph: GraphsTuple = None, layer_size: int = 64, layer_count: int = 3, iterations: int = 10) -> tf.Tensor: """ Builds the value function network for Args: vf_arch: arch to use as a string latent: the observation input act_fun: activation function shared_graph: the gnn output from the policy input_graph: GraphTuple before any processing iterations: number of iterations of message passing Returns: A tensor which will hold the value """ if vf_arch == "shared": output_globals_vf = tf.reshape(shared_graph.globals, [-1, layer_size]) latent_vf = output_globals_vf latent_vf = act_fun( linear(latent_vf, "vf_fc0", 128, init_scale=np.sqrt(2))) latent_vf = act_fun( linear(latent_vf, "vf_fc1", 128, init_scale=np.sqrt(2))) elif vf_arch == "graph": model_vf = DDRGraphNetwork(layer_size=layer_size) output_graph_vf = model_vf(input_graph, iterations) output_globals_vf = tf.reshape(output_graph_vf.globals, [-1, layer_size]) latent_vf = output_globals_vf elif vf_arch == "mlp": latent_vf = latent latent_vf = act_fun( linear(latent_vf, "vf_fc0", 128, init_scale=np.sqrt(2))) latent_vf = act_fun( linear(latent_vf, "vf_fc1", 128, init_scale=np.sqrt(2))) latent_vf = act_fun( linear(latent_vf, "vf_fc2", 128, init_scale=np.sqrt(2))) else: raise Exception("No such vf network") return latent_vf
def my_small_cnn(scaled_images, **kwargs): activ = tf.nn.relu layer_1 = activ(conv(scaled_images, 'c1', n_filters=32, filter_size=3, stride=1, **kwargs)) layer_2 = activ(conv(layer_1, 'c2', n_filters=64, filter_size=3, stride=1, **kwargs)) layer_3 = conv_to_fc(layer_2) return activ( linear(layer_3, 'fc1', n_hidden=32, init_scale=np.sqrt(2)))
def __init__(self, sess, ob_space, ac_space, n_env, n_steps, n_batch, reuse=False, layers=None, net_arch=None, act_fun=tf.tanh, cnn_extractor=None, feature_extraction="cnn", **kwargs): super(CustomFeedForwardPolicy, self).__init__(sess, ob_space, ac_space, n_env, n_steps, n_batch, reuse=reuse, scale=False) self._kwargs_check(feature_extraction, kwargs) with tf.variable_scope("model", reuse=reuse): if feature_extraction == "cnn": pi_latent = vf_latent = cnn_extractor(self.processed_obs, **kwargs) else: Exception("nope") assert str( type(self.pdtype) ) == "<class 'stable_baselines.common.distributions.DiagGaussianProbabilityDistributionType'>" self._value_fn = linear(vf_latent, 'vf', 1) self._proba_distribution, self._policy, self.q_value = \ self.pdtype.proba_distribution_from_latent( pi_latent, vf_latent, init_scale=0.01) # self._value_fn = linear(vf_latent, 'vf', 1) # mean = pi_latent # n_jets = mean.shape[1] # print("njets", n_jets) # logstd = tf.get_variable(name='pi/logstd', shape=[1, n_jets], initializer=tf.zeros_initializer()) # pdparam = tf.concat([mean, mean * 0.0 + logstd], axis=1) # # we take the last layer of our cnn for policy and q value # self._proba_distribution = self.pdtype.proba_distribution_from_flat(pdparam) # self._policy = pi_latent # self.q_value = vf_latent # print("heyyyyyyy") self._setup_init()
def attention_cnn(scaled_images, **kwargs): """Nature CNN with region-sensitive module""" def softmax_2d(tensor): b, h, w, c = tensor.shape tensor = tf.reshape(tensor, (-1, h * w, c)) tensor = tf.nn.softmax(tensor, axis=1) tensor = tf.reshape(tensor, (-1, h, w, c)) return tensor c1 = tf.nn.relu( conv(scaled_images, 'c1', n_filters=32, filter_size=8, stride=4, init_scale=np.sqrt(2), **kwargs)) c2 = tf.nn.relu( conv(c1, 'c2', n_filters=64, filter_size=4, stride=2, init_scale=np.sqrt(2), **kwargs)) c3 = tf.nn.relu( conv(c2, 'c3', n_filters=64, filter_size=3, stride=1, init_scale=np.sqrt(2), **kwargs)) c3 = tf.nn.l2_normalize(c3, axis=-1) a1 = tf.nn.elu( conv(c3, 'a1', n_filters=512, filter_size=1, stride=1, init_scale=np.sqrt(2), **kwargs)) a2 = softmax_2d( conv(a1, 'a2', n_filters=2, filter_size=1, stride=1, init_scale=np.sqrt(2), **kwargs)) a2 = tf.identity(a2, name='attn') x = c3 * tf.reduce_sum(a2, axis=-1, keepdims=True) x = conv_to_fc(x) return tf.nn.relu(linear(x, 'fc1', n_hidden=512, init_scale=np.sqrt(2)))
def proba_distribution_from_latent(self, pi_latent_vector, vf_latent_vector, action_mask_vector=None, init_scale=1.0, init_bias=0.0): pdparam = linear(pi_latent_vector, 'pi', sum(self.n_vec), init_scale=init_scale, init_bias=init_bias) q_values = linear(vf_latent_vector, 'q', sum(self.n_vec), init_scale=init_scale, init_bias=init_bias) return self.proba_distribution_from_flat( pdparam, action_mask_vector=action_mask_vector), pdparam, q_values
def __init__(self, sess, ob_space, ac_space, n_env, n_steps, n_batch, reuse=False, layers=None, cnn_extractor=nature_cnn, feature_extraction="cnn", obs_phs=None, layer_norm=False, **kwargs): super(FeedForwardPolicy, self).__init__(sess, ob_space, ac_space, n_env, n_steps, n_batch, n_lstm=256, reuse=reuse, scale=(feature_extraction == "cnn"), obs_phs=obs_phs) if layers is None: layers = [64, 64] with tf.variable_scope("model", reuse=reuse): if feature_extraction == "cnn": extracted_features = cnn_extractor(self.processed_x, **kwargs) pi_latent = extracted_features else: activ = tf.nn.relu processed_x = tf.layers.flatten(self.processed_x) pi_h = processed_x for i, layer_size in enumerate(layers): pi_h = linear(pi_h, 'pi_fc' + str(i), n_hidden=layer_size, init_scale=np.sqrt(2)) if layer_norm: pi_h = tf.contrib.layers.layer_norm(pi_h, center=True, scale=True) pi_h = activ(pi_h) pi_latent = pi_h self.proba_distribution, self.policy, self.q_value = \ self.pdtype.proba_distribution_from_latent(pi_latent, pi_latent, init_scale=0.01) self.value_fn = self.policy self.initial_state = None self._setup_init()
def cnn_extractor(scaled_images, channels=c, w=w, h=h): print(f"========= REAL SHAPE: {scaled_images.shape} ===========") original_shape = scaled_images.shape[1] print(f"========= SHAPE: {original_shape} ===========") scaled_images = tf.reshape(scaled_images, (-1, h, w, channels)) activ = tf.nn.relu layer_1 = activ(conv(scaled_images, 'c1', n_filters=32, filter_size=w, stride=1, init_scale=np.sqrt(2))) layer_2 = activ(conv(layer_1, 'c2', n_filters=64, filter_size=1, stride=1, init_scale=np.sqrt(2))) layer_3 = activ(conv(layer_2, 'c3', n_filters=128, filter_size=1, stride=1, init_scale=np.sqrt(2))) layer_3 = conv_to_fc(layer_3) return activ(linear(layer_3, 'fc1', n_hidden=128, init_scale=np.sqrt(2)))
def proba_distribution_from_latent(self, pi_latent_vector, vf_latent_vector, init_scale=1.0, init_bias=0.0): mean = linear(pi_latent_vector, 'pi', self.size, init_scale=init_scale, init_bias=init_bias) logstd = tf.get_variable(name='pi/logstd', shape=[1, self.size], initializer=tf.zeros_initializer()) pdparam = tf.concat([mean, mean * 0.0 + logstd], axis=1) q_values = linear(vf_latent_vector, 'q', self.size, init_scale=init_scale, init_bias=init_bias) return self.proba_distribution_from_flat(pdparam), mean, q_values
def build_actor_critic_network_actionsadded(x, layers, action_indices, state_indices, reuse): activ = tf.nn.relu with tf.variable_scope("actor_critic", reuse=tf.AUTO_REUSE): actions = tf.gather(x, action_indices, axis=1) actions = tf.reduce_sum(actions, axis=1, keepdims=True) state = tf.gather(x, state_indices, axis=1) vf_h = tf.layers.flatten(tf.concat([actions, state], axis=1)) for j, layer_size in enumerate(layers): vf_h = activ( linear(vf_h, 'vf_fc' + str(j), n_hidden=layer_size, init_scale=np.sqrt(2))) vf_latent = activ(linear(vf_h, 'vf_head', len(action_indices))) value_fn = linear(vf_latent, 'vf', 1) pi_latent = build_policy(x, layers, action_indices, state_indices, activ) return pi_latent, vf_latent, value_fn