def __init__(self, sess, ob_space, ac_space, n_env, n_steps, n_batch, reuse=False, **kwargs): super(CustomPolicy, self).__init__(sess, ob_space, ac_space, n_env, n_steps, n_batch, reuse=reuse, scale=True) with tf.variable_scope("model", reuse=reuse): activ=tf.nn.relu #print(self.processed_obs) cnnLayer = activ(conv(self.processed_obs, 'c1', n_filters=1024, filter_size=2, stride=1, init_scale=np.sqrt(2), **kwargs)) cnnFCLayer = conv_to_fc(cnnLayer) extracted_features = cnnFCLayer#activ(linear(cnnFCLayer, 'fc1', n_hidden=1024, init_scale=np.sqrt(2))) pi_h = extracted_features for i, layer_size in enumerate([1024,512]): pi_h = activ(tf.layers.dense(pi_h, layer_size, name='pi_fc' + str(i))) pi_latent = pi_h vf_h =extracted_features for i, layer_size in enumerate([1024,512]): vf_h = activ(tf.layers.dense(vf_h, layer_size, name='vf_fc'+str(i))) value_fn = tf.layers.dense(vf_h, 1, name='vf') vf_latent = vf_h self._proba_distribution, self._policy, self.q_value = \ self.pdtype.proba_distribution_from_latent(pi_latent, vf_latent, init_scale=0.01) self._value_fn = value_fn self._setup_init()
def custom_extractor(scaled_images, **kwargs): activ = tf.nn.relu layer_1 = activ( conv(scaled_images, 'c1', n_filters=32, filter_size=8, stride=4, init_scale=np.sqrt(2), **kwargs)) layer_2 = activ( conv(layer_1, 'c2', n_filters=64, filter_size=4, stride=2, init_scale=np.sqrt(2), **kwargs)) layer_3 = activ( conv(layer_2, 'c3', n_filters=64, filter_size=3, stride=1, init_scale=np.sqrt(2), **kwargs)) layer_3 = conv_to_fc(layer_3) return activ(linear(layer_3, 'fc1', n_hidden=512, init_scale=np.sqrt(2)))
def Cnn1(image, **kwargs): activ = tf.nn.relu layer_1 = activ( conv(image, 'c1', n_filters=32, filter_size=3, stride=1, init_scale=np.sqrt(2), **kwargs)) layer_2 = activ( conv(layer_1, 'c2', n_filters=64, filter_size=3, stride=1, init_scale=np.sqrt(2), **kwargs)) layer_3 = activ( conv(layer_2, 'c3', n_filters=64, filter_size=3, stride=1, init_scale=np.sqrt(2), **kwargs)) layer_3 = conv_to_fc(layer_3) return activ(linear(layer_3, 'fc1', n_hidden=512, init_scale=np.sqrt(2)))
def nature_cnn(scaled_images, **kwargs): """ CNN from Nature paper. :param scaled_images: (TensorFlow Tensor) Image input placeholder :param kwargs: (dict) Extra keywords parameters for the convolutional layers of the CNN :return: (TensorFlow Tensor) The CNN output layer """ activ = tf.nn.relu layer_1 = activ( conv(scaled_images, 'c1', n_filters=32, filter_size=8, stride=4, init_scale=np.sqrt(2), **kwargs)) layer_2 = activ( conv(layer_1, 'c2', n_filters=64, filter_size=4, stride=2, init_scale=np.sqrt(2), **kwargs)) layer_3 = activ( conv(layer_2, 'c3', n_filters=64, filter_size=3, stride=1, init_scale=np.sqrt(2), **kwargs)) layer_3 = conv_to_fc(layer_3) return activ(linear(layer_3, 'fc1', n_hidden=512, init_scale=np.sqrt(2)))
def small_convnet(x, activ=tf.nn.relu, **kwargs): layer_1 = activ( tf_layers.conv(x, 'c1', n_filters=32, filter_size=8, stride=4, init_scale=np.sqrt(2), **kwargs)) layer_2 = activ( tf_layers.conv(layer_1, 'c2', n_filters=64, filter_size=4, stride=2, init_scale=np.sqrt(2), **kwargs)) layer_3 = activ( tf_layers.conv(layer_2, 'c3', n_filters=64, filter_size=3, stride=1, init_scale=np.sqrt(2), **kwargs)) layer_3 = tf_layers.conv_to_fc(layer_3) return tf_layers.linear(layer_3, 'fc1', n_hidden=512, init_scale=np.sqrt(2))
def cnn3D(input_space, **kwargs): """ Custom 3d CNN. :param scaled_images: (TensorFlow Tensor) Image input placeholder :param kwargs: (dict) Extra keywords parameters for the convolutional layers of the CNN :return: (TensorFlow Tensor) The CNN output layer """ activ = tf.nn.relu layer_1 = activ( conv3d(input_space, 'c1', n_filters=16, filter_size=3, stride=1, init_scale=np.sqrt(2), **kwargs)) layer_2 = activ( conv3d(layer_1, 'c2', n_filters=16, filter_size=3, stride=1, init_scale=np.sqrt(2), **kwargs)) layer_3 = maxpool3d(layer_2, 2, 1, 'VALID') #layer_4 = activ(conv3d(layer_3, 'c3', n_filters=64, filter_size=3, stride=1, init_scale=np.sqrt(2), **kwargs)) layer_4 = conv_to_fc(layer_3) layer_5 = activ(linear(layer_4, 'fc1', n_hidden=16, init_scale=np.sqrt(2))) layer_6 = activ(linear(layer_5, 'fc2', n_hidden=16, init_scale=np.sqrt(2))) return activ(linear(layer_6, 'fc3', n_hidden=32, init_scale=np.sqrt(2)))
def modified_cnn(unscaled_images, **kwargs): import tensorflow as tf scaled_images = tf.cast(unscaled_images, tf.float32) / 255. activ = tf.nn.relu layer_1 = activ(conv(scaled_images, 'c1', n_filters=32, filter_size=1, stride=1, init_scale=np.sqrt(2), **kwargs)) layer_2 = activ(conv(layer_1, 'c2', n_filters=32, filter_size=2, stride=2, init_scale=np.sqrt(2), **kwargs)) layer_2 = conv_to_fc(layer_2) return activ(linear(layer_2, 'fc1', n_hidden=512, init_scale=np.sqrt(2)))
def tiny_filter_deep_nature_cnn(scaled_images, **kwargs): """ CNN from Nature paper. :param scaled_images: (TensorFlow Tensor) Image input placeholder :param kwargs: (dict) Extra keywords parameters for the convolutional layers of the CNN :return: (TensorFlow Tensor) The CNN output layer """ activ = tf.nn.relu layer_1 = activ( conv(scaled_images, 'c1', n_filters=6, filter_size=2, stride=1, init_scale=np.sqrt(2), **kwargs)) layer_2 = activ( conv(layer_1, 'c2', n_filters=8, filter_size=2, stride=1, init_scale=np.sqrt(2), **kwargs)) layer_3 = activ( conv(layer_2, 'c3', n_filters=10, filter_size=2, stride=1, init_scale=np.sqrt(2), **kwargs)) layer_4 = activ( conv(layer_3, 'c4', n_filters=12, filter_size=3, stride=1, init_scale=np.sqrt(2), **kwargs)) layer_5 = activ( conv(layer_4, 'c5', n_filters=14, filter_size=3, stride=1, init_scale=np.sqrt(2), **kwargs)) layer_5 = conv_to_fc(layer_5) layer_6 = activ(linear(layer_5, 'fc1', n_hidden=256, init_scale=np.sqrt(2))) layer_7 = activ(linear(layer_6, 'fc2', n_hidden=128, init_scale=np.sqrt(2))) return activ(linear(layer_7, 'fc3', n_hidden=128, init_scale=np.sqrt(2)))
def cnn(input_tensor,**kwargs): visual_input=tf.slice(input_tensor,[0,0],[-1,49],name='input_img') prev_output=tf.slice(input_tensor,[0,49],[-1,50],'prev_outputs') visual_input=tf.reshape(visual_input,(-1,7,7,1)) activ=tf.nn.relu layer_1 = activ(conv(visual_input, 'c1', n_filters=16, filter_size=3, stride=1, init_scale=np.sqrt(2), **kwargs)) #layer_2 = activ(conv(layer_1, 'c2', n_filters=16, filter_size=3, stride=1, init_scale=np.sqrt(2), **kwargs)) #layer_3=conv_to_fc(layer_2) layer_2=conv_to_fc(layer_1) visual_output=activ(linear(layer_2,'fc1',n_hidden=49,init_scale=np.sqrt(2))) total_output=tf.concat([visual_output,prev_output],1) return total_output
def augmented_nature_cnn(scaled_images, **kwargs): """ Copied from stable_baselines policies.py. This is nature CNN head where last channel of the image contains direct features. :param scaled_images: (TensorFlow Tensor) Image input placeholder :param kwargs: (dict) Extra keywords parameters for the convolutional layers of the CNN :return: (TensorFlow Tensor) The CNN output layer """ activ = tf.nn.relu # Take last channel as direct features other_features = tf.contrib.slim.flatten(scaled_images[..., -1]) # Take known amount of direct features, rest are padding zeros other_features = other_features[:, :num_direct_features] scaled_images = scaled_images[..., :-1] layer_1 = activ( conv(scaled_images, 'cnn1', n_filters=32, filter_size=8, stride=4, init_scale=np.sqrt(2), **kwargs)) layer_2 = activ( conv(layer_1, 'cnn2', n_filters=64, filter_size=4, stride=2, init_scale=np.sqrt(2), **kwargs)) layer_3 = activ( conv(layer_2, 'cnn3', n_filters=64, filter_size=3, stride=1, init_scale=np.sqrt(2), **kwargs)) layer_3 = conv_to_fc(layer_3) # Append direct features to the final output of extractor img_output = activ( linear(layer_3, 'cnn_fc1', n_hidden=512, init_scale=np.sqrt(2))) concat = tf.concat((img_output, other_features), axis=1) return concat
def nature_cnn(scaled_images, **kwargs): """ CNN from Nature paper. :param scaled_images: (TensorFlow Tensor) Image input placeholder :param kwargs: (dict) Extra keywords parameters for the convolutional layers of the CNN :return: (TensorFlow Tensor) The CNN output layer """ activ = tf.nn.relu if 'view' in kwargs.keys(): _, h, w, d = scaled_images.shape view_type = kwargs['view'] if view_type == 'even': mask = np.array([i % 2 for i in range(h * w)]).reshape( (1, h, w, 1)) elif view_type == 'odd': mask = np.array([1 - i % 2 for i in range(h * w)]).reshape( (1, h, w, 1)) else: raise NotImplementedError scaled_images = scaled_images * tf.constant(mask, dtype=tf.float32) del kwargs['view'] layer_1 = activ( conv(scaled_images, 'c1', n_filters=32, filter_size=8, stride=4, init_scale=np.sqrt(2), **kwargs)) layer_2 = activ( conv(layer_1, 'c2', n_filters=64, filter_size=4, stride=2, init_scale=np.sqrt(2), **kwargs)) layer_3 = activ( conv(layer_2, 'c3', n_filters=64, filter_size=3, stride=1, init_scale=np.sqrt(2), **kwargs)) layer_3 = conv_to_fc(layer_3) return activ(linear(layer_3, 'fc1', n_hidden=512, init_scale=np.sqrt(2)))
def customizedCNN(scaled_images, **kwargs): """ CNN from Nature paper. :param scaled_images: (TensorFlow Tensor) Image input placeholder :param kwargs: (dict) Extra keywords parameters for the convolutional layers of the CNN :return: (TensorFlow Tensor) The CNN output layer """ activ = tf.nn.relu layer_1 = activ( conv(scaled_images, 'c1', n_filters=8, filter_size=6, stride=3, init_scale=np.sqrt(2), **kwargs)) layer_2 = activ( conv(layer_1, 'c2', n_filters=8, filter_size=3, stride=2, init_scale=np.sqrt(2), **kwargs)) layer_3 = activ( conv(layer_2, 'c3', n_filters=8, filter_size=3, stride=1, init_scale=np.sqrt(2), **kwargs)) layer_4 = conv_to_fc(layer_3) layer_5 = activ(linear(layer_4, 'fc1', n_hidden=256, init_scale=np.sqrt(2))) layer_6 = activ(linear(layer_5, 'fc2', n_hidden=128, init_scale=np.sqrt(2))) active = tf.tanh pi = active(linear(layer_6, "pi_fc{}".format(1), 64, init_scale=np.sqrt(2))) # pi = active(linear(pi, "pi_fc{}".format(2), 128, init_scale=np.sqrt(2))) vf = active(linear(layer_6, "vf_fc{}".format(1), 64, init_scale=np.sqrt(2))) # vf = active(linear(vf, "vf_fc{}".format(2), 128, init_scale=np.sqrt(2))) return pi, vf
def custom_cnn_extractor(input_images): activ = tf.nn.relu layer_1 = activ( conv(input_images, 'c1', n_filters=8, filter_size=3, stride=1, init_scale=np.sqrt(2))) layer_2 = activ( conv(layer_1, 'c2', n_filters=8, filter_size=3, stride=1, init_scale=np.sqrt(2))) layer_2 = conv_to_fc(layer_2) return activ(linear(layer_2, 'fc1', n_hidden=256, init_scale=np.sqrt(2)))
def modified_cnn(scaled_images, **kwargs): activ = tf.nn.relu layer_1 = activ( conv(scaled_images, 'c1', n_filters=32, filter_size=4, stride=2, init_scale=np.sqrt(2), **kwargs)) layer_2 = activ( conv(layer_1, 'c2', n_filters=64, filter_size=4, stride=1, init_scale=np.sqrt(2), **kwargs)) layer_2 = conv_to_fc(layer_2) return activ(linear(layer_2, 'fc1', n_hidden=512, init_scale=np.sqrt(2)))
def FullyConv1(image, n_tools, **kwargs): activ = tf.nn.relu x = activ( conv(image, 'c1', n_filters=32, filter_size=3, stride=1, pad='SAME', init_scale=np.sqrt(2))) x = activ( conv(x, 'c2', n_filters=64, filter_size=3, stride=1, pad='SAME', init_scale=np.sqrt(2))) x = activ( conv(x, 'c3', n_filters=64, filter_size=3, stride=1, pad='SAME', init_scale=np.sqrt(2))) x = activ( conv(x, 'c4', n_filters=64, filter_size=3, stride=1, pad='SAME', init_scale=np.sqrt(2))) x = activ( conv(x, 'c5', n_filters=64, filter_size=3, stride=1, pad='SAME', init_scale=np.sqrt(2))) x = activ( conv(x, 'c6', n_filters=64, filter_size=3, stride=1, pad='SAME', init_scale=np.sqrt(2))) x = activ( conv(x, 'c7', n_filters=64, filter_size=3, stride=1, pad='SAME', init_scale=np.sqrt(2))) x = activ( conv(x, 'c8', n_filters=n_tools, filter_size=3, stride=1, pad='SAME', init_scale=np.sqrt(2))) act = conv_to_fc(x) val = activ( conv(x, 'v1', n_filters=64, filter_size=3, stride=2, init_scale=np.sqrt(2))) val = activ( conv(val, 'v4', n_filters=64, filter_size=1, stride=1, init_scale=np.sqrt(2))) val = conv_to_fc(val) return act, val
def nature_cnn(scaled_images, **kwargs): with open('../stable-baselines/config.json', 'r') as f: config = json.load(f) with tf.variable_scope("model", reuse=False): activ = tf.nn.relu scaled_images = tf.contrib.layers.batch_norm(scaled_images) layer_1 = activ( conv(scaled_images, 'c1', n_filters=config['cnn_params']['l1']['n_filters'], filter_size=config['cnn_params']['l1']['filter_size'], stride=config['cnn_params']['l1']['stride'], init_scale=config['cnn_params']['l1']['init_scale'], **kwargs)) layer_2 = tf.nn.max_pool(value=layer_1, ksize=2, strides=[1, 2, 2, 1], padding='VALID', data_format='NHWC') # self.layer_2 = tf.contrib.layers.batch_norm(self.layer_2) layer_3 = activ( conv(layer_2, 'c2', n_filters=config['cnn_params']['l2']['n_filters'], filter_size=config['cnn_params']['l2']['filter_size'], stride=config['cnn_params']['l2']['stride'], init_scale=config['cnn_params']['l2']['init_scale'], **kwargs)) layer_4 = tf.nn.max_pool(value=layer_3, ksize=2, strides=[1, 2, 2, 1], padding='VALID', data_format='NHWC') # self.layer_4 = tf.contrib.layers.batch_norm(self.layer_4) layer_5 = activ( conv(layer_4, 'c3', n_filters=config['cnn_params']['l3']['n_filters'], filter_size=config['cnn_params']['l3']['filter_size'], stride=config['cnn_params']['l3']['stride'], init_scale=config['cnn_params']['l3']['init_scale'], **kwargs)) layer_6 = tf.nn.max_pool(value=layer_5, ksize=2, strides=[1, 2, 2, 1], padding='VALID', data_format='NHWC') # self.layer_6 = tf.contrib.layers.batch_norm(self.layer_6) layer_7 = conv_to_fc(layer_6) return activ( linear(layer_7, 'fc1', n_hidden=config['cnn_params']['fc']['n_hidden'], init_scale=config['cnn_params']['fc']['init_scale']))
def __init__(self, sess, ob_space, ac_space, n_env, n_steps, n_batch, reuse=False, h_shape=[10, 10, 1024], **kwargs): super(CropPolicyYOTO, self).__init__(sess, ob_space, ac_space, n_env, n_steps, n_batch, reuse=reuse, scale=True) with tf.variable_scope("model", reuse=reuse): # Get tensors nu_bbox = self.processed_obs[:, :, -25:-1] # nu_bbox = tf.Print(nu_bbox, [nu_bbox[:, :, -4:]], "nu_bbox = ") nu_bbox = tf.reshape(nu_bbox, [tf.shape(nu_bbox)[0], nu_bbox.shape[2]]) h_obs = self.processed_obs[:, :, :-25] h_obs = tf.reshape( h_obs, [tf.shape(h_obs)[0], h_shape[0], h_shape[1], h_shape[2]]) gamma = self.processed_obs[:, :, -1:] gamma = tf.reshape(gamma, [tf.shape(gamma)[0], gamma.shape[2]]) # gamma = tf.Print(gamma, [gamma], "gamma = ") activ = tf.nn.relu x = activ( conv(h_obs, 'c1', n_filters=128, filter_size=3, stride=1, init_scale=np.sqrt(2), **kwargs)) mean_mlp, std_mlp = self.create_mlp_for_yoto(gamma=gamma, out_layer_size=128, hidden_layer_size=64, layer_name='c1', reuse=reuse) mean_mlp = tf.expand_dims(mean_mlp, 1) mean_mlp = tf.expand_dims(mean_mlp, 1) std_mlp = tf.expand_dims(std_mlp, 1) std_mlp = tf.expand_dims(std_mlp, 1) x = tf.multiply(std_mlp, x) x = tf.add(mean_mlp, x) x = activ( conv(x, 'c2', n_filters=128, filter_size=3, stride=1, init_scale=np.sqrt(2), **kwargs)) mean_mlp, std_mlp = self.create_mlp_for_yoto(gamma=gamma, out_layer_size=128, hidden_layer_size=64, layer_name='c2', reuse=reuse) mean_mlp = tf.expand_dims(mean_mlp, 1) mean_mlp = tf.expand_dims(mean_mlp, 1) std_mlp = tf.expand_dims(std_mlp, 1) std_mlp = tf.expand_dims(std_mlp, 1) x = tf.multiply(std_mlp, x) x = tf.add(mean_mlp, x) x = tf.layers.max_pooling2d(x, 2, 2, name='pool1') x = conv_to_fc(x) x = activ(linear(x, 'fc1', n_hidden=512, init_scale=np.sqrt(2))) mean_mlp, std_mlp = self.create_mlp_for_yoto(gamma=gamma, out_layer_size=512, hidden_layer_size=128, layer_name='fc1', reuse=reuse) x = tf.multiply(std_mlp, x) x = tf.add(mean_mlp, x) extracted_features = x extracted_features = tf.layers.flatten(extracted_features) extracted_features = tf.concat([extracted_features, nu_bbox], axis=1) # Concatenate history term pi_h = extracted_features for i, layer_size in enumerate([128, 128, 128]): pi_h = activ( tf.layers.dense(pi_h, layer_size, name='pi_fc' + str(i))) pi_latent = pi_h vf_h = extracted_features for i, layer_size in enumerate([32, 32]): vf_h = activ( tf.layers.dense(vf_h, layer_size, name='vf_fc' + str(i))) value_fn = tf.layers.dense(vf_h, 1, name='vf') vf_latent = vf_h self._proba_distribution, self._policy, self.q_value = \ self.pdtype.proba_distribution_from_latent(pi_latent, vf_latent, init_scale=0.01) self._value_fn = value_fn self._setup_init()
def __init__(self, sess, ob_space, ac_space, n_env, n_steps, n_batch, reuse=False, h_shape=[23, 23, 1024], **kwargs): super(CropPolicy, self).__init__(sess, ob_space, ac_space, n_env, n_steps, n_batch, reuse=reuse, scale=True) with tf.variable_scope("model", reuse=reuse): # Get tensors nu_bbox = self.processed_obs[:, :, -24:] nu_bbox = tf.reshape(nu_bbox, [tf.shape(nu_bbox)[0], nu_bbox.shape[2]]) h_obs = self.processed_obs[:, :, :-24] h_obs = tf.reshape( h_obs, [tf.shape(h_obs)[0], h_shape[0], h_shape[1], h_shape[2]]) activ = tf.nn.relu layer_1 = activ( conv(h_obs, 'c1', n_filters=128, filter_size=3, stride=1, init_scale=np.sqrt(2), **kwargs)) layer_2 = activ( conv(layer_1, 'c2', n_filters=128, filter_size=3, stride=1, init_scale=np.sqrt(2), **kwargs)) layer_3 = tf.layers.max_pooling2d(layer_2, 2, 2, name='pool1') layer_3 = conv_to_fc(layer_3) extracted_features = activ( linear(layer_3, 'fc1', n_hidden=512, init_scale=np.sqrt(2))) extracted_features = tf.layers.flatten(extracted_features) extracted_features = tf.concat([extracted_features, nu_bbox], axis=1) # Concatenate history term pi_h = extracted_features for i, layer_size in enumerate([128, 128, 128]): pi_h = activ( tf.layers.dense(pi_h, layer_size, name='pi_fc' + str(i))) pi_latent = pi_h vf_h = extracted_features for i, layer_size in enumerate([32, 32]): vf_h = activ( tf.layers.dense(vf_h, layer_size, name='vf_fc' + str(i))) value_fn = tf.layers.dense(vf_h, 1, name='vf') vf_latent = vf_h self._proba_distribution, self._policy, self.q_value = \ self.pdtype.proba_distribution_from_latent(pi_latent, vf_latent, init_scale=0.01) self._value_fn = value_fn self._setup_init()