def _transform_into_images(config, data, img_type="all"): """ reshapes data (shape: (batch_size, feature_length)) into the required image shape with an additional batch_dimension, e.g. (1,120,160,7) """ data_shape = get_correct_image_shape(config, get_type=img_type) data = data[:, :-6] data = tf.reshape(data, [-1, *data_shape]) return data
def _transform_edge_into_images(config, data, img_type="all", output_cnn_2_filter_maps=False): """ reshapes data (shape: (batch_size, feature_length)) into the required image shape with an additional batch_dimension, e.g. (1,120,160,7) """ data_shape = get_correct_image_shape(config, get_type=img_type) if output_cnn_2_filter_maps: data_shape = (120,160,2) data = tf.reshape(data, [-1, *data_shape]) return data
def _build(self, inputs, verbose=VERBOSITY): if EncodeProcessDecode_v7_edge_segmentation_no_edges_dropout.convnet_tanh: activation = tf.nn.tanh else: activation = tf.nn.relu img_shape = get_correct_image_shape( config=None, get_type="seg", depth_data_provided= EncodeProcessDecode_v7_edge_segmentation_no_edges_dropout. depth_data_provided) img_data = tf.reshape( inputs, [-1, *img_shape]) # -1 means "all", i.e. batch dimension print(img_data.get_shape()) ''' 60, 80 ''' outputs = snt.Conv2D(output_channels=32, kernel_shape=3, stride=2, padding="SAME")(img_data) outputs = activation(outputs) if EncodeProcessDecode_v7_edge_segmentation_no_edges_dropout.conv_layer_instance_norm: outputs = snt.BatchNorm()(outputs, is_training=self._is_training) print(outputs.get_shape()) ''' 30, 40 ''' outputs = snt.Conv2D(output_channels=32, kernel_shape=3, stride=2, padding="SAME")(outputs) outputs = activation(outputs) if EncodeProcessDecode_v7_edge_segmentation_no_edges_dropout.conv_layer_instance_norm: outputs = snt.BatchNorm()(outputs, is_training=self._is_training) print(outputs.get_shape()) ''' 15, 20 ''' outputs = snt.Conv2D(output_channels=16, kernel_shape=3, stride=2, padding="SAME")(outputs) outputs = activation(outputs) if EncodeProcessDecode_v7_edge_segmentation_no_edges_dropout.conv_layer_instance_norm: outputs = snt.BatchNorm()(outputs, is_training=self._is_training) print(outputs.get_shape()) ''' 8, 10 ''' outputs = snt.Conv2D(output_channels=5, kernel_shape=3, stride=2, padding="SAME")(outputs) outputs = activation(outputs) if EncodeProcessDecode_v7_edge_segmentation_no_edges_dropout.conv_layer_instance_norm: outputs = snt.BatchNorm()(outputs, is_training=self._is_training) print(outputs.get_shape()) outputs = tf.layers.flatten(outputs) # 8,10,5 flattened return outputs
def _build(self, inputs, name, verbose=VERBOSITY, keep_dropout_prop=0.9): if EncodeProcessDecode_v5_no_skip_batch_norm.convnet_tanh: activation = tf.nn.tanh else: activation = tf.nn.relu """ velocity (x,y,z) and position (x,y,z) """ n_non_visual_elements = 6 filter_sizes = [ EncodeProcessDecode_v5_no_skip_batch_norm.n_conv_filters, EncodeProcessDecode_v5_no_skip_batch_norm.n_conv_filters * 2 ] """ shape: (batch_size, features), get everything except velocity and position """ img_data = inputs[:, :-n_non_visual_elements] img_shape = get_correct_image_shape( config=None, get_type="all", depth_data_provided=EncodeProcessDecode_v5_no_skip_batch_norm. depth_data_provided) img_data = tf.reshape( img_data, [-1, *img_shape]) # -1 means "all", i.e. batch dimension ''' Layer1 encoder output shape (?, 120, 160, filter_sizes[0]) ''' outputs1 = tf.layers.conv2d( img_data, filters=64, kernel_size=3, strides=1, padding='same', activation=activation, use_bias=False, kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=1e-05)) if EncodeProcessDecode_v5_no_skip_batch_norm.conv_layer_instance_norm: outputs1 = snt.BatchNorm()(outputs1, is_training=self._is_training) l1_shape = outputs1.get_shape() ''' Layer2 encoder output shape (?, 120, 160, filter_sizes[0]) ''' outputs = tf.layers.conv2d( outputs1, filters=64, kernel_size=3, strides=1, padding='same', activation=activation, use_bias=False, kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=1e-05)) if EncodeProcessDecode_v5_no_skip_batch_norm.conv_layer_instance_norm: outputs = snt.BatchNorm()(outputs, is_training=self._is_training) l2_shape = outputs.get_shape() ''' Layer3 encoder output shape (?, 60, 80, filter_sizes[0]) ''' if EncodeProcessDecode_v5_no_skip_batch_norm.convnet_pooling: outputs = tf.layers.max_pooling2d(outputs, 2, 2) l3_shape = outputs.get_shape() if self._is_training: outputs = tf.nn.dropout(outputs, keep_prob=keep_dropout_prop) else: outputs = tf.nn.dropout(outputs, keep_prob=1.0) ''' Layer4 encoder output shape (?, 60, 80, filter_sizes[0]) ''' outputs = tf.layers.conv2d( outputs, filters=filter_sizes[0], kernel_size=3, strides=1, padding='same', activation=activation, use_bias=False, kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=1e-05)) if EncodeProcessDecode_v5_no_skip_batch_norm.conv_layer_instance_norm: outputs = snt.BatchNorm()(outputs, is_training=self._is_training) l4_shape = outputs.get_shape() ''' Layer5 encoder output shape (?, 60, 80, filter_sizes[0]) ''' outputs = tf.layers.conv2d( outputs, filters=filter_sizes[0], kernel_size=3, strides=1, padding='same', activation=activation, use_bias=False, kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=1e-05)) # --------------- SKIP CONNECTION --------------- # outputs2 = outputs if EncodeProcessDecode_v5_no_skip_batch_norm.conv_layer_instance_norm: outputs = snt.BatchNorm()(outputs, is_training=self._is_training) l5_shape = outputs.get_shape() ''' Layer6 encoder output shape (?, 30, 40, filter_sizes[0]) ''' if EncodeProcessDecode_v5_no_skip_batch_norm.convnet_pooling: outputs = tf.layers.max_pooling2d(outputs, 2, 2) l6_shape = outputs.get_shape() if self._is_training: outputs = tf.nn.dropout(outputs, keep_prob=keep_dropout_prop) else: outputs = tf.nn.dropout(outputs, keep_prob=1.0) ''' Layer7 encoder output shape (?, 30, 40, filter_sizes[1]) ''' outputs = tf.layers.conv2d( outputs, filters=filter_sizes[0], kernel_size=3, strides=1, padding='same', activation=activation, use_bias=False, kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=1e-05)) if EncodeProcessDecode_v5_no_skip_batch_norm.conv_layer_instance_norm: outputs = snt.BatchNorm()(outputs, is_training=self._is_training) l7_shape = outputs.get_shape() ''' Layer8 encoder output shape (?, 30, 40, filter_sizes[0]) ''' outputs = tf.layers.conv2d( outputs, filters=filter_sizes[0], kernel_size=3, strides=1, padding='same', activation=activation, use_bias=False, kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=1e-05)) if EncodeProcessDecode_v5_no_skip_batch_norm.conv_layer_instance_norm: outputs = snt.BatchNorm()(outputs, is_training=self._is_training) l8_shape = outputs.get_shape() ''' Layer9 encoder output shape (?, 15, 20, filter_sizes[0]) ''' if EncodeProcessDecode_v5_no_skip_batch_norm.convnet_pooling: outputs = tf.layers.max_pooling2d(outputs, 2, 2) l9_shape = outputs.get_shape() if self._is_training: outputs = tf.nn.dropout(outputs, keep_prob=keep_dropout_prop) else: outputs = tf.nn.dropout(outputs, keep_prob=1.0) ''' Layer10 encoder output shape (?, 15, 20, filter_sizes[1]) ''' outputs = tf.layers.conv2d( outputs, filters=filter_sizes[1], kernel_size=3, strides=1, padding='same', activation=activation, use_bias=False, kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=1e-05)) if EncodeProcessDecode_v5_no_skip_batch_norm.conv_layer_instance_norm: outputs = snt.BatchNorm()(outputs, is_training=self._is_training) l10_shape = outputs.get_shape() ''' Layer11 encoder output shape (?, 15, 20, filter_sizes[1]) ''' outputs = tf.layers.conv2d( outputs, filters=filter_sizes[1], kernel_size=3, strides=1, padding='same', activation=activation, use_bias=False, kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=1e-05)) # --------------- SKIP CONNECTION --------------- # outputs3 = outputs if EncodeProcessDecode_v5_no_skip_batch_norm.conv_layer_instance_norm: outputs = snt.BatchNorm()(outputs, is_training=self._is_training) l11_shape = outputs.get_shape() ''' Layer12 encoder output shape (?, 7, 10, filter_sizes[1]) ''' if EncodeProcessDecode_v5_no_skip_batch_norm.convnet_pooling: outputs = tf.layers.max_pooling2d(outputs, 2, 2) l12_shape = outputs.get_shape() ''' Layer13 encoder output shape (?, 4, 5, filter_sizes[1]) ''' outputs = tf.layers.conv2d( outputs, filters=filter_sizes[1], kernel_size=3, strides=2, padding='same', activation=activation, use_bias=False, kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=1e-05)) if EncodeProcessDecode_v5_no_skip_batch_norm.conv_layer_instance_norm: outputs = snt.BatchNorm()(outputs, is_training=self._is_training) l13_shape = outputs.get_shape() ''' Layer14 encoder output shape (?, 2, 3, filter_sizes[1]) ''' outputs = tf.layers.conv2d( outputs, filters=filter_sizes[1], kernel_size=3, strides=2, padding='same', activation=activation, use_bias=False, kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=1e-05)) if EncodeProcessDecode_v5_no_skip_batch_norm.conv_layer_instance_norm: outputs = snt.BatchNorm()(outputs, is_training=self._is_training) l14_shape = outputs.get_shape() ''' Layer15 encoder output shape (?, 1, 1, filter_sizes[1]) ''' if EncodeProcessDecode_v5_no_skip_batch_norm.convnet_pooling: outputs = tf.layers.max_pooling2d(outputs, 2, 2) l15_shape = outputs.get_shape() if verbose: print("Layer1 encoder output shape", l1_shape) print("Layer2 encoder output shape", l2_shape) print("Layer3 encoder output shape", l3_shape) print("Layer4 encoder output shape", l4_shape) print("Layer5 encoder output shape", l5_shape) print("Layer6 encoder output shape", l6_shape) print("Layer7 encoder output shape", l7_shape) print("Layer8 encoder output shape", l8_shape) print("Layer9 encoder output shape", l9_shape) print("Layer10 encoder output shape", l10_shape) print("Layer11 encoder output shape", l11_shape) print("Layer12 encoder output shape", l12_shape) print("Layer13 encoder output shape", l13_shape) print("Layer14 encoder output shape", l14_shape) print("Layer15 encoder output shape", l15_shape) #' shape (?, 7, 10, filter_sizes[1]) -> (?, n_neurons_nodes_total_dim-n_neurons_nodes_non_visual) ' visual_latent_output = tf.layers.flatten(outputs) #visual_latent_output = tf.layers.dense(inputs=visual_latent_output, units=EncodeProcessDecode_v4_172_improve_shapes_exp1.n_neurons_nodes_total_dim - EncodeProcessDecode_v4_172_improve_shapes_exp1.n_neurons_nodes_non_visual) # --------------- SKIP CONNECTION --------------- # self.skip1 = outputs1 self.skip2 = outputs2 self.skip3 = outputs3 return visual_latent_output
def _build(self, inputs, name, is_training=True, verbose=False): if EncodeProcessDecode_v1.convnet_tanh: activation = tf.nn.tanh else: activation = tf.nn.relu if "global" in name: n_non_visual_elements = 5 else: n_non_visual_elements = 6 filter_sizes = [EncodeProcessDecode_v1.n_conv_filters, EncodeProcessDecode_v1.n_conv_filters * 2] img_data = inputs[:, :-n_non_visual_elements] # shape: (batch_size, features) img_shape = get_correct_image_shape(config=None, get_type="all", depth_data_provided=EncodeProcessDecode_v1.depth_data_provided) img_data = tf.reshape(img_data, [-1, *img_shape]) # -1 means "all", i.e. batch dimension ''' layer 1''' outputs = tf.layers.conv2d(img_data, filters=filter_sizes[0], kernel_size=3, strides=1, padding='same', activation=activation) outputs = activation(outputs) outputs = tf.layers.batch_normalization(outputs, training=is_training) l1_shape = outputs.get_shape() ''' layer 2''' outputs = tf.layers.conv2d(outputs, filters=filter_sizes[0], kernel_size=3, strides=1, padding='same', activation=activation) outputs = activation(outputs) outputs = tf.layers.batch_normalization(outputs, training=is_training) l2_shape = outputs.get_shape() ''' layer 3''' if EncodeProcessDecode_v1.convnet_pooling: outputs = tf.layers.max_pooling2d(outputs, 2, 2) l3_shape = outputs.get_shape() ''' layer 4''' outputs = tf.layers.conv2d(outputs, filters=filter_sizes[0], kernel_size=3, strides=1, padding='same', activation=activation) outputs = activation(outputs) outputs = tf.layers.batch_normalization(outputs, training=is_training) l4_shape = outputs.get_shape() ''' layer 5''' if EncodeProcessDecode_v1.convnet_pooling: outputs = tf.layers.max_pooling2d(outputs, 2, 2) l5_shape = outputs.get_shape() ''' layer 6''' outputs = tf.layers.conv2d(outputs, filters=filter_sizes[1], kernel_size=3, strides=1, padding='same', activation=activation) outputs = activation(outputs) outputs = tf.layers.batch_normalization(outputs, training=is_training) l6_shape = outputs.get_shape() ''' layer 7''' if EncodeProcessDecode_v1.convnet_pooling: outputs = tf.layers.max_pooling2d(outputs, 2, 2) l7_shape = outputs.get_shape() ''' layer 8''' outputs = tf.layers.conv2d(outputs, filters=filter_sizes[1], kernel_size=3, strides=1, padding='same', activation=activation) outputs = activation(outputs) l8_shape = outputs.get_shape() ''' layer 9''' outputs = tf.layers.conv2d(outputs, filters=filter_sizes[1], kernel_size=3, strides=1, padding='same', activation=activation) outputs = activation(outputs) outputs = tf.layers.batch_normalization(outputs, training=is_training) l9_shape = outputs.get_shape() ''' layer 10''' if EncodeProcessDecode_v1.convnet_pooling: outputs = tf.layers.max_pooling2d(outputs, 2, 2) l10_shape = outputs.get_shape() outputs = tf.layers.batch_normalization(outputs, training=is_training) if verbose: print("Layer0 encoder output shape", l1_shape) print("Layer1 encoder output shape", l2_shape) print("Layer2 encoder output shape", l3_shape) print("Layer3 encoder output shape", l4_shape) print("Layer4 encoder output shape", l5_shape) print("Layer5 encoder output shape", l6_shape) print("Layer6 encoder output shape", l7_shape) print("Layer7 encoder output shape", l8_shape) print("Layer8 encoder output shape", l9_shape) print("Layer9 encoder output shape", l10_shape) ' shape (?, 7, 10, 32) -> (?, dimensions_latent_repr-n_neurons_mlp_nonvisual) ' visual_latent_output = tf.layers.flatten(outputs) ''' layer 11''' visual_latent_output = tf.layers.dense(inputs=visual_latent_output, units=EncodeProcessDecode_v1.dimensions_latent_repr - EncodeProcessDecode_v1.n_neurons_mlp_nonvisual) return visual_latent_output
def _build(self, inputs, is_training=True, verbose=False): filter_sizes = [EncodeProcessDecode_v1.n_conv_filters, EncodeProcessDecode_v1.n_conv_filters * 2] if EncodeProcessDecode_v1.convnet_tanh: activation = tf.nn.tanh else: activation = tf.nn.relu img_shape = get_correct_image_shape(config=None, get_type='all', depth_data_provided=EncodeProcessDecode_v1.depth_data_provided) """ get image data, get everything >except< last n elements which are non-visual """ image_data = inputs[:, :-EncodeProcessDecode_v1.n_neurons_mlp_nonvisual] visual_latent_space_dim = EncodeProcessDecode_v1.dimensions_latent_repr - EncodeProcessDecode_v1.n_neurons_mlp_nonvisual """ in order to apply 1x1 2D convolutions, transform shape (batch_size, features) -> shape (batch_size, 1, 1, features)""" image_data = tf.expand_dims(image_data, axis=1) image_data = tf.expand_dims(image_data, axis=1) # yields shape (?,1,1,latent_dim) #assert is_square(visual_latent_space_dim), "dimension of visual latent space vector (dimensions of latent representation: ({}) - " \ # "dimensions of non visual latent representation({})) must be square".format( # EncodeProcessDecode.dimensions_latent_repr, EncodeProcessDecode.n_neurons_mlp_nonvisual) #image_data = tf.reshape(image_data, (-1, int(math.sqrt(visual_latent_space_dim)), int(math.sqrt(visual_latent_space_dim)), 1)) #image_data = tf.reshape(image_data, (-1, 7, 10, 5)) image_data = tf.reshape(image_data, (-1, 7, 10, 15)) ''' layer 1 (7,10,5) -> (7,10,filter_sizes[1])''' outputs = tf.layers.conv2d_transpose(image_data, filters=filter_sizes[1], kernel_size=3, strides=1, padding='same') outputs = tf.layers.batch_normalization(outputs, training=is_training) outputs = activation(outputs) l1_shape = outputs.get_shape() ''' layer 2 (7,10,filter_sizes[1]) -> (15,20,filter_sizes[1]) ''' outputs = tf.layers.conv2d_transpose(outputs, filters=filter_sizes[1], kernel_size=(3, 2), strides=2, padding='valid') outputs = tf.layers.batch_normalization(outputs, training=is_training) outputs = activation(outputs) l2_shape = outputs.get_shape() ''' layer 2 (15,20,filter_sizes[1]) -> (15,20,filter_sizes[1]) ''' outputs = tf.layers.conv2d_transpose(outputs, filters=filter_sizes[1], kernel_size=3, strides=1, padding='same') outputs = tf.layers.batch_normalization(outputs, training=is_training) outputs = activation(outputs) l3_shape = outputs.get_shape() ''' layer 2 (15,20,filter_sizes[1]) -> (30,40,filter_sizes[1]) ''' outputs = tf.layers.conv2d_transpose(outputs, filters=filter_sizes[1], kernel_size=3, strides=1, padding='same') outputs = tf.layers.batch_normalization(outputs, training=is_training) outputs = activation(outputs) l4_shape = outputs.get_shape() ''' layer 3 (30,40,filter_sizes[1]) -> (30,40,filter_sizes[1]) ''' outputs = tf.layers.conv2d_transpose(outputs, filters=filter_sizes[1], kernel_size=3, strides=2, padding='same') outputs = tf.layers.batch_normalization(outputs, training=is_training) outputs = activation(outputs) l5_shape = outputs.get_shape() ''' layer 4 (30,40,filter_sizes[1]) -> (30,40,filter_sizes[0]) ''' outputs = tf.layers.conv2d_transpose(outputs, filters=filter_sizes[0], kernel_size=3, strides=1, padding='same') outputs = tf.layers.batch_normalization(outputs, training=is_training) outputs = activation(outputs) l6_shape = outputs.get_shape() ''' layer 5 (30,40,filter_sizes[0]) -> (60,80,filter_sizes[0]) ''' outputs = tf.layers.conv2d_transpose(outputs, filters=filter_sizes[0], kernel_size=3, strides=2, padding='same') outputs = tf.layers.batch_normalization(outputs, training=is_training) outputs = activation(outputs) l7_shape = outputs.get_shape() ''' layer 5 (60,80,filter_sizes[0]) -> (60,80,filter_sizes[0]) ''' outputs = tf.layers.conv2d_transpose(outputs, filters=filter_sizes[0], kernel_size=3, strides=1, padding='same') outputs = tf.layers.batch_normalization(outputs, training=is_training) outputs = activation(outputs) l8_shape = outputs.get_shape() ''' layer 5 (60,80,filter_sizes[0]) -> (120,160,filter_sizes[0]) ''' outputs = tf.layers.conv2d_transpose(outputs, filters=filter_sizes[0], kernel_size=3, strides=2, padding='same') outputs = tf.layers.batch_normalization(outputs, training=is_training) outputs = activation(outputs) l9_shape = outputs.get_shape() ''' layer 5 (120,160,filter_sizes[0]) -> (120,160,filter_sizes[0]) ''' outputs = tf.layers.conv2d_transpose(outputs, filters=filter_sizes[0], kernel_size=3, strides=1, padding='same') outputs = tf.layers.batch_normalization(outputs, training=is_training) outputs = activation(outputs) l10_shape = outputs.get_shape() ''' layer 6 (120,160,filter_sizes[0]) -> (120,160,3 or 4 or 7]) ''' outputs = tf.layers.conv2d_transpose(outputs, filters=img_shape[2], kernel_size=1, strides=1, padding='same') outputs = tf.layers.batch_normalization(outputs, training=is_training) outputs = activation(outputs) l11_shape = outputs.get_shape() visual_latent_output = tf.layers.flatten(outputs) if verbose: print("Image data shape", image_data.get_shape()) print("Layer1 decoder output shape", l1_shape) print("Layer2 decoder output shape", l2_shape) print("Layer3 decoder output shape", l3_shape) print("Layer4 decoder output shape", l4_shape) print("Layer5 decoder output shape", l5_shape) print("Layer6 decoder output shape", l6_shape) print("Layer7 decoder output shape", l7_shape) print("Layer8 decoder output shape", l8_shape) print("Layer9 decoder output shape", l9_shape) print("Layer10 decoder output shape", l10_shape) print("Layer11 decoder output shape", l11_shape) print("decoder shape before adding non-visual data", visual_latent_output.get_shape()) # outputs = tf.nn.dropout(outputs, keep_prob=tf.constant(1.0)) # todo: deal with train/test time return visual_latent_output
def graph_to_input_and_targets_single_experiment(config, graph, features, initial_pos_vel_known, return_only_unpadded=False): """Returns 2 graphs with input and target feature vectors for training. Args: graph: An `nx.DiGraph` instance. Returns: The input `nx.DiGraph` instance. The target `nx.DiGraph` instance. Raises: ValueError: unknown node type """ gripper_as_global = config.gripper_as_global data_offset_manipulable_objects = config.data_offset_manipulable_objects experiment_length = features['experiment_length'] experiment_id = features['experiment_id'] """ handles the testing cycles when a different number of rollouts shall be predicted than seen in training """ if config.n_rollouts is not experiment_length: experiment_length = config.n_rollouts target_graphs = [graph.copy() for _ in range(experiment_length)] def create_node_feature(attr, features, step, config): if attr['type_name'] == 'container': """ container only has object segmentations """ # pad up to fixed size since sonnet can only handle fixed-sized features res = attr['features'] if config.use_object_seg_data_only_for_init: feature = features['object_segments'][1].flatten() else: feature = features['object_segments'][step][1].flatten() res[:feature.shape[0]] = feature return res elif attr['type_name'] == 'gripper': """ gripper only has obj segs and gripper pos """ if config.use_object_seg_data_only_for_init: obj_seg = features['object_segments'][0].flatten() else: obj_seg = features['object_segments'][step][0].flatten() pos = features['gripperpos'][step].flatten().astype(np.float32) vel = features['grippervel'][step].flatten().astype(np.float32) return np.concatenate((obj_seg, vel, pos)) elif "manipulable" in attr['type_name']: """ we assume shape (image features, vel(3dim), pos(3dim)) """ obj_id = int(attr['type_name'].split("_")[2]) obj_id_segs = obj_id + data_offset_manipulable_objects # obj_seg will have data as following: (rgb, seg, optionally: depth) if config.use_object_seg_data_only_for_init: """ in this case, the nodes will have static visual information over time """ obj_seg = features['object_segments'][obj_id].flatten() else: """ in this case, the nodes will have dynamic visual information over time """ obj_seg = features['object_segments'][step][ obj_id_segs].astype(np.float32) """ nodes have full access to scene observation (i.e. rgb and depth) """ if config.nodes_get_full_rgb_depth: rgb = features["img"][step].astype(np.float32) depth = features["depth"][step].astype(np.float32) obj_seg[:, :, :3] = rgb obj_seg[:, :, -3:] = depth obj_seg = obj_seg.flatten() pos = features['objpos'][step][obj_id].flatten().astype(np.float32) # normalize velocity # """ (normalized) velocity is computed here since rolled indexing in # tfrecords seems not straightforward """ # if step == 0: # diff = np.zeros(shape=3, dtype=np.float32) # else: # diff = features['objpos'][step-1][obj_id] - features['objpos'][step][obj_id] # if config.normalize_data: # vel = normalize_list([diff])[0] #vel = (diff * 240.0).flatten().astype(np.float32) vel = features['objvel'][step][obj_id].flatten().astype(np.float32) if config.remove_pos_vel: pos = np.zeros(shape=np.shape(pos), dtype=np.float32) vel = np.zeros(shape=np.shape(vel), dtype=np.float32) return np.concatenate((obj_seg, vel, pos)) def create_edge_feature_distance(receiver, sender, target_graph_i): node_feature_rcv = target_graph_i.nodes(data=True)[receiver] node_feature_snd = target_graph_i.nodes(data=True)[sender] """ the position is always the last three elements of the flattened feature vector """ pos1 = node_feature_rcv['features'][-3:] pos2 = node_feature_snd['features'][-3:] return (pos1 - pos2).astype(np.float32) def create_edge_feature(sender, target_graph, target_graph_previous, seg_as_edges, img_shape=None): if not seg_as_edges: node_feature_snd_prev = target_graph_previous.nodes( data=True)[sender] node_feature_snd = target_graph.nodes(data=True)[sender] """ the position is always the last three elements of the flattened feature vector """ pos_prev = node_feature_snd_prev["features"][-3:] vel_pos = node_feature_snd['features'][-6:] vel_pos = np.insert(vel_pos, 3, pos_prev) """ will yield (vel_t, pos_{t-1}, pos_t)""" return vel_pos.astype(np.float32) else: node_feature = target_graph.nodes( data=True)[sender]['features'][:-6] node_feature = np.reshape(node_feature, img_shape) return node_feature[:, :, 3].flatten() input_control_graphs = [] for step in range(experiment_length): for node_index, node_feature in graph.nodes(data=True): node_feature = create_node_feature(node_feature, features, step, config) target_graphs[step].add_node(node_index, features=node_feature) """ if gripper_as_global = True, graphs will have one node less add globals (image, segmentation, depth, gravity, time_step) """ if gripper_as_global: if config.global_output_size == 5: global_features = np.concatenate( (np.atleast_1d(step), np.atleast_1d(constants.g), features['gripperpos'][step].flatten())).astype( np.float32) elif config.global_output_size == 9: padding_flag = 1 if step >= features[ "unpadded_experiment_length"] else 0 global_features = np.concatenate(( np.atleast_1d(padding_flag), np.atleast_1d(step), np.atleast_1d(constants.g), features['gripperpos'][step].flatten(), features['grippervel'][step].flatten(), )).astype(np.float32) else: global_features = np.concatenate( (features['img'][step].flatten(), features['seg'][step].flatten(), features['depth'][step].flatten(), np.atleast_1d(step), np.atleast_1d(constants.g), features['gripperpos'][step].flatten())).astype( np.float32) target_graphs[step].graph["features"] = global_features """ assign gripperpos to input control graphs """ input_control_graph = graph.copy() for i in range(input_control_graph.number_of_nodes()): input_control_graph.nodes(data=True)[i]["features"] = None for receiver, sender, edge_feature in input_control_graph.edges( data=True): input_control_graph[sender][receiver][0]['features'] = None input_control_graph.graph["features"] = global_features assert target_graphs[step].graph["features"].shape[ 0] == config.global_output_size assert input_control_graph.graph["features"].shape[ 0] == config.global_output_size input_control_graphs.append(input_control_graph) else: if config.global_output_size == 2: target_graphs[step].graph["features"] = np.concatenate( (np.atleast_1d(step), np.atleast_1d(constants.g))).astype(np.float32) #assert target_graphs[step].graph["features"].shape[0]-3 == config.global_output_size input_control_graphs = None """ compute distances between every manipulable object (and gripper if not gripper_as_global) """ for step in range(experiment_length): for sender, receiver, edge_feature in target_graphs[step].edges( data="features"): if step == 0: target_graphs_previous = target_graphs[step] else: target_graphs_previous = target_graphs[step - 1] edge_feature = create_edge_feature( sender=sender, target_graph=target_graphs[step], target_graph_previous=target_graphs_previous, seg_as_edges=config.edges_carry_segmentation_data, img_shape=get_correct_image_shape(config, get_type='all')) if config.remove_edges: edge_feature = np.zeros(shape=np.shape(edge_feature), dtype=np.float32) target_graphs[step].add_edge(sender, receiver, key=0, features=edge_feature) input_graphs = [] for i in range(experiment_length - 1): inp = target_graphs[i].copy() """ gripperpos and grippervel always reflect the current step. However, we are interested in predicting the effects of a new/next control command --> shift by one """ inp.graph["features"] = input_control_graphs[i + 1].graph["features"] input_graphs.append(inp) target_graphs = target_graphs[1:] # first state is used for init # todo: following code assumes all nodes are of type 'manipulable' """ set velocity and position info to zero """ if not initial_pos_vel_known: """ for all nodes """ for graph in input_graphs: for idx, node_feature in graph.nodes(data=True): feat = node_feature['features'] feat[-6:] = 0 graph.add_node(idx, features=feat) """ for all edges """ for receiver, sender, edge_feature in graph.edges(data=True): feat = edge_feature['features'] feat[:] = 0 graph.add_edge(sender, receiver, features=feat) if return_only_unpadded: input_graphs = [ graph for graph in input_graphs if graph.graph['features'][0] == 0 ] target_graphs = [ graph for graph in target_graphs if graph.graph['features'][0] == 0 ] """ check if the gripper pos+vel in the input graph are values from the next time step """ assert (input_graphs[0].graph['features'] == target_graphs[0].graph['features']).all() return input_graphs, target_graphs, experiment_id
def _build(self, inputs, name, verbose=VERBOSITY, keep_dropout_prop=0.7): if EncodeProcessDecode_v4_1082_latent_dim_only_seg_skip_connection_one_step.convnet_tanh: activation = tf.nn.tanh else: activation = tf.nn.relu n_non_visual_elements = 6 # velocity (x,y,z) and position (x,y,z) filter_sizes = [EncodeProcessDecode_v4_1082_latent_dim_only_seg_skip_connection_one_step.n_conv_filters, EncodeProcessDecode_v4_1082_latent_dim_only_seg_skip_connection_one_step.n_conv_filters * 2] img_data = inputs[:, :-n_non_visual_elements] # shape: (batch_size, features) img_shape = get_correct_image_shape(config=None, get_type="all", depth_data_provided=EncodeProcessDecode_v4_1082_latent_dim_only_seg_skip_connection_one_step.depth_data_provided) img_data = tf.reshape(img_data, [-1, *img_shape]) # -1 means "all", i.e. batch dimension ''' layer 1''' outputs1 = tf.layers.conv2d(img_data, filters=filter_sizes[0], kernel_size=3, strides=1, padding='same', activation=activation) outputs = activation(outputs1) outputs = tf.contrib.layers.layer_norm(outputs) l1_shape = outputs.get_shape() ''' layer 2''' outputs = tf.layers.conv2d(outputs, filters=filter_sizes[0], kernel_size=3, strides=1, padding='same', activation=activation) outputs = activation(outputs) outputs = tf.contrib.layers.layer_norm(outputs) l2_shape = outputs.get_shape() ''' layer 3''' if EncodeProcessDecode_v4_1082_latent_dim_only_seg_skip_connection_one_step.convnet_pooling: outputs = tf.layers.max_pooling2d(outputs, 2, 2) l3_shape = outputs.get_shape() if self.is_training: outputs = tf.nn.dropout(outputs, keep_prob=keep_dropout_prop) else: outputs = tf.nn.dropout(outputs, keep_prob=1.0) ''' layer 4''' outputs = tf.layers.conv2d(outputs, filters=filter_sizes[0], kernel_size=3, strides=1, padding='same', activation=activation) outputs = activation(outputs) outputs = tf.contrib.layers.layer_norm(outputs) l4_shape = outputs.get_shape() ''' layer 5''' outputs = tf.layers.conv2d(outputs, filters=filter_sizes[0], kernel_size=3, strides=1, padding='same', activation=activation) # --------------- SKIP CONNECTION --------------- # outputs2 = outputs outputs = activation(outputs) outputs = tf.contrib.layers.layer_norm(outputs) l5_shape = outputs.get_shape() ''' layer 6''' if EncodeProcessDecode_v4_1082_latent_dim_only_seg_skip_connection_one_step.convnet_pooling: outputs = tf.layers.max_pooling2d(outputs, 2, 2) l6_shape = outputs.get_shape() if self.is_training: outputs = tf.nn.dropout(outputs, keep_prob=keep_dropout_prop) else: outputs = tf.nn.dropout(outputs, keep_prob=1.0) ''' layer 7''' outputs = tf.layers.conv2d(outputs, filters=filter_sizes[1], kernel_size=3, strides=1, padding='same', activation=activation) outputs = activation(outputs) outputs = tf.contrib.layers.layer_norm(outputs) l7_shape = outputs.get_shape() ''' layer 8''' outputs = tf.layers.conv2d(outputs, filters=filter_sizes[1], kernel_size=3, strides=1, padding='same', activation=activation) outputs = activation(outputs) outputs = tf.contrib.layers.layer_norm(outputs) l8_shape = outputs.get_shape() ''' layer 9''' if EncodeProcessDecode_v4_1082_latent_dim_only_seg_skip_connection_one_step.convnet_pooling: outputs = tf.layers.max_pooling2d(outputs, 2, 2) l9_shape = outputs.get_shape() if self.is_training: outputs = tf.nn.dropout(outputs, keep_prob=keep_dropout_prop) else: outputs = tf.nn.dropout(outputs, keep_prob=1.0) ''' layer 10''' outputs = tf.layers.conv2d(outputs, filters=filter_sizes[1], kernel_size=3, strides=1, padding='same', activation=activation) outputs = activation(outputs) outputs = tf.contrib.layers.layer_norm(outputs) l10_shape = outputs.get_shape() ''' layer 11''' outputs = tf.layers.conv2d(outputs, filters=filter_sizes[1], kernel_size=3, strides=1, padding='same', activation=activation) # --------------- SKIP CONNECTION --------------- # outputs3 = outputs outputs = activation(outputs) outputs = tf.contrib.layers.layer_norm(outputs) l11_shape = outputs.get_shape() ''' layer 12''' if EncodeProcessDecode_v4_1082_latent_dim_only_seg_skip_connection_one_step.convnet_pooling: outputs = tf.layers.max_pooling2d(outputs, 2, 2) l12_shape = outputs.get_shape() if self.is_training: outputs = tf.nn.dropout(outputs, keep_prob=keep_dropout_prop) else: outputs = tf.nn.dropout(outputs, keep_prob=1.0) if verbose: print("Layer1 encoder output shape", l1_shape) print("Layer2 encoder output shape", l2_shape) print("Layer3 encoder output shape", l3_shape) print("Layer4 encoder output shape", l4_shape) print("Layer5 encoder output shape", l5_shape) print("Layer6 encoder output shape", l6_shape) print("Layer7 encoder output shape", l7_shape) print("Layer8 encoder output shape", l8_shape) print("Layer9 encoder output shape", l9_shape) print("Layer10 encoder output shape", l10_shape) print("Layer11 encoder output shape", l11_shape) print("Layer12 encoder output shape", l12_shape) ' shape (?, 7, 10, 32) -> (?, n_neurons_nodes_total_dim-n_neurons_nodes_non_visual) ' visual_latent_output = tf.layers.flatten(outputs) ''' layer 11''' visual_latent_output = tf.layers.dense(inputs=visual_latent_output, units=EncodeProcessDecode_v4_1082_latent_dim_only_seg_skip_connection_one_step.n_neurons_nodes_total_dim - EncodeProcessDecode_v4_1082_latent_dim_only_seg_skip_connection_one_step.n_neurons_nodes_non_visual) # --------------- SKIP CONNECTION --------------- # self.skip1 = outputs1 self.skip2 = outputs2 self.skip3 = outputs3 return visual_latent_output
def _build(self, inputs, name, verbose=VERBOSITY, keep_dropout_prop=0.7): filter_sizes = [EncodeProcessDecode_v4_1082_latent_dim_only_seg_skip_connection_one_step.n_conv_filters, EncodeProcessDecode_v4_1082_latent_dim_only_seg_skip_connection_one_step.n_conv_filters * 2] if EncodeProcessDecode_v4_1082_latent_dim_only_seg_skip_connection_one_step.convnet_tanh: activation = tf.nn.tanh else: activation = tf.nn.relu img_shape = get_correct_image_shape(config=None, get_type='all', depth_data_provided=EncodeProcessDecode_v4_1082_latent_dim_only_seg_skip_connection_one_step.depth_data_provided) """ get image data, get everything >except< last n elements which are non-visual (position and velocity) """ image_data = inputs[:, :-EncodeProcessDecode_v4_1082_latent_dim_only_seg_skip_connection_one_step.n_neurons_nodes_non_visual] #visual_latent_space_dim = EncodeProcessDecode_v3.n_neurons_nodes_total_dim - EncodeProcessDecode_v3.n_neurons_nodes_total_dim """ in order to apply 1x1 2D convolutions, transform shape (batch_size, features) -> shape (batch_size, 1, 1, features)""" image_data = tf.expand_dims(image_data, axis=1) image_data = tf.expand_dims(image_data, axis=1) # yields shape (?,1,1,latent_dim) image_data = tf.reshape(image_data, (-1, 7, 10, 15)) ''' layer 1 (7,10,5) -> (7,10,filter_sizes[1])''' outputs = tf.layers.conv2d_transpose(image_data, filters=filter_sizes[1], kernel_size=3, strides=1, padding='same') outputs = activation(outputs) outputs = tf.contrib.layers.layer_norm(outputs) l1_shape = outputs.get_shape() if self.is_training: outputs = tf.nn.dropout(outputs, keep_prob=keep_dropout_prop) else: outputs = tf.nn.dropout(outputs, keep_prob=1.0) ''' layer 2 (7,10,filter_sizes[1]) -> (15,20,filter_sizes[1]) ''' outputs = tf.layers.conv2d_transpose(outputs, filters=filter_sizes[1], kernel_size=(3, 2), strides=2, padding='valid') outputs = activation(outputs) outputs = tf.contrib.layers.layer_norm(outputs) l2_shape = outputs.get_shape() outputsl2 = outputs ''' layer 2_2 (15,20,filter_sizes[1] -> (15,20,filter_sizes[1]) ''' # --------------- SKIP CONNECTION --------------- # #outputs = tf.concat([outputs, self.skip3], axis=3) #outputs = outputs + self.skip3 #after_skip3 = outputs.get_shape() # --------------- SKIP CONNECTION --------------- # outputs = tf.layers.conv2d(self.skip3, filters=filter_sizes[1], kernel_size=3, strides=1, padding='same') outputs = activation(outputs) outputs = tf.contrib.layers.layer_norm(outputs) l1_2_shape = outputs.get_shape() outputs = outputsl2 + outputs after_skip3 = outputs.get_shape() if self.is_training: outputs = tf.nn.dropout(outputs, keep_prob=keep_dropout_prop) else: outputs = tf.nn.dropout(outputs, keep_prob=1.0) ''' layer 3 (15,20,filter_sizes[1]) -> (15,20,filter_sizes[1]) ''' outputs = tf.layers.conv2d_transpose(outputs, filters=filter_sizes[1], kernel_size=3, strides=1, padding='same') outputs = activation(outputs) outputs = tf.contrib.layers.layer_norm(outputs) l3_shape = outputs.get_shape() ''' layer 4 (15,20,filter_sizes[1]) -> (15,20,filter_sizes[1]) ''' outputs = tf.layers.conv2d(outputs, filters=filter_sizes[1], kernel_size=3, strides=1, padding='same') outputs = activation(outputs) outputs = tf.contrib.layers.layer_norm(outputs) l4_shape = outputs.get_shape() if self.is_training: outputs = tf.nn.dropout(outputs, keep_prob=keep_dropout_prop) else: outputs = tf.nn.dropout(outputs, keep_prob=1.0) ''' layer 5 (15,20,filter_sizes[1]) -> (30,40,filter_sizes[1]) ''' outputs = tf.layers.conv2d_transpose(outputs, filters=filter_sizes[1], kernel_size=3, strides=1, padding='same') outputs = activation(outputs) outputs = tf.contrib.layers.layer_norm(outputs) l5_shape = outputs.get_shape() if self.is_training: outputs = tf.nn.dropout(outputs, keep_prob=keep_dropout_prop) else: outputs = tf.nn.dropout(outputs, keep_prob=1.0) ''' layer 7 (30,40,filter_sizes[1]) -> (30,40,filter_sizes[1]) ''' outputs = tf.layers.conv2d_transpose(outputs, filters=filter_sizes[1], kernel_size=3, strides=2, padding='same') outputs = activation(outputs) outputs = tf.contrib.layers.layer_norm(outputs) l7_shape = outputs.get_shape() ''' layer 8 (30,40,filter_sizes[1]) -> (30,40,filter_sizes[1]) ''' outputs = tf.layers.conv2d(outputs, filters=filter_sizes[0], kernel_size=3, strides=1, padding='same') outputs = activation(outputs) outputs = tf.contrib.layers.layer_norm(outputs) l8_shape = outputs.get_shape() if self.is_training: outputs = tf.nn.dropout(outputs, keep_prob=keep_dropout_prop) else: outputs = tf.nn.dropout(outputs, keep_prob=1.0) ''' layer 9 (30,40,filter_sizes[1]) -> (30,40,filter_sizes[0]) ''' outputs = tf.layers.conv2d_transpose(outputs, filters=filter_sizes[0], kernel_size=3, strides=1, padding='same') outputs = activation(outputs) outputs = tf.contrib.layers.layer_norm(outputs) l9_shape = outputs.get_shape() if self.is_training: outputs = tf.nn.dropout(outputs, keep_prob=keep_dropout_prop) else: outputs = tf.nn.dropout(outputs, keep_prob=1.0) ''' layer 11 (30,40,filter_sizes[0]) -> (60,80,filter_sizes[0]) ''' outputs = tf.layers.conv2d_transpose(outputs, filters=filter_sizes[0], kernel_size=3, strides=2, padding='same') outputs = activation(outputs) outputs = tf.contrib.layers.layer_norm(outputs) l11_shape = outputs.get_shape() ''' layer 12 (60,80,filter_sizes[0]) -> (60,80,filter_sizes[0]) ''' outputs = tf.layers.conv2d(outputs, filters=filter_sizes[0], kernel_size=3, strides=1, padding='same') outputs = activation(outputs) outputs = tf.contrib.layers.layer_norm(outputs) l12_shape = outputs.get_shape() if self.is_training: outputs = tf.nn.dropout(outputs, keep_prob=keep_dropout_prop) else: outputs = tf.nn.dropout(outputs, keep_prob=1.0) ''' layer 13 (60,80,filter_sizes[0]) -> (60,80,filter_sizes[0]) ''' outputs = tf.layers.conv2d_transpose(outputs, filters=filter_sizes[0], kernel_size=3, strides=1, padding='same') outputs = activation(outputs) outputs = tf.contrib.layers.layer_norm(outputs) l13_shape = outputs.get_shape() outputsl13 = outputs # --------------- SKIP CONNECTION --------------- # #outputs = tf.concat([outputs, self.skip2], axis=3) #outputs = outputs + self.skip2 #after_skip2 = outputs.get_shape() ''' layer 14 (60,80,filter_sizes[0]) -> (60,80,filter_sizes[0]) ''' outputs = tf.layers.conv2d(outputs, filters=filter_sizes[0], kernel_size=3, strides=1, padding='same') outputs = activation(outputs) outputs = tf.contrib.layers.layer_norm(outputs) l14_shape = outputs.get_shape() # --------------- SKIP CONNECTION --------------- # outputs = outputsl13 + outputs after_skip2 = outputs.get_shape() if self.is_training: outputs = tf.nn.dropout(outputs, keep_prob=keep_dropout_prop) else: outputs = tf.nn.dropout(outputs, keep_prob=1.0) ''' layer 15 (60,80,filter_sizes[0]) -> (120,160,filter_sizes[0]) ''' outputs = tf.layers.conv2d_transpose(outputs, filters=filter_sizes[0], kernel_size=3, strides=2, padding='same') outputs = activation(outputs) outputs = tf.contrib.layers.layer_norm(outputs) l15_shape = outputs.get_shape() if self.is_training: outputs = tf.nn.dropout(outputs, keep_prob=keep_dropout_prop) else: outputs = tf.nn.dropout(outputs, keep_prob=1.0) # --------------- SKIP CONNECTION --------------- # #outputs = outputs + self.skip1 #outputs = tf.concat([outputs, self.skip1], axis=3) #after_skip1 = outputs.get_shape() ''' layer 18 (120,160,filter_sizes[0]) -> (120,160,filter_sizes[0]) ''' outputs = tf.layers.conv2d(outputs, filters=filter_sizes[0], kernel_size=1, strides=1, padding='same') outputs = activation(outputs) outputs = tf.contrib.layers.layer_norm(outputs) l18_shape = outputs.get_shape() ''' layer 17 (120,160,filter_sizes[0]) -> (120,160,filter_sizes[0]) ''' outputs = tf.layers.conv2d_transpose(outputs, filters=filter_sizes[0], kernel_size=3, strides=1, padding='same') outputs = activation(outputs) outputs = tf.contrib.layers.layer_norm(outputs) l17_shape = outputs.get_shape() outputs = tf.layers.conv2d(outputs, filters=1, kernel_size=1, strides=1, padding='same', activation=None) # activation None for logits l19_shape = outputs.get_shape() visual_latent_output = tf.layers.flatten(outputs) if verbose: print("Image data shape", image_data.get_shape()) print("Layer1 decoder output shape", l1_shape) print("Layer1_2 decoder output shape", l1_2_shape) print("Layer2 decoder output shape", l2_shape) print("Layer3 decoder output shape", l3_shape) print("Layer4 decoder output shape", l4_shape) print("Layer5 decoder output shape", l5_shape) print("Layer6 decoder output shape", l7_shape) print("Layer7 decoder output shape", l8_shape) print("Layer8 decoder output shape", l9_shape) print("Layer9 decoder output shape", l11_shape) print("Layer10 decoder output shape", l12_shape) print("Layer11 decoder output shape", l13_shape) print("Layer12 decoder output shape", l14_shape) print("Layer13 decoder output shape", l15_shape) print("Layer14 decoder output shape", l17_shape) print("Layer15 decoder output shape", l18_shape) print("Layer16 decoder output shape", l19_shape) print("decoder shape before adding non-visual data", visual_latent_output.get_shape()) print("shape before skip3 {}".format(l1_shape)) print("shape after skip3 {}".format(after_skip3)) print("shape before skip2 {}".format(l11_shape)) print("shape after skip2 {}".format(after_skip2)) print("shape before skip1 {}".format(l17_shape)) #print("shape after skip1 {}".format(after_skip1)) return visual_latent_output
def _build(self, inputs, verbose=VERBOSITY, keep_dropout_prop=0.9): if EncodeProcessDecode_v6_no_core.convnet_tanh: activation = tf.nn.tanh else: activation = tf.nn.relu """ velocity (x,y,z) and position (x,y,z) """ n_globals = 9 n_non_visual_elements = 6 filter_sizes = [EncodeProcessDecode_v6_no_core.n_conv_filters, EncodeProcessDecode_v6_no_core.n_conv_filters * 2] """ shape: (batch_size, features), get everything except velocity and position """ img_data = inputs[:, :-(n_non_visual_elements + n_globals)] img_shape = get_correct_image_shape(config=None, get_type="all", depth_data_provided=EncodeProcessDecode_v6_no_core.depth_data_provided) img_data = tf.reshape(img_data, [-1, *img_shape]) # -1 means "all", i.e. batch dimension ''' Layer1 encoder output shape (?, 120, 160, filter_sizes[0]) ''' outputs1 = snt.Conv2D(output_channels=128, kernel_shape=3, stride=1, padding="SAME")(img_data) outputs1 = activation(outputs1) #outputs1 = tf.layers.conv2d(img_data, filters=64, kernel_size=3, strides=1, padding='same', activation=activation, use_bias=False, # kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=1e-05)) if EncodeProcessDecode_v6_no_core.conv_layer_instance_norm: outputs1 = snt.BatchNorm()(outputs1, is_training=self._is_training) #outputs1 = tf.contrib.layers.instance_norm(outputs1) l1_shape = outputs1.get_shape() ''' Layer2 encoder output shape (?, 120, 160, filter_sizes[0]) ''' outputs = snt.Conv2D(output_channels=128, kernel_shape=3, stride=1, padding="SAME")(outputs1) outputs = activation(outputs) #outputs = tf.layers.conv2d(outputs1, filters=64, kernel_size=3, strides=1, padding='same', activation=activation, use_bias=False, # kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=1e-05)) #if EncodeProcessDecode_v5_no_skip_no_core_no_training_flags_new.conv_layer_instance_norm: # outputs = tf.contrib.layers.instance_norm(outputs) l2_shape = outputs.get_shape() ''' Layer3 encoder output shape (?, 60, 80, filter_sizes[0]) ''' if EncodeProcessDecode_v6_no_core.convnet_pooling: outputs = tf.layers.average_pooling2d(outputs, 2, 2) l3_shape = outputs.get_shape() #if is_training: # outputs = tf.nn.dropout(outputs, keep_prob=keep_dropout_prop) #else: # outputs = tf.nn.dropout(outputs, keep_prob=1.0) ''' Layer4 encoder output shape (?, 60, 80, filter_sizes[0]) ''' outputs = snt.Conv2D(output_channels=filter_sizes[0], kernel_shape=3, stride=1, padding="SAME")(outputs) outputs = activation(outputs) #outputs = tf.layers.conv2d(outputs, filters=filter_sizes[0], kernel_size=3, strides=1, padding='same', activation=activation, # use_bias=False, kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=1e-05)) if EncodeProcessDecode_v6_no_core.conv_layer_instance_norm: outputs = snt.BatchNorm()(outputs, is_training=self._is_training) #outputs = tf.contrib.layers.instance_norm(outputs) l4_shape = outputs.get_shape() ''' Layer5 encoder output shape (?, 60, 80, filter_sizes[0]) ''' outputs = snt.Conv2D(output_channels=filter_sizes[0], kernel_shape=3, stride=1, padding="SAME")(outputs) outputs = activation(outputs) #outputs = tf.layers.conv2d(outputs, filters=filter_sizes[0], kernel_size=3, strides=1, padding='same', activation=activation, # use_bias=False, kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=1e-05)) # --------------- SKIP CONNECTION --------------- # outputs2 = outputs if EncodeProcessDecode_v6_no_core.conv_layer_instance_norm: outputs = snt.BatchNorm()(outputs, is_training=self._is_training) #outputs = tf.contrib.layers.instance_norm(outputs) l5_shape = outputs.get_shape() ''' Layer6 encoder output shape (?, 30, 40, filter_sizes[0]) ''' if EncodeProcessDecode_v6_no_core.convnet_pooling: outputs = tf.layers.average_pooling2d(outputs, 2, 2) l6_shape = outputs.get_shape() #if is_training: # outputs = tf.nn.dropout(outputs, keep_prob=keep_dropout_prop) #else: # outputs = tf.nn.dropout(outputs, keep_prob=1.0) ''' Layer7 encoder output shape (?, 30, 40, filter_sizes[1]) ''' outputs = snt.Conv2D(output_channels=filter_sizes[0], kernel_shape=3, stride=1, padding="SAME")(outputs) outputs = activation(outputs) #outputs = tf.layers.conv2d(outputs, filters=filter_sizes[0], kernel_size=3, strides=1, padding='same', activation=activation, # use_bias=False, kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=1e-05)) if EncodeProcessDecode_v6_no_core.conv_layer_instance_norm: outputs = snt.BatchNorm()(outputs, is_training=self._is_training) #outputs = tf.contrib.layers.instance_norm(outputs) l7_shape = outputs.get_shape() ''' Layer8 encoder output shape (?, 30, 40, filter_sizes[0]) ''' outputs = snt.Conv2D(output_channels=filter_sizes[0], kernel_shape=3, stride=1, padding="SAME")(outputs) outputs = activation(outputs) #outputs = tf.layers.conv2d(outputs, filters=filter_sizes[0], kernel_size=3, strides=1, padding='same', activation=activation, # use_bias=False, kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=1e-05)) if EncodeProcessDecode_v6_no_core.conv_layer_instance_norm: outputs = snt.BatchNorm()(outputs, is_training=self._is_training) #outputs = tf.contrib.layers.instance_norm(outputs) l8_shape = outputs.get_shape() ''' Layer9 encoder output shape (?, 15, 20, filter_sizes[0]) ''' if EncodeProcessDecode_v6_no_core.convnet_pooling: outputs = tf.layers.average_pooling2d(outputs, 2, 2) l9_shape = outputs.get_shape() #if is_training: # outputs = tf.nn.dropout(outputs, keep_prob=keep_dropout_prop) #else: # outputs = tf.nn.dropout(outputs, keep_prob=1.0) ''' Layer10 encoder output shape (?, 15, 20, filter_sizes[1]) ''' outputs = snt.Conv2D(output_channels=filter_sizes[1], kernel_shape=3, stride=1, padding="SAME")(outputs) outputs = activation(outputs) #outputs = tf.layers.conv2d(outputs, filters=filter_sizes[1], kernel_size=3, strides=1, padding='same', activation=activation, # use_bias=False, kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=1e-05)) if EncodeProcessDecode_v6_no_core.conv_layer_instance_norm: outputs = snt.BatchNorm()(outputs, is_training=self._is_training) #outputs = tf.contrib.layers.instance_norm(outputs) l10_shape = outputs.get_shape() ''' Layer11 encoder output shape (?, 15, 20, filter_sizes[1]) ''' outputs = snt.Conv2D(output_channels=filter_sizes[1], kernel_shape=3, stride=1, padding="SAME")(outputs) outputs = activation(outputs) #outputs = tf.layers.conv2d(outputs, filters=filter_sizes[1], kernel_size=3, strides=1, padding='same', activation=activation, # use_bias=False, kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=1e-05)) # --------------- SKIP CONNECTION --------------- # outputs3 = outputs if EncodeProcessDecode_v6_no_core.conv_layer_instance_norm: outputs = snt.BatchNorm()(outputs, is_training=self._is_training) #outputs = tf.contrib.layers.instance_norm(outputs) l11_shape = outputs.get_shape() ''' Layer12 encoder output shape (?, 7, 10, filter_sizes[1]) ''' if EncodeProcessDecode_v6_no_core.convnet_pooling: outputs = tf.layers.average_pooling2d(outputs, 2, 2) l12_shape = outputs.get_shape() ''' Layer13 encoder output shape (?, 4, 5, filter_sizes[1]) ''' outputs = snt.Conv2D(output_channels=filter_sizes[1], kernel_shape=3, stride=2, padding="SAME")(outputs) outputs = activation(outputs) #outputs = tf.layers.conv2d(outputs, filters=filter_sizes[1], kernel_size=3, strides=2, padding='same', activation=activation, # use_bias=False, kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=1e-05)) if EncodeProcessDecode_v6_no_core.conv_layer_instance_norm: outputs = snt.BatchNorm()(outputs, is_training=self._is_training) #outputs = tf.contrib.layers.instance_norm(outputs) l13_shape = outputs.get_shape() ''' Layer14 encoder output shape (?, 2, 3, filter_sizes[1]) ''' outputs = snt.Conv2D(output_channels=filter_sizes[1], kernel_shape=3, stride=2, padding="SAME")(outputs) outputs = activation(outputs) #outputs = tf.layers.conv2d(outputs, filters=filter_sizes[1], kernel_size=3, strides=2, padding='same', activation=activation, # use_bias=False, kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=1e-05)) if EncodeProcessDecode_v6_no_core.conv_layer_instance_norm: outputs = snt.BatchNorm()(outputs, is_training=self._is_training) #outputs = tf.contrib.layers.instance_norm(outputs) l14_shape = outputs.get_shape() ''' Layer15 encoder output shape (?, 1, 1, filter_sizes[1]) ''' if EncodeProcessDecode_v6_no_core.convnet_pooling: outputs = tf.layers.average_pooling2d(outputs, 2, 2) l15_shape = outputs.get_shape() if verbose: print("Layer1 encoder output shape", l1_shape) print("Layer2 encoder output shape", l2_shape) print("Layer3 encoder output shape", l3_shape) print("Layer4 encoder output shape", l4_shape) print("Layer5 encoder output shape", l5_shape) print("Layer6 encoder output shape", l6_shape) print("Layer7 encoder output shape", l7_shape) print("Layer8 encoder output shape", l8_shape) print("Layer9 encoder output shape", l9_shape) print("Layer10 encoder output shape", l10_shape) print("Layer11 encoder output shape", l11_shape) print("Layer12 encoder output shape", l12_shape) print("Layer13 encoder output shape", l13_shape) print("Layer14 encoder output shape", l14_shape) print("Layer15 encoder output shape", l15_shape) # ' shape (?, 7, 10, filter_sizes[1]) -> (?, n_neurons_nodes_total_dim-n_neurons_nodes_non_visual) ' visual_latent_output = tf.layers.flatten(outputs) # visual_latent_output = tf.layers.dense(inputs=visual_latent_output, units=EncodeProcessDecode_v4_172_improve_shapes_exp1.n_neurons_nodes_total_dim - EncodeProcessDecode_v4_172_improve_shapes_exp1.n_neurons_nodes_non_visual) # --------------- SKIP CONNECTION --------------- # self.skip1 = outputs1 self.skip2 = outputs2 self.skip3 = outputs3 n_globals = 9 n_non_visual_elements = 6 gripper_input = inputs[:, -n_globals:] # get x,y,z-gripper position and x,y,z-gripper velocity n_neurons = EncodeProcessDecode_v6_no_core.n_neurons_nodes_non_visual n_layers = EncodeProcessDecode_v6_no_core.n_neurons_nodes_non_visual output_size = EncodeProcessDecode_v6_no_core.n_neurons_nodes_non_visual net = snt.nets.MLP([n_neurons] * n_layers, activate_final=False) """ map velocity and position into a latent space, concatenate with visual latent space vector """ gripper_latent_output = snt.Sequential([net, snt.LayerNorm(), snt.Linear(output_size)])(gripper_input) outputs = tf.concat([visual_latent_output, gripper_latent_output], axis=1) if verbose: print("final encoder output shape", outputs.get_shape()) return outputs
def _build(self, inputs, name, verbose=VERBOSITY, keep_dropout_prop=0.7): filter_sizes = [ EncodeProcessDecode_v3_1114_latent_dim.n_conv_filters, EncodeProcessDecode_v3_1114_latent_dim.n_conv_filters * 2 ] if EncodeProcessDecode_v3_1114_latent_dim.convnet_tanh: activation = tf.nn.tanh else: activation = tf.nn.relu img_shape = get_correct_image_shape( config=None, get_type='all', depth_data_provided=EncodeProcessDecode_v3_1114_latent_dim. depth_data_provided) """ get image data, get everything >except< last n elements which are non-visual (position and velocity) """ image_data = inputs[:, :-EncodeProcessDecode_v3_1114_latent_dim. n_neurons_nodes_non_visual] #visual_latent_space_dim = EncodeProcessDecode_v3.n_neurons_nodes_total_dim - EncodeProcessDecode_v3.n_neurons_nodes_total_dim """ in order to apply 1x1 2D convolutions, transform shape (batch_size, features) -> shape (batch_size, 1, 1, features)""" image_data = tf.expand_dims(image_data, axis=1) image_data = tf.expand_dims(image_data, axis=1) # yields shape (?,1,1,latent_dim) image_data = tf.reshape(image_data, (-1, 7, 10, 15)) ''' layer 1 (7,10,5) -> (7,10,filter_sizes[1])''' outputs = tf.layers.conv2d_transpose(image_data, filters=filter_sizes[1], kernel_size=3, strides=1, padding='same') outputs = activation(outputs) outputs = tf.contrib.layers.layer_norm(outputs) l1_shape = outputs.get_shape() ''' layer 2 (7,10,filter_sizes[1]) -> (15,20,filter_sizes[1]) ''' outputs = tf.layers.conv2d_transpose(outputs, filters=filter_sizes[1], kernel_size=(3, 2), strides=2, padding='valid') outputs = activation(outputs) outputs = tf.contrib.layers.layer_norm(outputs) l2_shape = outputs.get_shape() if self.is_training: outputs = tf.nn.dropout(outputs, keep_prob=keep_dropout_prop) else: outputs = tf.nn.dropout(outputs, keep_prob=1.0) ''' layer 2 (15,20,filter_sizes[1]) -> (15,20,filter_sizes[1]) ''' outputs = tf.layers.conv2d_transpose(outputs, filters=filter_sizes[1], kernel_size=3, strides=1, padding='same') outputs = activation(outputs) outputs = tf.contrib.layers.layer_norm(outputs) l3_shape = outputs.get_shape() ''' layer 2 (15,20,filter_sizes[1]) -> (30,40,filter_sizes[1]) ''' outputs = tf.layers.conv2d_transpose(outputs, filters=filter_sizes[1], kernel_size=3, strides=1, padding='same') outputs = activation(outputs) outputs = tf.contrib.layers.layer_norm(outputs) l4_shape = outputs.get_shape() if self.is_training: outputs = tf.nn.dropout(outputs, keep_prob=keep_dropout_prop) else: outputs = tf.nn.dropout(outputs, keep_prob=1.0) ''' layer 3 (30,40,filter_sizes[1]) -> (30,40,filter_sizes[1]) ''' outputs = tf.layers.conv2d_transpose(outputs, filters=filter_sizes[1], kernel_size=3, strides=2, padding='same') outputs = activation(outputs) outputs = tf.contrib.layers.layer_norm(outputs) l5_shape = outputs.get_shape() ''' layer 4 (30,40,filter_sizes[1]) -> (30,40,filter_sizes[0]) ''' outputs = tf.layers.conv2d_transpose(outputs, filters=filter_sizes[0], kernel_size=3, strides=1, padding='same') outputs = activation(outputs) outputs = tf.contrib.layers.layer_norm(outputs) l6_shape = outputs.get_shape() if self.is_training: outputs = tf.nn.dropout(outputs, keep_prob=keep_dropout_prop) else: outputs = tf.nn.dropout(outputs, keep_prob=1.0) ''' layer 5 (30,40,filter_sizes[0]) -> (60,80,filter_sizes[0]) ''' outputs = tf.layers.conv2d_transpose(outputs, filters=filter_sizes[0], kernel_size=3, strides=2, padding='same') outputs = activation(outputs) outputs = tf.contrib.layers.layer_norm(outputs) l7_shape = outputs.get_shape() ''' layer 5 (60,80,filter_sizes[0]) -> (60,80,filter_sizes[0]) ''' outputs = tf.layers.conv2d_transpose(outputs, filters=filter_sizes[0], kernel_size=3, strides=1, padding='same') outputs = activation(outputs) outputs = tf.contrib.layers.layer_norm(outputs) l8_shape = outputs.get_shape() if self.is_training: outputs = tf.nn.dropout(outputs, keep_prob=keep_dropout_prop) else: outputs = tf.nn.dropout(outputs, keep_prob=1.0) ''' layer 5 (60,80,filter_sizes[0]) -> (120,160,filter_sizes[0]) ''' outputs = tf.layers.conv2d_transpose(outputs, filters=filter_sizes[0], kernel_size=3, strides=2, padding='same') outputs = activation(outputs) outputs = tf.contrib.layers.layer_norm(outputs) l9_shape = outputs.get_shape() ''' layer 5 (120,160,filter_sizes[0]) -> (120,160,filter_sizes[0]) ''' outputs = tf.layers.conv2d_transpose(outputs, filters=filter_sizes[0], kernel_size=3, strides=1, padding='same') outputs = activation(outputs) outputs = tf.contrib.layers.layer_norm(outputs) l10_shape = outputs.get_shape() if self.is_training: outputs = tf.nn.dropout(outputs, keep_prob=keep_dropout_prop) else: outputs = tf.nn.dropout(outputs, keep_prob=1.0) ''' layer 6 (120,160,filter_sizes[0]) -> (120,160,3 or 4 or 7]) ''' outputs = tf.layers.conv2d_transpose(outputs, filters=img_shape[2], kernel_size=1, strides=1, padding='same') outputs = activation(outputs) l11_shape = outputs.get_shape() visual_latent_output = tf.layers.flatten(outputs) if verbose: print("Image data shape", image_data.get_shape()) print("Layer1 decoder output shape", l1_shape) print("Layer2 decoder output shape", l2_shape) print("Layer3 decoder output shape", l3_shape) print("Layer4 decoder output shape", l4_shape) print("Layer5 decoder output shape", l5_shape) print("Layer6 decoder output shape", l6_shape) print("Layer7 decoder output shape", l7_shape) print("Layer8 decoder output shape", l8_shape) print("Layer9 decoder output shape", l9_shape) print("Layer10 decoder output shape", l10_shape) print("Layer11 decoder output shape", l11_shape) print("decoder shape before adding non-visual data", visual_latent_output.get_shape()) return visual_latent_output
def _build(self, inputs, verbose=VERBOSITY, keep_dropout_prop=0.9): if EncodeProcessDecode_v8_edge_segmentation.convnet_tanh: activation = tf.nn.tanh else: activation = tf.nn.relu """ velocity (x,y,z) and position (x,y,z) """ n_non_visual_elements = 6 filter_sizes = [ EncodeProcessDecode_v8_edge_segmentation.n_conv_filters, EncodeProcessDecode_v8_edge_segmentation.n_conv_filters * 2 ] """ shape: (batch_size, features), get everything except velocity and position """ img_data = inputs[:, :-n_non_visual_elements] img_shape = get_correct_image_shape( config=None, get_type="all", depth_data_provided=EncodeProcessDecode_v8_edge_segmentation. depth_data_provided) img_data = tf.reshape( img_data, [-1, *img_shape]) # -1 means "all", i.e. batch dimension ''' Layer1 encoder output shape (?, 120, 160, filter_sizes[0]) ''' outputs1 = snt.Conv2D(output_channels=128, kernel_shape=3, stride=1, padding="SAME")(img_data) outputs1 = activation(outputs1) if EncodeProcessDecode_v8_edge_segmentation.conv_layer_instance_norm: outputs1 = snt.BatchNorm()(outputs1, is_training=self._is_training) l1_shape = outputs1.get_shape() ''' Layer2 encoder output shape (?, 120, 160, filter_sizes[0]) ''' outputs = snt.Conv2D(output_channels=128, kernel_shape=3, stride=1, padding="SAME")(outputs1) outputs = activation(outputs) if EncodeProcessDecode_v8_edge_segmentation.conv_layer_instance_norm: outputs1 = snt.BatchNorm()(outputs1, is_training=self._is_training) l2_shape = outputs.get_shape() ''' Layer3 encoder output shape (?, 60, 80, filter_sizes[0]) ''' if EncodeProcessDecode_v8_edge_segmentation.convnet_pooling: outputs = tf.layers.max_pooling2d(outputs, 2, 2) l3_shape = outputs.get_shape() #if is_training: # outputs = tf.nn.dropout(outputs, keep_prob=keep_dropout_prop) #else: # outputs = tf.nn.dropout(outputs, keep_prob=1.0) ''' Layer4 encoder output shape (?, 60, 80, filter_sizes[0]) ''' outputs = snt.Conv2D(output_channels=filter_sizes[0], kernel_shape=3, stride=1, padding="SAME")(outputs) outputs = activation(outputs) if EncodeProcessDecode_v8_edge_segmentation.conv_layer_instance_norm: outputs = snt.BatchNorm()(outputs, is_training=self._is_training) l4_shape = outputs.get_shape() ''' Layer5 encoder output shape (?, 60, 80, filter_sizes[0]) ''' outputs = snt.Conv2D(output_channels=filter_sizes[0], kernel_shape=3, stride=1, padding="SAME")(outputs) outputs = activation(outputs) # --------------- SKIP CONNECTION --------------- # outputs2 = outputs if EncodeProcessDecode_v8_edge_segmentation.conv_layer_instance_norm: outputs = snt.BatchNorm()(outputs, is_training=self._is_training) l5_shape = outputs.get_shape() ''' Layer6 encoder output shape (?, 30, 40, filter_sizes[0]) ''' if EncodeProcessDecode_v8_edge_segmentation.convnet_pooling: outputs = tf.layers.max_pooling2d(outputs, 2, 2) l6_shape = outputs.get_shape() #if is_training: # outputs = tf.nn.dropout(outputs, keep_prob=keep_dropout_prop) #else: # outputs = tf.nn.dropout(outputs, keep_prob=1.0) ''' Layer7 encoder output shape (?, 30, 40, filter_sizes[1]) ''' outputs = snt.Conv2D(output_channels=filter_sizes[0], kernel_shape=3, stride=1, padding="SAME")(outputs) outputs = activation(outputs) if EncodeProcessDecode_v8_edge_segmentation.conv_layer_instance_norm: outputs = snt.BatchNorm()(outputs, is_training=self._is_training) l7_shape = outputs.get_shape() ''' Layer8 encoder output shape (?, 30, 40, filter_sizes[0]) ''' outputs = snt.Conv2D(output_channels=filter_sizes[0], kernel_shape=3, stride=1, padding="SAME")(outputs) outputs = activation(outputs) if EncodeProcessDecode_v8_edge_segmentation.conv_layer_instance_norm: outputs = snt.BatchNorm()(outputs, is_training=self._is_training) l8_shape = outputs.get_shape() ''' Layer9 encoder output shape (?, 15, 20, filter_sizes[0]) ''' if EncodeProcessDecode_v8_edge_segmentation.convnet_pooling: outputs = tf.layers.max_pooling2d(outputs, 2, 2) l9_shape = outputs.get_shape() #if is_training: # outputs = tf.nn.dropout(outputs, keep_prob=keep_dropout_prop) #else: # outputs = tf.nn.dropout(outputs, keep_prob=1.0) ''' Layer10 encoder output shape (?, 15, 20, filter_sizes[1]) ''' outputs = snt.Conv2D(output_channels=filter_sizes[1], kernel_shape=3, stride=1, padding="SAME")(outputs) outputs = activation(outputs) if EncodeProcessDecode_v8_edge_segmentation.conv_layer_instance_norm: outputs = snt.BatchNorm()(outputs, is_training=self._is_training) l10_shape = outputs.get_shape() ''' Layer11 encoder output shape (?, 15, 20, filter_sizes[1]) ''' outputs = snt.Conv2D(output_channels=filter_sizes[1], kernel_shape=3, stride=1, padding="SAME")(outputs) outputs = activation(outputs) # --------------- SKIP CONNECTION --------------- # outputs3 = outputs if EncodeProcessDecode_v8_edge_segmentation.conv_layer_instance_norm: outputs = snt.BatchNorm()(outputs, is_training=self._is_training) l11_shape = outputs.get_shape() ''' Layer12 encoder output shape (?, 7, 10, filter_sizes[1]) ''' if EncodeProcessDecode_v8_edge_segmentation.convnet_pooling: outputs = tf.layers.max_pooling2d(outputs, 2, 2) l12_shape = outputs.get_shape() ''' Layer13 encoder output shape (?, 4, 5, filter_sizes[1]) ''' outputs = snt.Conv2D(output_channels=filter_sizes[1], kernel_shape=3, stride=2, padding="SAME")(outputs) outputs = activation(outputs) if EncodeProcessDecode_v8_edge_segmentation.conv_layer_instance_norm: outputs = snt.BatchNorm()(outputs, is_training=self._is_training) l13_shape = outputs.get_shape() ''' Layer14 encoder output shape (?, 2, 3, filter_sizes[1]) ''' outputs = snt.Conv2D(output_channels=filter_sizes[1], kernel_shape=3, stride=2, padding="SAME")(outputs) outputs = activation(outputs) if EncodeProcessDecode_v8_edge_segmentation.conv_layer_instance_norm: outputs = snt.BatchNorm()(outputs, is_training=self._is_training) l14_shape = outputs.get_shape() ''' Layer15 encoder output shape (?, 1, 1, filter_sizes[1]) ''' if EncodeProcessDecode_v8_edge_segmentation.convnet_pooling: outputs = tf.layers.max_pooling2d(outputs, 2, 2) l15_shape = outputs.get_shape() if verbose: print("Layer1 encoder output shape", l1_shape) print("Layer2 encoder output shape", l2_shape) print("Layer3 encoder output shape", l3_shape) print("Layer4 encoder output shape", l4_shape) print("Layer5 encoder output shape", l5_shape) print("Layer6 encoder output shape", l6_shape) print("Layer7 encoder output shape", l7_shape) print("Layer8 encoder output shape", l8_shape) print("Layer9 encoder output shape", l9_shape) print("Layer10 encoder output shape", l10_shape) print("Layer11 encoder output shape", l11_shape) print("Layer12 encoder output shape", l12_shape) print("Layer13 encoder output shape", l13_shape) print("Layer14 encoder output shape", l14_shape) print("Layer15 encoder output shape", l15_shape) # ' shape (?, 7, 10, filter_sizes[1]) -> (?, n_neurons_nodes_total_dim-n_neurons_nodes_non_visual) ' visual_latent_output = tf.layers.flatten(outputs) # --------------- SKIP CONNECTION --------------- # self.skip1 = outputs1 self.skip2 = outputs2 self.skip3 = outputs3 if verbose: print("final encoder output shape", outputs.get_shape()) return visual_latent_output