def _load_and_concatenate_image_channels(rgb_path=None, rendered_path=None, depth_path=None, seg_path=None, crop_size=512): if (rgb_path is None and rendered_path is None and depth_path is None and seg_path is None): raise ValueError('At least one of the inputs has to be not None') channels = () if rgb_path is not None: rgb_img = np.array(Image.open(rgb_path)).astype(np.float32) rgb_img = utils.get_central_crop(rgb_img, crop_size, crop_size) channels = channels + (rgb_img, ) if rendered_path is not None: rendered_img = np.array(Image.open(rendered_path)).astype(np.float32) rendered_img = utils.get_central_crop(rendered_img, crop_size, crop_size) if not opts.use_alpha: rendered_img = rendered_img[:, :, :3] # drop the alpha channel channels = channels + (rendered_img, ) if depth_path is not None: depth_img = np.array(Image.open(depth_path)) depth_img = depth_img.astype(np.float32) depth_img = utils.get_central_crop(depth_img, crop_size, crop_size) channels = channels + (depth_img, ) if seg_path is not None: seg_img = np.array(Image.open(seg_path)).astype(np.float32) channels = channels + (seg_img, ) # Concatenate and normalize channels img = np.dstack(channels) img = img * (2.0 / 255) - 1.0 return img
def next(self): if self.iter_idx < len(self.filenames): rendered_img_name = self.filenames[self.iter_idx] basename = rendered_img_name[:-9] # remove the 'color.png' suffix ref_img_name = basename + 'reference.png' depth_img_name = basename + 'depth.png' # Read the 3D rendered image img_rendered = cv2.imread(rendered_img_name, cv2.IMREAD_UNCHANGED) # Change BGR (default cv2 format) to RGB img_rendered = img_rendered[:, :, [2,1,0,3]] # it has a 4th alpha channel # Read the depth image img_depth = cv2.imread(depth_img_name, cv2.IMREAD_UNCHANGED) # Workaround as some depth images are read with a different data type! img_depth = img_depth.astype(np.uint16) # Read reference image if exists, otherwise replace with a zero image. if osp.exists(ref_img_name): img_ref = cv2.imread(ref_img_name) img_ref = img_ref[:, :, ::-1] # Change BGR to RGB format. else: # use a dummy 3-channel zero image as a placeholder print('Warning: no reference image found! Using a dummy placeholder!') img_height, img_width = img_depth.shape img_ref = np.zeros((img_height, img_width, 3), dtype=np.uint8) if self.use_semantic_map: semantic_seg_img_name = basename + 'seg_rgb.png' img_seg = cv2.imread(semantic_seg_img_name) img_seg = img_seg[:, :, ::-1] # Change from BGR to RGB if img_seg.shape[0] == 512 and img_seg.shape[1] == 512: img_ref = utils.get_central_crop(img_ref) img_rendered = utils.get_central_crop(img_rendered) img_depth = utils.get_central_crop(img_depth) img_shape = img_depth.shape assert img_seg.shape == (img_shape + (3,)), 'error in seg image %s %s' % ( basename, str(img_seg.shape)) assert img_ref.shape == (img_shape + (3,)), 'error in ref image %s %s' % ( basename, str(img_ref.shape)) assert img_rendered.shape == (img_shape + (4,)), ('error in rendered ' 'image %s %s' % (basename, str(img_rendered.shape))) assert len(img_depth.shape) == 2, 'error in depth image %s %s' % ( basename, str(img_depth.shape)) raw_example = dict() raw_example['height'] = img_ref.shape[0] raw_example['width'] = img_ref.shape[1] raw_example['rendered'] = img_rendered.tostring() raw_example['depth'] = img_depth.tostring() raw_example['real'] = img_ref.tostring() if self.use_semantic_map: raw_example['seg'] = img_seg.tostring() self.iter_idx += 1 return raw_example else: raise StopIteration()
def compute_pairwise_style_loss_v2(image_paths_list): grams_all = [None] * len(image_paths_list) crop_height, crop_width = opts.train_resolution, opts.train_resolution img_var = tf.placeholder(tf.float32, shape=[1, crop_height, crop_width, 3]) vgg_layers = ['conv%d_2' % i for i in range(1, 6)] # conv1 through conv5 grams_ops = compute_gram_matrices(img_var, vgg_layers) with tf.Session() as sess: for ii, img_path in enumerate(image_paths_list): print('Computing gram matrices for image #%d' % (ii + 1)) img = np.array(Image.open(img_path), dtype=np.float32) img = img * 2. / 255. - 1 # normalize image img = utils.get_central_crop(img, crop_height, crop_width) img = np.expand_dims(img, axis=0) grams_all[ii] = sess.run(grams_ops, feed_dict={img_var: img}) print('Number of images = %d' % len(grams_all)) print('Gram matrices per image:') for i in range(len(grams_all[0])): print('gram_matrix[%d].shape = %s' % (i, grams_all[0][i].shape)) n_imgs = len(grams_all) dist_matrix = np.zeros((n_imgs, n_imgs)) for i in range(n_imgs): print('Computing distances for image #%d' % i) for j in range(i + 1, n_imgs): loss_style = 0 # Compute loss using all gram matrices from all layers for gram_i, gram_j in zip(grams_all[i], grams_all[j]): loss_style += np.mean((gram_i - gram_j)**2, axis=(1, 2)) dist_matrix[i][j] = dist_matrix[j][i] = loss_style return dist_matrix
def segment_images(images_path, xception_frozen_graph_path, save_dir, crop_height=512, crop_width=512): if not osp.exists(xception_frozen_graph_path): raise OSError('Xception frozen graph not found at %s' % xception_frozen_graph_path) with tf.gfile.GFile(xception_frozen_graph_path, "rb") as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) with tf.Graph().as_default() as graph: new_input = tf.placeholder(tf.uint8, [1, crop_height, crop_width, 3], name="new_input") tf.import_graph_def(graph_def, input_map={"ImageTensor:0": new_input}, return_elements=None, name="sem_seg", op_dict=None, producer_op_list=None) corrupted_dir = osp.join(save_dir, 'corrupted') if not osp.exists(corrupted_dir): os.makedirs(corrupted_dir) with tf.Session(graph=graph) as sess: for i, img_path in enumerate(images_path): print('Segmenting image %05d / %05d: %s' % (i + 1, len(images_path), img_path)) img = np.array(Image.open(img_path)) if len(img.shape) == 2 or img.shape[2] != 3: print('Warning! corrupted image %s' % img_path) img_base_path = img_path[: -14] # remove the '_reference.png' suffix srcs = sorted(glob.glob(img_base_path + '_*')) dest = unicode(corrupted_dir + '/.') for src in srcs: shutil.move(src, dest) continue img = utils.get_central_crop(img, crop_height=crop_height, crop_width=crop_width) img = np.expand_dims(img, 0) # convert to NHWC format seg = sess.run("sem_seg/SemanticPredictions:0", feed_dict={new_input: img}) assert np.max( seg[:]) <= 255, 'segmentation image is not of type uint8!' seg = np.squeeze( np.uint8(seg)) # convert to uint8 and squeeze to WxH. parent_dir, filename = osp.split(img_path) basename, ext = osp.splitext(filename) basename = basename[:-10] # remove the '_reference' suffix seg_filename = basename + "_seg.png" seg_filepath = osp.join(save_dir, seg_filename) # Save segmentation image Image.fromarray(seg).save(seg_filepath)
def conv_model(self, inputs, step): """ Construct the CNN Args: inputs: Tensor with shape [n, num_landmarks, patch_shape, patch_shape, 3] step(int): RNN step Returns: """ net = {} with tf.name_scope('mdm_conv{}'.format(step), values=[inputs]): inputs = tf.reshape( inputs, (self.batch_size * self.num_patches, self.patch_shape[0], self.patch_shape[1], self.num_channels) ) # Convolution 1 inputs = self.conv2d_bn(inputs, name='conv_1') self.visualize_cnn_mean(step, inputs, 'conv_1') net['conv_1'] = inputs inputs = tf.layers.max_pooling2d(inputs, [2, 2], [2, 2]) net['pool_1'] = inputs # Convolution 2 inputs = self.conv2d_bn(inputs, name='conv_2') self.visualize_cnn_mean(step, inputs, 'conv_2') net['conv_2'] = inputs inputs = tf.layers.max_pooling2d(inputs, [2, 2], [2, 2]) net['pool_2'] = inputs # Convolution 3 inputs = self.conv2d_bn(inputs, name='conv_3') self.visualize_cnn_mean(step, inputs, 'conv_3') net['conv_3'] = inputs inputs = tf.layers.max_pooling2d(inputs, [2, 2], [2, 2]) net['pool_3'] = inputs # Crop crop_size = inputs.get_shape().as_list()[1:3] cropped = utils.get_central_crop(net['conv_3'], box=crop_size) net['conv_3_cropped'] = cropped inputs = tf.concat([cropped, inputs], 3) # Flatten inputs = tf.reshape(inputs, (self.batch_size, -1)) net['concat'] = inputs return inputs, net
def conv_model(inputs, is_training=True, scope=''): # summaries or losses. net = {} with tf.op_scope([inputs], scope, 'mdm_conv'): with scopes.arg_scope([ops.conv2d, ops.fc], is_training=is_training): with scopes.arg_scope([ops.conv2d], activation=tf.nn.relu, padding='VALID'): net['conv_1'] = ops.conv2d(inputs, 32, [3, 3], scope='conv_1') net['pool_1'] = ops.max_pool(net['conv_1'], [2, 2]) net['conv_2'] = ops.conv2d(net['pool_1'], 32, [3, 3], scope='conv_2') net['pool_2'] = ops.max_pool(net['conv_2'], [2, 2]) crop_size = net['pool_2'].get_shape().as_list()[1:3] net['conv_2_cropped'] = utils.get_central_crop(net['conv_2'], box=crop_size) net['concat'] = tf.concat(3, [net['conv_2_cropped'], net['pool_2']]) return net
def conv_model(inputs, is_training=True, scope=''): # summaries or losses. net = {} with tf.name_scope(scope, 'mdm_conv', [inputs]): # 给下面op_name 加前缀mdm_conv 用with 语句解决资源释放问题 with scopes.arg_scope([ops.conv2d, ops.fc], is_training=is_training): with scopes.arg_scope([ops.conv2d], activation=tf.nn.relu, padding='VALID'): net['conv_1'] = ops.conv2d(inputs, 32, [3, 3], scope='conv_1') net['pool_1'] = ops.max_pool(net['conv_1'], [2, 2]) net['conv_2'] = ops.conv2d(net['pool_1'], 32, [3, 3], scope='conv_2') net['pool_2'] = ops.max_pool(net['conv_2'], [2, 2]) # 两个卷积层 每层32个过滤器 3*3核 # 每层卷积层后有一个2*2 的最大池化层 crop_size = net['pool_2'].get_shape().as_list()[1:3] net['conv_2_cropped'] = utils.get_central_crop(net['conv_2'], box=crop_size) # 中央作物的激活与第二池化层的输出, # 通过跳转链接concat连接起来,以保留更多相关本地信息,否则使用max池化层会丢失这些信息 net['concat'] = tf.concat([net['conv_2_cropped'], net['pool_2']], 3) # axis=3 return net
def conv_model(inputs, is_training=True, scope=''): # summaries or losses. net = {} with tf.name_scope(scope, 'rdn_conv', [inputs]): with scopes.arg_scope([ops.conv2d, ops.fc], is_training=is_training): with scopes.arg_scope([ops.conv2d], activation=tf.nn.relu, padding='VALID'): net['conv_1'] = ops.conv2d(inputs, 32, [3, 3], scope='conv_1') net['pool_1'] = ops.max_pool(net['conv_1'], [2, 2]) net['conv_2'] = ops.conv2d(net['pool_1'], 32, [3, 3], scope='conv_2') net['pool_2'] = ops.max_pool(net['conv_2'], [2, 2]) crop_size = net['pool_2'].get_shape().as_list()[1:3] net['conv_2_cropped'] = utils.get_central_crop(net['conv_2'], box=crop_size) net['concat'] = tf.concat( [net['conv_2_cropped'], net['pool_2']], 3) return net
def next(self): if self.iter_idx < len(self.filenames): rendered_img_name = self.filenames[self.iter_idx] basename = rendered_img_name[:-9] # remove the 'color.png' suffix ref_img_name = basename + 'reference.png' depth_img_name = basename + 'depth.png' normal_img_name = basename + 'normal.png' wc_img_name = basename + 'wc.png' point_json_name = basename + 'point.txt' # Read the 3D rendered image img_rendered = cv2.imread(rendered_img_name, cv2.IMREAD_UNCHANGED) # Change BGR (default cv2 format) to RGB img_rendered = img_rendered[:, :, [2,1,0]] # Read the depth image img_depth = cv2.imread(depth_img_name, cv2.IMREAD_UNCHANGED) # Workaround as some depth images are read with a different data type! img_depth = img_depth.astype(np.uint16) # Read reference image if exists, otherwise replace with a zero image. if osp.exists(ref_img_name): img_ref = cv2.imread(ref_img_name) img_ref = img_ref[:, :, ::-1] # Change BGR to RGB format. else: # use a dummy 3-channel zero image as a placeholder print('Warning: no reference image found! Using a dummy placeholder!') img_height, img_width = img_depth.shape img_ref = np.zeros((img_height, img_width, 3), dtype=np.uint8) if osp.exists(normal_img_name): img_normal = cv2.imread(normal_img_name, cv2.IMREAD_UNCHANGED) else: print('Warning: no normal image found! Using a dummy placeholder!') img_height, img_width = img_depth.shape img_normal = np.zeros((img_height, img_width, 3), dtype=np.uint8) if osp.exists(wc_img_name): img_wc = cv2.imread(wc_img_name, cv2.IMREAD_UNCHANGED) else: print('Warning: no wc image found! Using a dummy placeholder!') img_height, img_width = img_depth.shape img_wc = np.zeros((img_height, img_width, 3), dtype=np.uint8) if osp.exists(point_json_name): with open(point_json_name) as json_file: json_point = json.load(json_file) json_point = {ast.literal_eval(k): v for k, v in json_point.items()} img_height, img_width = img_depth.shape img_point = np.full((img_height, img_width), 0, dtype=np.uint8) for x, y in json_point: img_point[y][x] = json_point[(x,y)] + 1 # NOTE bug in the zbuffer algo else: print('Warning: no point json found! Using a dummy placeholder!') img_height, img_width = img_depth.shape img_point = np.full((img_height, img_width), 0, dtype=np.uint8) if self.use_semantic_map: semantic_seg_img_name = basename + 'seg_rgb.png' img_seg = cv2.imread(semantic_seg_img_name) img_seg = img_seg[:, :, ::-1] # Change from BGR to RGB if img_seg.shape[0] == 512 and img_seg.shape[1] == 512: img_ref = utils.get_central_crop(img_ref) img_rendered = utils.get_central_crop(img_rendered) img_depth = utils.get_central_crop(img_depth) img_normal = utils.get_central_crop(img_normal) img_wc = utils.get_central_crop(img_wc) img_point = utils.get_central_crop(img_point) img_shape = img_depth.shape assert img_seg.shape == (img_shape + (3,)), 'error in seg image %s %s' % ( basename, str(img_seg.shape)) assert img_ref.shape == (img_shape + (3,)), 'error in ref image %s %s' % ( basename, str(img_ref.shape)) assert img_rendered.shape == (img_shape + (3,)), ('error in rendered ' 'image %s %s' % (basename, str(img_rendered.shape))) assert img_normal.shape == (img_shape + (3,)), ('error in normal ' 'image %s %s' % (basename, str(img_normal.shape))) assert img_wc.shape == (img_shape + (3,)), 'error in wc image %s %s' % ( basename, str(img_wc.shape)) assert len(img_depth.shape) == 2, 'error in depth image %s %s' % ( basename, str(img_depth.shape)) assert len(img_point.shape) == 2, 'error in point image %s %s' % ( basename, str(img_point.shape)) raw_example = dict() raw_example['height'] = img_ref.shape[0] raw_example['width'] = img_ref.shape[1] raw_example['rendered'] = img_rendered.tostring() raw_example['depth'] = img_depth.tostring() raw_example['real'] = img_ref.tostring() raw_example['normal'] = img_normal.tostring() raw_example['wc'] = img_wc.tostring() raw_example['point'] = img_point.tostring() if self.use_semantic_map: raw_example['seg'] = img_seg.tostring() self.iter_idx += 1 return raw_example else: raise StopIteration()