def _prepare_augmented_batch(self, tensors, batch_size, image_size=None): if self.subset == "train": assert batch_size == 1, "only implemented for 1 so far" augmentor_strs = self.config.unicode_list("augmentors_train", []) augmentors = parse_augmentors(augmentor_strs, self.void_label()) for augmentor in augmentors: tensors = augmentor.apply(tensors) if image_size is None: image_size = self.image_size tensors = assemble_input_tensors(tensors, image_size) # batchify keys = tensors.keys() tensors = {k: tf.expand_dims(tensors[k], axis=0) for k in keys} else: augmentor_strs = self.config.unicode_list("augmentors_val", []) assert "scale" not in augmentor_strs, "scale augmentation during test time not implemented yet" assert "translation" not in augmentor_strs, "translation augmentation during test time not implemented yet" augmentors = parse_augmentors(augmentor_strs, self.void_label()) tensors = [tensors for _ in xrange(batch_size)] tensors = [apply_augmentors(t, augmentors) for t in tensors] tensors = [assemble_input_tensors(t) for t in tensors] # batchify keys = tensors[0].keys() tensors = { k: tf.stack([t[k] for t in tensors], axis=0) for k in keys } return tensors
def _load_crop_helper(self, img_file_name, img_bbox): img = load_image_tensorflow(img_file_name, jpg=self.jpg, channels=3) img.set_shape(self.input_size + (3,)) # augment and normalize tensors = {"unnormalized_img": img} tensors = apply_augmentors(tensors, self.augmentors) img = tensors["unnormalized_img"] img_norm = normalize(img) return img_norm, img, None
def for_each_img(i_idx): img_id = img_ids[i_idx] tag = tf.as_string( pers_id, width=5, fill="0") + "_" + tf.as_string( img_id, width=4, fill="0") + ".png" img = load_image_tensorflow( self.data_dir + self.train_folder + '/' + tag, jpg=False) img.set_shape(self.input_size + (3, )) tensors = {"unnormalized_img": img} tensors = apply_augmentors(tensors, self.augmentors) img = tensors["unnormalized_img"] img = normalize(img) label = p_idx img.set_shape(self.input_size + (3, )) return img, label, tag
def _prepare_augmented_batch(self, tensors, batch_size, image_size=None): if self.subset == "train": assert batch_size == 1, "only implemented for 1 so far" augmentor_strs = self.config.unicode_list("augmentors_train", []) augmentors = parse_augmentors(augmentor_strs, self.void_label()) for augmentor in augmentors: tensors = augmentor.apply(tensors) if image_size is None: image_size = self.image_size tensors = assemble_input_tensors(tensors, image_size) # batchify keys = list(tensors.keys()) tensors = {k: tf.expand_dims(tensors[k], axis=0) for k in keys} # for testing # from datasets.Util.Normalization import unnormalize # summ0 = tf.summary.image("inputs", unnormalize(tensors["inputs"][:, :, :, :3])) # summ1 = tf.summary.image("labels", tensors["labels"] * 255) # summ2 = tf.summary.image("old_labels", tensors["inputs"][:, :, :, 3:4]) # flow0 = tensors["inputs"][:, :, :, 4:5] # flow0 -= tf.reduce_min(flow0) # flow0 /= tf.reduce_max(flow0) # flow1 = tensors["inputs"][:, :, :, 5:6] # flow1 -= tf.reduce_min(flow1) # flow1 /= tf.reduce_max(flow1) # summ3 = tf.summary.image("flow_future_3", flow3) # summ4 = tf.summary.image("flow_future_4", flow4) # self.summaries += [summ0, summ1, summ2, summ3, summ4] else: augmentor_strs = self.config.unicode_list("augmentors_val", []) assert "scale" not in augmentor_strs, "scale augmentation during test time not implemented yet" assert "translation" not in augmentor_strs, "translation augmentation during test time not implemented yet" augmentors = parse_augmentors(augmentor_strs, self.void_label()) tensors = [tensors for _ in range(batch_size)] tensors = [apply_augmentors(t, augmentors) for t in tensors] tensors = [assemble_input_tensors(t) for t in tensors] # batchify keys = list(tensors[0].keys()) tensors = { k: tf.stack([t[k] for t in tensors], axis=0) for k in keys } return tensors
def _load_crop_helper(self, img_file_name, img_bbox): img_whole_im = load_image_tensorflow( img_file_name, jpg=self.jpg, channels=3) dims = tf.shape(img_whole_im) img_x = img_bbox[0] img_y = img_bbox[1] img_w = img_bbox[2] img_h = img_bbox[3] # add context region img_x -= 0.5 * img_w * (self.context_region_factor - 1.0) img_y -= 0.5 * img_h * (self.context_region_factor - 1.0) img_w *= self.context_region_factor img_h *= self.context_region_factor # round to integer coordinates img_x = tf.cast(tf.round(img_x), tf.int32) img_y = tf.cast(tf.round(img_y), tf.int32) img_w = tf.cast(tf.round(img_w), tf.int32) img_h = tf.cast(tf.round(img_h), tf.int32) # clip to image size img_x = tf.maximum(img_x, 0) img_y = tf.maximum(img_y, 0) img_excess_w = tf.maximum(img_x + img_w - dims[1], 0) img_excess_h = tf.maximum(img_y + img_h - dims[0], 0) img_w = img_w - img_excess_w img_h = img_h - img_excess_h # crop img_cropped = img_whole_im[img_y:img_y + img_h, img_x:img_x + img_w] # resize img = resize_image(img_cropped, self.input_size, True) # image_print = sess.run(tf.Print(img, [img], summarize=128*128*3)) img.set_shape(self.input_size + (3,)) self.draw(img, img_bbox) # self.showimage_variable_opencv(img) # augment and normalize tensors = {"unnormalized_img": img} tensors = apply_augmentors(tensors, self.augmentors) img = tensors["unnormalized_img"] img_norm = normalize(img) return img_norm, img, img_cropped
def read_images_from_disk( input_queue, input_size, resize_mode, label_postproc_fn=lambda x: x, augmentors=(), label_load_fn=load_label_default, img_load_fn=load_img_default, distance_transform_fn=Util.create_distance_transform): im_path = input_queue[0] label_path = input_queue[1] if len(input_queue) > 2: img_id = input_queue[2] else: img_id = None img = img_load_fn(img_path=im_path) #TODO: clean up all this stuff! labels = label_load_fn(im_path, label_path) if 'label' in list(labels.keys()): label = labels['label'] label = label_postproc_fn(label) label.set_shape(img.get_shape().as_list()[:-1] + [1]) else: label = None if 'old_label' in list(labels.keys()): old_label = labels['old_label'] old_label.set_shape(img.get_shape().as_list()[:-1] + [1]) else: old_label = None if Constants.BBOXES in list(labels.keys()): bboxes = labels[Constants.BBOXES] else: bboxes = None if Constants.IDS in list(labels.keys()): ids = labels[Constants.IDS] else: ids = None if Constants.CLASSES in list(labels.keys()): classes = labels[Constants.CLASSES] else: classes = None if Constants.IGNORE_REGIONS in list(labels.keys()): ignore_regions = labels[Constants.IGNORE_REGIONS] else: ignore_regions = None if Constants.SCENE_INFOS in list(labels.keys()): scene_infos = labels[Constants.SCENE_INFOS] else: scene_infos = None if Constants.OLD_LABEL_AS_DT in list(labels.keys()): old_label_as_dt = labels[Constants.OLD_LABEL_AS_DT] else: old_label_as_dt = None u0 = None u1 = None tensors = create_tensor_dict(unnormalized_img=img, label=label, old_label=old_label, u0=u0, u1=u1, tag=im_path, raw_label=label, bboxes=bboxes, ids=ids, classes=classes, img_id=img_id, ignore_regions=ignore_regions, scene_infos=scene_infos, old_label_as_dt=old_label_as_dt) tensors = resize(tensors, resize_mode, input_size) # Create distance transform after image resize to speed up the computation. if Constants.USE_CLICKS in list(labels.keys()): assert Constants.STRATEGY in labels and Constants.IGNORE_CLASSES in labels tensors = add_distance_transform(tensors, labels, distance_transform_fn) elif Constants.OLD_LABEL_AS_DT in list(labels.keys()): tensors["old_label"] = tf.py_func(distance_transform_fn, [tensors["label"]], [tf.float32])[0] tensors["old_label"].set_shape(tensors["label"].get_shape()) tensors = apply_augmentors(tensors, augmentors) tensors = assemble_input_tensors(tensors) summaries = [] return tensors, summaries
def read_images_from_disk(input_queue, input_size, resize_mode, label_postproc_fn=lambda x: x, augmentors=(), label_load_fn=load_label_default, img_load_fn=load_img_default): im_path = input_queue[0] label_path = input_queue[1] img = img_load_fn(img_path=im_path) labels = label_load_fn(im_path, label_path) label = labels['label'] label = label_postproc_fn(label) label.set_shape(img.get_shape().as_list()[:-1] + [1]) old_label = u0 = u1 = None if len(input_queue) == 3: flow_path = input_queue[2] flow = img_load_fn(img_path=flow_path) else: flow = None if 'old_label' in labels.keys(): old_label = labels['old_label'] old_label.set_shape(img.get_shape().as_list()[:-1] + [1]) if Constants.DT_NEG in labels.keys() and Constants.DT_POS in labels.keys(): u0 = labels[Constants.DT_NEG] u0.set_shape(img.get_shape().as_list()[:-1] + [1]) # Create a negative click map, where the click points are denoted as 1 and the rest of it as 0. # This would majorly be used to show the clicks in summaries. [neg_clicks] = tf.py_func(create_clicks_map, [labels['neg_clicks'], u0], [tf.float32], name="create_click_map") neg_clicks.set_shape(img.get_shape().as_list()[:-1] + [1]) u0 = tf.concat([u0, neg_clicks], axis=2) u1 = labels[Constants.DT_POS] u1.set_shape(img.get_shape().as_list()[:-1] + [1]) [pos_clicks] = tf.py_func(create_clicks_map, [labels['pos_clicks'], u1], [tf.float32], name="create_click_map") pos_clicks.set_shape(img.get_shape().as_list()[:-1] + [1]) u1 = tf.concat([u1, pos_clicks], axis=2) shape = im_path.get_shape() im_path = tf.string_join( [im_path, tf.as_string(labels['num_clicks'])], separator=":", name="JoinPath") im_path.set_shape(shape) tensors = create_tensor_dict(unnormalized_img=img, label=label, old_label=old_label, u0=u0, u1=u1, tag=im_path, raw_label=label, flow=flow) tensors = resize(tensors, resize_mode, input_size) tensors = apply_augmentors(tensors, augmentors) tensors = assemble_input_tensors(tensors) summaries = [] return tensors, summaries
def create_input_tensors_dict(self, batch_size): ######################## ####### TRAINING ####### ######################## if self.subset in "train": ####### Paired Batch-Mode ####### if self.batching_mode in "pair": assert batch_size % 2 == 0 batch_size /= 2 rand = tf.random_uniform([5], maxval=tf.int32.max, dtype=tf.int32) sample_same_person = rand[0] % 2 pers_id_1 = ((rand[1] - 1) % self.num_train_id) + 1 pers_1_n_imgs = self.train_counts[pers_id_1 - 1] img_id_1 = ((rand[2] - 1) % pers_1_n_imgs) + 1 def if_same_person(): pers_id_2 = pers_id_1 img_id_2 = ((rand[4] - 1) % (pers_1_n_imgs - 1)) + 1 img_id_2 = tf.cond(img_id_2 >= img_id_1, lambda: img_id_2 + 1, lambda: img_id_2) return pers_id_2, img_id_2 def if_not_same_person(): pers_id_2 = ((rand[3] - 1) % (self.num_train_id - 1)) + 1 pers_id_2 = tf.cond(pers_id_2 >= pers_id_1, lambda: pers_id_2 + 1, lambda: pers_id_2) pers_2_n_imgs = self.train_counts[pers_id_2 - 1] img_id_2 = ((rand[4] - 1) % pers_2_n_imgs) + 1 return pers_id_2, img_id_2 pers_id_2, img_id_2 = tf.cond( tf.cast(sample_same_person, tf.bool), if_same_person, if_not_same_person) img1 = tf.as_string(pers_id_1, width=5, fill="0") + "_" + tf.as_string( img_id_1, width=4, fill="0") + ".png" img2 = tf.as_string(pers_id_2, width=5, fill="0") + "_" + tf.as_string( img_id_2, width=4, fill="0") + ".png" tag = img1 + " " + img2 + " " + tf.as_string( sample_same_person) img1 = self.data_dir + self.train_folder + '/' + img1 img2 = self.data_dir + self.train_folder + '/' + img2 img_val1 = load_image_tensorflow(img1, jpg=False) img_val1.set_shape(self.input_size + (3, )) tensors = {"unnormalized_img": img_val1} tensors = apply_augmentors(tensors, self.augmentors) img_val1 = tensors["unnormalized_img"] img_val1 = normalize(img_val1) img_val2 = load_image_tensorflow(img2, jpg=False) img_val2.set_shape(self.input_size + (3, )) tensors = {"unnormalized_img": img_val2} tensors = apply_augmentors(tensors, self.augmentors) img_val2 = tensors["unnormalized_img"] img_val2 = normalize(img_val2) pair = tf.stack([img_val1, img_val2]) label = sample_same_person imgs, labels, tags = tf.train.batch([pair, label, tag], batch_size=batch_size) shape = smart_shape(imgs) shape2 = shape[1:] shape2[0] *= batch_size imgs = tf.reshape(imgs, shape2) ####### Group Batch-Mode ####### elif self.batching_mode in "group": assert batch_size % self.group_size == 0 batch_size /= self.group_size batch_size = int(batch_size) pers_ids = tf.random_shuffle(tf.range( 1, self.num_train_id))[0:batch_size] def for_each_identity(p_idx): pers_id = pers_ids[p_idx] img_ids = tf.tile( tf.random_shuffle( tf.range(1, self.train_counts[pers_id - 1])), [4])[0:self.group_size] def for_each_img(i_idx): img_id = img_ids[i_idx] tag = tf.as_string( pers_id, width=5, fill="0") + "_" + tf.as_string( img_id, width=4, fill="0") + ".png" img = load_image_tensorflow( self.data_dir + self.train_folder + '/' + tag, jpg=False) img.set_shape(self.input_size + (3, )) tensors = {"unnormalized_img": img} tensors = apply_augmentors(tensors, self.augmentors) img = tensors["unnormalized_img"] img = normalize(img) label = p_idx img.set_shape(self.input_size + (3, )) return img, label, tag imgs, labels, tags = tf.map_fn( for_each_img, tf.range(0, self.group_size), dtype=(tf.float32, tf.int32, tf.string)) return imgs, labels, tags imgs, labels, tags = tf.map_fn(for_each_identity, tf.range(0, batch_size), dtype=(tf.float32, tf.int32, tf.string)) def reshape(x): shape = smart_shape(x) shape2 = shape[1:] shape2[0] = self.group_size * batch_size x = tf.reshape(x, shape2) return x imgs = reshape(imgs) labels = reshape(labels) tags = reshape(tags) ####### Single Batch-Mode ####### else: # self.batching_mode in "single": rand = tf.random_uniform([2], maxval=tf.int32.max, dtype=tf.int32) pers_id_1 = ((rand[0] - 1) % self.num_train_id) + 1 pers_1_n_imgs = self.train_counts[pers_id_1 - 1] img_id_1 = ((rand[1] - 1) % pers_1_n_imgs) + 1 img1 = tf.as_string(pers_id_1, width=5, fill="0") + "_" + tf.as_string( img_id_1, width=4, fill="0") + ".png" tag = img1 img1 = self.data_dir + self.train_folder + '/' + img1 img_val1 = load_image_tensorflow(img1, jpg=False) img_val1.set_shape(self.input_size + (3, )) tensors = {"unnormalized_img": img_val1} tensors = apply_augmentors(tensors, self.augmentors) img_val1 = tensors["unnormalized_img"] img_val1 = normalize(img_val1) label = pers_id_1 imgs, labels, tags = tf.train.batch([img_val1, label, tag], batch_size=batch_size) ########################## ####### Validation ####### ########################## else: # self.subset in "valid": ####### Similarity Validation-Mode ####### if self.validation_mode in "similarity": path = self.test_case + '/' start_idx = self.idx_placeholder[0] end_idx = self.idx_placeholder[1] end_net = self.use_end_network def if_end_net(): pdx = self.idx_placeholder[2] def _load_imgs(idx): img1_idx = pdx + 1 img2_idx = idx + 1 label = tf.cond( abs(img1_idx - img2_idx) <= 0, lambda: img1_idx * 0 + 1, lambda: img1_idx * 0) img1 = path + tf.as_string(img1_idx, width=4, fill="0") + "_1.png" img2 = path + tf.as_string(img2_idx, width=4, fill="0") + "_2.png" tag = img1 + " " + img2 + " " + tf.as_string(label) img_val1 = tf.zeros(self.input_size + (3, )) img_val1.set_shape(self.input_size + (3, )) img_val2 = tf.zeros(self.input_size + (3, )) img_val2.set_shape(self.input_size + (3, )) pair = tf.stack([img_val1, img_val2]) return pair, label, tag imgs, labels, tags = tf.map_fn( _load_imgs, tf.range(start_idx, end_idx), dtype=(tf.float32, tf.int32, tf.string)) shape = smart_shape(imgs) shape2 = shape[1:] shape2[0] *= end_idx - start_idx imgs = tf.reshape(imgs, shape2) return imgs, labels, tags def if_not_end_net(): test_size = self.idx_placeholder[2] test_num = self.idx_placeholder[3] def _load_imgs(idx): label = 0 img = path + tf.as_string( idx + 1, width=4, fill="0") + "_" + tf.as_string(test_size + test_num) + ".png" tag = img img = self.data_dir + img img_val = load_normalized_image_tensorflow(img, jpg=False) img_val.set_shape(self.input_size + (3, )) return img_val, label, tag imgs, labels, tags = tf.map_fn( _load_imgs, tf.range(start_idx, end_idx), dtype=(tf.float32, tf.int32, tf.string)) shape = smart_shape(imgs) imgs = tf.reshape(imgs, shape) return imgs, labels, tags imgs, labels, tags = tf.cond(end_net, if_end_net, if_not_end_net) ####### Embedding Validation-Mode ####### else: # self.validation_mode in "embedding": path = self.test_case + '/' start_idx = self.idx_placeholder[0] end_idx = self.idx_placeholder[1] test_size = self.idx_placeholder[2] test_num = self.idx_placeholder[3] def _load_imgs(idx): label = 0 img = path + tf.as_string( test_size + 1, width=5, fill="0") + "_" + tf.as_string( idx + 1, width=4, fill="0") + ".png" tag = img img = self.data_dir + img img_val = load_normalized_image_tensorflow(img, jpg=False) img_val.set_shape(self.input_size + (3, )) return img_val, label, tag imgs, labels, tags = tf.map_fn(_load_imgs, tf.range(start_idx, end_idx), dtype=(tf.float32, tf.int32, tf.string)) shape = smart_shape(imgs) imgs = tf.reshape(imgs, shape) tensors = {"inputs": imgs, "labels": labels, "tags": tags} self.images = imgs return tensors