def preproc_img(fname, boxes, klass, second_klass, is_crowd, aug): im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname im = im.astype('float32') # assume floatbox as input assert boxes.dtype == np.float32 # augmentation: im, params = aug.augment_return_params(im) points = box_to_point8(boxes) points = aug.augment_coords(points, params) boxes = point8_to_box(points) # rpn anchor: try: fm_labels, fm_boxes = get_rpn_anchor_input(im, boxes, is_crowd) boxes = boxes[is_crowd == 0] # skip crowd boxes in training target klass = klass[is_crowd == 0] if config.USE_SECOND_HEAD: second_klass = second_klass[is_crowd == 0] if not len(boxes): raise MalformedData("No valid gt_boxes!") except MalformedData as e: log_once("Input {} is filtered for training: {}".format(fname, str(e)), 'warn') return None if config.USE_SECOND_HEAD: ret = [im, fm_labels, fm_boxes, boxes, klass, second_klass] else: ret = [im, fm_labels, fm_boxes, boxes, klass] return ret, params
def preprocess(img): fname, boxes, klass, is_crowd = img['file_name'], img['boxes'], img[ 'class'], img['is_crowd'] img_name = fname.split('/')[-1] img_id = int(img_name[3:-4]) # pretrain rpn for negtive chip extraction proposals = proposal_pickle['boxes'][proposal_pickle['ids'].index( img_id)] proposals[2:4] += proposals[0:2] # from [x,y,w,h] to [x1,y1,x2,y2] boxes = np.copy(boxes) im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname im = im.astype('float32') # assume floatbox as input assert boxes.dtype == np.float32, "Loader has to return floating point boxes!" assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!" chip_generator = Im2Chip(im, boxes, klass, proposals, cfg.SNIPER.SCALES, cfg.SNIPER.VALID_RANGES, is_crowd=is_crowd, chip_size=cfg.SNIPER.CHIP_SIZE, chip_stride=cfg.SNIPER.CHIP_STRIDE) im, boxes, klass, scale_indices, is_crowd = chip_generator.genChipMultiScale( ) rets = [] for i in range(len(im)): try: if len(boxes[i]) == 0: continue if not len(boxes[i]): raise MalformedData("No valid gt_boxes!") except MalformedData as e: log_once( "Input {} is filtered for training: {}".format( fname, str(e)), 'warn') ret = None continue # ret = [im[i]] + list(anchor_inputs) + [boxes[i], klass[i] # ] + [scale_indices[i]*len(boxes[i])] new_name = '%s_%d' % (img_name, i) cv2.imwrite('%s/%s' % (OUTPUT_IMG_DIR, new_name), im[i]) ret = [im[i]] + [boxes[i], klass[i]] for j in range(len(klass[i])): if j == 0: out_file.write(new_name) out_file.write(' %d %f %f %f %f' % (klass[i][j], boxes[i][j][0], boxes[i][j][1], boxes[i][j][2], boxes[i][j][3])) if j == len(klass[i]) - 1: out_file.write('\n') rets.append(ret) return rets
def preprocess(roidb): fname, boxes, klass, is_crowd = roidb['file_name'], roidb[ 'boxes'], roidb['class'], roidb['is_crowd'] boxes = np.copy(boxes) im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname im = im.astype('float32') # assume floatbox as input assert boxes.dtype == np.float32, "Loader has to return floating point boxes!" # augmentation: im, params = aug.augment_return_params(im) points = box_to_point8(boxes) points = aug.augment_coords(points, params) boxes = point8_to_box(points) assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!" ret = {'image': im} # rpn anchor: try: ret['anchor_labels'], ret['anchor_boxes'] = get_rpn_anchor_input( im, boxes, is_crowd) boxes = boxes[is_crowd == 0] # skip crowd boxes in training target klass = klass[is_crowd == 0] ret['gt_boxes'] = boxes ret['gt_labels'] = klass if not len(boxes): raise MalformedData("No valid gt_boxes!") except MalformedData as e: log_once( "Input {} is filtered for training: {}".format(fname, str(e)), 'warn') return None return ret
def _add_detection_gt(self, img, add_mask): """ Add 'boxes', 'class', 'is_crowd' of this image to the dict, used by detection. If add_mask is True, also add 'segmentation' in coco poly format. """ ann_ids = self.coco.getAnnIds(imgIds=img['id'], iscrowd=None) objs = self.coco.loadAnns(ann_ids) # clean-up boxes valid_objs = [] width = img['width'] height = img['height'] for obj in objs: if obj.get('ignore', 0) == 1: continue x1, y1, w, h = obj['bbox'] # bbox is originally in float # x1/y1 means upper-left corner and w/h means true w/h. This can be verified by segmentation pixels. # But we do assume that (0.0, 0.0) is upper-left corner of the first pixel box = FloatBox(float(x1), float(y1), float(x1 + w), float(y1 + h)) box.clip_by_shape([height, width]) # Require non-zero seg area and more than 1x1 box size if obj['area'] > 1 and box.is_box() and box.area() >= 4: obj['bbox'] = [box.x1, box.y1, box.x2, box.y2] valid_objs.append(obj) if add_mask: segs = obj['segmentation'] if not isinstance(segs, list): assert obj['iscrowd'] == 1 obj['segmentation'] = None else: valid_segs = [ np.asarray(p).reshape(-1, 2) for p in segs if len(p) >= 6 ] if len(valid_segs) < len(segs): log_once( "Image {} has invalid polygons!".format( img['file_name']), 'warn') obj['segmentation'] = valid_segs # all geometrically-valid boxes are returned boxes = np.asarray([obj['bbox'] for obj in valid_objs], dtype='float32') # (n, 4) cls = np.asarray([ COCOMeta.category_id_to_class_id[obj['category_id']] for obj in valid_objs ], dtype='int32') # (n,) is_crowd = np.asarray([obj['iscrowd'] for obj in valid_objs], dtype='int8') # add the keys img['boxes'] = boxes # nx4 img['class'] = cls # n, always >0 img['is_crowd'] = is_crowd # n, if add_mask: img['segmentation'] = [obj['segmentation'] for obj in valid_objs]
def __init__(self, input_tensors, output_tensors, return_input=False, sess=None): """ Args: input_tensors (list): list of names. output_tensors (list): list of names. return_input (bool): same as :attr:`PredictorBase.return_input`. sess (tf.Session): the session this predictor runs in. If None, will use the default session at the first call. """ self.return_input = return_input self.input_tensors = input_tensors self.output_tensors = output_tensors self.sess = sess self._use_callable = get_tf_version_number() >= 1.2 if self._use_callable: if sess is not None: self._callable = sess.make_callable( fetches=output_tensors, feed_list=input_tensors, accept_options=self.ACCEPT_OPTIONS) else: self._callable = None else: log_once( "TF>=1.2 is recommended for better performance of predictor!", 'warn')
def preprocess(roidb): fname, boxes, klass, is_crowd = roidb['file_name'], roidb['boxes'], roidb['class'], roidb['is_crowd'] boxes = np.copy(boxes) im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname im = im.astype('float32') # assume floatbox as input assert boxes.dtype == np.float32, "Loader has to return floating point boxes!" # augmentation: im, params = aug.augment_return_params(im) points = box_to_point8(boxes) points = aug.augment_coords(points, params) boxes = point8_to_box(points) assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!" ret = {'image': im} # rpn anchor: try: if cfg.MODE_FPN: multilevel_anchor_inputs = get_multilevel_rpn_anchor_input(im, boxes, is_crowd) for i, (anchor_labels, anchor_boxes) in enumerate(multilevel_anchor_inputs): ret['anchor_labels_lvl{}'.format(i + 2)] = anchor_labels ret['anchor_boxes_lvl{}'.format(i + 2)] = anchor_boxes else: # anchor_labels, anchor_boxes ret['anchor_labels'], ret['anchor_boxes'] = get_rpn_anchor_input(im, boxes, is_crowd) boxes = boxes[is_crowd == 0] # skip crowd boxes in training target klass = klass[is_crowd == 0] ret['gt_boxes'] = boxes ret['gt_labels'] = klass if not len(boxes): raise MalformedData("No valid gt_boxes!") except MalformedData as e: log_once("Input {} is filtered for training: {}".format(fname, str(e)), 'warn') return None if cfg.MODE_MASK: # augmentation will modify the polys in-place segmentation = copy.deepcopy(roidb['segmentation']) segmentation = [segmentation[k] for k in range(len(segmentation)) if not is_crowd[k]] assert len(segmentation) == len(boxes) # Apply augmentation on polygon coordinates. # And produce one image-sized binary mask per box. masks = [] for polys in segmentation: polys = [aug.augment_coords(p, params) for p in polys] masks.append(segmentation_to_mask(polys, im.shape[0], im.shape[1])) masks = np.asarray(masks, dtype='uint8') # values in {0, 1} ret['gt_masks'] = masks # from viz import draw_annotation, draw_mask # viz = draw_annotation(im, boxes, klass) # for mask in masks: # viz = draw_mask(viz, mask) # tpviz.interactive_imshow(viz) return ret
def _add_detection_gt(self, img, add_mask): """ Add 'boxes', 'class', 'is_crowd' of this image to the dict, used by detection. If add_mask is True, also add 'segmentation' in mafat poly format. """ # ann_ids = self.mafat.getAnnIds(imgIds=img['id']) # objs = self.mafat.loadAnns(ann_ids) objs = self.coco.imgToAnns[img['id']] # equivalent but faster than the above two lines # clean-up boxes valid_objs = [] width = img['width'] height = img['height'] for obj in objs: if obj.get('ignore', 0) == 1: continue x1, y1, w, h = obj['bbox'] # bbox is originally in float # x1/y1 means upper-left corner and w/h means true w/h. This can be verified by segmentation pixels. # But we do make an assumption here that (0.0, 0.0) is upper-left corner of the first pixel x1 = np.clip(float(x1), 0, width) y1 = np.clip(float(y1), 0, height) w = np.clip(float(x1 + w), 0, width) - x1 h = np.clip(float(y1 + h), 0, height) - y1 # Require non-zero seg area and more than 1x1 box size if obj['area'] > 1 and w > 0 and h > 0 and w * h >= 4: obj['bbox'] = [x1, y1, x1 + w, y1 + h] valid_objs.append(obj) if add_mask: segs = obj['segmentation'] if not isinstance(segs, list): assert obj['iscrowd'] == 1 obj['segmentation'] = None else: valid_segs = [np.asarray(p).reshape(-1, 2).astype('float32') for p in segs if len(p) >= 6] if len(valid_segs) < len(segs): log_once("Image {} has invalid polygons!".format(img['file_name']), 'warn') obj['segmentation'] = valid_segs # all geometrically-valid boxes are returned boxes = np.asarray([obj['bbox'] for obj in valid_objs], dtype='float32') # (n, 4) cls = np.asarray([ MAFATMeta.category_id_to_class_id[obj['category_id']] for obj in valid_objs], dtype='int32') # (n,) is_crowd = np.asarray([obj['iscrowd'] for obj in valid_objs], dtype='int8') # add the keys img['boxes'] = boxes # nx4 img['class'] = cls # n, always >0 img['is_crowd'] = is_crowd # n, if add_mask: # also required to be float32 img['segmentation'] = [ obj['segmentation'] for obj in valid_objs]
def preprocess(img): fname, boxes, klass, is_crowd = img['file_name'], img['boxes'], img['class'], img['is_crowd'] im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname im = im.astype('float32') # assume floatbox as input assert boxes.dtype == np.float32 # augmentation: im, params = aug.augment_return_params(im) points = box_to_point8(boxes) points = aug.augment_coords(points, params) boxes = point8_to_box(points) assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!" # rpn anchor: try: if config.MODE_FPN: multilevel_anchor_inputs = get_multilevel_rpn_anchor_input(im, boxes, is_crowd) anchor_inputs = itertools.chain.from_iterable(multilevel_anchor_inputs) else: # anchor_labels, anchor_boxes anchor_inputs = get_rpn_anchor_input(im, boxes, is_crowd) assert len(anchor_inputs) == 2 boxes = boxes[is_crowd == 0] # skip crowd boxes in training target klass = klass[is_crowd == 0] if not len(boxes): raise MalformedData("No valid gt_boxes!") except MalformedData as e: log_once("Input {} is filtered for training: {}".format(fname, str(e)), 'warn') return None ret = [im] + list(anchor_inputs) + [boxes, klass] # TODO pad im when FPN if add_mask: # augmentation will modify the polys in-place segmentation = copy.deepcopy(img['segmentation']) segmentation = [segmentation[k] for k in range(len(segmentation)) if not is_crowd[k]] assert len(segmentation) == len(boxes) # Apply augmentation on polygon coordinates. # And produce one image-sized binary mask per box. masks = [] for polys in segmentation: polys = [aug.augment_coords(p, params) for p in polys] masks.append(segmentation_to_mask(polys, im.shape[0], im.shape[1])) masks = np.asarray(masks, dtype='uint8') # values in {0, 1} ret.append(masks) # from viz import draw_annotation, draw_mask # viz = draw_annotation(im, boxes, klass) # for mask in masks: # viz = draw_mask(viz, mask) # tpviz.interactive_imshow(viz) return ret
def preprocess(img): fname, boxes, klass, is_crowd = img['file_name'], img['boxes'], img[ 'class'], img['is_crowd'] im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname im = im.astype('float32') # assume floatbox as input assert boxes.dtype == np.float32 # augmentation: im, params = aug.augment_return_params(im) points = box_to_point8(boxes) points = aug.augment_coords(points, params) boxes = point8_to_box(points) # rpn anchor: try: fm_labels, fm_boxes = get_rpn_anchor_input(im, boxes, klass, is_crowd) boxes = boxes[is_crowd == 0] # skip crowd boxes in training target klass = klass[is_crowd == 0] if not len(boxes): raise MalformedData("No valid gt_boxes!") except MalformedData as e: log_once( "Input {} is invalid for training: {}".format(fname, str(e)), 'warn') return None ret = [im, fm_labels, fm_boxes, boxes, klass] # masks segmentation = img.get('segmentation', None) if segmentation is not None: segmentation = [ segmentation[k] for k in range(len(segmentation)) if not is_crowd[k] ] assert len(segmentation) == len(boxes) # one image-sized binary mask per box masks = [] for polys in segmentation: polys = [aug.augment_coords(p, params) for p in polys] masks.append( segmentation_to_mask(polys, im.shape[0], im.shape[1])) masks = np.asarray(masks, dtype='uint8') # values in {0, 1} ret.append(masks) # from viz import draw_annotation, draw_mask # viz = draw_annotation(im, boxes, klass) # for mask in masks: # viz = draw_mask(viz, mask) # tpviz.interactive_imshow(viz) return ret
def _add_detection_gt(self, img, add_mask): """ Add 'boxes', 'class', 'is_crowd' of this image to the dict, used by detection. If add_mask is True, also add 'segmentation' in coco poly format. """ # ann_ids = self.coco.getAnnIds(imgIds=img['id']) # objs = self.coco.loadAnns(ann_ids) objs = self.coco.imgToAnns[img['id']] # equivalent but faster than the above two lines # clean-up boxes valid_objs = [] width = img['width'] height = img['height'] for obj in objs: if obj.get('ignore', 0) == 1: continue x1, y1, w, h = obj['bbox'] # bbox is originally in float # x1/y1 means upper-left corner and w/h means true w/h. This can be verified by segmentation pixels. # But we do assume that (0.0, 0.0) is upper-left corner of the first pixel box = FloatBox(float(x1), float(y1), float(x1 + w), float(y1 + h)) box.clip_by_shape([height, width]) # Require non-zero seg area and more than 1x1 box size if obj['area'] > 1 and box.is_box() and box.area() >= 4: obj['bbox'] = [box.x1, box.y1, box.x2, box.y2] valid_objs.append(obj) if add_mask: segs = obj['segmentation'] if not isinstance(segs, list): assert obj['iscrowd'] == 1 obj['segmentation'] = None else: valid_segs = [np.asarray(p).reshape(-1, 2).astype('float32') for p in segs if len(p) >= 6] if len(valid_segs) < len(segs): log_once("Image {} has invalid polygons!".format(img['file_name']), 'warn') obj['segmentation'] = valid_segs # all geometrically-valid boxes are returned boxes = np.asarray([obj['bbox'] for obj in valid_objs], dtype='float32') # (n, 4) cls = np.asarray([ COCOMeta.category_id_to_class_id[obj['category_id']] for obj in valid_objs], dtype='int32') # (n,) is_crowd = np.asarray([obj['iscrowd'] for obj in valid_objs], dtype='int8') # add the keys img['boxes'] = boxes # nx4 img['class'] = cls # n, always >0 img['is_crowd'] = is_crowd # n, if add_mask: # also required to be float32 img['segmentation'] = [ obj['segmentation'] for obj in valid_objs]
def preprocess(img): fname, boxes, klass, is_crowd = img['file_name'], img['boxes'], img['class'], img['is_crowd'] im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname im = im.astype('float32') # assume floatbox as input assert boxes.dtype == np.float32 # augmentation: im, params = aug.augment_return_params(im) points = box_to_point8(boxes) points = aug.augment_coords(points, params) boxes = point8_to_box(points) assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!" # rpn anchor: try: fm_labels, fm_boxes = get_rpn_anchor_input(im, boxes, is_crowd) boxes = boxes[is_crowd == 0] # skip crowd boxes in training target klass = klass[is_crowd == 0] if not len(boxes): raise MalformedData("No valid gt_boxes!") except MalformedData as e: log_once("Input {} is filtered for training: {}".format(fname, str(e)), 'warn') return None ret = [im, fm_labels, fm_boxes, boxes, klass] if add_mask: # augmentation will modify the polys in-place segmentation = copy.deepcopy(img['segmentation']) segmentation = [segmentation[k] for k in range(len(segmentation)) if not is_crowd[k]] assert len(segmentation) == len(boxes) # Apply augmentation on polygon coordinates. # And produce one image-sized binary mask per box. masks = [] for polys in segmentation: polys = [aug.augment_coords(p, params) for p in polys] masks.append(segmentation_to_mask(polys, im.shape[0], im.shape[1])) masks = np.asarray(masks, dtype='uint8') # values in {0, 1} ret.append(masks) # from viz import draw_annotation, draw_mask # viz = draw_annotation(im, boxes, klass) # for mask in masks: # viz = draw_mask(viz, mask) # tpviz.interactive_imshow(viz) return ret
def add_anchor_to_dp(dp): im, boxes, klass, is_crowd, fname = dp try: fm_labels, fm_boxes = get_rpn_anchor_input(im, boxes, klass, is_crowd) boxes = boxes[is_crowd == 0] # skip crowd boxes in training target klass = klass[is_crowd == 0] if not len(boxes): raise MalformedData("No valid gt_boxes!") except MalformedData as e: log_once("Input {} is invalid for training: {}".format(fname, str(e)), 'warn') return None return [im, fm_labels, fm_boxes, boxes, klass]
def preprocess(img): im, fname = img['image_data'], img['id'] multi_mask = getAnnotation(df, fname) if multi_mask is None: return None im = cv2.imread(im) #============================ #if random.random() > 0.5: # im = np.fliplr(im) # h, w, 3 # multi_mask = np.fliplr(multi_mask) #im, multi_mask = do_flip_transpose2(im, multi_mask, type=random.randint(0,7)) augmented = strong_aug()(image=im, mask=multi_mask) im, multi_mask = augmented['image'], augmented['mask'] #============================ # Resize im, multi_mask = fix_resize_transform_range(im, multi_mask, [768, 2000], 1.0) im = pad_to_factor(im) multi_mask = pad_to_factor(multi_mask) boxes, klass, masks, is_crowd = multi_mask_to_annotation(multi_mask) if len(boxes) == 0 or np.min(np_area(boxes)) <= 0: log_once("Input have zero area box: {}".format(fname), 'warn') return None # rpn anchor: try: if config.FPN: fm_labels, fm_boxes = get_rpn_anchor_input_FPN(im, boxes, is_crowd) else: fm_labels, fm_boxes = get_rpn_anchor_input(im, boxes, is_crowd) if not len(boxes): raise MalformedData("No valid gt_boxes!") except MalformedData as e: log_once("Input {} is filtered for training: {}".format(fname, str(e)), 'warn') return None ret = [im, fm_labels, fm_boxes, boxes, klass, masks] """ from viz import draw_annotation, draw_mask viz = draw_annotation(im, boxes, klass) for ind, mask in enumerate(masks): viz = draw_mask(viz, mask) cv2.imwrite("./test_{}.jpg".format(np.random.rand()), viz) if (len(boxes) > 3): exit() """ return ret
def preprocess(img): fname, boxes, klass, is_crowd = img['file_name'], img['boxes'], img[ 'class'], img['is_crowd'] im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname im = im.astype('float32') # assume floatbox as input assert boxes.dtype == np.float32 # augmentation: im, params = aug.augment_return_params(im) points = box_to_point8(boxes) points = aug.augment_coords(points, params) boxes = point8_to_box(points) # rpn anchor: try: fm_labels, fm_boxes = get_rpn_anchor_input(im, boxes, is_crowd) boxes = boxes[is_crowd == 0] # skip crowd boxes in training target klass = klass[is_crowd == 0] if not len(boxes): raise MalformedData("No valid gt_boxes!") except MalformedData as e: log_once( "Input {} is filtered for training: {}".format(fname, str(e)), 'warn') return None ret = [im, fm_labels, fm_boxes, boxes, klass] # masks if add_mask: masks = img['masks'] masks = [np.dstack([m, m, m]) for m in masks] masks = [aug.augment(m)[..., 0] for m in masks] assert len(boxes) == np.asarray(masks).shape[0] ret.append(masks) # from viz import draw_annotation, draw_mask # viz = draw_annotation(im, boxes, klass) # for mask in masks: # viz = draw_mask(viz, mask) # tpviz.interactive_imshow(viz) # input() return ret
def fp16_getter(getter, *args, **kwargs): name = args[0] if len(args) else kwargs['name'] if not name.endswith('/W') and not name.endswith('/b'): """ Following convention, convolution & fc are quantized. BatchNorm (gamma & beta) are not quantized. """ return getter(*args, **kwargs) else: if kwargs['dtype'] == tf.float16: kwargs['dtype'] = tf.float32 ret = getter(*args, **kwargs) ret = tf.cast(ret, tf.float16) log_once("Variable {} casted to fp16 ...".format(name)) return ret else: return getter(*args, **kwargs)
def preprocess(roidb): fname, boxes, klass, is_crowd = roidb['file_name'], roidb[ 'boxes'], roidb['class'], roidb['is_crowd'] boxes = np.copy(boxes) im = imread(fname) assert im is not None, fname im = np.expand_dims(im, axis=2) im = np.repeat(im, 3, axis=2) im = im.astype('float32') #height, width = im.shape[:2] # assume floatbox as input assert boxes.dtype == np.float32, "Loader has to return floating point boxes!" if not cfg.DATA.ABSOLUTE_COORD: boxes[:, 0::2] *= width boxes[:, 1::2] *= height #source_image = Image.fromarray(im.astype('uint8')) #imsave('./input_image1', im[:,:,1].astype(np.float32), imagej=True) """ draw = ImageDraw.Draw(source_image) for i, bbox in enumerate(boxes): # tmp_x = bbox[2] - bbox[0] # tmp_y = bbox[3] - bbox[1] # draw.rectangle((bbox[0], bbox[1], tmp_x, tmp_y), outline='red') draw.rectangle((bbox[0], bbox[1], bbox[2], bbox[3]), outline='red') #draw.text((bbox[0] + 5, bbox[1] + 5), str(klass_tmp[i])) source_image.save('./input_image1', "JPEG") """ # augmentation: im, params = aug.augment_return_params(im) points = box_to_point8(boxes) points = aug.augment_coords(points, params) boxes = point8_to_box(points) assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!" klass_tmp = np.copy(klass) #print(klass) #imsave('./input_image2', im[:,:,1].astype(np.float32), imagej=True) """ source_image = Image.fromarray(im.astype('uint8')) draw = ImageDraw.Draw(source_image) for i, bbox in enumerate(boxes): # tmp_x = bbox[2] - bbox[0] # tmp_y = bbox[3] - bbox[1] # draw.rectangle((bbox[0], bbox[1], tmp_x, tmp_y), outline='red') draw.rectangle((bbox[0], bbox[1], bbox[2], bbox[3]), outline='red') #draw.text((bbox[0]+5, bbox[1]+5), str(klass_tmp[i])) source_image.save('./input_image2', "JPEG") """ ret = {'image': im} # rpn anchor: try: if cfg.MODE_FPN: multilevel_anchor_inputs = get_multilevel_rpn_anchor_input( im, boxes, is_crowd) for i, (anchor_labels, anchor_boxes) in enumerate(multilevel_anchor_inputs): ret['anchor_labels_lvl{}'.format(i + 2)] = anchor_labels ret['anchor_boxes_lvl{}'.format(i + 2)] = anchor_boxes else: # anchor_labels, anchor_boxes ret['anchor_labels'], ret[ 'anchor_boxes'] = get_rpn_anchor_input( im, boxes, is_crowd) boxes = boxes[is_crowd == 0] # skip crowd boxes in training target klass = klass[is_crowd == 0] ret['gt_boxes'] = boxes ret['gt_labels'] = klass if not len(boxes): raise MalformedData("No valid gt_boxes!") except MalformedData as e: log_once( "Input {} is filtered for training: {}".format(fname, str(e)), 'warn') return None if cfg.MODE_MASK: # augmentation will modify the polys in-place segmentation = copy.deepcopy(roidb['segmentation']) segmentation = [ segmentation[k] for k in range(len(segmentation)) if not is_crowd[k] ] assert len(segmentation) == len(boxes) # Apply augmentation on polygon coordinates. # And produce one image-sized binary mask per box. masks = [] width_height = np.asarray([width, height], dtype=np.float32) for polys in segmentation: if not cfg.DATA.ABSOLUTE_COORD: polys = [p * width_height for p in polys] polys = [aug.augment_coords(p, params) for p in polys] masks.append( segmentation_to_mask(polys, im.shape[0], im.shape[1])) masks = np.asarray(masks, dtype='uint8') # values in {0, 1} ret['gt_masks'] = masks # from viz import draw_annotation, draw_mask # viz = draw_annotation(im, boxes, klass) # for mask in masks: # viz = draw_mask(viz, mask) # tpviz.interactive_imshow(viz) return ret
def Conv(inputs, filters, kernel_size, strides=(1, 1), padding='same', data_format='channels_last', dilation_rate=(1, 1), activation=None, use_bias=True, kernel_initializer=None, bias_initializer=tf.zeros_initializer(), kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, split=1, norm=False): """ Similar to `tf.layers.Conv2D`, but with some differences: 1. Default kernel initializer is variance_scaling_initializer(2.0). 2. Default padding is 'same'. 3. Support 'split' argument to do group convolution. Variable Names: * ``W``: weights * ``b``: bias """ if kernel_initializer is None: if get_tf_version_tuple() <= (1, 12): kernel_initializer = tf.contrib.layers.variance_scaling_initializer( 2.0) # deprecated else: kernel_initializer = tf.keras.initializers.VarianceScaling( 2.0, distribution='untruncated_normal') dilation_rate = shape2d(dilation_rate) if True: # group conv implementation data_format = get_data_format(data_format, keras_mode=False) in_shape = inputs.get_shape().as_list() channel_axis = 3 if data_format == 'NHWC' else 1 in_channel = in_shape[channel_axis] assert in_channel is not None, "[Conv2D] Input cannot have unknown channel!" assert in_channel % split == 0 assert kernel_regularizer is None and bias_regularizer is None and activity_regularizer is None, \ "Not supported by group conv or dilated conv!" out_channel = filters assert out_channel % split == 0 assert dilation_rate == [1, 1] or get_tf_version_tuple() >= ( 1, 5), 'TF>=1.5 required for dilated conv.' kernel_shape = shape2d(kernel_size) filter_shape = kernel_shape + [in_channel // split, out_channel] stride = shape4d(strides, data_format=data_format) kwargs = {"data_format": data_format} if get_tf_version_tuple() >= (1, 5): kwargs['dilations'] = shape4d(dilation_rate, data_format=data_format) # matching input dtype (ex. tf.float16) since the default dtype of variable if tf.float32 inputs_dtype = inputs.dtype W = tf.get_variable('parseweigth', filter_shape, dtype=inputs_dtype, initializer=kernel_initializer) if norm: use_bias = False W = tf.reshape(W, kernel_shape + [4, in_channel // 4, out_channel]) W = tf.nn.softmax(W, 2) W = tf.reshape(W, filter_shape) #dynamics = tf.reduce_mean(inputs, 0) #dynamics = tf.transpose(dynamics, [1,2,0]) #dynamics = tf.image.resize_images(dynamics, kernel_shape) #dynamics = tf.expand_dims(dynamics, -1) #W = W + 0.001 * dynamics #tf.random_normal(shape = tf.shape(W), mean = 0.0, stddev = 0.012, dtype = tf.float32) #W = W *tf.random_uniform(shape=W.get_shape().as_list(), minval=0., maxval=2.) if use_bias: b = tf.get_variable('parsebias', [out_channel], dtype=inputs_dtype, initializer=bias_initializer) if split == 1: conv = tf.nn.conv2d(inputs, W, stride, padding.upper(), **kwargs) else: try: conv = tf.nn.conv2d(inputs, W, stride, padding.upper(), **kwargs) except ValueError: log_once( "CUDNN group convolution support is only available with " "https://github.com/tensorflow/tensorflow/pull/25818 . " "Will fall back to a loop-based slow implementation instead!", 'warn') ret = tf.nn.bias_add(conv, b, data_format=data_format) if use_bias else conv if activation is not None: ret = activation(ret) ret = tf.identity(ret, name='output') ret.variables = VariableHolder(W=W) if use_bias: ret.variables.b = b return ret
df = pd.read_csv(csv_path, engine="python") df = df.dropna(axis=0) df = df.set_index('ImageId') from tqdm import tqdm for img in tqdm(imgs, total=len(imgs)): im, fname = img['image_data'], img['id'] multi_mask = getAnnotation(df, fname) im = cv2.imread(im) #============================ # Resize augmented = strong_aug()(image=im, mask=multi_mask) im, multi_mask = augmented['image'], augmented['mask'] boxes, klass, masks, is_crowd = multi_mask_to_annotation(multi_mask) if len(boxes) == 0 or np.min(np_area(boxes)) <= 0: log_once("Input have zero area box: {}".format(fname), 'warn') print(boxes) exit() """ from viz import draw_annotation, draw_mask viz = draw_annotation(im, boxes, klass) for ind, mask in enumerate(masks): viz = draw_mask(viz, mask) cv2.imwrite("./eval_gt/{}.jpg".format(fname), viz) """ """ # for each gt, find all those anchors (including ties) that has the max ious with it ANCHOR_SIZES = (32,64,128,256,512) RAIOS = (0.5,1,2) #ANCHOR_SIZES = (16, 32, 64, 128, 256) from tensorpack.dataflow import PrintData
def __call__(self, roidb): # fname, boxes, klass, is_crowd = roidb["file_name"], roidb[ "boxes"], roidb["class"], roidb["is_crowd"] assert boxes.ndim == 2 and boxes.shape[1] == 4, boxes.shape boxes = np.copy(boxes) im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname im = im.astype("float32") height, width = im.shape[:2] # assume floatbox as input assert boxes.dtype == np.float32, "Loader has to return float32 boxes!" if not self.cfg.DATA.ABSOLUTE_COORD: boxes[:, 0::2] *= width boxes[:, 1::2] *= height ret = {} tfms = self.aug_weak.get_transform(im) im = tfms.apply_image(im) points = box_to_point8(boxes) points = tfms.apply_coords(points) boxes = point8_to_box(points) h, w = im.shape[:2] if self.aug_type != "default": boxes_backup = boxes.copy() try: assert len(boxes) > 0, "boxes after resizing becomes to zero" assert np.sum(np_area(boxes)) > 0, "boxes are all zero area!" bbs = array_to_bb(boxes) images_aug, bbs_aug, _ = self.aug_strong(images=[im], bounding_boxes=[bbs], n_real_box=len(bbs)) # convert to gt boxes array boxes = bb_to_array(bbs_aug[0]) boxes[:, 0] = np.clip(boxes[:, 0], 0, w) boxes[:, 1] = np.clip(boxes[:, 1], 0, h) boxes[:, 2] = np.clip(boxes[:, 2], 0, w) boxes[:, 3] = np.clip(boxes[:, 3], 0, h) # after affine, some boxes can be zero area. Let's remove them and their corresponding info boxes, mask = remove_empty_boxes(boxes) klass = klass[mask] assert len( klass ) > 0, "Empty boxes and kclass after removing empty ones" is_crowd = np.array( [0] * len(klass)) # do not ahve crowd annotations assert klass.max() <= self.cfg.DATA.NUM_CATEGORY, \ "Invalid category {}!".format(klass.max()) assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!" im = images_aug[0] except Exception as e: logger.warn("Error catched " + str(e) + "\n Use non-augmented data.") boxes = boxes_backup ret["image"] = im try: # Add rpn data to dataflow: if self.cfg.MODE_FPN: multilevel_anchor_inputs = self.get_multilevel_rpn_anchor_input( im, boxes, is_crowd) for i, (anchor_labels, anchor_boxes) in enumerate(multilevel_anchor_inputs): ret["anchor_labels_lvl{}".format(i + 2)] = anchor_labels ret["anchor_boxes_lvl{}".format(i + 2)] = anchor_boxes else: ret["anchor_labels"], ret[ "anchor_boxes"] = self.get_rpn_anchor_input( im, boxes, is_crowd) boxes = boxes[is_crowd == 0] # skip crowd boxes in training target klass = klass[is_crowd == 0] ret["gt_boxes"] = boxes ret["gt_labels"] = klass except Exception as e: log_once( "Input {} is filtered for training: {}".format(fname, str(e)), "warn") return None return ret
def _preprocess_common(ref_box, target_box, ref_im, target_im, aug): ref_boxes = np.array([ref_box], dtype=np.float32) target_boxes = np.array([target_box], dtype=np.float32) klass = np.array([1], dtype=np.int32) # augmentation: target_im, target_params = aug.augment_return_params(target_im) ref_im, ref_params = aug.augment_return_params(ref_im) ref_boxes = _augment_boxes(ref_boxes, aug, ref_params) target_boxes = _augment_boxes(target_boxes, aug, target_params) if ref_boxes is None or target_boxes is None: return None # additional augmentations: # motion blur if cfg.DATA.MOTION_BLUR_AUGMENTATIONS: do_motion_blur_ref = np.random.rand() < 0.25 if do_motion_blur_ref: # generating the kernel kernel_size = np.random.randint(5, 15) kernel_motion_blur = np.zeros((kernel_size, kernel_size)) kernel_motion_blur[int( (kernel_size - 1) / 2), :] = np.ones(kernel_size) kernel_motion_blur = kernel_motion_blur / kernel_size # applying the kernel ref_im = cv2.filter2D(ref_im, -1, kernel_motion_blur) do_motion_blur_target = np.random.rand() < 0.25 if do_motion_blur_target: # generating the kernel kernel_size = np.random.randint(5, 15) kernel_motion_blur = np.zeros((kernel_size, kernel_size)) kernel_motion_blur[int( (kernel_size - 1) / 2), :] = np.ones(kernel_size) kernel_motion_blur = kernel_motion_blur / kernel_size # applying the kernel target_im = cv2.filter2D(target_im, -1, kernel_motion_blur) # grayscale if cfg.DATA.GRAYSCALE_AUGMENTATIONS: do_grayscale = np.random.rand() < 0.25 if do_grayscale: grayscale_aug = imgaug.Grayscale() ref_im = np.tile(grayscale_aug.augment(ref_im), [1, 1, 3]) target_im = np.tile(grayscale_aug.augment(target_im), [1, 1, 3]) if cfg.DATA.DEBUG_VIS: import matplotlib.pyplot as plt ref_im_vis = ref_im.copy() #ref_im_vis[int(ref_boxes[0][1]):int(ref_boxes[0][3]), int(ref_boxes[0][0]):int(ref_boxes[0][2]), 0] = 255 ref_im_vis[int(ref_boxes[0][1]):int(ref_boxes[0][3]), int(ref_boxes[0][0]):int(ref_boxes[0][2]), 2] = \ (0.5 * ref_im_vis[int(ref_boxes[0][1]):int(ref_boxes[0][3]), int(ref_boxes[0][0]):int(ref_boxes[0][2]), 2] + 120).astype(np.uint8) plt.imshow(ref_im_vis[..., ::-1]) plt.show() target_im_vis = target_im.copy() target_im_vis[int(target_boxes[0][1]):int(target_boxes[0][3]), int(target_boxes[0][0]):int(target_boxes[0][2]), 2] = \ (0.5 * target_im_vis[int(target_boxes[0][1]):int(target_boxes[0][3]), int(target_boxes[0][0]):int(target_boxes[0][2]), 2] + 120).astype(np.uint8) plt.imshow(target_im_vis[..., ::-1]) plt.show() is_crowd = np.array([0], dtype=np.int32) ret = {'ref_image': ref_im, 'ref_box': ref_boxes[0], 'image': target_im} if cfg.DATA.DEBUG_VIS: return ret # rpn anchor: try: if cfg.MODE_FPN: multilevel_anchor_inputs = get_multilevel_rpn_anchor_input( target_im, target_boxes, is_crowd) for i, (anchor_labels, anchor_boxes) in enumerate(multilevel_anchor_inputs): ret['anchor_labels_lvl{}'.format(i + 2)] = anchor_labels ret['anchor_boxes_lvl{}'.format(i + 2)] = anchor_boxes else: # anchor_labels, anchor_boxes ret['anchor_labels'], ret['anchor_boxes'] = get_rpn_anchor_input( target_im, target_boxes, is_crowd) ret['gt_boxes'] = target_boxes ret['gt_labels'] = klass if not len(target_boxes): raise MalformedData("No valid gt_boxes!") except MalformedData as e: log_once("Input is filtered for training: {}".format(str(e)), 'warn') return None return ret
def _maybe_add_hard_example_data(data, ref_fname, vid_name, hard_example_index, hard_example_names, dataset_name): if not cfg.MODE_HARD_MINING: return data data = data.copy() name_for_idx = dataset_name + "/" + vid_name + "/" if dataset_name == "GOT10k": name_for_idx += ref_fname.split("/")[-1].replace(".jpg", "") this_fmt = "%08d" elif dataset_name == "ImageNetVID": name_for_idx += str(int(ref_fname.split("/")[-1].replace(".JPEG", ""))) this_fmt = "%06d" elif dataset_name == "LaSOT": name_for_idx += str(int(ref_fname.split("/")[-1].replace(".jpg", ""))) this_fmt = "%08d" elif dataset_name == "YouTubeVOS": name_for_idx += str(int(ref_fname.split("/")[-1].replace(".jpg", ""))) this_fmt = "%05d" else: assert False, ("unknown dataset", dataset_name) try: idx = hard_example_names["all"].index(name_for_idx) except ValueError: log_once("Not found in index: {}".format(name_for_idx), 'warn') return None if dataset_name == "LaSOT": nns = hard_example_index.get_nns_by_item(idx, cfg.HARD_MINING_KNN_LASOT) else: nns = hard_example_index.get_nns_by_item(idx, cfg.HARD_MINING_KNN) if cfg.MODE_HARD_NEGATIVES_ONLY_CROSSOVER or \ (cfg.MODE_HARD_NEGATIVES_ONLY_CROSSOVER_YOUTUBEVOS and dataset_name == "YouTubeVOS"): nn_names = [hard_example_names["all"][nn] for nn in nns] nn_datasets = [x.split("/")[0] for x in nn_names] nns = [nn for nn, ds_ in zip(nns, nn_datasets) if ds_ != dataset_name] remove_query = False else: remove_query = True nns = subsample_nns(vid_name, nns, hard_example_names["all"], cfg.N_HARD_NEGATIVES_TO_SAMPLE, remove_query=remove_query) feats = [] for nn in nns: sp = hard_example_names["all"][nn].split("/") if sp[0] == "GOT10k": fmt = "%08d" elif sp[0] == "ImageNetVID": fmt = "%06d" elif sp[0] == "LaSOT": fmt = "%08d" elif sp[0] == "YouTubeVOS": fmt = "%05d" else: assert False, ("unknown dataset", sp[0]) feat_fn = os.path.join(cfg.HARD_MINING_DATA_PATH, sp[0], "det_feats_compressed", sp[1], fmt % int(sp[2]) + ".npz") feat = np.load(feat_fn) feat = feat["f"] feats.append(feat) feats = np.stack(feats, axis=0) data['hard_negative_features'] = feats if cfg.MODE_IF_HARD_MINING_THEN_ALSO_POSITIVES: hard_example_names_dataset = hard_example_names[dataset_name] #hpens_oldversion = [x for x in hard_example_names_dataset if x.startswith(vid_name)] left = right = bisect.bisect_left(hard_example_names_dataset, vid_name) while left > 0: if hard_example_names_dataset[left - 1].startswith(vid_name): left -= 1 else: break while right < len(hard_example_names_dataset): if hard_example_names_dataset[right].startswith(vid_name): right += 1 else: break hpens = hard_example_names_dataset[left:right] assert len(hpens) > 0, vid_name random.shuffle(hpens) hpens = hpens[:cfg.N_HARD_POS_TO_SAMPLE] feats = [] ious = [] gt_boxes = [] jitter_boxes = [] for hpen in hpens: sp = hpen.split("/") feat_fn = os.path.join(cfg.HARD_MINING_DATA_PATH, dataset_name, "det_feats_compressed", sp[0], this_fmt % int(sp[1]) + ".npz") npz_data = np.load(feat_fn) feat = npz_data["f"] iou_data = npz_data["i"] feats.append(feat) iou = [float(x) for x in iou_data[-3:]] ious.append(iou) box_xyxy = [float(x) for x in iou_data[:4]] gt_boxes.append(box_xyxy) jitter_box_xyxy = np.array([float(x) for x in iou_data[4:16] ]).reshape(3, 4) jitter_boxes.append(jitter_box_xyxy) feats = np.stack(feats, axis=0) # atm just sample from same sequence, does not need to be hard data['hard_positive_features'] = feats data['hard_positive_ious'] = np.stack(ious, axis=0) data['hard_positive_gt_boxes'] = np.stack(gt_boxes, axis=0) data['hard_positive_jitter_boxes'] = np.stack(jitter_boxes, axis=0) return data
def preprocess(roidb): fname = roidb['img'] x1, y1, w, h = np.split(roidb['bbox'], 4, axis=1) boxes = np.concatenate([x1, y1, x1 + w, y1 + h], axis=1) klass = np.ones(len(roidb['bbox']), dtype=np.int32) male = roidb['male'] longhair = roidb['longhair'] sunglass = roidb['sunglass'] hat = roidb['hat'] tshirt = roidb['tshirt'] longsleeve = roidb['longsleeve'] formal = roidb['formal'] shorts = roidb['shorts'] jeans = roidb['jeans'] longpants = roidb['longpants'] skirt = roidb['skirt'] facemask = roidb['facemask'] logo = roidb['logo'] stripe = roidb['stripe'] boxes = np.copy(boxes) im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname im = im.astype('float32') # assume floatbox as input assert boxes.dtype == np.float32, "Loader has to return floating point boxes!" # augmentation: im, params = aug.augment_return_params(im) points = box_to_point8(boxes) points = aug.augment_coords(points, params) boxes = point8_to_box(points) assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!" ret = {'image': im} # rpn anchor: try: # anchor_labels, anchor_boxes ret['anchor_labels'], ret['anchor_boxes'] = get_rpn_anchor_input( im, boxes, np.zeros(len(boxes), dtype=int)) ret['gt_boxes'] = boxes ret['gt_labels'] = klass ret['male'] = male ret['longhair'] = longhair ret['sunglass'] = sunglass ret['hat'] = hat ret['tshirt'] = tshirt ret['longsleeve'] = longsleeve ret['formal'] = formal ret['shorts'] = shorts ret['jeans'] = jeans ret['longpants'] = longpants ret['skirt'] = skirt ret['facemask'] = facemask ret['logo'] = logo ret['stripe'] = stripe if not len(boxes): raise MalformedData("No valid gt_boxes!") except MalformedData as e: log_once( "Input {} is filtered for training: {}".format(fname, str(e)), 'warn') return None return ret
def _add_detection_gt( self, img, add_mask ): # 디텍션을 위해 박스와 클래스와 is_crowd를 만든다. 이것인 ground truth인가?@@@@@ """ Add 'boxes', 'class', 'is_crowd' of this image to the dict, used by detection. If add_mask is True, also add 'segmentation' in coco poly format. """ # ann_ids = self.coco.getAnnIds(imgIds=img['id']) # objs = self.coco.loadAnns(ann_ids) objs = self.coco.imgToAnns[img[ 'id']] # equivalent but faster than the above two lines # id 값을 통해 이미지 객체 만든다. # clean-up boxes valid_objs = [] # 리스트 하나 만들어서 width = img['width'] # 이미지 객체에 대한 width 값 초기화 height = img['height'] # height 값 초기화 for obj in objs: # 객체들 중에서 if obj.get('ignore', 0) == 1: # ignore 값이 있는 딕셔너리가 있으면 뛰어넘는다. 보지 않는다. continue x1, y1, w, h = obj['bbox'] # 객체에서 bbox 정보를 초기화한다. # bbox is originally in float # x1/y1 means upper-left corner and w/h means true w/h. This can be verified by segmentation pixels. # But we do assume that (0.0, 0.0) is upper-left corner of the first pixel box = FloatBox( float(x1), float(y1), # float 박스를 만든다.(네모난 박스를 만든다.) float(x1 + w), float(y1 + h)) box.clip_by_shape([height, width]) # clip_by_shape함수가 뭐지?@@@@@ # Require non-zero seg area and more than 1x1 box size if obj['area'] > 1 and box.is_box( ) and box.area() >= 4: # 객체의 너비가 1보다 크고 박스가 있고 박스의 너비가 4이상이면 obj['bbox'] = [box.x1, box.y1, box.x2, box.y2] # 객체의 bbox는 x1,x2,y1,y2로 지정해준다. valid_objs.append(obj) # 그리고 객체를 유효한 객체들의 리스트에 넣는다. if add_mask: # 그리고 여기서 마스크가 있으면(mask r cnn 일때를 말한다.) segs = obj[ 'segmentation'] # 객체의 segmentation 부분을 가지고 segs 라는 변수를 초기화한다. if not isinstance(segs, list): # segs 라는 변수가 리스트가 아닐때, assert obj['iscrowd'] == 1 # 객체에 iscrowd가 1이면 예외처리해준다. obj['segmentation'] = None # 객체의 segmentation이 없다? @@@@@ else: valid_segs = [ np.asarray(p).reshape(-1, 2).astype('float32') for p in segs if len(p) >= 6 ] # segs라는 리스트에서 유효한 것들만 뽑는다. if len(valid_segs) < len( segs): # 근데 segs들에서 유효한 segs들이 별로 없다면 log_once("Image {} has invalid polygons!".format( img['file_name']), 'warn') # 로그를 띄운다. 별로 없어서 warning이라고 obj['segmentation'] = valid_segs # 유효한 segs들은 객체의 segmentation에 다시 넣어준다. # all geometrically-valid boxes are returned boxes = np.asarray( [obj['bbox'] for obj in valid_objs], dtype='float32') # (n, 4) # 유효한 객체들의 bbox를 np를 통해 만든어 준다. cls = np.asarray( [ # 유효한 객체들로 클래스를 만든어 cls라는 변수를 만들어 준다. COCOMeta.category_id_to_class_id[obj['category_id']] for obj in valid_objs ], dtype='int32') # (n,) is_crowd = np.asarray( [obj['iscrowd'] for obj in valid_objs], dtype='int8' ) # 유효한 객체에서 각 객체의 is_crowd 를 가지고 is_crowd라는 똑같은 이름의 변수를 초기화해준다. # add the keys img['boxes'] = boxes # nx4 # 박스들을 이미지 객체의 박스에 넣는다. 여기서 boxes는 아마 유효한 것들의 boxes 일것이다. img['class'] = cls # n, always >0 # 클래스들을 이미지 객체의 클래스에 넣는다. img['is_crowd'] = is_crowd # n, # is_crowd를 이미지 객체의 그것에 넣는다. if add_mask: # 만약 마스크 rcnn 이라면 # also required to be float32 img['segmentation'] = [ # 세그멘테이션도 이와 동일한 맥락이다. obj['segmentation'] for obj in valid_objs ]
def __call__(self, roidb): fname, boxes_house, boxes_damage, klass, is_crowd = roidb[ "file_name"], roidb["boxes_house"], roidb["boxes_damage"], roidb[ "class"], roidb["is_crowd"] assert boxes_damage.ndim == 2 and boxes_damage.shape[ 1] == 4, boxes_damage.shape boxes_house = np.copy(boxes_house) boxes_damage = np.copy(boxes_damage) im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname im = im.astype("float32") height, width = im.shape[:2] # assume floatbox as input assert boxes_damage.dtype == np.float32, "Loader has to return float32 boxes!" if not self.cfg.DATA.ABSOLUTE_COORD: boxes_house[:, 0::2] *= width boxes_house[:, 1::2] *= height boxes_damage[:, 0::2] *= width boxes_damage[:, 1::2] *= height # augmentation: tfms = self.aug.get_transform(im) im = tfms.apply_image(im) points_house = box_to_point4(boxes_house) points_house = tfms.apply_coords(points_house) boxes_house = point4_to_box(points_house) if len(boxes_house): assert klass.max() <= self.cfg.DATA.NUM_CATEGORY, \ "Invalid category {}!".format(klass.max()) assert np.min( np_area(boxes_house)) > 0, "Some boxes have zero area!" points_damage = box_to_point4(boxes_damage) points_damage = tfms.apply_coords(points_damage) boxes_damage = point4_to_box(points_damage) if len(boxes_damage): assert klass.max() <= self.cfg.DATA.NUM_CATEGORY, \ "Invalid category {}!".format(klass.max()) assert np.min( np_area(boxes_damage)) > 0, "Some boxes have zero area!" ret = {"image": im} # Add rpn data to dataflow: try: if self.cfg.MODE_FPN: # CHANGE TWO RPN anchors here multilevel_anchor_inputs_house = self.get_multilevel_rpn_anchor_input( im, boxes_house, is_crowd) for i, (anchor_labels, anchor_boxes_house ) in enumerate(multilevel_anchor_inputs_house): ret["anchor_labels_lvl{}_house".format(i + 2)] = anchor_labels ret["anchor_boxes_lvl{}_house".format( i + 2)] = anchor_boxes_house multilevel_anchor_inputs_damage = self.get_multilevel_rpn_anchor_input( im, boxes_damage, is_crowd) for i, (anchor_labels, anchor_boxes_damage ) in enumerate(multilevel_anchor_inputs_damage): ret["anchor_labels_lvl{}_damage".format(i + 2)] = anchor_labels ret["anchor_boxes_lvl{}_damage".format( i + 2)] = anchor_boxes_damage else: ret["anchor_labels"], ret[ "anchor_boxes_house"] = self.get_rpn_anchor_input( im, boxes_house, is_crowd) ret["anchor_labels"], ret[ "anchor_boxes_damage"] = self.get_rpn_anchor_input( im, boxes_damage, is_crowd) boxes_house = boxes_house[is_crowd == 0] # skip crowd boxes in training target boxes_damage = boxes_damage[ is_crowd == 0] # skip crowd boxes in training target klass = klass[is_crowd == 0] ret["gt_boxes_house"] = boxes_house ret["gt_boxes_damage"] = boxes_damage ret["gt_labels"] = klass except MalformedData as e: log_once( "Input {} is filtered for training: {}".format(fname, str(e)), "warn") return None if self.cfg.MODE_MASK: # augmentation will modify the polys in-place segmentation = copy.deepcopy(roidb["segmentation"]) segmentation = [ segmentation[k] for k in range(len(segmentation)) if not is_crowd[k] ] assert len(segmentation) == len(boxes_house) # Apply augmentation on polygon coordinates. # And produce one image-sized binary mask per box. masks = [] width_height = np.asarray([width, height], dtype=np.float32) gt_mask_width = int(np.ceil(im.shape[1] / 8.0) * 8) # pad to 8 in order to pack mask into bits for polys in segmentation: if not self.cfg.DATA.ABSOLUTE_COORD: polys = [p * width_height for p in polys] polys = [tfms.apply_coords(p) for p in polys] masks.append( polygons_to_mask(polys, im.shape[0], gt_mask_width)) if len(masks): masks = np.asarray(masks, dtype='uint8') # values in {0, 1} masks = np.packbits(masks, axis=-1) else: # no gt on the image masks = np.zeros((0, im.shape[0], gt_mask_width // 8), dtype='uint8') ret['gt_masks_packed'] = masks # from viz import draw_annotation, draw_mask # viz = draw_annotation(im, boxes, klass) # for mask in masks: # viz = draw_mask(viz, mask) # tpviz.interactive_imshow(viz) return ret
def preprocess(img): fname, boxes, klass, is_crowd = img['file_name'], img['boxes'], img[ 'class'], img['is_crowd'] img_name = fname.split('/')[-1] img_id = int(img_name[3:-4]) # pretrain rpn for negtive chip extraction proposals = proposal_pickle['boxes'][proposal_pickle['ids'].index( img_id)] proposals[2:4] += proposals[0:2] # from [x,y,w,h] to [x1,y1,x2,y2] boxes = np.copy(boxes) im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname im = im.astype('float32') # assume floatbox as input assert boxes.dtype == np.float32, "Loader has to return floating point boxes!" # augmentation: im, params = aug.augment_return_params(im) points = box_to_point8(boxes) points = aug.augment_coords(points, params) boxes = point8_to_box(points) assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!" chip_generator = Im2Chip(im, boxes, klass, proposals, cfg.SNIPER.SCALES, cfg.SNIPER.VALID_RANGES, is_crowd=is_crowd, chip_size=cfg.SNIPER.CHIP_SIZE, chip_stride=cfg.SNIPER.CHIP_STRIDE) im, boxes, klass, scale_indices, is_crowd = chip_generator.genChipMultiScale( ) rets = [] for i in range(len(im)): try: if len(boxes[i]) == 0: continue # anchor_labels, anchor_boxes gt_invalid = [] maxbox = cfg.SNIPER.VALID_RANGES[scale_indices[i]][0] minbox = cfg.SNIPER.VALID_RANGES[scale_indices[i]][1] maxbox = sys.maxsize if maxbox == -1 else maxbox minbox = 0 if minbox == -1 else minbox for box in boxes[i]: w = box[2] - box[0] h = box[3] - box[1] if w >= maxbox or h >= maxbox or (w < minbox and h < minbox): gt_invalid.append(box) anchor_inputs = get_sniper_rpn_anchor_input( im[i], boxes[i], is_crowd[i], gt_invalid) assert len(anchor_inputs) == 2 boxes[i] = boxes[i][is_crowd[i] == 0] # skip crowd boxes in training target klass[i] = klass[i][is_crowd[i] == 0] if not len(boxes[i]): raise MalformedData("No valid gt_boxes!") except MalformedData as e: log_once( "Input {} is filtered for training: {}".format( fname, str(e)), 'warn') ret = None continue # ret = [im[i]] + list(anchor_inputs) + [boxes[i], klass[i] # ] + [scale_indices[i]*len(boxes[i])] ret = [im[i]] + list(anchor_inputs) + [boxes[i], klass[i]] rets.append(ret) return rets
def MaskedConv2D( inputs, filters, kernel_size, strides=(1, 1), padding='same', data_format='channels_last', dilation_rate=(1, 1), activation=None, use_bias=True, kernel_initializer=None, bias_initializer=tf.zeros_initializer(), kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, split=1, masking=False): """ A wrapper around `tf.layers.Conv2D`. Some differences to maintain backward-compatibility: 1. Default kernel initializer is variance_scaling_initializer(2.0). 2. Default padding is 'same'. 3. Support 'split' argument to do group conv. Variable Names: * ``W``: weights * ``b``: bias """ if kernel_initializer is None: if get_tf_version_tuple() <= (1, 12): kernel_initializer = tf.contrib.layers.variance_scaling_initializer(2.0) else: kernel_initializer = tf.keras.initializers.VarianceScaling(2.0, distribution='untruncated_normal') dilation_rate = shape2d(dilation_rate) if (masking == False) and (split == 1) and (dilation_rate == [1, 1]): # tf.layers.Conv2D has bugs with dilations (https://github.com/tensorflow/tensorflow/issues/26797) with rename_get_variable({'kernel': 'W', 'bias': 'b'}): layer = tf.layers.Conv2D( filters, kernel_size, strides=strides, padding=padding, data_format=data_format, dilation_rate=dilation_rate, activation=activation, use_bias=use_bias, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, _reuse=tf.get_variable_scope().reuse) ret = layer.apply(inputs, scope=tf.get_variable_scope()) ret = tf.identity(ret, name='output') ret.variables = VariableHolder(W=layer.kernel) if use_bias: ret.variables.b = layer.bias else: if masking == True: assert split == 1, "Pruining group conv is not supported yet" # group conv implementation data_format = get_data_format(data_format, keras_mode=False) in_shape = inputs.get_shape().as_list() channel_axis = 3 if data_format == 'NHWC' else 1 in_channel = in_shape[channel_axis] assert in_channel is not None, "[Conv2D] Input cannot have unknown channel!" assert in_channel % split == 0 assert kernel_regularizer is None and bias_regularizer is None and activity_regularizer is None, \ "Not supported by group conv or dilated conv!" out_channel = filters assert out_channel % split == 0 assert dilation_rate == [1, 1] or get_tf_version_tuple() >= (1, 5), 'TF>=1.5 required for dilated conv.' kernel_shape = shape2d(kernel_size) filter_shape = kernel_shape + [in_channel / split, out_channel] stride = shape4d(strides, data_format=data_format) kwargs = dict(data_format=data_format) if get_tf_version_tuple() >= (1, 5): kwargs['dilations'] = shape4d(dilation_rate, data_format=data_format) W = tf.get_variable( 'W', filter_shape, initializer=kernel_initializer) if use_bias: b = tf.get_variable('b', [out_channel], initializer=bias_initializer) if split == 1: if masking: W = pruning.apply_mask(W) conv = tf.nn.conv2d(inputs, W, stride, padding.upper(), **kwargs) else: conv = None if get_tf_version_tuple() >= (1, 13): try: conv = tf.nn.conv2d(inputs, W, stride, padding.upper(), **kwargs) except ValueError: log_once("CUDNN group convolution support is only available with " "https://github.com/tensorflow/tensorflow/pull/25818 . " "Will fall back to a loop-based slow implementation instead!", 'warn') if conv is None: inputs = tf.split(inputs, split, channel_axis) kernels = tf.split(W, split, 3) outputs = [tf.nn.conv2d(i, k, stride, padding.upper(), **kwargs) for i, k in zip(inputs, kernels)] conv = tf.concat(outputs, channel_axis) ret = tf.nn.bias_add(conv, b, data_format=data_format) if use_bias else conv if activation is not None: ret = activation(ret) ret = tf.identity(ret, name='output') ret.variables = VariableHolder(W=W) if use_bias: ret.variables.b = b return ret
def __call__(self, roidb): fname, boxes, klass, is_crowd = roidb["file_name"], roidb["boxes"], roidb["class"], roidb["is_crowd"] boxes = np.copy(boxes) im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname im = im.astype("float32") height, width = im.shape[:2] # assume floatbox as input assert boxes.dtype == np.float32, "Loader has to return floating point boxes!" if not self.cfg.DATA.ABSOLUTE_COORD: boxes[:, 0::2] *= width boxes[:, 1::2] *= height # augmentation: im, params = self.aug.augment_return_params(im) points = box_to_point8(boxes) points = self.aug.augment_coords(points, params) boxes = point8_to_box(points) assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!" ret = {"image": im} # Add rpn data to dataflow: try: if self.cfg.MODE_FPN: multilevel_anchor_inputs = self.get_multilevel_rpn_anchor_input(im, boxes, is_crowd) for i, (anchor_labels, anchor_boxes) in enumerate(multilevel_anchor_inputs): ret["anchor_labels_lvl{}".format(i + 2)] = anchor_labels ret["anchor_boxes_lvl{}".format(i + 2)] = anchor_boxes else: ret["anchor_labels"], ret["anchor_boxes"] = self.get_rpn_anchor_input(im, boxes, is_crowd) boxes = boxes[is_crowd == 0] # skip crowd boxes in training target klass = klass[is_crowd == 0] ret["gt_boxes"] = boxes ret["gt_labels"] = klass if not len(boxes): raise MalformedData("No valid gt_boxes!") except MalformedData as e: log_once("Input {} is filtered for training: {}".format(fname, str(e)), "warn") return None if self.cfg.MODE_MASK: # augmentation will modify the polys in-place segmentation = copy.deepcopy(roidb["segmentation"]) segmentation = [segmentation[k] for k in range(len(segmentation)) if not is_crowd[k]] assert len(segmentation) == len(boxes) # Apply augmentation on polygon coordinates. # And produce one image-sized binary mask per box. masks = [] width_height = np.asarray([width, height], dtype=np.float32) gt_mask_width = int(np.ceil(im.shape[1] / 8.0) * 8) # pad to 8 in order to pack mask into bits for polys in segmentation: if not self.cfg.DATA.ABSOLUTE_COORD: polys = [p * width_height for p in polys] polys = [self.aug.augment_coords(p, params) for p in polys] masks.append(segmentation_to_mask(polys, im.shape[0], gt_mask_width)) masks = np.asarray(masks, dtype='uint8') # values in {0, 1} masks = np.packbits(masks, axis=-1) ret['gt_masks_packed'] = masks # from viz import draw_annotation, draw_mask # viz = draw_annotation(im, boxes, klass) # for mask in masks: # viz = draw_mask(viz, mask) # tpviz.interactive_imshow(viz) return ret
def preprocess(roidb_batch): datapoint_list = [] for roidb in roidb_batch: fname, boxes, klass, is_crowd = roidb['file_name'], roidb['boxes'], roidb['class'], roidb['is_crowd'] boxes = np.copy(boxes) im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname im = im.astype('float32') # assume floatbox as input assert boxes.dtype == np.float32, "Loader has to return floating point boxes!" # augmentation: im, params = aug.augment_return_params(im) points = box_to_point8(boxes) points = aug.augment_coords(points, params) boxes = point8_to_box(points) assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!" ret = {'images': im} # rpn anchor: try: if cfg.MODE_FPN: multilevel_anchor_inputs = get_multilevel_rpn_anchor_input(im, boxes, is_crowd) for i, (anchor_labels, anchor_boxes) in enumerate(multilevel_anchor_inputs): ret['anchor_labels_lvl{}'.format(i + 2)] = anchor_labels ret['anchor_boxes_lvl{}'.format(i + 2)] = anchor_boxes else: raise NotImplementedError("[armand] Batch mode only available for FPN") boxes = boxes[is_crowd == 0] # skip crowd boxes in training target klass = klass[is_crowd == 0] ret['gt_boxes'] = boxes ret['gt_labels'] = klass ret['filename'] = fname if not len(boxes): raise MalformedData("No valid gt_boxes!") except MalformedData as e: log_once("Input {} is filtered for training: {}".format(fname, str(e)), 'warn') return None if cfg.MODE_MASK: # augmentation will modify the polys in-place segmentation = copy.deepcopy(roidb['segmentation']) segmentation = [segmentation[k] for k in range(len(segmentation)) if not is_crowd[k]] assert len(segmentation) == len(boxes) # Apply augmentation on polygon coordinates. # And produce one image-sized binary mask per box. masks = [] for polys in segmentation: polys = [aug.augment_coords(p, params) for p in polys] masks.append(segmentation_to_mask(polys, im.shape[0], im.shape[1])) masks = np.asarray(masks, dtype='uint8') # values in {0, 1} ret['gt_masks'] = masks datapoint_list.append(ret) ################################################################################################################# # Batchify the output ################################################################################################################# # Now we need to batch the various fields # Easily stackable: # - anchor_labels_lvl2 # - anchor_boxes_lvl2 # - anchor_labels_lvl3 # - anchor_boxes_lvl3 # - anchor_labels_lvl4 # - anchor_boxes_lvl4 # - anchor_labels_lvl5 # - anchor_boxes_lvl5 # - anchor_labels_lvl6 # - anchor_boxes_lvl6 batched_datapoint = {} for stackable_field in ["anchor_labels_lvl2", "anchor_boxes_lvl2", "anchor_labels_lvl3", "anchor_boxes_lvl3", "anchor_labels_lvl4", "anchor_boxes_lvl4", "anchor_labels_lvl5", "anchor_boxes_lvl5", "anchor_labels_lvl6", "anchor_boxes_lvl6"]: batched_datapoint[stackable_field] = np.stack([d[stackable_field] for d in datapoint_list]) # Require padding and original dimension storage # - image (HxWx3) # - gt_boxes (?x4) # - gt_labels (?) # - gt_masks (?xHxW) """ Find the minimum container size for images (maxW x maxH) Find the maximum number of ground truth boxes For each image, save original dimension and pad """ if cfg.PREPROC.PREDEFINED_PADDING: padding_shapes = [get_padding_shape(*(d["images"].shape[:2])) for d in datapoint_list] max_height = max([shp[0] for shp in padding_shapes]) max_width = max([shp[1] for shp in padding_shapes]) else: image_dims = [d["images"].shape for d in datapoint_list] heights = [dim[0] for dim in image_dims] widths = [dim[1] for dim in image_dims] max_height = max(heights) max_width = max(widths) # image padded_images = [] original_image_dims = [] for datapoint in datapoint_list: image = datapoint["images"] original_image_dims.append(image.shape) h_padding = max_height - image.shape[0] w_padding = max_width - image.shape[1] padded_image = np.pad(image, [[0, h_padding], [0, w_padding], [0, 0]], 'constant') padded_images.append(padded_image) batched_datapoint["images"] = np.stack(padded_images) #print(batched_datapoint["images"].shape) batched_datapoint["orig_image_dims"] = np.stack(original_image_dims) # gt_boxes and gt_labels max_num_gts = max([d["gt_labels"].size for d in datapoint_list]) gt_counts = [] padded_gt_labels = [] padded_gt_boxes = [] padded_gt_masks = [] for datapoint in datapoint_list: gt_count_for_image = datapoint["gt_labels"].size gt_counts.append(gt_count_for_image) gt_padding = max_num_gts - gt_count_for_image padded_gt_labels_for_img = np.pad(datapoint["gt_labels"], [0, gt_padding], 'constant', constant_values=-1) padded_gt_labels.append(padded_gt_labels_for_img) padded_gt_boxes_for_img = np.pad(datapoint["gt_boxes"], [[0, gt_padding], [0,0]], 'constant') padded_gt_boxes.append(padded_gt_boxes_for_img) h_padding = max_height - datapoint["images"].shape[0] w_padding = max_width - datapoint["images"].shape[1] if cfg.MODE_MASK: padded_gt_masks_for_img = np.pad(datapoint["gt_masks"], [[0, gt_padding], [0, h_padding], [0, w_padding]], 'constant') padded_gt_masks.append(padded_gt_masks_for_img) batched_datapoint["orig_gt_counts"] = np.stack(gt_counts) batched_datapoint["gt_labels"] = np.stack(padded_gt_labels) batched_datapoint["gt_boxes"] = np.stack(padded_gt_boxes) batched_datapoint["filenames"] = [d["filename"] for d in datapoint_list] if cfg.MODE_MASK: batched_datapoint["gt_masks"] = np.stack(padded_gt_masks) return batched_datapoint