def _data_aug_fn(image, ground_truth, augmentor, preprocessor, data_format="channels_first"): """Data augmentation function.""" # restore data ground_truth = cPickle.loads(ground_truth.numpy()) image=image.numpy() annos = ground_truth["kpt"] mask = ground_truth["mask"] bbxs = ground_truth["bbx"] # kepoint transform img_h,img_w,_=image.shape annos=np.array(annos).astype(np.float32) bbxs=np.array(bbxs).astype(np.float32) # decode mask h_mask, w_mask, _ = np.shape(image) mask_valid = np.ones((h_mask, w_mask), dtype=np.uint8) if(mask!=None): for seg in mask: bin_mask = maskUtils.decode(seg) bin_mask = np.logical_not(bin_mask) mask_valid = np.bitwise_and(mask_valid, bin_mask) # general augmentaton process image,annos,mask_valid,bbxs=augmentor.process(image=image,annos=annos,mask_valid=mask_valid,bbxs=bbxs) # generate result which include proposal region x,y,w,h,edges delta,tx,ty,tw,th,te,te_mask=preprocessor.process(annos=annos,mask_valid=mask_valid,bbxs=bbxs) #generate output masked image, result map and maskes img_mask = mask_valid[:,:,np.newaxis] image = image * np.repeat(img_mask, 3, 2) if(data_format=="channels_first"): image=np.transpose(image,[2,0,1]) return image,delta,tx,ty,tw,th,te,te_mask
def _data_aug_fn(image, ground_truth, hin, hout, win, wout, parts, limbs ,flip_list=None, data_format="channels_first"): """Data augmentation function.""" #restore data concat_dim=0 if data_format=="channels_first" else -1 ground_truth = cPickle.loads(ground_truth.numpy()) image=image.numpy() annos = ground_truth["kpt"] labeled= ground_truth["labeled"] mask = ground_truth["mask"] # decode mask h_mask, w_mask, _ = np.shape(image) mask_miss = np.ones((h_mask, w_mask), dtype=np.uint8) if(mask!=None): for seg in mask: bin_mask = maskUtils.decode(seg) bin_mask = np.logical_not(bin_mask) if(bin_mask.shape!=mask_miss.shape): print(f"test error mask shape mask_miss:{mask_miss.shape} bin_mask:{bin_mask.shape}") else: mask_miss = np.bitwise_and(mask_miss, bin_mask) #get transform matrix M_rotate = tl.prepro.affine_rotation_matrix(angle=(-30, 30)) # original paper: -40~40 M_zoom = tl.prepro.affine_zoom_matrix(zoom_range=(0.5, 0.8)) # original paper: 0.5~1.1 M_combined = M_rotate.dot(M_zoom) h, w, _ = image.shape transform_matrix = tl.prepro.transform_matrix_offset_center(M_combined, x=w, y=h) #apply data augmentation image = tl.prepro.affine_transform_cv2(image, transform_matrix) mask_miss = tl.prepro.affine_transform_cv2(mask_miss, transform_matrix, border_mode='replicate') annos = tl.prepro.affine_transform_keypoints(annos, transform_matrix) #temply ignore flip augmentation ''' if(flip_list!=None): image, annos, mask_miss = tl.prepro.keypoint_random_flip(image,annos, mask_miss, prob=0.5, flip_list=flip_list) ''' image, annos, mask_miss = tl.prepro.keypoint_resize_random_crop(image, annos, mask_miss, size=(hin, win)) # hao add # generate result which include keypoints heatmap and vectormap height, width, _ = image.shape heatmap = get_heatmap(annos, height, width, hout, wout, parts, limbs, data_format=data_format) vectormap = get_vectormap(annos, height, width, hout, wout, parts, limbs, data_format=data_format) resultmap = np.concatenate((heatmap, vectormap), axis=concat_dim) image=cv2.resize(image,(win,hin)) mask_miss=cv2.resize(mask_miss,(win,hin)) img_mask=mask_miss #generate output masked image, result map and maskes img_mask = mask_miss.reshape(hin, win, 1) image = image * np.repeat(img_mask, 3, 2) resultmap = np.array(resultmap, dtype=np.float32) mask_miss = np.array(cv2.resize(mask_miss, (wout, hout), interpolation=cv2.INTER_AREA),dtype=np.float32)[:,:,np.newaxis] if(data_format=="channels_first"): image=np.transpose(image,[2,0,1]) mask_miss=np.transpose(mask_miss,[2,0,1]) labeled=np.float32(labeled) return image, resultmap, mask_miss, labeled
def _data_aug_fn(image, ground_truth): """Data augmentation function.""" ground_truth = cPickle.loads(ground_truth) ground_truth = list(ground_truth) annos = ground_truth[0] mask = ground_truth[1] h_mask, w_mask, _ = np.shape(image) # mask mask_miss = np.ones((h_mask, w_mask), dtype=np.uint8) for seg in mask: bin_mask = maskUtils.decode(seg) bin_mask = np.logical_not(bin_mask) mask_miss = np.bitwise_and(mask_miss, bin_mask) ## image data augmentation # randomly resize height and width independently, scale is changed image, annos, mask_miss = keypoint_random_resize(image, annos, mask_miss, zoom_range=(0.8, 1.2)) # random rotate image, annos, mask_miss = keypoint_random_rotate(image, annos, mask_miss, rg=15.0) # random left-right flipping image, annos, mask_miss = keypoint_random_flip(image, annos, mask_miss, prob=0.5) # random resize height and width together image, annos, mask_miss = keypoint_random_resize_shortestedge( image, annos, mask_miss, min_size=(hin, win), zoom_range=(0.95, 1.6)) # random crop image, annos, mask_miss = keypoint_random_crop(image, annos, mask_miss, size=(hin, win)) # with padding # generate result maps including keypoints heatmap, pafs and mask h, w, _ = np.shape(image) height, width, _ = np.shape(image) heatmap = get_heatmap(annos, height, width) vectormap = get_vectormap(annos, height, width) resultmap = np.concatenate((heatmap, vectormap), axis=2) image = np.array(image, dtype=np.float32) img_mask = mask_miss.reshape(hin, win, 1) image = image * np.repeat(img_mask, 3, 2) resultmap = np.array(resultmap, dtype=np.float32) mask_miss = cv2.resize(mask_miss, (hout, wout), interpolation=cv2.INTER_AREA) mask_miss = np.array(mask_miss, dtype=np.float32) return image, resultmap, mask_miss
def annToMask(ann, height, width): """ Convert annotation which can be polygons, uncompressed RLE, or RLE to binary mask. :return: binary mask (numpy 2D array) """ rle = annToRLE(ann, height, width) m = maskUtils.decode(rle) return m
def poly2mask_single(h, w, poly): # TODO: write test for poly2mask, using mask2poly convert mask to poly', compare poly with poly' # visualize the mask rles = maskUtils.frPyObjects(poly, h, w) rle = maskUtils.merge(rles) mask = maskUtils.decode(rle) return mask
def _data_aug_fn(image, ground_truth, hin, win, hout, wout, hnei, wnei, parts, limbs, kpt_cvter, data_format="channels_first"): """Data augmentation function.""" #restore data ground_truth = cPickle.loads(ground_truth.numpy()) image = image.numpy() annos = ground_truth["obj"] mask = ground_truth["mask"] bbxs = ground_truth["bbx"] #kepoint transform img_h, img_w, _ = image.shape for anno_idx in range(0, len(annos)): annos[anno_idx] = kpt_cvter(annos[anno_idx]) annos = np.array(annos).astype(np.float32) bbxs = np.array(bbxs).astype(np.float32) scale_w = np.float32(win / img_w) scale_h = np.float32(hin / img_h) annos[:, :, 0] *= scale_w annos[:, :, 1] *= scale_h #bbx transform bbxs[:, 0] *= scale_w bbxs[:, 1] *= scale_h bbxs[:, 2] *= scale_w bbxs[:, 3] *= scale_h # decode mask h_mask, w_mask, _ = np.shape(image) mask_miss = np.ones((h_mask, w_mask), dtype=np.uint8) if (mask != None): for seg in mask: bin_mask = maskUtils.decode(seg) bin_mask = np.logical_not(bin_mask) mask_miss = np.bitwise_and(mask_miss, bin_mask) #image transform image = cv2.resize(image, dsize=(win, hin)) mask_miss = cv2.resize(mask_miss, dsize=(win, hin)) # generate result which include proposal region x,y,w,h,edges delta, tx, ty, tw, th, te, te_mask = get_pose_proposals( annos, bbxs, hin, win, hout, wout, hnei, wnei, parts, limbs, mask_miss, data_format) #generate output masked image, result map and maskes img_mask = mask_miss[:, :, np.newaxis] image = image * np.repeat(img_mask, 3, 2) if (data_format == "channels_first"): image = np.transpose(image, [2, 0, 1]) return image, delta, tx, ty, tw, th, te, te_mask
def poly2mask_single(h, w, poly): # TODO: write test for poly2mask, using mask2poly convert mask to poly', compare poly with poly' # visualize the mask rles = maskUtils.frPyObjects(poly, h, w) rle = maskUtils.merge(rles) mask = maskUtils.decode(rle) # sum = mask.sum() # print("{} {} {} {}".format(sum, h, w, poly)) # if not mask.any(): # pass return mask
def _data_aug_fn(image, ground_truth, augmentor, preprocessor, data_format="channels_first"): """Data augmentation function.""" #restore data concat_dim = 0 if data_format == "channels_first" else -1 ground_truth = cPickle.loads(ground_truth.numpy()) image = image.numpy() annos = ground_truth["kpt"] labeled = ground_truth["labeled"] mask = ground_truth["mask"] hin, win = preprocessor.hin, preprocessor.win hout, wout = preprocessor.hout, preprocessor.wout # decode mask h_mask, w_mask, _ = np.shape(image) mask_valid = np.ones((h_mask, w_mask), dtype=np.uint8) if (mask != None): for seg in mask: bin_mask = maskUtils.decode(seg) bin_mask = np.logical_not(bin_mask) if (bin_mask.shape != mask_valid.shape): print( f"test error mask shape mask_valid:{mask_valid.shape} bin_mask:{bin_mask.shape}" ) else: mask_valid = np.bitwise_and(mask_valid, bin_mask) #general augmentaton process image, annos, mask_valid = augmentor.process(image=image, annos=annos, mask_valid=mask_valid) # generate result including heatmap and vectormap heatmap, vectormap = preprocessor.process(annos=annos, mask_valid=mask_valid) resultmap = np.concatenate((heatmap, vectormap), axis=concat_dim) #generate output masked image, result map and maskes image_mask = mask_valid.reshape(hin, win, 1) image = image * np.repeat(image_mask, 3, 2) resultmap = np.array(resultmap, dtype=np.float32) mask_valid = np.array(cv2.resize(mask_valid, (wout, hout), interpolation=cv2.INTER_AREA), dtype=np.float32)[:, :, np.newaxis] if (data_format == "channels_first"): image = np.transpose(image, [2, 0, 1]) mask_valid = np.transpose(mask_valid, [2, 0, 1]) labeled = np.float32(labeled) return image, resultmap, mask_valid, labeled
def _data_aug_fn(image, ground_truth): ground_truth = cPickle.loads(ground_truth) ground_truth = list(ground_truth) annos = ground_truth[0] mask = ground_truth[1] h_mask, w_mask, _ = np.shape(image) # mask mask_miss = np.ones((h_mask, w_mask), dtype=np.uint8) for seg in mask: bin_mask = maskUtils.decode(seg) bin_mask = np.logical_not(bin_mask) mask_miss = np.bitwise_and(mask_miss, bin_mask) return image, mask_miss
def decode_mask(meta_mask_list): if(type(meta_mask_list)!=list): return None if(meta_mask_list==[]): return None inv_mask_list=[] for meta_mask in meta_mask_list: mask=maskUtils.decode(meta_mask) inv_mask=np.logical_not(mask) inv_mask_list.append(inv_mask) mask=np.ones_like(inv_mask_list[0]) for inv_mask in inv_mask_list: mask=np.logical_and(mask,inv_mask) mask = mask.astype(np.uint8) return mask
def _data_aug_fn(image, ground_truth, augmentor, preprocessor, data_format="channels_first"): """Data augmentation function.""" #restore data ground_truth = cPickle.loads(ground_truth.numpy()) image = image.numpy() annos = ground_truth["kpt"] labeled = ground_truth["labeled"] mask = ground_truth["mask"] hin, win = preprocessor.hin, preprocessor.win hout, wout = preprocessor.hout, preprocessor.wout # decode mask h_mask, w_mask, _ = np.shape(image) mask_valid = np.ones((h_mask, w_mask), dtype=np.uint8) if (mask != None): for seg in mask: bin_mask = maskUtils.decode(seg) bin_mask = np.logical_not(bin_mask) mask_valid = np.bitwise_and(mask_valid, bin_mask) #general augmentaton process image, annos, mask_valid = augmentor.process(image=image, annos=annos, mask_valid=mask_valid) # generate result including pif_maps and paf_maps pif_maps, paf_maps = preprocessor.process(annos=annos, mask_valid=mask_valid) pif_conf, pif_vec, pif_bmin, pif_scale = pif_maps paf_conf, paf_src_vec, paf_dst_vec, paf_src_bmin, paf_dst_bmin, paf_src_scale, paf_dst_scale = paf_maps #generate output masked image, result map and maskes image_mask = mask_valid.reshape(hin, win, 1) image = image * np.repeat(image_mask, 3, 2) mask_valid_out = np.array(cv2.resize(mask_valid, (wout, hout), interpolation=cv2.INTER_AREA), dtype=np.float32)[:, :, np.newaxis] if (data_format == "channels_first"): image = np.transpose(image, [2, 0, 1]) mask_valid_out = np.transpose(mask_valid_out, [2, 0, 1]) labeled = np.float32(labeled) return image, pif_conf,pif_vec,pif_bmin,pif_scale,\ paf_conf,paf_src_vec,paf_dst_vec,paf_src_bmin,paf_dst_bmin,paf_src_scale,paf_dst_scale, mask_valid_out, labeled
def _mock_data_aug_fn(image, ground_truth): """Data augmentation function.""" ground_truth = cPickle.loads(ground_truth) ground_truth = list(ground_truth) annos = ground_truth[0] mask = ground_truth[1] h_mask, w_mask, _ = np.shape(image) # mask mask_miss = np.ones((h_mask, w_mask), dtype=np.uint8) for seg in mask: bin_mask = maskUtils.decode(seg) bin_mask = np.logical_not(bin_mask) mask_miss = np.bitwise_and(mask_miss, bin_mask) # random crop #TODO only working with quadradic dimmentions image, annos, mask_miss = tl.prepro.keypoint_resize_random_crop( image, annos, mask_miss, size=(config.MODEL.hin, config.MODEL.win)) # hao add # generate result maps including keypoints heatmap, pafs and mask height, width, _ = np.shape(image) heatmap = get_heatmap(annos, height, width) vectormap = get_vectormap(annos, height, width) resultmap = np.concatenate((heatmap, vectormap), axis=2) image = np.array(image, dtype=np.float32) #TODO image has not always the right size if 256 * 384 is requested print("image1 ", height, width, _) print("mask1 ", mask_miss.shape) # mask image in all 3 channels img_mask = mask_miss.reshape(height, width, 1) image = image * np.repeat(img_mask, 3, 2) resultmap = np.array(resultmap, dtype=np.float32) mask_miss = cv2.resize(mask_miss, (config.MODEL.hout, config.MODEL.wout), interpolation=cv2.INTER_AREA) mask_miss = np.array(mask_miss, dtype=np.float32) return image, resultmap, mask_miss
def gen_mask(components): """ Generate masks based on the coco mask polygons. :param components: components :return: updated components """ meta = components[0] if meta.masks_segments: mask_miss = np.ones((meta.height, meta.width), dtype=np.uint8) for seg in meta.masks_segments: bin_mask = maskUtils.decode(seg) bin_mask = np.logical_not(bin_mask) mask_miss = np.bitwise_and(mask_miss, bin_mask) meta.mask = mask_miss return components
def segm_to_mask(segm, w, h): """ Convert a segmentation map which can be polygons to a binary mask. Reference: https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocotools/coco.py Args: segm (list<list<int>> or list<int>): A segmentation map which can be polygons. w (int): Image width h (int): Image hight Returns: mask (np.array(H, W)): A segmentation mask of a particular class. """ rle = segm_to_rle(segm, w, h) mask = maskUtils.decode(rle) return mask
def __getitem__(self, index): img_file = os.path.join(self._img_dir, str(self._infos.data[index].id) + '.jpg') img = transforms.ToTensor()(Image.open(img_file).convert('RGB')) img_w, img_h = img.size(2), img.size(1) target = torch.LongTensor(img_h, img_w).zero_() for inst in self._infos.data[index].insts: polys = [] # { bg, person, bicycle, car, motorcycle, truck, bus, train } if self._n_class == 7: if inst.category_idx == 8: inst.category_idx = 5 if inst.category_idx <= 6: for poly in inst.seg: polys.append(poly.tolist()) # { bg, person } elif self._n_class == 2: if inst.category_idx == 1: for poly in inst.seg: polys.append(poly.tolist()) if polys: rles = maskUtils.frPyObjects(polys, img_h, img_w) rle = maskUtils.merge(rles) mask = maskUtils.decode(rle) target.masked_fill_(torch.from_numpy(mask), inst.category_idx) p_w = self._infos.patchSize.w p_h = self._infos.patchSize.h x0 = random.randint(0, (img_w - p_w)) y0 = random.randint(0, (img_h - p_h)) img = img[:, y0:y0+p_h, x0:x0+p_w] target = target[y0:y0+p_h, x0:x0+p_w] return img, target
def gen_mask(components): """ Generate masks based on the coco mask polygons. :param components: components :return: updated components """ masks_segments = components[7] hh = components[1] ww = components[2] if masks_segments: mask_miss = np.ones((hh, ww), dtype=np.uint8) for seg in masks_segments: bin_mask = maskUtils.decode(seg) bin_mask = np.logical_not(bin_mask) mask_miss = np.bitwise_and(mask_miss, bin_mask) components[11] = mask_miss return components
def _data_aug_fn(image, input): input = cPickle.loads(input) input = list(input) annos = input[0] mask = input[1] h_mask, w_mask, _ = np.shape(image) # mask mask_miss = np.ones((h_mask, w_mask), dtype=np.uint8) for seg in mask: bin_mask = maskUtils.decode(seg) bin_mask = np.logical_not(bin_mask) mask_miss = np.bitwise_and(mask_miss, bin_mask) # image process image, annos, mask_miss = pose_random_scale(image, annos, mask_miss) image, annos, mask_miss = pose_rotation(image, annos, mask_miss) image, annos, mask_miss = random_flip(image, annos, mask_miss) image, annos, mask_miss = crop_meta_image(image, annos, mask_miss) h, w, _ = np.shape(image) if h != 368 or w != 368: image, annos, mask_miss = _resize_image(image, annos, mask_miss, 368, 368) height, width, _ = np.shape(image) heatmap = get_heatmap(annos, height, width) vectormap = get_vectormap(annos, height, width) resultmap = np.concatenate((heatmap, vectormap), axis=2) image = image image = np.array(image, dtype=np.float32) img_mask = mask_miss.reshape(368, 368, 1) image = image * np.repeat(img_mask, 3, 2) resultmap = np.array(resultmap, dtype=np.float32) mask_miss = cv2.resize(mask_miss, (46, 46), interpolation=cv2.INTER_AREA) mask_miss = np.array(mask_miss, dtype=np.float32) return image, resultmap, mask_miss
def main(): inputfile = '/home/qinjian/Segmentation/地理遥感图像分割/aicrowd房屋分割竞赛/val/images' jsonfile = '/home/qinjian/Segmentation/地理遥感图像分割/aicrowd房屋分割竞赛/val/annotation-small.json' outputfile = '/home/qinjian/Segmentation/地理遥感图像分割/aicrowd房屋分割竞赛/val/show' mkdir_os(outputfile) coco = COCO(jsonfile) catIds = coco.getCatIds(catNms=['wires']) # catIds=1 表示人这一类 imgIds = coco.getImgIds(catIds=catIds) # 图片id,许多值 for i in range(len(imgIds)): if i % 100 == 0: print(i, "/", len(imgIds)) img = coco.loadImgs(imgIds[i])[0] cvImage = cv2.imread(os.path.join(inputfile, img['file_name']), -1) cvImage = cv2.cvtColor(cvImage, cv2.COLOR_BGR2GRAY) cvImage = cv2.cvtColor(cvImage, cv2.COLOR_GRAY2BGR) annIds = coco.getAnnIds(imgIds=img['id'], catIds=catIds, iscrowd=None) anns = coco.loadAnns(annIds) polygons = [] color = [] for ann in anns: if 'segmentation' in ann: if type(ann['segmentation']) == list: # polygon for seg in ann['segmentation']: poly = np.array(seg).reshape((int(len(seg) / 2), 2)) poly_list = poly.tolist() polygons.append(poly_list) if ann['iscrowd'] == 0: color.append([0, 0, 255]) if ann['iscrowd'] == 1: color.append([0, 255, 255]) else: exit() print("-------------") # mask t = imgIds[ann['image_id']] if type(ann['segmentation']['counts']) == list: rle = maskUtils.frPyObjects([ann['segmentation']], t['height'], t['width']) else: rle = [ann['segmentation']] m = maskUtils.decode(rle) if ann['iscrowd'] == 0: color_mask = np.array([0, 0, 255]) if ann['iscrowd'] == 1: color_mask = np.array([0, 255, 255]) mask = m.astype(np.bool) cvImage[mask] = cvImage[mask] * 0.7 + color_mask * 0.3 point_size = 2 thickness = 2 for key in range(len(polygons)): ndata = polygons[key] cur_color = color[key] for k in range(len(ndata)): data = ndata[k] cv2.circle(cvImage, (int(data[0]), int(data[1])), point_size, (cur_color[0], cur_color[1], cur_color[2]), thickness) cv2.imwrite(os.path.join(outputfile, img['file_name']), cvImage)
Returns: A Tensor. Has the same type as input. Has the shape of tensor.shape * repeats """ expanded_tensor = tf.expand_dims(tensor, -1) multiples = [1] + repeats tiled_tensor = tf.tile(expanded_tensor, multiples=multiples) repeated_tesnor = tf.reshape(tiled_tensor, tf.shape(tensor) * repeats) return repeated_tesnor if __name__ == '__main__': data_dir = '/Users/Joel/Desktop/coco' data_type = 'val' anno_path = '{}/annotations/person_keypoints_{}2014.json'.format( data_dir, data_type) df_val = PoseInfo(data_dir, data_type, anno_path) for i in range(50): meta = df_val.metas[i] mask_sig = meta.masks print('shape of np mask is ', np.shape(mask_sig), type(mask_sig)) if mask_sig is not []: mask_miss = np.ones((meta.height, meta.width), dtype=np.uint8) for seg in mask_sig: bin_mask = maskUtils.decode(seg) bin_mask = np.logical_not(bin_mask) mask_miss = np.bitwise_and(mask_miss, bin_mask)
def _data_aug_fn(image, ground_truth): """Data augmentation function.""" ground_truth = cPickle.loads(ground_truth) ground_truth = list(ground_truth) annos = ground_truth[0] mask = ground_truth[1] h_mask, w_mask, _ = np.shape(image) # mask mask_miss = np.ones((h_mask, w_mask), dtype=np.uint8) for seg in mask: bin_mask = maskUtils.decode(seg) bin_mask = np.logical_not(bin_mask) mask_miss = np.bitwise_and(mask_miss, bin_mask) ## image data augmentation # # randomly resize height and width independently, scale is changed # image, annos, mask_miss = keypoint_random_resize(image, annos, mask_miss, zoom_range=(0.8, 1.2))# removed hao # # random rotate # image, annos, mask_miss = keypoint_random_rotate(image, annos, mask_miss, rg=15.0)# removed hao # # random left-right flipping # image, annos, mask_miss = keypoint_random_flip(image, annos, mask_miss, prob=0.5)# removed hao M_rotate = tl.prepro.affine_rotation_matrix(angle=(-30, 30)) # -40~40 M_flip = tl.prepro.affine_horizontal_flip_matrix(prob=0.5) M_zoom = tl.prepro.affine_zoom_matrix(zoom_range=(0.5, 0.8)) # 0.5~1.1 # M_shear = tl.prepro.affine_shear_matrix(x_shear=(-0.1, 0.1), y_shear=(-0.1, 0.1)) M_combined = M_rotate.dot(M_flip).dot(M_zoom) #.dot(M_shear) # M_combined = tl.prepro.affine_zoom_matrix(zoom_range=0.9) # for debug h, w, _ = image.shape transform_matrix = tl.prepro.transform_matrix_offset_center(M_combined, x=w, y=h) image = tl.prepro.affine_transform_cv2(image, transform_matrix) annos = tl.prepro.affine_transform_keypoints(annos, transform_matrix) # random resize height and width together # image, annos, mask_miss = keypoint_random_resize_shortestedge( # image, annos, mask_miss, min_size=(hin, win), zoom_range=(0.95, 1.6)) # removed hao # random crop # image, annos, mask_miss = keypoint_random_crop(image, annos, mask_miss, size=(hin, win)) # with padding # removed hao image, annos, mask_miss = tl.prepro.keypoint_resize_random_crop(image, annos, mask_miss, size=(hin, win)) # generate result maps including keypoints heatmap, pafs and mask h, w, _ = np.shape(image) height, width, _ = np.shape(image) heatmap = get_heatmap(annos, height, width) vectormap = get_vectormap(annos, height, width) resultmap = np.concatenate((heatmap, vectormap), axis=2) image = np.array(image, dtype=np.float32) img_mask = mask_miss.reshape(hin, win, 1) image = image * np.repeat(img_mask, 3, 2) resultmap = np.array(resultmap, dtype=np.float32) mask_miss = cv2.resize(mask_miss, (hout, wout), interpolation=cv2.INTER_AREA) mask_miss = np.array(mask_miss, dtype=np.float32) return image, resultmap, mask_miss
def annToMask(self, ann): rle = self.annToRLE(ann) m = maskUtils.decode(rle) return m
def _data_aug_fn(image, ground_truth, hin, win, hout, wout, hnei, wnei, parts, limbs, data_format="channels_first"): """Data augmentation function.""" #restore data ground_truth = cPickle.loads(ground_truth.numpy()) image = image.numpy() annos = ground_truth["kpt"] mask = ground_truth["mask"] bbxs = ground_truth["bbx"] #kepoint transform img_h, img_w, _ = image.shape annos = np.array(annos).astype(np.float32) bbxs = np.array(bbxs).astype(np.float32) ''' scale_w=np.float32(win/img_w) scale_h=np.float32(hin/img_h) annos[:,:,0]*=scale_w annos[:,:,1]*=scale_h #bbx transform bbxs[:,0]*=scale_w bbxs[:,1]*=scale_h bbxs[:,2]*=scale_w bbxs[:,3]*=scale_h ''' # decode mask h_mask, w_mask, _ = np.shape(image) mask_miss = np.ones((h_mask, w_mask), dtype=np.uint8) if (mask != None): for seg in mask: bin_mask = maskUtils.decode(seg) bin_mask = np.logical_not(bin_mask) mask_miss = np.bitwise_and(mask_miss, bin_mask) #prepare transform bbx transform_bbx = np.zeros(shape=(bbxs.shape[0], 4, 2)) bbxs_x, bbxs_y, bbxs_w, bbxs_h = bbxs[:, 0], bbxs[:, 1], bbxs[:, 2], bbxs[:, 3] transform_bbx[:, 0, 0], transform_bbx[:, 0, 1] = bbxs_x, bbxs_y #left_top transform_bbx[:, 1, 0], transform_bbx[:, 1, 1] = bbxs_x + bbxs_w, bbxs_y #right_top transform_bbx[:, 2, 0], transform_bbx[:, 2, 1] = bbxs_x, bbxs_y + bbxs_h #left_buttom transform_bbx[:, 3, 0], transform_bbx[:, 3, 1] = bbxs_x + bbxs_w, bbxs_y + bbxs_h #right top #image transform #get transform matrix h, w, _ = image.shape M_rotate = tl.prepro.affine_rotation_matrix( angle=(-30, 30)) # original paper: -40~40 M_zoom = tl.prepro.affine_zoom_matrix( zoom_range=(0.5, 0.8)) # original paper: 0.5~1.1 M_combined = M_rotate.dot(M_zoom) transform_matrix = tl.prepro.transform_matrix_offset_center(M_combined, x=w, y=h) #transform image = tl.prepro.affine_transform_cv2(image, transform_matrix) mask_miss = tl.prepro.affine_transform_cv2(mask_miss, transform_matrix, border_mode='replicate') annos = tl.prepro.affine_transform_keypoints(annos, transform_matrix) transform_bbx = tl.prepro.affine_transform_keypoints( transform_bbx, transform_matrix) #construct transformed bbx transform_bbx = np.array(transform_bbx) final_bbxs = np.zeros(shape=bbxs.shape) for bbx_id in range(0, transform_bbx.shape[0]): bbx = transform_bbx[bbx_id, :, :] bbx_min_x = np.amin(bbx[:, 0]) bbx_max_x = np.amax(bbx[:, 0]) bbx_min_y = np.amin(bbx[:, 1]) bbx_max_y = np.amax(bbx[:, 1]) final_bbxs[bbx_id, 0] = bbx_min_x final_bbxs[bbx_id, 1] = bbx_min_y final_bbxs[bbx_id, 2] = bbx_max_x - bbx_min_x final_bbxs[bbx_id, 3] = bbx_max_y - bbx_min_y #resize crop transform_h, transform_w, _ = image.shape image, annos, mask_miss = tl.prepro.keypoint_resize_random_crop(image, annos, mask_miss, size=(hin, win)) resize_ratio = max( hin / transform_h, win / transform_w) #follow tl.prepro.keypoint_resize_random_crop final_bbxs[:, 2] = final_bbxs[:, 2] * resize_ratio final_bbxs[:, 3] = final_bbxs[:, 3] * resize_ratio # generate result which include proposal region x,y,w,h,edges delta, tx, ty, tw, th, te, te_mask = get_pose_proposals( annos, final_bbxs, hin, win, hout, wout, hnei, wnei, parts, limbs, mask_miss, data_format) #generate output masked image, result map and maskes img_mask = mask_miss[:, :, np.newaxis] image = image * np.repeat(img_mask, 3, 2) if (data_format == "channels_first"): image = np.transpose(image, [2, 0, 1]) return image, delta, tx, ty, tw, th, te, te_mask
continue anns = img_dict[f] pack = [(ann["mode"], ann["bbox"], ann["label"], ann["score"], ann["rle"]) for ann in anns] for m, b, l, s, r in pack: b = [math.ceil(coor) for coor in b] if m == "xyxy": pt1 = (b[0], b[1]) pt2 = (b[2], b[3]) else: pt1 = (b[0], b[1]) pt2 = (b[0] + b[2], b[1] + b[3]) if not args.no_mask: decoded_mask = mask_utils.decode(r) decoded_mask_3ch = np.stack( (decoded_mask * args.alpha, ) * 3, axis=-1) green_dropback = np.zeros_like(im) green_dropback[:, :, 1] = 255 foreground = cv2.multiply(decoded_mask_3ch, green_dropback, dtype=cv2.CV_32FC3) background = cv2.multiply(1 - decoded_mask_3ch, im, dtype=cv2.CV_32FC3) im = cv2.add(foreground, background) if not args.no_box:
def _data_aug_fn(image, ground_truth): """Data augmentation function.""" ground_truth = cPickle.loads(ground_truth) ground_truth = list(ground_truth) annos = ground_truth[0] mask = ground_truth[1] h_mask, w_mask, _ = np.shape(image) # mask mask_miss = np.ones((h_mask, w_mask), dtype=np.uint8) #TODO only working with quadradic dimmentions #print("image0 ",h_mask, w_mask, _) #print("mask0 ",mask_miss.shape) for seg in mask: bin_mask = maskUtils.decode(seg) bin_mask = np.logical_not(bin_mask) mask_miss = np.bitwise_and(mask_miss, bin_mask) ## image data augmentation # # randomly resize height and width independently, scale is changed # image, annos, mask_miss = tl.prepro.keypoint_random_resize(image, annos, mask_miss, zoom_range=(0.8, 1.2))# removed hao # # random rotate # image, annos, mask_miss = tl.prepro.keypoint_random_rotate(image, annos, mask_miss, rg=15.0)# removed hao # # random left-right flipping # image, annos, mask_miss = tl.prepro.keypoint_random_flip(image, annos, mask_miss, prob=0.5)# removed hao M_rotate = tl.prepro.affine_rotation_matrix( angle=(-180, 180)) # original paper: -40~40 # M_flip = tl.prepro.affine_horizontal_flip_matrix(prob=0.5) # hao removed: bug, keypoints will have error M_zoom = tl.prepro.affine_zoom_matrix( zoom_range=(0.5, 1.1)) # original paper: 0.5~1.1 # M_shear = tl.prepro.affine_shear_matrix(x_shear=(-0.1, 0.1), y_shear=(-0.1, 0.1)) M_combined = M_rotate.dot(M_zoom) # M_combined = M_rotate.dot(M_flip).dot(M_zoom)#.dot(M_shear) # M_combined = tl.prepro.affine_zoom_matrix(zoom_range=0.9) # for debug h, w, _ = image.shape transform_matrix = tl.prepro.transform_matrix_offset_center(M_combined, x=w, y=h) image = tl.prepro.affine_transform_cv2(image, transform_matrix) mask_miss = tl.prepro.affine_transform_cv2(mask_miss, transform_matrix, border_mode='replicate') annos = tl.prepro.affine_transform_keypoints(annos, transform_matrix) # random resize height and width together # image, annos, mask_miss = tl.prepro.keypoint_random_resize_shortestedge( # image, annos, mask_miss, min_size=(hin, win), zoom_range=(0.95, 1.6)) # removed hao image, annos, mask_miss = tl.prepro.keypoint_random_flip(image, annos, mask_miss, prob=0.5) # random crop #TODO only working with quadradic dimmentions image, annos, mask_miss = tl.prepro.keypoint_resize_random_crop( image, annos, mask_miss, size=(config.MODEL.hin, config.MODEL.win)) # hao add # generate result maps including keypoints heatmap, pafs and mask height, width, _ = np.shape(image) heatmap = get_heatmap(annos, height, width) vectormap = get_vectormap(annos, height, width) resultmap = np.concatenate((heatmap, vectormap), axis=2) image = np.array(image, dtype=np.float32) #TODO image has not always the right size if 256 * 384 is requested #print("image1 ",height, width, _) #print("mask1 ",mask_miss.shape) # mask image in all 3 channels img_mask = mask_miss.reshape(height, width, 1) image = image * np.repeat(img_mask, 3, 2) resultmap = np.array(resultmap, dtype=np.float32) mask_miss = cv2.resize(mask_miss, (config.MODEL.hout, config.MODEL.wout), interpolation=cv2.INTER_AREA) mask_miss = np.array(mask_miss, dtype=np.float32) return image, resultmap, mask_miss