Exemple #1
0
def _data_aug_fn(image, ground_truth, augmentor, preprocessor, data_format="channels_first"):
    """Data augmentation function."""
    # restore data
    ground_truth = cPickle.loads(ground_truth.numpy())
    image=image.numpy()
    annos = ground_truth["kpt"]
    mask = ground_truth["mask"]
    bbxs = ground_truth["bbx"]
    # kepoint transform
    img_h,img_w,_=image.shape
    annos=np.array(annos).astype(np.float32)
    bbxs=np.array(bbxs).astype(np.float32)
    # decode mask
    h_mask, w_mask, _ = np.shape(image)
    mask_valid = np.ones((h_mask, w_mask), dtype=np.uint8)
    if(mask!=None):
        for seg in mask:
            bin_mask = maskUtils.decode(seg)
            bin_mask = np.logical_not(bin_mask)
            mask_valid = np.bitwise_and(mask_valid, bin_mask)

    # general augmentaton process
    image,annos,mask_valid,bbxs=augmentor.process(image=image,annos=annos,mask_valid=mask_valid,bbxs=bbxs)
    
    # generate result which include proposal region x,y,w,h,edges
    delta,tx,ty,tw,th,te,te_mask=preprocessor.process(annos=annos,mask_valid=mask_valid,bbxs=bbxs)

    #generate output masked image, result map and maskes
    img_mask = mask_valid[:,:,np.newaxis]
    image = image * np.repeat(img_mask, 3, 2)
    if(data_format=="channels_first"):
        image=np.transpose(image,[2,0,1])
    return image,delta,tx,ty,tw,th,te,te_mask
Exemple #2
0
def _data_aug_fn(image, ground_truth, hin, hout, win, wout, parts, limbs ,flip_list=None, data_format="channels_first"):
    """Data augmentation function."""
    #restore data
    concat_dim=0 if data_format=="channels_first" else -1
    ground_truth = cPickle.loads(ground_truth.numpy())
    image=image.numpy()
    annos = ground_truth["kpt"]
    labeled= ground_truth["labeled"]
    mask = ground_truth["mask"]

    # decode mask
    h_mask, w_mask, _ = np.shape(image)
    mask_miss = np.ones((h_mask, w_mask), dtype=np.uint8)
    if(mask!=None):
        for seg in mask:
            bin_mask = maskUtils.decode(seg)
            bin_mask = np.logical_not(bin_mask)
            if(bin_mask.shape!=mask_miss.shape):
                print(f"test error mask shape mask_miss:{mask_miss.shape} bin_mask:{bin_mask.shape}")
            else:
                mask_miss = np.bitwise_and(mask_miss, bin_mask)
    
    #get transform matrix
    M_rotate = tl.prepro.affine_rotation_matrix(angle=(-30, 30))  # original paper: -40~40
    M_zoom = tl.prepro.affine_zoom_matrix(zoom_range=(0.5, 0.8))  # original paper: 0.5~1.1
    M_combined = M_rotate.dot(M_zoom)
    h, w, _ = image.shape
    transform_matrix = tl.prepro.transform_matrix_offset_center(M_combined, x=w, y=h)
    
    #apply data augmentation
    image = tl.prepro.affine_transform_cv2(image, transform_matrix)
    mask_miss = tl.prepro.affine_transform_cv2(mask_miss, transform_matrix, border_mode='replicate')
    annos = tl.prepro.affine_transform_keypoints(annos, transform_matrix)
    #temply ignore flip augmentation 
    '''
    if(flip_list!=None):
        image, annos, mask_miss = tl.prepro.keypoint_random_flip(image,annos, mask_miss, prob=0.5, flip_list=flip_list)
    '''
    image, annos, mask_miss = tl.prepro.keypoint_resize_random_crop(image, annos, mask_miss, size=(hin, win)) # hao add

    # generate result which include keypoints heatmap and vectormap
    height, width, _ = image.shape
    heatmap = get_heatmap(annos, height, width, hout, wout, parts, limbs, data_format=data_format)
    vectormap = get_vectormap(annos, height, width, hout, wout, parts, limbs, data_format=data_format)
    resultmap = np.concatenate((heatmap, vectormap), axis=concat_dim)

    image=cv2.resize(image,(win,hin))
    mask_miss=cv2.resize(mask_miss,(win,hin))
    img_mask=mask_miss
    
    #generate output masked image, result map and maskes
    img_mask = mask_miss.reshape(hin, win, 1)
    image = image * np.repeat(img_mask, 3, 2)
    resultmap = np.array(resultmap, dtype=np.float32)
    mask_miss = np.array(cv2.resize(mask_miss, (wout, hout), interpolation=cv2.INTER_AREA),dtype=np.float32)[:,:,np.newaxis]
    if(data_format=="channels_first"):
        image=np.transpose(image,[2,0,1])
        mask_miss=np.transpose(mask_miss,[2,0,1])
    labeled=np.float32(labeled)
    return image, resultmap, mask_miss, labeled
Exemple #3
0
def _data_aug_fn(image, ground_truth):
    """Data augmentation function."""
    ground_truth = cPickle.loads(ground_truth)
    ground_truth = list(ground_truth)

    annos = ground_truth[0]
    mask = ground_truth[1]
    h_mask, w_mask, _ = np.shape(image)
    # mask
    mask_miss = np.ones((h_mask, w_mask), dtype=np.uint8)

    for seg in mask:
        bin_mask = maskUtils.decode(seg)
        bin_mask = np.logical_not(bin_mask)
        mask_miss = np.bitwise_and(mask_miss, bin_mask)

    ## image data augmentation
    # randomly resize height and width independently, scale is changed
    image, annos, mask_miss = keypoint_random_resize(image,
                                                     annos,
                                                     mask_miss,
                                                     zoom_range=(0.8, 1.2))
    # random rotate
    image, annos, mask_miss = keypoint_random_rotate(image,
                                                     annos,
                                                     mask_miss,
                                                     rg=15.0)
    # random left-right flipping
    image, annos, mask_miss = keypoint_random_flip(image,
                                                   annos,
                                                   mask_miss,
                                                   prob=0.5)
    # random resize height and width together
    image, annos, mask_miss = keypoint_random_resize_shortestedge(
        image, annos, mask_miss, min_size=(hin, win), zoom_range=(0.95, 1.6))
    # random crop
    image, annos, mask_miss = keypoint_random_crop(image,
                                                   annos,
                                                   mask_miss,
                                                   size=(hin,
                                                         win))  # with padding

    # generate result maps including keypoints heatmap, pafs and mask
    h, w, _ = np.shape(image)
    height, width, _ = np.shape(image)
    heatmap = get_heatmap(annos, height, width)
    vectormap = get_vectormap(annos, height, width)
    resultmap = np.concatenate((heatmap, vectormap), axis=2)

    image = np.array(image, dtype=np.float32)

    img_mask = mask_miss.reshape(hin, win, 1)
    image = image * np.repeat(img_mask, 3, 2)

    resultmap = np.array(resultmap, dtype=np.float32)
    mask_miss = cv2.resize(mask_miss, (hout, wout),
                           interpolation=cv2.INTER_AREA)
    mask_miss = np.array(mask_miss, dtype=np.float32)
    return image, resultmap, mask_miss
Exemple #4
0
def annToMask(ann, height, width):
    """
    Convert annotation which can be polygons, uncompressed RLE, or RLE to binary mask.
    :return: binary mask (numpy 2D array)
    """
    rle = annToRLE(ann, height, width)
    m = maskUtils.decode(rle)
    return m
def poly2mask_single(h, w, poly):
    # TODO: write test for poly2mask, using mask2poly convert mask to poly', compare poly with poly'
    # visualize the mask
    rles = maskUtils.frPyObjects(poly, h, w)
    rle = maskUtils.merge(rles)
    mask = maskUtils.decode(rle)

    return mask
Exemple #6
0
def _data_aug_fn(image,
                 ground_truth,
                 hin,
                 win,
                 hout,
                 wout,
                 hnei,
                 wnei,
                 parts,
                 limbs,
                 kpt_cvter,
                 data_format="channels_first"):
    """Data augmentation function."""
    #restore data
    ground_truth = cPickle.loads(ground_truth.numpy())
    image = image.numpy()
    annos = ground_truth["obj"]
    mask = ground_truth["mask"]
    bbxs = ground_truth["bbx"]
    #kepoint transform
    img_h, img_w, _ = image.shape
    for anno_idx in range(0, len(annos)):
        annos[anno_idx] = kpt_cvter(annos[anno_idx])
    annos = np.array(annos).astype(np.float32)
    bbxs = np.array(bbxs).astype(np.float32)
    scale_w = np.float32(win / img_w)
    scale_h = np.float32(hin / img_h)
    annos[:, :, 0] *= scale_w
    annos[:, :, 1] *= scale_h
    #bbx transform
    bbxs[:, 0] *= scale_w
    bbxs[:, 1] *= scale_h
    bbxs[:, 2] *= scale_w
    bbxs[:, 3] *= scale_h
    # decode mask
    h_mask, w_mask, _ = np.shape(image)
    mask_miss = np.ones((h_mask, w_mask), dtype=np.uint8)
    if (mask != None):
        for seg in mask:
            bin_mask = maskUtils.decode(seg)
            bin_mask = np.logical_not(bin_mask)
            mask_miss = np.bitwise_and(mask_miss, bin_mask)
    #image transform
    image = cv2.resize(image, dsize=(win, hin))
    mask_miss = cv2.resize(mask_miss, dsize=(win, hin))

    # generate result which include proposal region x,y,w,h,edges
    delta, tx, ty, tw, th, te, te_mask = get_pose_proposals(
        annos, bbxs, hin, win, hout, wout, hnei, wnei, parts, limbs, mask_miss,
        data_format)

    #generate output masked image, result map and maskes
    img_mask = mask_miss[:, :, np.newaxis]
    image = image * np.repeat(img_mask, 3, 2)
    if (data_format == "channels_first"):
        image = np.transpose(image, [2, 0, 1])
    return image, delta, tx, ty, tw, th, te, te_mask
Exemple #7
0
def poly2mask_single(h, w, poly):
    # TODO: write test for poly2mask, using mask2poly convert mask to poly', compare poly with poly'
    # visualize the mask
    rles = maskUtils.frPyObjects(poly, h, w)
    rle = maskUtils.merge(rles)
    mask = maskUtils.decode(rle)
    # sum = mask.sum()
    # print("{} {} {} {}".format(sum, h, w, poly))
    # if not mask.any():
    #     pass

    return mask
Exemple #8
0
def _data_aug_fn(image,
                 ground_truth,
                 augmentor,
                 preprocessor,
                 data_format="channels_first"):
    """Data augmentation function."""
    #restore data
    concat_dim = 0 if data_format == "channels_first" else -1
    ground_truth = cPickle.loads(ground_truth.numpy())
    image = image.numpy()
    annos = ground_truth["kpt"]
    labeled = ground_truth["labeled"]
    mask = ground_truth["mask"]
    hin, win = preprocessor.hin, preprocessor.win
    hout, wout = preprocessor.hout, preprocessor.wout

    # decode mask
    h_mask, w_mask, _ = np.shape(image)
    mask_valid = np.ones((h_mask, w_mask), dtype=np.uint8)
    if (mask != None):
        for seg in mask:
            bin_mask = maskUtils.decode(seg)
            bin_mask = np.logical_not(bin_mask)
            if (bin_mask.shape != mask_valid.shape):
                print(
                    f"test error mask shape mask_valid:{mask_valid.shape} bin_mask:{bin_mask.shape}"
                )
            else:
                mask_valid = np.bitwise_and(mask_valid, bin_mask)

    #general augmentaton process
    image, annos, mask_valid = augmentor.process(image=image,
                                                 annos=annos,
                                                 mask_valid=mask_valid)

    # generate result including heatmap and vectormap
    heatmap, vectormap = preprocessor.process(annos=annos,
                                              mask_valid=mask_valid)
    resultmap = np.concatenate((heatmap, vectormap), axis=concat_dim)

    #generate output masked image, result map and maskes
    image_mask = mask_valid.reshape(hin, win, 1)
    image = image * np.repeat(image_mask, 3, 2)
    resultmap = np.array(resultmap, dtype=np.float32)
    mask_valid = np.array(cv2.resize(mask_valid, (wout, hout),
                                     interpolation=cv2.INTER_AREA),
                          dtype=np.float32)[:, :, np.newaxis]
    if (data_format == "channels_first"):
        image = np.transpose(image, [2, 0, 1])
        mask_valid = np.transpose(mask_valid, [2, 0, 1])
    labeled = np.float32(labeled)
    return image, resultmap, mask_valid, labeled
Exemple #9
0
    def _data_aug_fn(image, ground_truth):
        ground_truth = cPickle.loads(ground_truth)
        ground_truth = list(ground_truth)

        annos = ground_truth[0]
        mask = ground_truth[1]
        h_mask, w_mask, _ = np.shape(image)
        # mask
        mask_miss = np.ones((h_mask, w_mask), dtype=np.uint8)

        for seg in mask:
            bin_mask = maskUtils.decode(seg)
            bin_mask = np.logical_not(bin_mask)
            mask_miss = np.bitwise_and(mask_miss, bin_mask)
        return image, mask_miss
Exemple #10
0
def decode_mask(meta_mask_list):
    if(type(meta_mask_list)!=list):
        return None
    if(meta_mask_list==[]):
        return None
    inv_mask_list=[]
    for meta_mask in meta_mask_list:
        mask=maskUtils.decode(meta_mask)
        inv_mask=np.logical_not(mask)
        inv_mask_list.append(inv_mask)
    mask=np.ones_like(inv_mask_list[0])
    for inv_mask in inv_mask_list:
        mask=np.logical_and(mask,inv_mask)
    mask = mask.astype(np.uint8)
    return mask
Exemple #11
0
def _data_aug_fn(image,
                 ground_truth,
                 augmentor,
                 preprocessor,
                 data_format="channels_first"):
    """Data augmentation function."""
    #restore data
    ground_truth = cPickle.loads(ground_truth.numpy())
    image = image.numpy()
    annos = ground_truth["kpt"]
    labeled = ground_truth["labeled"]
    mask = ground_truth["mask"]
    hin, win = preprocessor.hin, preprocessor.win
    hout, wout = preprocessor.hout, preprocessor.wout

    # decode mask
    h_mask, w_mask, _ = np.shape(image)
    mask_valid = np.ones((h_mask, w_mask), dtype=np.uint8)
    if (mask != None):
        for seg in mask:
            bin_mask = maskUtils.decode(seg)
            bin_mask = np.logical_not(bin_mask)
            mask_valid = np.bitwise_and(mask_valid, bin_mask)

    #general augmentaton process
    image, annos, mask_valid = augmentor.process(image=image,
                                                 annos=annos,
                                                 mask_valid=mask_valid)

    # generate result including pif_maps and paf_maps
    pif_maps, paf_maps = preprocessor.process(annos=annos,
                                              mask_valid=mask_valid)
    pif_conf, pif_vec, pif_bmin, pif_scale = pif_maps
    paf_conf, paf_src_vec, paf_dst_vec, paf_src_bmin, paf_dst_bmin, paf_src_scale, paf_dst_scale = paf_maps

    #generate output masked image, result map and maskes
    image_mask = mask_valid.reshape(hin, win, 1)
    image = image * np.repeat(image_mask, 3, 2)
    mask_valid_out = np.array(cv2.resize(mask_valid, (wout, hout),
                                         interpolation=cv2.INTER_AREA),
                              dtype=np.float32)[:, :, np.newaxis]
    if (data_format == "channels_first"):
        image = np.transpose(image, [2, 0, 1])
        mask_valid_out = np.transpose(mask_valid_out, [2, 0, 1])
    labeled = np.float32(labeled)
    return image, pif_conf,pif_vec,pif_bmin,pif_scale,\
        paf_conf,paf_src_vec,paf_dst_vec,paf_src_bmin,paf_dst_bmin,paf_src_scale,paf_dst_scale, mask_valid_out, labeled
Exemple #12
0
def _mock_data_aug_fn(image, ground_truth):
    """Data augmentation function."""
    ground_truth = cPickle.loads(ground_truth)
    ground_truth = list(ground_truth)

    annos = ground_truth[0]
    mask = ground_truth[1]
    h_mask, w_mask, _ = np.shape(image)

    # mask
    mask_miss = np.ones((h_mask, w_mask), dtype=np.uint8)

    for seg in mask:
        bin_mask = maskUtils.decode(seg)
        bin_mask = np.logical_not(bin_mask)
        mask_miss = np.bitwise_and(mask_miss, bin_mask)

    # random crop
    #TODO only working with quadradic dimmentions
    image, annos, mask_miss = tl.prepro.keypoint_resize_random_crop(
        image, annos, mask_miss,
        size=(config.MODEL.hin, config.MODEL.win))  # hao add

    # generate result maps including keypoints heatmap, pafs and mask
    height, width, _ = np.shape(image)

    heatmap = get_heatmap(annos, height, width)
    vectormap = get_vectormap(annos, height, width)
    resultmap = np.concatenate((heatmap, vectormap), axis=2)

    image = np.array(image, dtype=np.float32)

    #TODO image has not always the right size if  256 * 384 is requested
    print("image1 ", height, width, _)
    print("mask1 ", mask_miss.shape)

    # mask image in all 3 channels
    img_mask = mask_miss.reshape(height, width, 1)
    image = image * np.repeat(img_mask, 3, 2)

    resultmap = np.array(resultmap, dtype=np.float32)
    mask_miss = cv2.resize(mask_miss, (config.MODEL.hout, config.MODEL.wout),
                           interpolation=cv2.INTER_AREA)
    mask_miss = np.array(mask_miss, dtype=np.float32)
    return image, resultmap, mask_miss
Exemple #13
0
def gen_mask(components):
    """
    Generate masks based on the coco mask polygons.

    :param components: components
    :return: updated components
    """
    meta = components[0]
    if meta.masks_segments:
        mask_miss = np.ones((meta.height, meta.width), dtype=np.uint8)
        for seg in meta.masks_segments:
            bin_mask = maskUtils.decode(seg)
            bin_mask = np.logical_not(bin_mask)
            mask_miss = np.bitwise_and(mask_miss, bin_mask)

        meta.mask = mask_miss

    return components
Exemple #14
0
def segm_to_mask(segm, w, h):
    """
    Convert a segmentation map which can be polygons to a binary mask.
    Reference:
        https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocotools/coco.py

    Args:
        segm (list<list<int>> or list<int>):
            A segmentation map which can be polygons.
        w (int): Image width
        h (int): Image hight

    Returns:
        mask (np.array(H, W)):
            A segmentation mask of a particular class.
    """
    rle = segm_to_rle(segm, w, h)
    mask = maskUtils.decode(rle)
    return mask
Exemple #15
0
    def __getitem__(self, index):

        img_file = os.path.join(self._img_dir,
                                str(self._infos.data[index].id) + '.jpg')

        img = transforms.ToTensor()(Image.open(img_file).convert('RGB'))
        img_w, img_h = img.size(2), img.size(1)

        target = torch.LongTensor(img_h, img_w).zero_()
        for inst in self._infos.data[index].insts:

            polys = []
            # { bg, person, bicycle, car, motorcycle, truck, bus, train }
            if self._n_class == 7:
                if inst.category_idx == 8:
                    inst.category_idx = 5
                if inst.category_idx <= 6:
                    for poly in inst.seg:
                        polys.append(poly.tolist())
            # { bg, person }
            elif self._n_class == 2:
                if inst.category_idx == 1:
                    for poly in inst.seg:
                        polys.append(poly.tolist())

            if polys:
                rles = maskUtils.frPyObjects(polys, img_h, img_w)
                rle = maskUtils.merge(rles)
                mask = maskUtils.decode(rle)
                target.masked_fill_(torch.from_numpy(mask),
                                    inst.category_idx)

        p_w = self._infos.patchSize.w
        p_h = self._infos.patchSize.h

        x0 = random.randint(0, (img_w - p_w))
        y0 = random.randint(0, (img_h - p_h))

        img = img[:, y0:y0+p_h, x0:x0+p_w]
        target = target[y0:y0+p_h, x0:x0+p_w]

        return img, target
def gen_mask(components):
    """
    Generate masks based on the coco mask polygons.

    :param components: components
    :return: updated components
    """
    masks_segments = components[7]
    hh = components[1]
    ww = components[2]

    if masks_segments:
        mask_miss = np.ones((hh, ww), dtype=np.uint8)
        for seg in masks_segments:
            bin_mask = maskUtils.decode(seg)
            bin_mask = np.logical_not(bin_mask)
            mask_miss = np.bitwise_and(mask_miss, bin_mask)

        components[11] = mask_miss

    return components
Exemple #17
0
def _data_aug_fn(image, input):
    input = cPickle.loads(input)
    input = list(input)

    annos = input[0]
    mask = input[1]
    h_mask, w_mask, _ = np.shape(image)
    # mask
    mask_miss = np.ones((h_mask, w_mask), dtype=np.uint8)
    for seg in mask:
        bin_mask = maskUtils.decode(seg)
        bin_mask = np.logical_not(bin_mask)
        mask_miss = np.bitwise_and(mask_miss, bin_mask)

    # image process
    image, annos, mask_miss = pose_random_scale(image, annos, mask_miss)
    image, annos, mask_miss = pose_rotation(image, annos, mask_miss)
    image, annos, mask_miss = random_flip(image, annos, mask_miss)
    image, annos, mask_miss = crop_meta_image(image, annos, mask_miss)

    h, w, _ = np.shape(image)
    if h != 368 or w != 368:
        image, annos, mask_miss = _resize_image(image, annos, mask_miss, 368,
                                                368)

    height, width, _ = np.shape(image)
    heatmap = get_heatmap(annos, height, width)
    vectormap = get_vectormap(annos, height, width)
    resultmap = np.concatenate((heatmap, vectormap), axis=2)

    image = image
    image = np.array(image, dtype=np.float32)

    img_mask = mask_miss.reshape(368, 368, 1)
    image = image * np.repeat(img_mask, 3, 2)

    resultmap = np.array(resultmap, dtype=np.float32)
    mask_miss = cv2.resize(mask_miss, (46, 46), interpolation=cv2.INTER_AREA)
    mask_miss = np.array(mask_miss, dtype=np.float32)
    return image, resultmap, mask_miss
def main():

    inputfile = '/home/qinjian/Segmentation/地理遥感图像分割/aicrowd房屋分割竞赛/val/images'
    jsonfile = '/home/qinjian/Segmentation/地理遥感图像分割/aicrowd房屋分割竞赛/val/annotation-small.json'
    outputfile = '/home/qinjian/Segmentation/地理遥感图像分割/aicrowd房屋分割竞赛/val/show'

    mkdir_os(outputfile)

    coco = COCO(jsonfile)
    catIds = coco.getCatIds(catNms=['wires'])  # catIds=1 表示人这一类
    imgIds = coco.getImgIds(catIds=catIds)  # 图片id,许多值
    for i in range(len(imgIds)):
        if i % 100 == 0:
            print(i, "/", len(imgIds))
        img = coco.loadImgs(imgIds[i])[0]

        cvImage = cv2.imread(os.path.join(inputfile, img['file_name']), -1)
        cvImage = cv2.cvtColor(cvImage, cv2.COLOR_BGR2GRAY)
        cvImage = cv2.cvtColor(cvImage, cv2.COLOR_GRAY2BGR)

        annIds = coco.getAnnIds(imgIds=img['id'], catIds=catIds, iscrowd=None)
        anns = coco.loadAnns(annIds)

        polygons = []
        color = []
        for ann in anns:
            if 'segmentation' in ann:
                if type(ann['segmentation']) == list:
                    # polygon
                    for seg in ann['segmentation']:
                        poly = np.array(seg).reshape((int(len(seg) / 2), 2))
                        poly_list = poly.tolist()
                        polygons.append(poly_list)
                        if ann['iscrowd'] == 0:
                            color.append([0, 0, 255])
                        if ann['iscrowd'] == 1:
                            color.append([0, 255, 255])
                else:
                    exit()
                    print("-------------")
                    # mask
                    t = imgIds[ann['image_id']]
                    if type(ann['segmentation']['counts']) == list:
                        rle = maskUtils.frPyObjects([ann['segmentation']],
                                                    t['height'], t['width'])
                    else:
                        rle = [ann['segmentation']]
                    m = maskUtils.decode(rle)

                    if ann['iscrowd'] == 0:
                        color_mask = np.array([0, 0, 255])
                    if ann['iscrowd'] == 1:
                        color_mask = np.array([0, 255, 255])

                    mask = m.astype(np.bool)
                    cvImage[mask] = cvImage[mask] * 0.7 + color_mask * 0.3

        point_size = 2
        thickness = 2
        for key in range(len(polygons)):
            ndata = polygons[key]
            cur_color = color[key]
            for k in range(len(ndata)):
                data = ndata[k]
                cv2.circle(cvImage, (int(data[0]), int(data[1])), point_size,
                           (cur_color[0], cur_color[1], cur_color[2]),
                           thickness)
        cv2.imwrite(os.path.join(outputfile, img['file_name']), cvImage)
Exemple #19
0
    Returns:

    A Tensor. Has the same type as input. Has the shape of tensor.shape * repeats
    """

    expanded_tensor = tf.expand_dims(tensor, -1)
    multiples = [1] + repeats
    tiled_tensor = tf.tile(expanded_tensor, multiples=multiples)
    repeated_tesnor = tf.reshape(tiled_tensor, tf.shape(tensor) * repeats)

    return repeated_tesnor


if __name__ == '__main__':
    data_dir = '/Users/Joel/Desktop/coco'
    data_type = 'val'
    anno_path = '{}/annotations/person_keypoints_{}2014.json'.format(
        data_dir, data_type)
    df_val = PoseInfo(data_dir, data_type, anno_path)

    for i in range(50):
        meta = df_val.metas[i]
        mask_sig = meta.masks
        print('shape of np mask is ', np.shape(mask_sig), type(mask_sig))
        if mask_sig is not []:
            mask_miss = np.ones((meta.height, meta.width), dtype=np.uint8)
            for seg in mask_sig:
                bin_mask = maskUtils.decode(seg)
                bin_mask = np.logical_not(bin_mask)
                mask_miss = np.bitwise_and(mask_miss, bin_mask)
Exemple #20
0
def _data_aug_fn(image, ground_truth):
    """Data augmentation function."""
    ground_truth = cPickle.loads(ground_truth)
    ground_truth = list(ground_truth)

    annos = ground_truth[0]
    mask = ground_truth[1]
    h_mask, w_mask, _ = np.shape(image)
    # mask
    mask_miss = np.ones((h_mask, w_mask), dtype=np.uint8)

    for seg in mask:
        bin_mask = maskUtils.decode(seg)
        bin_mask = np.logical_not(bin_mask)
        mask_miss = np.bitwise_and(mask_miss, bin_mask)

    ## image data augmentation
    # # randomly resize height and width independently, scale is changed
    # image, annos, mask_miss = keypoint_random_resize(image, annos, mask_miss, zoom_range=(0.8, 1.2))# removed hao
    # # random rotate
    # image, annos, mask_miss = keypoint_random_rotate(image, annos, mask_miss, rg=15.0)# removed hao
    # # random left-right flipping
    # image, annos, mask_miss = keypoint_random_flip(image, annos, mask_miss, prob=0.5)# removed hao
    M_rotate = tl.prepro.affine_rotation_matrix(angle=(-30, 30))  # -40~40
    M_flip = tl.prepro.affine_horizontal_flip_matrix(prob=0.5)
    M_zoom = tl.prepro.affine_zoom_matrix(zoom_range=(0.5, 0.8))  # 0.5~1.1
    # M_shear = tl.prepro.affine_shear_matrix(x_shear=(-0.1, 0.1), y_shear=(-0.1, 0.1))
    M_combined = M_rotate.dot(M_flip).dot(M_zoom)  #.dot(M_shear)
    # M_combined = tl.prepro.affine_zoom_matrix(zoom_range=0.9) # for debug
    h, w, _ = image.shape
    transform_matrix = tl.prepro.transform_matrix_offset_center(M_combined,
                                                                x=w,
                                                                y=h)
    image = tl.prepro.affine_transform_cv2(image, transform_matrix)
    annos = tl.prepro.affine_transform_keypoints(annos, transform_matrix)

    # random resize height and width together
    # image, annos, mask_miss = keypoint_random_resize_shortestedge(
    #     image, annos, mask_miss, min_size=(hin, win), zoom_range=(0.95, 1.6)) # removed hao
    # random crop
    # image, annos, mask_miss = keypoint_random_crop(image, annos, mask_miss, size=(hin, win))  # with padding # removed hao
    image, annos, mask_miss = tl.prepro.keypoint_resize_random_crop(image,
                                                                    annos,
                                                                    mask_miss,
                                                                    size=(hin,
                                                                          win))

    # generate result maps including keypoints heatmap, pafs and mask
    h, w, _ = np.shape(image)
    height, width, _ = np.shape(image)
    heatmap = get_heatmap(annos, height, width)
    vectormap = get_vectormap(annos, height, width)
    resultmap = np.concatenate((heatmap, vectormap), axis=2)

    image = np.array(image, dtype=np.float32)

    img_mask = mask_miss.reshape(hin, win, 1)
    image = image * np.repeat(img_mask, 3, 2)

    resultmap = np.array(resultmap, dtype=np.float32)
    mask_miss = cv2.resize(mask_miss, (hout, wout),
                           interpolation=cv2.INTER_AREA)
    mask_miss = np.array(mask_miss, dtype=np.float32)
    return image, resultmap, mask_miss
Exemple #21
0
    def annToMask(self, ann):
        rle = self.annToRLE(ann)
        m = maskUtils.decode(rle)

        return m
Exemple #22
0
def _data_aug_fn(image,
                 ground_truth,
                 hin,
                 win,
                 hout,
                 wout,
                 hnei,
                 wnei,
                 parts,
                 limbs,
                 data_format="channels_first"):
    """Data augmentation function."""
    #restore data
    ground_truth = cPickle.loads(ground_truth.numpy())
    image = image.numpy()
    annos = ground_truth["kpt"]
    mask = ground_truth["mask"]
    bbxs = ground_truth["bbx"]
    #kepoint transform
    img_h, img_w, _ = image.shape
    annos = np.array(annos).astype(np.float32)
    bbxs = np.array(bbxs).astype(np.float32)
    '''
    scale_w=np.float32(win/img_w)
    scale_h=np.float32(hin/img_h)
    annos[:,:,0]*=scale_w
    annos[:,:,1]*=scale_h
    #bbx transform
    bbxs[:,0]*=scale_w
    bbxs[:,1]*=scale_h
    bbxs[:,2]*=scale_w
    bbxs[:,3]*=scale_h
    '''
    # decode mask
    h_mask, w_mask, _ = np.shape(image)
    mask_miss = np.ones((h_mask, w_mask), dtype=np.uint8)
    if (mask != None):
        for seg in mask:
            bin_mask = maskUtils.decode(seg)
            bin_mask = np.logical_not(bin_mask)
            mask_miss = np.bitwise_and(mask_miss, bin_mask)

    #prepare transform bbx
    transform_bbx = np.zeros(shape=(bbxs.shape[0], 4, 2))
    bbxs_x, bbxs_y, bbxs_w, bbxs_h = bbxs[:, 0], bbxs[:, 1], bbxs[:,
                                                                  2], bbxs[:,
                                                                           3]
    transform_bbx[:, 0, 0], transform_bbx[:, 0, 1] = bbxs_x, bbxs_y  #left_top
    transform_bbx[:, 1,
                  0], transform_bbx[:, 1,
                                    1] = bbxs_x + bbxs_w, bbxs_y  #right_top
    transform_bbx[:, 2,
                  0], transform_bbx[:, 2,
                                    1] = bbxs_x, bbxs_y + bbxs_h  #left_buttom
    transform_bbx[:, 3,
                  0], transform_bbx[:, 3,
                                    1] = bbxs_x + bbxs_w, bbxs_y + bbxs_h  #right top

    #image transform
    #get transform matrix
    h, w, _ = image.shape
    M_rotate = tl.prepro.affine_rotation_matrix(
        angle=(-30, 30))  # original paper: -40~40
    M_zoom = tl.prepro.affine_zoom_matrix(
        zoom_range=(0.5, 0.8))  # original paper: 0.5~1.1
    M_combined = M_rotate.dot(M_zoom)
    transform_matrix = tl.prepro.transform_matrix_offset_center(M_combined,
                                                                x=w,
                                                                y=h)
    #transform
    image = tl.prepro.affine_transform_cv2(image, transform_matrix)
    mask_miss = tl.prepro.affine_transform_cv2(mask_miss,
                                               transform_matrix,
                                               border_mode='replicate')
    annos = tl.prepro.affine_transform_keypoints(annos, transform_matrix)
    transform_bbx = tl.prepro.affine_transform_keypoints(
        transform_bbx, transform_matrix)
    #construct transformed bbx
    transform_bbx = np.array(transform_bbx)
    final_bbxs = np.zeros(shape=bbxs.shape)
    for bbx_id in range(0, transform_bbx.shape[0]):
        bbx = transform_bbx[bbx_id, :, :]
        bbx_min_x = np.amin(bbx[:, 0])
        bbx_max_x = np.amax(bbx[:, 0])
        bbx_min_y = np.amin(bbx[:, 1])
        bbx_max_y = np.amax(bbx[:, 1])
        final_bbxs[bbx_id, 0] = bbx_min_x
        final_bbxs[bbx_id, 1] = bbx_min_y
        final_bbxs[bbx_id, 2] = bbx_max_x - bbx_min_x
        final_bbxs[bbx_id, 3] = bbx_max_y - bbx_min_y
    #resize crop
    transform_h, transform_w, _ = image.shape
    image, annos, mask_miss = tl.prepro.keypoint_resize_random_crop(image,
                                                                    annos,
                                                                    mask_miss,
                                                                    size=(hin,
                                                                          win))
    resize_ratio = max(
        hin / transform_h,
        win / transform_w)  #follow tl.prepro.keypoint_resize_random_crop
    final_bbxs[:, 2] = final_bbxs[:, 2] * resize_ratio
    final_bbxs[:, 3] = final_bbxs[:, 3] * resize_ratio

    # generate result which include proposal region x,y,w,h,edges
    delta, tx, ty, tw, th, te, te_mask = get_pose_proposals(
        annos, final_bbxs, hin, win, hout, wout, hnei, wnei, parts, limbs,
        mask_miss, data_format)

    #generate output masked image, result map and maskes
    img_mask = mask_miss[:, :, np.newaxis]
    image = image * np.repeat(img_mask, 3, 2)
    if (data_format == "channels_first"):
        image = np.transpose(image, [2, 0, 1])
    return image, delta, tx, ty, tw, th, te, te_mask
Exemple #23
0
                continue

            anns = img_dict[f]
            pack = [(ann["mode"], ann["bbox"], ann["label"], ann["score"],
                     ann["rle"]) for ann in anns]

            for m, b, l, s, r in pack:
                b = [math.ceil(coor) for coor in b]
                if m == "xyxy":
                    pt1 = (b[0], b[1])
                    pt2 = (b[2], b[3])
                else:
                    pt1 = (b[0], b[1])
                    pt2 = (b[0] + b[2], b[1] + b[3])
                if not args.no_mask:
                    decoded_mask = mask_utils.decode(r)
                    decoded_mask_3ch = np.stack(
                        (decoded_mask * args.alpha, ) * 3, axis=-1)
                    green_dropback = np.zeros_like(im)
                    green_dropback[:, :, 1] = 255

                    foreground = cv2.multiply(decoded_mask_3ch,
                                              green_dropback,
                                              dtype=cv2.CV_32FC3)
                    background = cv2.multiply(1 - decoded_mask_3ch,
                                              im,
                                              dtype=cv2.CV_32FC3)

                    im = cv2.add(foreground, background)

                if not args.no_box:
Exemple #24
0
def _data_aug_fn(image, ground_truth):
    """Data augmentation function."""
    ground_truth = cPickle.loads(ground_truth)
    ground_truth = list(ground_truth)

    annos = ground_truth[0]
    mask = ground_truth[1]
    h_mask, w_mask, _ = np.shape(image)

    # mask
    mask_miss = np.ones((h_mask, w_mask), dtype=np.uint8)

    #TODO only working with quadradic dimmentions
    #print("image0 ",h_mask, w_mask, _)
    #print("mask0 ",mask_miss.shape)

    for seg in mask:
        bin_mask = maskUtils.decode(seg)
        bin_mask = np.logical_not(bin_mask)
        mask_miss = np.bitwise_and(mask_miss, bin_mask)

    ## image data augmentation
    # # randomly resize height and width independently, scale is changed
    # image, annos, mask_miss = tl.prepro.keypoint_random_resize(image, annos, mask_miss, zoom_range=(0.8, 1.2))# removed hao
    # # random rotate
    # image, annos, mask_miss = tl.prepro.keypoint_random_rotate(image, annos, mask_miss, rg=15.0)# removed hao
    # # random left-right flipping
    # image, annos, mask_miss = tl.prepro.keypoint_random_flip(image, annos, mask_miss, prob=0.5)# removed hao

    M_rotate = tl.prepro.affine_rotation_matrix(
        angle=(-180, 180))  # original paper: -40~40
    # M_flip = tl.prepro.affine_horizontal_flip_matrix(prob=0.5) # hao removed: bug, keypoints will have error
    M_zoom = tl.prepro.affine_zoom_matrix(
        zoom_range=(0.5, 1.1))  # original paper: 0.5~1.1
    # M_shear = tl.prepro.affine_shear_matrix(x_shear=(-0.1, 0.1), y_shear=(-0.1, 0.1))
    M_combined = M_rotate.dot(M_zoom)
    # M_combined = M_rotate.dot(M_flip).dot(M_zoom)#.dot(M_shear)
    # M_combined = tl.prepro.affine_zoom_matrix(zoom_range=0.9) # for debug
    h, w, _ = image.shape

    transform_matrix = tl.prepro.transform_matrix_offset_center(M_combined,
                                                                x=w,
                                                                y=h)
    image = tl.prepro.affine_transform_cv2(image, transform_matrix)
    mask_miss = tl.prepro.affine_transform_cv2(mask_miss,
                                               transform_matrix,
                                               border_mode='replicate')
    annos = tl.prepro.affine_transform_keypoints(annos, transform_matrix)

    # random resize height and width together
    # image, annos, mask_miss = tl.prepro.keypoint_random_resize_shortestedge(
    #     image, annos, mask_miss, min_size=(hin, win), zoom_range=(0.95, 1.6)) # removed hao
    image, annos, mask_miss = tl.prepro.keypoint_random_flip(image,
                                                             annos,
                                                             mask_miss,
                                                             prob=0.5)

    # random crop
    #TODO only working with quadradic dimmentions
    image, annos, mask_miss = tl.prepro.keypoint_resize_random_crop(
        image, annos, mask_miss,
        size=(config.MODEL.hin, config.MODEL.win))  # hao add

    # generate result maps including keypoints heatmap, pafs and mask
    height, width, _ = np.shape(image)

    heatmap = get_heatmap(annos, height, width)
    vectormap = get_vectormap(annos, height, width)
    resultmap = np.concatenate((heatmap, vectormap), axis=2)

    image = np.array(image, dtype=np.float32)

    #TODO image has not always the right size if  256 * 384 is requested
    #print("image1 ",height, width, _)
    #print("mask1 ",mask_miss.shape)

    # mask image in all 3 channels
    img_mask = mask_miss.reshape(height, width, 1)
    image = image * np.repeat(img_mask, 3, 2)

    resultmap = np.array(resultmap, dtype=np.float32)
    mask_miss = cv2.resize(mask_miss, (config.MODEL.hout, config.MODEL.wout),
                           interpolation=cv2.INTER_AREA)
    mask_miss = np.array(mask_miss, dtype=np.float32)
    return image, resultmap, mask_miss