コード例 #1
0
ファイル: synthgen.py プロジェクト: vietnvri/SynthText
class RendererV3(object):
    def __init__(self, data_dir, max_time=None):
        self.text_renderer = tu.RenderFont(data_dir)
        self.colorizer = Colorize(data_dir)
        self.min_char_height = 8  # px
        self.min_asp_ratio = 0.4  #
        self.max_text_regions = 7
        self.max_time = max_time

    def filter_regions(self, regions, filt):
        """
        filt : boolean list of regions to keep.
        """
        idx = np.arange(len(filt))[filt]
        for k in regions.keys():
            regions[k] = [regions[k][i] for i in idx]
        return regions

    def filter_for_placement(self, xyz, seg, regions):
        filt = np.zeros(len(regions['label'])).astype('bool')
        masks, Hs, Hinvs = [], [], []
        for idx, l in enumerate(regions['label']):
            res = get_text_placement_mask(xyz,
                                          seg == l,
                                          regions['coeff'][idx],
                                          pad=2)
            if res is not None:
                mask, H, Hinv = res
                masks.append(mask)
                Hs.append(H)
                Hinvs.append(Hinv)
                filt[idx] = True
        regions = self.filter_regions(regions, filt)
        regions['place_mask'] = masks
        regions['homography'] = Hs
        regions['homography_inv'] = Hinvs

        return regions

    def warpHomography(self, src_mat, H, dst_size):
        dst_mat = cv2.warpPerspective(src_mat,
                                      H,
                                      dst_size,
                                      flags=cv2.WARP_INVERSE_MAP
                                      | cv2.INTER_LINEAR)
        return dst_mat

    def homographyBB(self, bbs, H, offset=None):
        """
        Apply homography transform to bounding-boxes.
        BBS: 2 x 4 x n matrix  (2 coordinates, 4 points, n bbs).
        Returns the transformed 2x4xn bb-array.

        offset : a 2-tuple (dx,dy), added to points before transfomation.
        """
        eps = 1e-16
        # check the shape of the BB array:
        t, f, n = bbs.shape
        assert (t == 2) and (f == 4)

        # append 1 for homogenous coordinates:
        bbs_h = np.reshape(np.r_[bbs, np.ones((1, 4, n))], (3, 4 * n),
                           order='F')
        if offset != None:
            bbs_h[:2, :] += np.array(offset)[:, None]

        # perpective:
        bbs_h = H.dot(bbs_h)
        bbs_h /= (bbs_h[2, :] + eps)

        bbs_h = np.reshape(bbs_h, (3, 4, n), order='F')
        return bbs_h[:2, :, :]

    def bb_filter(self, bb0, bb, text):
        """
        Ensure that bounding-boxes are not too distorted
        after perspective distortion.

        bb0 : 2x4xn martrix of BB coordinates before perspective
        bb  : 2x4xn matrix of BB after perspective
        text: string of text -- for excluding symbols/punctuations.
        """
        h0 = np.linalg.norm(bb0[:, 3, :] - bb0[:, 0, :], axis=0)
        w0 = np.linalg.norm(bb0[:, 1, :] - bb0[:, 0, :], axis=0)
        hw0 = np.c_[h0, w0]

        h = np.linalg.norm(bb[:, 3, :] - bb[:, 0, :], axis=0)
        w = np.linalg.norm(bb[:, 1, :] - bb[:, 0, :], axis=0)
        hw = np.c_[h, w]

        # remove newlines and spaces:
        text = ''.join(text.split())
        assert len(text) == bb.shape[-1]

        alnum = np.array([ch.isalnum() for ch in text])
        hw0 = hw0[alnum, :]
        hw = hw[alnum, :]

        min_h0, min_h = np.min(hw0[:, 0]), np.min(hw[:, 0])
        asp0, asp = hw0[:, 0] / hw0[:, 1], hw[:, 0] / hw[:, 1]
        asp0, asp = np.median(asp0), np.median(asp)

        asp_ratio = asp / asp0
        is_good = (min_h > self.min_char_height
                   and asp_ratio > self.min_asp_ratio
                   and asp_ratio < 1.0 / self.min_asp_ratio)
        return is_good

    def get_min_h(selg, bb, text):
        # find min-height:
        h = np.linalg.norm(bb[:, 3, :] - bb[:, 0, :], axis=0)
        # remove newlines and spaces:
        text = ''.join(text.split())
        assert len(text) == bb.shape[-1]

        alnum = np.array([ch.isalnum() for ch in text])
        h = h[alnum]
        return np.min(h)

    def feather(self, text_mask, min_h):
        # determine the gaussian-blur std:
        if min_h <= 15:
            bsz = 0.25
            ksz = 1
        elif 15 < min_h < 30:
            bsz = max(0.30, 0.5 + 0.1 * np.random.randn())
            ksz = 3
        else:
            bsz = max(0.5, 1.5 + 0.5 * np.random.randn())
            ksz = 5
        return cv2.GaussianBlur(text_mask, (ksz, ksz), bsz)

    def place_text(self, rgb, collision_mask, H, Hinv):
        font = self.text_renderer.font_state.sample()
        font = self.text_renderer.font_state.init_font(font)

        render_res = self.text_renderer.render_sample(font, collision_mask)
        if render_res is None:  # rendering not successful
            return  # None
        else:
            text_mask, loc, bb, text = render_res

        # update the collision mask with text:
        collision_mask += (255 * (text_mask > 0)).astype('uint8')

        # warp the object mask back onto the image:
        # text_mask_orig = text_mask.copy()
        bb_orig = bb.copy()
        text_mask = self.warpHomography(text_mask, H, rgb.shape[:2][::-1])
        bb = self.homographyBB(bb, Hinv)

        if not self.bb_filter(bb_orig, bb, text):
            # warn("bad charBB statistics")
            return  # None

        # get the minimum height of the character-BB:
        min_h = self.get_min_h(bb, text)

        # feathering:
        text_mask = self.feather(text_mask, min_h)
        im_final = self.colorizer.color(rgb, [text_mask], np.array([min_h]))

        return im_final, text, bb, text_mask

    def get_num_text_regions(self, nregions):
        # return nregions
        nmax = min(self.max_text_regions, nregions)
        if np.random.rand() < 0.10:
            rnd = np.random.rand()
        else:
            rnd = np.random.beta(5.0, 1.0)
        return int(np.ceil(nmax * rnd))

    def char2wordBB(self, charBB, text):
        """
        Converts character bounding-boxes to word-level
        bounding-boxes.

        charBB : 2x4xn matrix of BB coordinates
        text   : the text string

        output : 2x4xm matrix of BB coordinates,
                 where, m == number of words.
        """
        wrds = text.split()
        bb_idx = np.r_[0, np.cumsum([len(w) for w in wrds])]
        wordBB = np.zeros((2, 4, len(wrds)), 'float32')

        for i in range(len(wrds)):
            cc = charBB[:, :, bb_idx[i]:bb_idx[i + 1]]

            # fit a rotated-rectangle:
            # change shape from 2x4xn_i -> (4*n_i)x2
            cc = np.squeeze(np.concatenate(np.dsplit(cc, cc.shape[-1]),
                                           axis=1)).T.astype('float32')
            rect = cv2.minAreaRect(cc.copy())
            box = np.array(cv2.boxPoints(rect))

            # find the permutation of box-coordinates which
            # are "aligned" appropriately with the character-bb.
            # (exhaustive search over all possible assignments):
            cc_tblr = np.c_[cc[0, :], cc[-3, :], cc[-2, :], cc[3, :]].T
            perm4 = np.array(list(itertools.permutations(np.arange(4))))
            dists = []
            for pidx in range(perm4.shape[0]):
                d = np.sum(
                    np.linalg.norm(box[perm4[pidx], :] - cc_tblr, axis=1))
                dists.append(d)
            wordBB[:, :, i] = box[perm4[np.argmin(dists)], :].T

        return wordBB

    def render_text(self, rgb, depth, seg, area, label, ninstance=1):
        """
        This method is rendering and

        Args:
            rgb   : HxWx3 image rgb values (uint8)
            depth : HxW depth values (float)
            seg   : HxW segmentation region masks
            area  : number of pixels in each region
            label : region labels == unique(seg) / {0}
                   i.e., indices of pixels in SEG which
                   constitute a region mask
            ninstance : number of times image should be
                        used to place text.

        Returns:
            res : a list of dictionaries, one for each of 
                  the image instances.
                  Each dictionary has the following structure:
                      'img'  : rgb-image with text on it.
                      'bb'   : 2x4xn matrix of bounding-boxes
                              for each character in the image.
                      'txt'  : a list of strings.
                      'masks': a list of masks of text placed on the image.
                              Shape of each mask is the same as shape of original image.

                  The correspondence b/w bb and txt is that
                  i-th non-space white-character in txt is at bb[:,:,i].
            
            If there's an error in pre-text placement, for e.g. if there's 
            no suitable region for text placement, an empty list is returned.
        """
        try:
            # depth -> xyz
            xyz = su.DepthCamera.depth2xyz(depth)

            # find text-regions:
            regions = TextRegions.get_regions(xyz, seg, area, label)

            # find the placement mask and homographies:
            regions = self.filter_for_placement(xyz, seg, regions)

            # finally place some text:
            nregions = len(regions['place_mask'])
            if nregions < 1:  # no good region to place text on
                return []
        except:
            # failure in pre-text placement
            # import traceback
            traceback.print_exc()
            return []

        res = []
        for i in range(ninstance):
            # place_masks - is a local copy of list of collision masks. it's updated, but is not really used.
            place_masks = copy.deepcopy(regions['place_mask'])

            print(colorize(Color.CYAN, " ** instance # : %d" % i))

            idict = {'img': [], 'charBB': None, 'wordBB': None, 'txt': None}

            m = self.get_num_text_regions(nregions)
            reg_idx = np.arange(min(2 * m, nregions))
            np.random.shuffle(reg_idx)
            reg_idx = reg_idx[:m]

            placed = False
            img = rgb.copy()
            itext = []
            ibb = []
            masks = []

            # process regions:
            num_txt_regions = len(reg_idx)
            NUM_REP = 5  # re-use each region three times:
            reg_range = np.arange(NUM_REP * num_txt_regions) % num_txt_regions

            if DEBUG:
                print("    ... try text rendering for %s regions",
                      len(reg_range))

            for idx in reg_range:
                ireg = reg_idx[idx]
                try:
                    if self.max_time is None:
                        txt_render_res = self.place_text(
                            img, place_masks[ireg],
                            regions['homography'][ireg],
                            regions['homography_inv'][ireg])
                    else:
                        with time_limit(self.max_time):
                            txt_render_res = self.place_text(
                                img, place_masks[ireg],
                                regions['homography'][ireg],
                                regions['homography_inv'][ireg])
                except TimeoutException as msg:
                    print(msg)
                    continue
                except:
                    traceback.print_exc()
                    # some error in placing text on the region
                    continue

                if txt_render_res is not None:
                    if DEBUG:
                        print(
                            "    ... text rendering attempt finished successfully"
                        )
                    placed = True
                    img, text, bb, collision_mask = txt_render_res
                    # update the region collision mask:
                    # place_masks[ireg] = collision_mask  # no point of doing that, already updated inside place_text method
                    masks.append(collision_mask)
                    # store the result:
                    itext.append(text)
                    ibb.append(bb)

            if placed:
                # at least 1 word was placed in this instance:
                idict['img'] = img
                idict['txt'] = itext
                idict['charBB'] = np.concatenate(ibb, axis=2)
                idict['wordBB'] = self.char2wordBB(idict['charBB'].copy(),
                                                   ' '.join(itext))
                idict['masks'] = masks
                idict['labeled_region'] = regions['label']
                res.append(idict.copy())
        return res
コード例 #2
0
class RendererV3(object):

    def __init__(self, data_dir, max_time=None):
        self.text_renderer = tu.RenderFont(data_dir)
        self.colorizer = Colorize(data_dir)
        #self.colorizerV2 = colorV2.Colorize(data_dir)

        self.min_char_height = 8 #px
        self.min_asp_ratio = 0.4 #

        self.max_text_regions = 7

        self.max_time = max_time

    def filter_regions(self,regions,filt):
        """
        filt : boolean list of regions to keep.
        """
        idx = np.arange(len(filt))[filt]
        for k in list(regions.keys()):
            regions[k] = [regions[k][i] for i in idx]
        return regions

    def filter_for_placement(self,xyz,seg,regions):
        filt = np.zeros(len(regions['label'])).astype('bool')
        masks,Hs,Hinvs = [],[], []
        for idx,l in enumerate(regions['label']):
            res = get_text_placement_mask(xyz,seg==l,regions['coeff'][idx],pad=2)
            if res is not None:
                mask,H,Hinv = res
                masks.append(mask)
                Hs.append(H)
                Hinvs.append(Hinv)
                filt[idx] = True
        regions = self.filter_regions(regions,filt)
        regions['place_mask'] = masks
        regions['homography'] = Hs
        regions['homography_inv'] = Hinvs

        return regions

    def warpHomography(self,src_mat,H,dst_size):
        dst_mat = cv2.warpPerspective(src_mat, H, dst_size,
                                      flags=cv2.WARP_INVERSE_MAP|cv2.INTER_LINEAR)
        return dst_mat

    def homographyBB(self, bbs, H, offset=None):
        """
        Apply homography transform to bounding-boxes.
        BBS: 2 x 4 x n matrix  (2 coordinates, 4 points, n bbs).
        Returns the transformed 2x4xn bb-array.

        offset : a 2-tuple (dx,dy), added to points before transfomation.
        """
        eps = 1e-16
        # check the shape of the BB array:
        t,f,n = bbs.shape
        assert (t==2) and (f==4)

        # append 1 for homogenous coordinates:
        bbs_h = np.reshape(np.r_[bbs, np.ones((1,4,n))], (3,4*n), order='F')
        if offset != None:
            bbs_h[:2,:] += np.array(offset)[:,None]

        # perpective:
        bbs_h = H.dot(bbs_h)
        bbs_h /= (bbs_h[2,:]+eps)

        bbs_h = np.reshape(bbs_h, (3,4,n), order='F')
        return bbs_h[:2,:,:]

    def bb_filter(self,bb0,bb,text):
        """
        Ensure that bounding-boxes are not too distorted
        after perspective distortion.

        bb0 : 2x4xn martrix of BB coordinates before perspective
        bb  : 2x4xn matrix of BB after perspective
        text: string of text -- for excluding symbols/punctuations.
        """
        h0 = np.linalg.norm(bb0[:,3,:] - bb0[:,0,:], axis=0)
        w0 = np.linalg.norm(bb0[:,1,:] - bb0[:,0,:], axis=0)
        hw0 = np.c_[h0,w0]

        h = np.linalg.norm(bb[:,3,:] - bb[:,0,:], axis=0)
        w = np.linalg.norm(bb[:,1,:] - bb[:,0,:], axis=0)
        hw = np.c_[h,w]

        # remove newlines and spaces:
        text = ''.join(text.split())
        assert len(text)==bb.shape[-1]

        alnum = np.array([ch.isalnum() for ch in text])
        hw0 = hw0[alnum,:]
        hw = hw[alnum,:]

        min_h0, min_h = np.min(hw0[:,0]), np.min(hw[:,0])
        asp0, asp = hw0[:,0]/hw0[:,1], hw[:,0]/hw[:,1]
        asp0, asp = np.median(asp0), np.median(asp)

        asp_ratio = asp/asp0
        is_good = ( min_h > self.min_char_height
                    and asp_ratio > self.min_asp_ratio
                    and asp_ratio < 1.0/self.min_asp_ratio)
        return is_good


    def get_min_h(selg, bb, text):
        # find min-height:
        h = np.linalg.norm(bb[:,3,:] - bb[:,0,:], axis=0)
        # remove newlines and spaces:
        text = ''.join(text.split())
        assert len(text)==bb.shape[-1]

        alnum = np.array([ch.isalnum() for ch in text])
        h = h[alnum]
        return np.min(h)


    def feather(self, text_mask, min_h):
        # determine the gaussian-blur std:
        if min_h <= 15 :
            bsz = 0.25
            ksz=1
        elif 15 < min_h < 30:
            bsz = max(0.30, 0.5 + 0.1*np.random.randn())
            ksz = 3
        else:
            bsz = max(0.5, 1.5 + 0.5*np.random.randn())
            ksz = 5
        return cv2.GaussianBlur(text_mask,(ksz,ksz),bsz)

    def place_text(self,rgb,collision_mask,H,Hinv):
        font = self.text_renderer.font_state.sample()
        font = self.text_renderer.font_state.init_font(font)

        render_res = self.text_renderer.render_sample(font,collision_mask)
        if render_res is None: # rendering not successful
            return #None
        else:
            text_mask,loc,bb,text = render_res
            #text=text.decode('utf-8')
            
        
        # update the collision mask with text:
        collision_mask += (255 * (text_mask>0)).astype('uint8')

        # warp the object mask back onto the image:
        text_mask_orig = text_mask.copy()
        bb_orig = bb.copy()
        text_mask = self.warpHomography(text_mask,H,rgb.shape[:2][::-1])
        bb = self.homographyBB(bb,Hinv)

        if not self.bb_filter(bb_orig,bb,text):
            colorize(Color.RED, 'bad charBB statistics')
            #warn("bad charBB statistics")
            return #None

        # get the minimum height of the character-BB:
        min_h = self.get_min_h(bb,text)

        #feathering:
        text_mask = self.feather(text_mask, min_h)

        im_final = self.colorizer.color(rgb,[text_mask],np.array([min_h]))
        print(colorize(Color.GREEN, 'text in synthgen.py/place_text to return '+text))
        return im_final, text, bb, collision_mask


    def get_num_text_regions(self, nregions):
        #return nregions
        nmax = min(self.max_text_regions, nregions)
        if np.random.rand() < 0.10:
            rnd = np.random.rand()
        else:
            rnd = np.random.beta(5.0,1.0)
        return int(np.ceil(nmax * rnd))

    def char2wordBB(self, charBB, text):
        """
        Converts character bounding-boxes to word-level
        bounding-boxes.

        charBB : 2x4xn matrix of BB coordinates
        text   : the text string

        output : 2x4xm matrix of BB coordinates,
                 where, m == number of words.
        """
        wrds = text.split()
        bb_idx = np.r_[0, np.cumsum([len(w) for w in wrds])]
        wordBB = np.zeros((2,4,len(wrds)), 'float32')
        
        for i in range(len(wrds)):
            cc = charBB[:,:,bb_idx[i]:bb_idx[i+1]]

            # fit a rotated-rectangle:
            # change shape from 2x4xn_i -> (4*n_i)x2
            cc = np.squeeze(np.concatenate(np.dsplit(cc,cc.shape[-1]),axis=1)).T.astype('float32')
            rect = cv2.minAreaRect(cc.copy())
            box = np.array(cv2.boxPoints(rect))

            # find the permutation of box-coordinates which
            # are "aligned" appropriately with the character-bb.
            # (exhaustive search over all possible assignments):
            cc_tblr = np.c_[cc[0,:],
                            cc[-3,:],
                            cc[-2,:],
                            cc[3,:]].T
            perm4 = np.array(list(itertools.permutations(np.arange(4))))
            dists = []
            for pidx in range(perm4.shape[0]):
                d = np.sum(np.linalg.norm(box[perm4[pidx],:]-cc_tblr,axis=1))
                dists.append(d)
            wordBB[:,:,i] = box[perm4[np.argmin(dists)],:].T

        return wordBB


    def render_text(self,rgb,depth,seg,area,label,ninstance=1,viz=False):
        """
        rgb   : HxWx3 image rgb values (uint8)
        depth : HxW depth values (float)
        seg   : HxW segmentation region masks
        area  : number of pixels in each region
        label : region labels == unique(seg) / {0}
               i.e., indices of pixels in SEG which
               constitute a region mask
        ninstance : no of times image should be
                    used to place text.

        @return:
            res : a list of dictionaries, one for each of 
                  the image instances.
                  Each dictionary has the following structure:
                      'img' : rgb-image with text on it.
                      'bb'  : 2x4xn matrix of bounding-boxes
                              for each character in the image.
                      'txt' : a list of strings.

                  The correspondence b/w bb and txt is that
                  i-th non-space white-character in txt is at bb[:,:,i].
            
            If there's an error in pre-text placement, for e.g. if there's 
            no suitable region for text placement, an empty list is returned.
        """
        try:
            # depth -> xyz
            xyz = su.DepthCamera.depth2xyz(depth)
            
            # find text-regions:
            regions = TextRegions.get_regions(xyz,seg,area,label)

            # find the placement mask and homographies:
            regions = self.filter_for_placement(xyz,seg,regions)

            # finally place some text:
            nregions = len(regions['place_mask'])
            if nregions < 1: # no good region to place text on
                return []
        except:
            # failure in pre-text placement
            #import traceback
            traceback.print_exc()
            return []

        res = []
        for i in range(ninstance):
            place_masks = copy.deepcopy(regions['place_mask'])

            print(colorize(Color.CYAN, " ** instance # : %d"%i))

            idict = {'img':[], 'charBB':None, 'wordBB':None, 'txt':None}

            m = self.get_num_text_regions(nregions)#np.arange(nregions)#min(nregions, 5*ninstance*self.max_text_regions))
            reg_idx = np.arange(min(2*m,nregions))
            #np.random.shuffle(reg_idx)
            reg_idx = reg_idx[:m]

            placed = False
            img = rgb.copy()
            itext = []
            ibb = []

            # process regions: 
            num_txt_regions = len(reg_idx)
            NUM_REP = 5 # re-use each region three times:
            reg_range = np.arange(NUM_REP * num_txt_regions) % num_txt_regions
            for idx in reg_range:
                ireg = reg_idx[idx]
                try:
                    if self.max_time is None:
                        txt_render_res = self.place_text(img,place_masks[ireg],
                                                         regions['homography'][ireg],
                                                         regions['homography_inv'][ireg])
                    else:
                        with time_limit(self.max_time):
                            txt_render_res = self.place_text(img,place_masks[ireg],
                                                             regions['homography'][ireg],
                                                             regions['homography_inv'][ireg])
                except TimeoutException as msg:
                    print(msg)
                    continue
                except:
                    traceback.print_exc()
                    # some error in placing text on the region
                    continue

                if txt_render_res is not None:
                    placed = True
                    img,text,bb,collision_mask = txt_render_res
                    # update the region collision mask:
                    place_masks[ireg] = collision_mask
                    # store the result:
                    itext.append(text)
                    ibb.append(bb)
                    print(colorize(Color.GREEN, 'text in synthgen.py/render_text append into itext '+text))

            if  placed:
                # at least 1 word was placed in this instance:
                idict['img'] = img
                idict['txt'] = itext
                idict['charBB'] = np.concatenate(ibb, axis=2)
                idict['wordBB'] = self.char2wordBB(idict['charBB'].copy(), ' '.join(itext))
                print(colorize(Color.GREEN, itext))
                res.append(idict.copy())
                if viz:
                    viz_textbb(1,img, [idict['wordBB']], alpha=1.0)
                    viz_masks(2,img,seg,depth,regions['label'])
                    # viz_regions(rgb.copy(),xyz,seg,regions['coeff'],regions['label'])
                    if i < ninstance-1:
                        input(colorize(Color.BLUE,'continue?',True))                    
        return res
コード例 #3
0
class RendererV3(object):

    def __init__(self, data_dir, max_time=None, logos=None):
        self.text_renderer = tu.RenderFont(data_dir)
        self.colorizer = Colorize(data_dir)
        #self.colorizerV2 = colorV2.Colorize(data_dir)
        self.logos = logos

        self.min_char_height = 8 #px
        self.min_asp_ratio = 0.4 #

        self.max_text_regions = 7

        self.max_time = max_time

    def filter_regions(self,regions,filt):
        """
        filt : boolean list of regions to keep.
        """
        idx = np.arange(len(filt))[filt]
        for k in regions.keys():
            regions[k] = [regions[k][i] for i in idx]
        return regions

    def filter_for_placement(self,xyz,seg,regions):
        filt = np.zeros(len(regions['label'])).astype('bool')
        masks,Hs,Hinvs = [],[], []
        for idx,l in enumerate(regions['label']):
            res = get_text_placement_mask(xyz,seg==l,regions['coeff'][idx],pad=2)
            if res is not None:
                mask,H,Hinv = res
                masks.append(mask)
                Hs.append(H)
                Hinvs.append(Hinv)
                filt[idx] = True
        regions = self.filter_regions(regions,filt)
        regions['place_mask'] = masks
        regions['homography'] = Hs
        regions['homography_inv'] = Hinvs

        return regions

    def warpHomography(self,src_mat,H,dst_size):
        dst_mat = cv2.warpPerspective(src_mat, H, dst_size,
                                      flags=cv2.WARP_INVERSE_MAP|cv2.INTER_LINEAR)
        return dst_mat

    def homographyBB(self, bbs, H, offset=None):
        """
        Apply homography transform to bounding-boxes.
        BBS: 2 x 4 x n matrix  (2 coordinates, 4 points, n bbs).
        Returns the transformed 2x4xn bb-array.

        offset : a 2-tuple (dx,dy), added to points before transfomation.
        """
        eps = 1e-16
        # check the shape of the BB array:
        t,f,n = bbs.shape
        assert (t==2) and (f==4)

        # append 1 for homogenous coordinates:
        bbs_h = np.reshape(np.r_[bbs, np.ones((1,4,n))], (3,4*n), order='F')
        if offset != None:
            bbs_h[:2,:] += np.array(offset)[:,None]

        # perpective:
        bbs_h = H.dot(bbs_h)
        bbs_h /= (bbs_h[2,:]+eps)

        bbs_h = np.reshape(bbs_h, (3,4,n), order='F')
        return bbs_h[:2,:,:]

    def bb_filter(self,bb0,bb,text):
        """
        Ensure that bounding-boxes are not too distorted
        after perspective distortion.

        bb0 : 2x4xn martrix of BB coordinates before perspective
        bb  : 2x4xn matrix of BB after perspective
        text: string of text -- for excluding symbols/punctuations.
        """
        h0 = np.linalg.norm(bb0[:,3,:] - bb0[:,0,:], axis=0)
        w0 = np.linalg.norm(bb0[:,1,:] - bb0[:,0,:], axis=0)
        hw0 = np.c_[h0,w0]

        h = np.linalg.norm(bb[:,3,:] - bb[:,0,:], axis=0)
        w = np.linalg.norm(bb[:,1,:] - bb[:,0,:], axis=0)
        hw = np.c_[h,w]

        # remove newlines and spaces:
        text = ''.join(text.split())
        assert len(text)==bb.shape[-1]

        alnum = np.array([ch.isalnum() for ch in text])
        hw0 = hw0[alnum,:]
        hw = hw[alnum,:]

        min_h0, min_h = np.min(hw0[:,0]), np.min(hw[:,0])
        asp0, asp = hw0[:,0]/hw0[:,1], hw[:,0]/hw[:,1]
        asp0, asp = np.median(asp0), np.median(asp)

        asp_ratio = asp/asp0
        is_good = ( min_h > self.min_char_height
                    and asp_ratio > self.min_asp_ratio
                    and asp_ratio < 1.0/self.min_asp_ratio)
        return is_good


    def get_min_h(selg, bb, text):
        # find min-height:
        h = np.linalg.norm(bb[:,3,:] - bb[:,0,:], axis=0)
        # remove newlines and spaces:
        text = ''.join(text.split())
        assert len(text)==bb.shape[-1]

        alnum = np.array([ch.isalnum() for ch in text])
        h = h[alnum]
        return np.min(h)


    def feather(self, text_mask, min_h):
        # determine the gaussian-blur std:
        if min_h <= 15 :
            bsz = 0.25
            ksz=1
        elif 15 < min_h < 30:
            bsz = max(0.30, 0.5 + 0.1*np.random.randn())
            ksz = 3
        else:
            bsz = max(0.5, 1.5 + 0.5*np.random.randn())
            ksz = 5
        return cv2.GaussianBlur(text_mask,(ksz,ksz),bsz)


    def robust_HW(self,mask):
        m = mask.copy()
        m = (~mask).astype('float')/255
        rH = np.median(np.sum(m,axis=0))
        rW = np.median(np.sum(m,axis=1))
        return rH,rW
    def sample_font_height_px(self,h_min,h_max):
        if np.random.rand() < 0.10:
            rnd = np.random.rand()
        else:
            rnd = np.random.beta(2.0,2.0)
        h_range = h_max - h_min
        f_h = np.floor(h_min + h_range*rnd)
        return f_h
    def bb_xywh2coords(self,bbs):
        """
        Takes an nx4 bounding-box matrix specified in x,y,w,h
        format and outputs a 2x4xn bb-matrix, (4 vertices per bb).
        """
        n,_ = bbs.shape
        coords = np.zeros((2,4,n))
        for i in range(n):
            coords[:,:,i] = bbs[i,:2][:,None]
            coords[0,1,i] += bbs[i,2]
            coords[:,2,i] += bbs[i,2:4]
            coords[1,3,i] += bbs[i,3]
        return coords
    def actually_place_logo(self, text_arrs, back_arr, bbs, logos):
        areas = [-np.prod(ta.shape) for ta in text_arrs]
        order = np.argsort(areas)
        locs = [None for i in range(len(text_arrs))]
        out_arr = np.zeros_like(back_arr)
        logo_out = np.zeros((back_arr.shape[0], back_arr.shape[1], logos[0].shape[2]), dtype=np.float32)
        for i in order:
            ba = np.clip(back_arr.copy().astype(np.float), 0, 255)
            ta = np.clip(text_arrs[i].copy().astype(np.float), 0, 255)
            ba[ba > 127] = 1e8
            intersect = ssig.fftconvolve(ba,ta[::-1,::-1],mode='valid')
            safemask = intersect < 1e8
            if not np.any(safemask): # no collision-free position:
                #warn("COLLISION!!!")
                return back_arr,locs[:i],bbs[:i],order[:i], None
            minloc = np.transpose(np.nonzero(safemask))
            loc = minloc[np.random.choice(minloc.shape[0]),:]
            locs[i] = loc
            # update the bounding-boxes:
            bbs[i] = bbs[i] + loc[::-1][:,None,None]
            # blit the text onto the canvas
            w,h = text_arrs[i].shape
            out_arr[loc[0]:loc[0]+w,loc[1]:loc[1]+h] += text_arrs[i]
            logo_out[loc[0]:loc[0]+w,loc[1]:loc[1]+h] += logos[i]
        return out_arr, locs, bbs, order, logo_out

    def pick_random_logos(self):
        im = None
        while im is None:
            name = self.logos[np.random.randint(low=0, high=len(self.logos))]
            im = cv2.imread(name, cv2.IMREAD_UNCHANGED)
            short_name = osp.basename(name)
        return im, short_name
    def actually_render_logo(self, mask):
        """
        Places text in the "collision-free" region as indicated
        in the mask -- 255 for unsafe, 0 for safe.
        The text is rendered using FONT, the text content is TEXT.
        """
        #H,W = mask.shape
        H,W = self.robust_HW(mask)
        # f_asp = self.font_state.get_aspect_ratio(font)
        min_logo_h = 40
        max_logo_h = 300
        max_shrink_trials = 50
        # let's just place one text-instance for now
        ## TODO : change this to allow multiple text instances?
        i = 0
        print ("Attempt")
        while i < max_shrink_trials and max_logo_h > min_logo_h:
            # if i > 0:
            #     print colorize(Color.BLUE, "shrinkage trial : %d"%i, True)
            # sample a random font-height:
            logo_h_px = self.sample_font_height_px(min_logo_h, max_logo_h)
            #print "font-height : %.2f (min: %.2f, max: %.2f)"%(f_h_px, self.min_font_h,max_font_h)
            # update for the loop
            max_logo_h = logo_h_px
            i += 1
            # compute the max-number of lines/chars-per-line:
            # nline, nchar = self.get_nline_nchar(mask.shape[:2],f_h,f_h*f_asp)
            # #print "  > nline = %d, nchar = %d"%(nline, nchar)
            #
            # assert nline >= 1 and nchar >= self.min_nchar
            # sample text:
            # text_type = sample_weighted(self.p_text)
            # text = self.text_source.sample(nline,nchar,text_type)
            # if len(text)==0 or np.any([len(line)==0 for line in text]):
            #     continue
            #print colorize(Color.GREEN, text)
            logo, name = self.pick_random_logos()
            # calculate logo width
            logo_w = logo_h_px * (float(logo.shape[1]) / logo.shape[0])
            print(logo_w, logo_h_px)
            if logo_w >= W:
                continue
            # render the logo
            if len(logo.shape) == 3 and logo.shape[2] == 4:
                logo_arr = logo[:, :, 3]
            else:
                logo_arr = np.ones((logo.shape[0], logo.shape[1]))
            logo_arr = cv2.resize(logo_arr, (int(logo_w), int(logo_h_px)))
            logo = cv2.resize(logo, (int(logo_w), int(logo_h_px)))
            logo_bb = np.array([[0, 0, logo_arr.shape[1], logo_arr.shape[0]]])
            bb = self.bb_xywh2coords(logo_bb)
            # make sure that the text-array is not bigger than mask array:
            if np.any(np.r_[logo_arr.shape[:2]] > np.r_[mask.shape[:2]]):
                # warn("text-array is bigger than mask")
                continue
            # position the text within the mask:
            # logger.debug(VisualRecord("original logo", [logo, logo_arr]))
            logo_mask,loc, bb, _, logo = self.actually_place_logo([logo_arr], mask, [bb], [logo])
            # logger.debug(VisualRecord("positioned logo", [logo, logo_mask]))
            if len(loc) > 0: # successful in placing the text collision-free:
                return logo_mask,loc[0],bb[0], name, logo
        return #None

    def place_logo(self, rgb, collision_mask, H, Hinv):
        # logo = cv2.imread("logos/nike.png", cv2.IMREAD_UNCHANGED)
        # name = "nike"
        render_res = self.actually_render_logo(collision_mask)
        # self.text_renderer.render_sample(font, collision_mask)
        if render_res is None:  # rendering not successful
            return  # None
        else:
            logo_mask, loc, bb, logo_name, logo_detail = render_res
        # update the collision mask with text:
        collision_mask += (255 * (logo_mask > 0)).astype('uint8')
        # warp the object mask back onto the image:
        logo_mask_orig = logo_mask.copy()
        bb_orig = bb.copy()
        # logger.debug(VisualRecord("logo_pre_warp", [logo_mask, logo_detail]))
        logo_mask = self.warpHomography(logo_mask, H, rgb.shape[:2][::-1])
        logo_detail = self.warpHomography(logo_detail, H, rgb.shape[:2][::-1])
        # print "logo mask detail", logo_mask.shape, logo_detail.shape
        bb = self.homographyBB(bb, Hinv)
        # if not self.bb_filter(bb_orig, bb, logo_name):
        #     # warn("bad charBB statistics")
        #     return  # None
        # get the minimum height of the character-BB:
        # min_h = self.get_min_h(bb, [logo_name])
        min_h = np.abs(bb[0, 3, :] - bb[0, 0, :])
        # feathering:
        logo_mask = self.feather(logo_mask, min_h)
        # print "logo mask detail2", logo_mask.shape, logo_detail.shape
        # logger.debug(VisualRecord("logo", [logo_mask, logo_detail]))
        #TODO deal with grayscale/color logos here
        im_final = self.colorizer.color(rgb, [logo_mask], np.array([min_h]), logo_detail=[logo_detail])
        # bbox_image = im_final.copy()
        # for bbox in bb.copy().T:
        #     print "bbox", bbox
        #     for coord in bbox:
        #         cv2.circle(bbox_image, (int(coord[0]), int(coord[1])), 5, (255,0,0), thickness=-1)
            # cv2.circle(bbox_image, (bbox[0], bbox[2]), 5, (255, 0, 0), thickness=-1)
            # cv2.circle(bbox_image, (bbox[3], bbox[2]), 5, (255, 0, 0), thickness=-1)
            # cv2.circle(bbox_image, (bbox[3], bbox[1]), 5, (255, 0, 0), thickness=-1)
        # logger.debug(VisualRecord("bboxes", imgs=[bbox_image]))
        # im_final = rgb.copy()
        return im_final, logo_name, bb, collision_mask



    def place_text(self,rgb,collision_mask,H,Hinv):
        font = self.text_renderer.font_state.sample()
        font = self.text_renderer.font_state.init_font(font)

        render_res = self.text_renderer.render_sample(font,collision_mask)
        if render_res is None: # rendering not successful
            return #None
        else:
            text_mask,loc,bb,text = render_res

        # update the collision mask with text:
        collision_mask += (255 * (text_mask>0)).astype('uint8')

        # warp the object mask back onto the image:
        text_mask_orig = text_mask.copy()
        bb_orig = bb.copy()
        text_mask = self.warpHomography(text_mask,H,rgb.shape[:2][::-1])
        bb = self.homographyBB(bb,Hinv)

        if not self.bb_filter(bb_orig,bb,text):
            #warn("bad charBB statistics")
            return #None

        # get the minimum height of the character-BB:
        min_h = self.get_min_h(bb,text)

        #feathering:
        text_mask = self.feather(text_mask, min_h)

        im_final = self.colorizer.color(rgb,[text_mask],np.array([min_h]))

        return im_final, text, bb, collision_mask


    def get_num_text_regions(self, nregions):
        #return nregions
        nmax = min(self.max_text_regions, nregions)
        if np.random.rand() < 0.10:
            rnd = np.random.rand()
        else:
            rnd = np.random.beta(5.0,1.0)
        return int(np.ceil(nmax * rnd))

    def char2wordBB(self, charBB, text):
        """
        Converts character bounding-boxes to word-level
        bounding-boxes.

        charBB : 2x4xn matrix of BB coordinates
        text   : the text string

        output : 2x4xm matrix of BB coordinates,
                 where, m == number of words.
        """
        wrds = text.split()
        bb_idx = np.r_[0, np.cumsum([len(w) for w in wrds])]
        wordBB = np.zeros((2,4,len(wrds)), 'float32')
        
        for i in range(len(wrds)):
            cc = charBB[:,:,bb_idx[i]:bb_idx[i+1]]

            # fit a rotated-rectangle:
            # change shape from 2x4xn_i -> (4*n_i)x2
            cc = np.squeeze(np.concatenate(np.dsplit(cc,cc.shape[-1]),axis=1)).T.astype('float32')
            rect = cv2.minAreaRect(cc.copy())
            box = np.array(cv2.boxPoints(rect))

            # find the permutation of box-coordinates which
            # are "aligned" appropriately with the character-bb.
            # (exhaustive search over all possible assignments):
            cc_tblr = np.c_[cc[0,:],
                            cc[-3,:],
                            cc[-2,:],
                            cc[3,:]].T
            perm4 = np.array(list(itertools.permutations(np.arange(4))))
            dists = []
            for pidx in range(perm4.shape[0]):
                d = np.sum(np.linalg.norm(box[perm4[pidx],:]-cc_tblr,axis=1))
                dists.append(d)
            wordBB[:,:,i] = box[perm4[np.argmin(dists)],:].T

        return wordBB


    def render_text(self,rgb,depth,seg,area,label,ninstance=1,viz=False):
        """
        rgb   : HxWx3 image rgb values (uint8)
        depth : HxW depth values (float)
        seg   : HxW segmentation region masks
        area  : number of pixels in each region
        label : region labels == unique(seg) / {0}
               i.e., indices of pixels in SEG which
               constitute a region mask
        ninstance : no of times image should be
                    used to place text.

        @return:
            res : a list of dictionaries, one for each of 
                  the image instances.
                  Each dictionary has the following structure:
                      'img' : rgb-image with text on it.
                      'bb'  : 2x4xn matrix of bounding-boxes
                              for each character in the image.
                      'txt' : a list of strings.

                  The correspondence b/w bb and txt is that
                  i-th non-space white-character in txt is at bb[:,:,i].
            
            If there's an error in pre-text placement, for e.g. if there's 
            no suitable region for text placement, an empty list is returned.
        """
        try:
            # depth -> xyz
            xyz = su.DepthCamera.depth2xyz(depth)
            
            # find text-regions:
            regions = TextRegions.get_regions(xyz,seg,area,label)

            # find the placement mask and homographies:
            regions = self.filter_for_placement(xyz,seg,regions)

            # finally place some text:
            nregions = len(regions['place_mask'])
            if nregions < 1: # no good region to place text on
                return []
        except:
            # failure in pre-text placement
            #import traceback
            traceback.print_exc()
            return []

        res = []
        for i in range(ninstance):
            place_masks = copy.deepcopy(regions['place_mask'])

            print (colorize(Color.CYAN, " ** instance # : %d"%i))

            idict = {'img':[], 'charBB':None, 'wordBB':None, 'txt':None}

            m = self.get_num_text_regions(nregions)#np.arange(nregions)#min(nregions, 5*ninstance*self.max_text_regions))
            reg_idx = np.arange(min(2*m,nregions))
            np.random.shuffle(reg_idx)
            reg_idx = reg_idx[:m]

            placed = False
            img = rgb.copy()
            itext = []
            ibb = []

            # process regions: 
            num_txt_regions = len(reg_idx)
            NUM_REP = 5 # re-use each region three times:
            reg_range = np.arange(NUM_REP * num_txt_regions) % num_txt_regions
            for idx in reg_range:
                ireg = reg_idx[idx]
                try:
                    if self.max_time is None:
                        txt_render_res = self.place_text(img,place_masks[ireg],
                                                         regions['homography'][ireg],
                                                         regions['homography_inv'][ireg])
                    else:
                        with time_limit(self.max_time):
                            txt_render_res = self.place_text(img,place_masks[ireg],
                                                             regions['homography'][ireg],
                                                             regions['homography_inv'][ireg])
                except TimeoutException as msg:
                    print (msg)
                    continue
                except:
                    traceback.print_exc()
                    # some error in placing text on the region
                    continue

                if txt_render_res is not None:
                    placed = True
                    img,text,bb,collision_mask = txt_render_res
                    # update the region collision mask:
                    place_masks[ireg] = collision_mask
                    # store the result:
                    itext.append(text)
                    ibb.append(bb)

            if  placed:
                # at least 1 word was placed in this instance:
                idict['img'] = img
                idict['txt'] = itext
                idict['charBB'] = np.concatenate(ibb, axis=2)
                idict['wordBB'] = self.char2wordBB(idict['charBB'].copy(), ' '.join(itext))
                res.append(idict.copy())
                if viz:
                    viz_textbb(1,img, [idict['wordBB']], alpha=1.0)
                    viz_masks(2,img,seg,depth,regions['label'])
                    # viz_regions(rgb.copy(),xyz,seg,regions['coeff'],regions['label'])
                    if i < ninstance-1:
                        raw_input(colorize(Color.BLUE,'continue?',True))                    
        return res

    def render_logo(self,rgb,depth, seg, area, label, ninstance=1, viz=False):
        print("hold")

        """
        rgb   : HxWx3 image rgb values (uint8)
        depth : HxW depth values (float)
        seg   : HxW segmentation region masks
        area  : number of pixels in each region
        label : region labels == unique(seg) / {0}
               i.e., indices of pixels in SEG which
               constitute a region mask
        ninstance : no of times image should be
                    used to place text.
                    
        @return:
            res : a list of dictionaries, one for each of
                  the image instances.
                  Each dictionary has the following structure:
                      'img' : rgb-image with text on it.
                      'bb'  : 2x4xn matrix of bounding-boxes
                              for each character in the image.
                      'txt' : a list of strings.
                  The correspondence b/w bb and txt is that
                  i-th non-space white-character in txt is at bb[:,:,i].
            If there's an error in pre-text placement, for e.g. if there's
            no suitable region for text placement, an empty list is returned.
        """

        try:
            # depth -> xyz
            xyz = su.DepthCamera.depth2xyz(depth)
            # find text-regions:
            regions = TextRegions.get_regions(xyz, seg, area, label)
            # find the placement mask and homographies:
            regions = self.filter_for_placement(xyz, seg, regions)
            # finally place some text:
            nregions = len(regions['place_mask'])
            if nregions < 1:  # no good region to place text on
                return []
        except:
            # failure in pre-text placement
            # import traceback
            traceback.print_exc()
            return []
        res = []
        for i in range(ninstance):
            place_masks = copy.deepcopy(regions['place_mask'])
            print
            colorize(Color.CYAN, " ** instance # : %d" % i)
            idict = {'img': [], 'logoBB': None, 'logoName': None}
            m = self.get_num_text_regions(nregions)
            reg_idx = np.arange(min(2 * m, nregions))
            np.random.shuffle(reg_idx)
            reg_idx = reg_idx[:m]
            placed = False
            img = rgb.copy()
            ilogo = []
            ibb = []
            # process regions:
            num_txt_regions = len(reg_idx)
            NUM_REP = np.random.randint(low=1, high=4)  # re-use each region three times:
            reg_range = np.arange(NUM_REP * num_txt_regions) % num_txt_regions
            for idx in reg_range:
                ireg = reg_idx[idx]
                try:
                    if self.max_time is None:
                        txt_render_res = self.place_logo(img, place_masks[ireg],
                                                         regions['homography'][ireg],
                                                         regions['homography_inv'][ireg])
                    else:
                        with time_limit(self.max_time):
                            txt_render_res = self.place_logo(img, place_masks[ireg],
                                                             regions['homography'][ireg],
                                                             regions['homography_inv'][ireg])
                except TimeoutException as msg:
                    print(msg)
                    continue
                except:
                    traceback.print_exc()
                        #some error in placing text on the region
                    continue
                if txt_render_res is not None:
                    placed = True
                    img, logo, bb, collision_mask = txt_render_res
                    # update the region collision mask:
                    place_masks[ireg] = collision_mask
                    # store the result:
                    ilogo.append(logo)
                    ibb.append(bb)
            if placed:
                # at least 1 word was placed in this instance:
                idict['img'] = img
                idict['logoName'] = ilogo
                idict['logoBB'] = np.concatenate(ibb, axis=2)
                res.append(idict.copy())
                # if viz:
                #     viz_textbb(1, img, [idict['logobb']], alpha=1.0)
                #     # viz_masks(2, img, seg, depth, regions['label'])
                #     # viz_regions(rgb.copy(),xyz,seg,regions['coeff'],regions['label'])
                #     if i < ninstance - 1:
                #         raw_input(colorize(Color.BLUE, 'continue?', True))
        return res
コード例 #4
0
ファイル: synthgen.py プロジェクト: kotomiDu/GameSynthText
class RendererV3(object):

    def __init__(self, data_dir, max_time=None, text_number=1, font_color_user = None, font_size_user = 18, text_content_user = "******") :
        self.text_renderer = tu.RenderFont( data_dir, font_size_user, text_content_user)
        self.colorizer = Colorize(data_dir, font_color= font_color_user)
        self.NUM_REP = text_number

        self.min_char_height = 8 #px
        self.min_asp_ratio = 0.4 #
        self.max_text_regions = 7
        self.max_time = max_time

    def filter_regions(self,regions,filt):
        """
        filt : boolean list of regions to keep.
        """
        idx = np.arange(len(filt))[filt]
        for k in regions.keys():
            regions[k] = [regions[k][i] for i in idx]
        return regions

    def filter_for_placement(self,xyz,seg,regions):
        filt = np.zeros(len(regions['label'])).astype('bool')
        masks,Hs,Hinvs = [],[], []
        for idx,l in enumerate(regions['label']):
            res = get_text_placement_mask(xyz,seg==l,regions['coeff'][idx],pad=2)
            if res is not None:
                mask,H,Hinv = res
                masks.append(mask)
                Hs.append(H)
                Hinvs.append(Hinv)
                filt[idx] = True
        regions = self.filter_regions(regions,filt)
        regions['place_mask'] = masks
        regions['homography'] = Hs
        regions['homography_inv'] = Hinvs

        return regions

    def warpHomography(self,src_mat,H,dst_size):
        dst_mat = cv2.warpPerspective(src_mat, H, dst_size,
                                      flags=cv2.WARP_INVERSE_MAP|cv2.INTER_LINEAR)
        return dst_mat

    def homographyBB(self, bbs, H, offset=None):
        """
        Apply homography transform to bounding-boxes.
        BBS: 2 x 4 x n matrix  (2 coordinates, 4 points, n bbs).
        Returns the transformed 2x4xn bb-array.

        offset : a 2-tuple (dx,dy), added to points before transfomation.
        """
        eps = 1e-16
        # check the shape of the BB array:
        t,f,n = bbs.shape
        assert (t==2) and (f==4)

        # append 1 for homogenous coordinates:
        bbs_h = np.reshape(np.r_[bbs, np.ones((1,4,n))], (3,4*n), order='F')
        if offset != None:
            bbs_h[:2,:] += np.array(offset)[:,None]

        # perpective:
        bbs_h = H.dot(bbs_h)
        bbs_h /= (bbs_h[2,:]+eps)

        bbs_h = np.reshape(bbs_h, (3,4,n), order='F')
        return bbs_h[:2,:,:]

    def bb_filter(self,bb0,bb,text):
        """
        Ensure that bounding-boxes are not too distorted
        after perspective distortion.

        bb0 : 2x4xn martrix of BB coordinates before perspective
        bb  : 2x4xn matrix of BB after perspective
        text: string of text -- for excluding symbols/punctuations.
        """
        h0 = np.linalg.norm(bb0[:,3,:] - bb0[:,0,:], axis=0)
        w0 = np.linalg.norm(bb0[:,1,:] - bb0[:,0,:], axis=0)
        hw0 = np.c_[h0,w0]

        h = np.linalg.norm(bb[:,3,:] - bb[:,0,:], axis=0)
        w = np.linalg.norm(bb[:,1,:] - bb[:,0,:], axis=0)
        hw = np.c_[h,w]

        # remove newlines and spaces:
        text = ''.join(text.split())
        assert len(text)==bb.shape[-1]

        alnum = np.array([ch.isalnum() for ch in text])
        hw0 = hw0[alnum,:]
        hw = hw[alnum,:]

        min_h0, min_h = np.min(hw0[:,0]), np.min(hw[:,0])
        asp0, asp = hw0[:,0]/hw0[:,1], hw[:,0]/hw[:,1]
        asp0, asp = np.median(asp0), np.median(asp)

        asp_ratio = asp/asp0
        is_good = ( min_h > self.min_char_height
                    and asp_ratio > self.min_asp_ratio
                    and asp_ratio < 1.0/self.min_asp_ratio)
        return is_good


    def get_min_h(selg, bb, text):
        # find min-height:
        h = np.linalg.norm(bb[:,3,:] - bb[:,0,:], axis=0)
        # remove newlines and spaces:
        text = ''.join(text.split())
        assert len(text)==bb.shape[-1]

        alnum = np.array([ch.isalnum() for ch in text])
        h = h[alnum]
        return np.min(h)


    def feather(self, text_mask, min_h):
        # determine the gaussian-blur std:
        if min_h <= 15 :
            bsz = 0.25
            ksz=1
        elif 15 < min_h < 30:
            bsz = max(0.30, 0.5 + 0.1*np.random.randn())
            ksz = 3
        else:
            bsz = max(0.5, 1.5 + 0.5*np.random.randn())
            ksz = 5
        return cv2.GaussianBlur(text_mask,(ksz,ksz),bsz)

    def place_text(self, rgb, collision_mask):
        font = self.text_renderer.font_state.sample()
        font = self.text_renderer.font_state.init_font(font)

        render_res = self.text_renderer.render_sample(font,collision_mask)
      
        if render_res is None: # rendering not successful
            return #None
        else:
            text_mask,loc,bb,text = render_res

        # update the collision mask with text:
        kernel = np.ones((32, 32), np.uint8)    # 2*self.min_font_h
        mask_ext = cv2.dilate(text_mask,kernel,iterations = 1)
        collision_mask += (255 * (mask_ext > 0)).astype('uint8')

        # warp the object mask back onto the image:
        # text_mask_orig = text_mask.copy()
        # bb_orig = bb.copy()
        # text_mask = self.warpHomography(text_mask,H,rgb.shape[:2][::-1])
        # bb = self.homographyBB(bb,Hinv)
        #
        # if not self.bb_filter(bb_orig,bb,text):
        #     #warn("bad charBB statistics")
        #     return #None

        # get the minimum height of the character-BB:
        min_h = self.get_min_h(bb,text)

        #feathering:
        # plt.imshow(text_mask)
        # text_mask = self.feather(text_mask, min_h)
        # plt.imshow(text_mask)

        im_final = self.colorizer.color(rgb,[text_mask],np.array([min_h]))

        return im_final, text, bb, collision_mask

    def get_num_text_regions(self, nregions):
        #return nregions
        nmax = min(self.max_text_regions, nregions)
        if np.random.rand() < 0.10:
            rnd = np.random.rand()
        else:
            rnd = np.random.beta(5.0,1.0)
        return int(np.ceil(nmax * rnd))

    def char2wordBB(self, charBB, text):
        """
        Converts character bounding-boxes to word-level
        bounding-boxes.

        charBB : 2x4xn matrix of BB coordinates
        text   : the text string

        output : 2x4xm matrix of BB coordinates,
                 where, m == number of words.
        """
        wrds = text.split()
        bb_idx = np.r_[0, np.cumsum([len(w) for w in wrds])]
        wordBB = np.zeros((2,4,len(wrds)), 'float32')
        
        for i in xrange(len(wrds)):
            cc = charBB[:,:,bb_idx[i]:bb_idx[i+1]]

            # fit a rotated-rectangle:
            # change shape from 2x4xn_i -> (4*n_i)x2
            cc = np.squeeze(np.concatenate(np.dsplit(cc,cc.shape[-1]),axis=1)).T.astype('float32')
            rect = cv2.minAreaRect(cc.copy())
            box = np.array(cv2.cv.BoxPoints(rect))

            # find the permutation of box-coordinates which
            # are "aligned" appropriately with the character-bb.
            # (exhaustive search over all possible assignments):
            cc_tblr = np.c_[cc[0,:],
                            cc[-3,:],
                            cc[-2,:],
                            cc[3,:]].T
            perm4 = np.array(list(itertools.permutations(np.arange(4))))
            dists = []
            for pidx in xrange(perm4.shape[0]):
                d = np.sum(np.linalg.norm(box[perm4[pidx],:]-cc_tblr,axis=1))
                dists.append(d)
            wordBB[:,:,i] = box[perm4[np.argmin(dists)],:].T

        return wordBB
    def char2lineBB(self, charBB, itext, imgsize):
        """
        Converts character bounding-boxes to line-level
        bounding-boxes.

        charBB : 2x4xn matrix of BB coordinates
        text   : the text string

        output : 2x4xm matrix of BB coordinates,
                 where, m == number of lines.
        """
        lt = len(itext)
        text = ' '.join(itext)
        lines = text.split('\n')
        lineBB = np.zeros((2, 4, len(lines)+lt-1), 'float32')
        wrds = text.split()
        bb_idx = np.r_[0, np.cumsum([len(w) for w in wrds])]
        nlines = 0
        idx1 = 0
        idx2 = 0
        txt = []
        letterBB = []

        for it in range(lt):
            text = itext[it]
            lines = text.split('\n')

            for i in range(len(lines)):
                wrds = lines[i].split()
                lw = len(wrds)

                if lw <= 0 :
                    continue

                idx2 += lw
                cc = charBB[:, :, bb_idx[idx1]:bb_idx[idx2]]
                idx1 = idx2

                #
                cc = np.squeeze(np.concatenate(np.dsplit(cc, cc.shape[-1]), axis=1)).T.astype('float32')
                coor_min = np.min(cc, axis=0)
                coor_max = np.max(cc, axis=0)
                ext = np.clip(np.round(np.random.rand(2) * 10), None, 6)  #not exceed self.min_font_h
                coor_min[0] = np.clip(coor_min[0] - ext[0], 0, None)
                coor_min[1] = np.clip(coor_min[1] - ext[1], 0, None)
                coor_max[0] = np.clip(coor_max[0] + ext[0], None, imgsize[1] - 1)
                coor_max[1] = np.clip(coor_max[1] + ext[1], None, imgsize[0] - 1)
                box = np.array([[coor_min[0], coor_min[1]], [coor_min[0], coor_max[1]], [coor_max[0], coor_max[1]],
                                [coor_max[0], coor_min[1]]])
                lineBB[:, :, nlines] = box.T
                nlines += 1
                linetxt = lines[i].strip()
                linetxt = linetxt.strip('\t')
                txt.append(linetxt)
                letterBB.append(cc.astype(int))

        return lineBB[:, :, 0:nlines], txt, letterBB
    def render_text(self, rgb, place_mask, dir, ninstance=1, viz=False):
        """
        rgb   : HxWx3 image rgb values (uint8)
        depth : HxW depth values (float)
        seg   : HxW segmentation region masks
        area  : number of pixels in each region
        label : region labels == unique(seg) / {0}
               i.e., indices of pixels in SEG which
               constitute a region mask
        ninstance : no of times image should be
                    used to place text.

        """
       
        for i in range(ninstance):
            place_masks = place_mask.copy()
            idict = {'img':[], 'charBB':None, 'wordBB':None, 'txt':None, 'lineBB':None}
            placed = False
            img = rgb.copy()
            itext = []
            ibb = []

         
            for idx in range(self.NUM_REP):            
                try:
                    txt_render_res = self.place_text(img, place_masks)                  
                except:
                    traceback.print_exc()
                    # some error in placing text on the region
                    continue

                if txt_render_res is not None:
                    placed = True
                    img,text,bb,collision_mask = txt_render_res
                    # update the region collision mask:
                    place_masks = collision_mask
                    # store the result:
                    itext.append(text)
                    ibb.append(bb)

            if placed:
                # at least 1 word was placed in this instance:
                idict['charBB'] = np.concatenate(ibb, axis=2)
                idict['lineBB'], idict['txt'], letterBB = self.char2lineBB(idict['charBB'].copy(), itext, place_masks.shape)
                outimg = Image.fromarray(img)
                inimgname_prefix = os.path.splitext(dir['inimgname'])[0]
                outimg.save(os.path.join(dir['outimgdir'],inimgname_prefix +'_'+str(i).zfill(3)+'.png'),'png')
                save_label_txt(os.path.join(dir['outtxtdir'],inimgname_prefix +'_'+str(i).zfill(3)+'.txt'), idict['lineBB'], idict['txt'], letterBB)
                
                if viz:
                    viz_textbb(1,img, [idict['lineBB']], alpha=1.0)
                    # viz_masks(2,img,seg,depth,regions['label'])
                    # viz_regions(rgb.copy(),xyz,seg,regions['coeff'],regions['label'])
                    if i < ninstance-1:
                        raw_input(colorize(Color.BLUE,'continue?',True))
            else:
                if global_util.logfile is not None:
                    global_util.logfile.write("{} failed\n".format(os.path.join(dir['outimgdir'],dir['inimgname'])))
コード例 #5
0
ファイル: synthgen.py プロジェクト: ustczhouyu/OCR-On-the-go
class RendererV3(object):

    def __init__(self, data_dir, max_time=None):
        self.text_renderer = tu.RenderFont(data_dir)
        self.colorizer = Colorize(data_dir)
        #self.colorizerV2 = colorV2.Colorize(data_dir)

        self.min_char_height = 8#8 #px
        self.min_asp_ratio = 0.4#0.4 #

        self.max_text_regions = 1#Rohit changed from 7 to 1

        self.max_time = max_time

    def filter_regions(self,regions,filt):
        """
        filt : boolean list of regions to keep.
        """
        idx = np.arange(len(filt))[filt]
        for k in regions.keys():
            regions[k] = [regions[k][i] for i in idx]
        return regions

    def filter_for_placement(self,xyz,seg,regions):
        filt = np.zeros(len(regions['label'])).astype('bool')
        masks,Hs,Hinvs = [],[], []
        for idx,l in enumerate(regions['label']):
            res = get_text_placement_mask(xyz,seg==l,regions['coeff'][idx],pad=2)
            if res is not None:
                mask,H,Hinv = res
                masks.append(mask)
                Hs.append(H)
                Hinvs.append(Hinv)
                filt[idx] = True
        regions = self.filter_regions(regions,filt)
        regions['place_mask'] = masks
        regions['homography'] = Hs
        regions['homography_inv'] = Hinvs

        return regions

    def warpHomography(self,src_mat,H,dst_size):
        dst_mat = cv2.warpPerspective(src_mat, H, dst_size,
                                      flags=cv2.WARP_INVERSE_MAP|cv2.INTER_LINEAR)
        return dst_mat

    def homographyBB(self, bbs, H, offset=None):
        """
        Apply homography transform to bounding-boxes.
        BBS: 2 x 4 x n matrix  (2 coordinates, 4 points, n bbs).
        Returns the transformed 2x4xn bb-array.

        offset : a 2-tuple (dx,dy), added to points before transfomation.
        """
        eps = 1e-16
        # check the shape of the BB array:
        t,f,n = bbs.shape
        assert (t==2) and (f==4)

        # append 1 for homogenous coordinates:
        bbs_h = np.reshape(np.r_[bbs, np.ones((1,4,n))], (3,4*n), order='F')
        if offset != None:
            bbs_h[:2,:] += np.array(offset)[:,None]

        # perpective:
        bbs_h = H.dot(bbs_h)
        bbs_h /= (bbs_h[2,:]+eps)

        bbs_h = np.reshape(bbs_h, (3,4,n), order='F')
        return bbs_h[:2,:,:]

    def bb_filter(self,bb0,bb,text):
        """
        Ensure that bounding-boxes are not too distorted
        after perspective distortion.

        bb0 : 2x4xn martrix of BB coordinates before perspective
        bb  : 2x4xn matrix of BB after perspective
        text: string of text -- for excluding symbols/punctuations.
        """
        # return True
        h0 = np.linalg.norm(bb0[:,3,:] - bb0[:,0,:], axis=0)
        w0 = np.linalg.norm(bb0[:,1,:] - bb0[:,0,:], axis=0)
        hw0 = np.c_[h0,w0]

        h = np.linalg.norm(bb[:,3,:] - bb[:,0,:], axis=0)
        w = np.linalg.norm(bb[:,1,:] - bb[:,0,:], axis=0)
        hw = np.c_[h,w]
        
        if np.sum(hw0[:,1] == 0) > 0 or np.sum(hw[:,1] == 0) > 0:
            return False
        # remove newlines and spaces:
        # text = ''.join(text.split())
        # assert len(text)==bb.shape[-1]

        # alnum = np.array([ch.isalnum() for ch in text])
        # hw0 = hw0[alnum,:]
        # hw = hw[alnum,:]

        min_h0, min_h = np.min(hw0[:,0]), np.min(hw[:,0])
        asp0, asp = hw0[:,0]/hw0[:,1], hw[:,0]/hw[:,1]
        asp0, asp = np.median(asp0), np.median(asp)

        asp_ratio = asp/asp0
        is_good = ( min_h > self.min_char_height
                    and asp_ratio > self.min_asp_ratio
                    and asp_ratio < 1.0/self.min_asp_ratio)
        return is_good


    def get_min_h(self, bb, text):
        # return True
        # find min-height:
        h = np.linalg.norm(bb[:,3,:] - bb[:,0,:], axis=0)
        # remove newlines and spaces:
        text = ''.join(text.split())
        # assert len(text)==bb.shape[-1]

        # alnum = np.array([ch.isalnum() for ch in text])
        # h = h[alnum]
        return np.min(h)


    def feather(self, text_mask, min_h):
        # determine the gaussian-blur std:
        bsz = 0.5 * np.random.rand()
        ksz = np.random.choice([1,3,5])
        # if min_h <= 15 :
        #     bsz = 0.25
        #     ksz=1
        # elif 15 < min_h < 30:
        #     bsz = max(0.30, 0.5 + 0.1*np.random.randn())
        #     ksz = 3
        # else:
        #     bsz = max(0.5, 1.5 + 0.5*np.random.randn())
        #     ksz = 5
        return cv2.GaussianBlur(text_mask,(ksz,ksz),bsz)

    def place_text(self,rgb,collision_mask,H,Hinv):
        font = self.text_renderer.font_state.sample()
        font = self.text_renderer.font_state.init_font(font)
        # print("syngthgen #510")

        render_res = self.text_renderer.render_sample(font,collision_mask)
        # print("syngthgen #513")
        if render_res is None: # rendering not successful
            return #None
        else:
            text_mask,loc,bb,text = render_res

        # update the collision mask with text:
        collision_mask += (255 * (text_mask>0)).astype('uint8')

        # warp the object mask back onto the image:
        text_mask_orig = text_mask.copy()
        bb_orig = bb.copy()
        text_mask = self.warpHomography(text_mask,H,rgb.shape[:2][::-1])
        bb = self.homographyBB(bb,Hinv)

        if not self.bb_filter(bb_orig,bb,text):
            #warn("bad charBB statistics")
            return #None

        # get the minimum height of the character-BB:
        min_h = self.get_min_h(bb,text)

        #feathering:
        text_mask = self.feather(text_mask, min_h)

        im_final = self.colorizer.color(rgb,[text_mask],np.array([min_h]))

        return im_final, text, bb, collision_mask


    def get_num_text_regions(self, nregions):
        #return nregions
        nmax = min(self.max_text_regions, nregions)
        if np.random.rand() < 0.10:
            rnd = np.random.rand()
        else:
            rnd = np.random.beta(5.0,1.0)
        return 1#int(np.ceil(nmax * rnd))#ROHIT CHANGED THIS
    def check_word_scriptDev(self, word_text):
        #devchars= 'ाीिुूेैंोौृःॄॅअआइईउऊऋएऐओऔअंअःकखगघङचछजझञटठडढणतथदधनपफबभमयरलवषशसहक्षत्रज्ञड़ढ़।॥'
        devFlag = 0
        #print(word_text)
        for chrs in word_text:	
            if(chrs in self.devchars):
                devFlag = 1
                break;
        return devFlag
    def aksharaSplit(self, word_text):
        prevWasHalanta = False
        word_text_new = []
        for chr1 in word_text:
            if chr1 in 'ाीिुूेैंोौृःॄॅ':
                prev_char = word_text_new[-1]
                word_text_new.pop()
                word_text_new.append(prev_char + chr1)
                prevWasHalanta = False
            elif chr1 in "्":
                prevWasHalanta = True
            else:
                if(prevWasHalanta):
                    prev_char = word_text_new[-1]
                    word_text_new.pop()
                    word_text_new.append(prev_char + "्" + chr1) 
                else:
                    word_text_new.append(chr1)
                prevWasHalanta = False
        return word_text_new

    def char2wordBB(self, charBB, text):
        """
        Converts character bounding-boxes to word-level
        bounding-boxes.

        charBB : 2x4xn matrix of BB coordinates
        text   : the text string

        output : 2x4xm matrix of BB coordinates,
                 where, m == number of words.
        """
        wrds = text.split()
        bb_idx = np.r_[0, np.cumsum([len(self.aksharaSplit(w)) for w in wrds])]
        wordBB = np.zeros((2,4,len(wrds)), 'float32')
        #print("wrds", wrds)        
        for i in range(len(wrds)):
            cc = charBB[:,:,bb_idx[i]:bb_idx[i+1]]

            # fit a rotated-rectangle:
            # change shape from 2x4xn_i -> (4*n_i)x2
            cc = np.squeeze(np.concatenate(np.dsplit(cc,cc.shape[-1]),axis=1)).T.astype('float32')
            rect = cv2.minAreaRect(cc.copy())
            box = np.array(cv2.boxPoints(rect))

            # find the permutation of box-coordinates which
            # are "aligned" appropriately with the character-bb.
            # (exhaustive search over all possible assignments):
            cc_tblr = np.c_[cc[0,:],
                            cc[-3,:],
                            cc[-2,:],
                            cc[3,:]].T
            perm4 = np.array(list(itertools.permutations(np.arange(4))))
            dists = []
            for pidx in range(perm4.shape[0]):
                d = np.sum(np.linalg.norm(box[perm4[pidx],:]-cc_tblr,axis=1))
                dists.append(d)
            wordBB[:,:,i] = box[perm4[np.argmin(dists)],:].T

        return wordBB

    def word2lineBB(self, wordBB, text):
        """
        Converts character bounding-boxes to word-level
        bounding-boxes.

        wordBB : 2x4xn matrix of BB coordinates
        text   : the word text string

        output : 2x4xm matrix of BB coordinates,
                 where, m == number of lines.
        """
        print("text", text)        
        lines = text.split("\n")
        bb_idx = np.r_[0, np.cumsum([len(l.split()) for l in lines])]
        lineBB = np.zeros((2,4,len(lines)), 'float32')
        print("lines", lines)        
        for i in range(len(lines)):
            cc = wordBB[:,:,bb_idx[i]:bb_idx[i+1]]

            # fit a rotated-rectangle:
            # change shape from 2x4xn_i -> (4*n_i)x2
            cc = np.squeeze(np.concatenate(np.dsplit(cc,cc.shape[-1]),axis=1)).T.astype('float32')
            rect = cv2.minAreaRect(cc.copy())
            box = np.array(cv2.boxPoints(rect))

            # find the permutation of box-coordinates which
            # are "aligned" appropriately with the character-bb.
            # (exhaustive search over all possible assignments):
            cc_tblr = np.c_[cc[0,:],
                            cc[-3,:],
                            cc[-2,:],
                            cc[3,:]].T
            perm4 = np.array(list(itertools.permutations(np.arange(4))))
            dists = []
            for pidx in range(perm4.shape[0]):
                d = np.sum(np.linalg.norm(box[perm4[pidx],:]-cc_tblr,axis=1))
                dists.append(d)
            lineBB[:,:,i] = box[perm4[np.argmin(dists)],:].T

        return lineBB
        '''# wrds = text.split()
        # bb_idx = np.r_[0, np.cumsum([len(w) for w in wrds])]
        # wordBB = np.zeros((2,4,len(wrds)), 'float32')
        
        # for i in range(len(wrds)):
        #     cc = charBB[:,:,bb_idx[i]:bb_idx[i+1]]

        #     # fit a rotated-rectangle:
        #     # change shape from 2x4xn_i -> (4*n_i)x2
        #     cc = np.squeeze(np.concatenate(np.dsplit(cc,cc.shape[-1]),axis=1)).T.astype('float32')
        #     rect = cv2.minAreaRect(cc.copy())
        #     box = np.array(cv2.boxPoints(rect))

        #     # find the permutation of box-coordinates which
        #     # are "aligned" appropriately with the character-bb.
        #     # (exhaustive search over all possible assignments):
        #     cc_tblr = np.c_[cc[0,:],
        #                     cc[-3,:],
        #                     cc[-2,:],
        #                     cc[3,:]].T
        #     perm4 = np.array(list(itertools.permutations(np.arange(4))))
        #     dists = []
        #     for pidx in range(perm4.shape[0]):
        #         d = np.sum(np.linalg.norm(box[perm4[pidx],:]-cc_tblr,axis=1))
        #         dists.append(d)
        #     wordBB[:,:,i] = box[perm4[np.argmin(dists)],:].T

        return charBB'''


    def render_text(self,rgb,depth,seg,area,label,ninstance=1,viz=False):# takes rgb image, depth, segmentation masks seg, label indices of pixel mask, ninstance, no of times image should be used to place text and returns a list of dictionaries one for each image instance with img bbs and txt, ith non-space white char in txt is bb[:,:,i] 
        """
        rgb   : HxWx3 image rgb values (uint8)
        depth : HxW depth values (float)
        seg   : HxW segmentation region masks
        area  : number of pixels in each region
        label : region labels == unique(seg) / {0}
               i.e., indices of pixels in SEG which
               constitute a region mask
        ninstance : no of times image should be
                    used to place text.

        @return:
            res : a list of dictionaries, one for each of 
                  the image instances.
                  Each dictionary has the following structure:
                      'img' : rgb-image with text on it.
                      'bb'  : 2x4xn matrix of bounding-boxes
                              for each character in the image.
                      'txt' : a list of strings.

                  The correspondence b/w bb and txt is that
                  i-th non-space white-character in txt is at bb[:,:,i].
            
            If there's an error in pre-text placement, for e.g. if there's 
            no suitable region for text placement, an empty list is returned.
        """
        try:
            # depth -> xyz
            xyz = su.DepthCamera.depth2xyz(depth)
            
            # find text-regions:
            regions = TextRegions.get_regions(xyz,seg,area,label)# taking depth, segmentations, area and labels and return regions
            #print("herE", regions)
            #regions = [regions[0]]

            # find the placement mask and homographies:
            regions = self.filter_for_placement(xyz,seg,regions)

            # finally place some text:
            nregions = len(regions['place_mask'])
            if nregions < 1: # no good region to place text on
                return []
        except:
            # failure in pre-text placement
            #import traceback
            traceback.print_exc()
            return []

        res = []
        for i in range(ninstance):
            place_masks = copy.deepcopy(regions['place_mask'])

            # print (colorize(Color.CYAN, " ** instance # : %d"%i))

            idict = {'img':[], 'charBB':None, 'wordBB':None, 'txt':None}

            m = self.get_num_text_regions(nregions)#np.arange(nregions)#min(nregions, 5*ninstance*self.max_text_regions))
            reg_idx = np.arange(min(1,nregions))#Rohit changed 2*m to 1
            np.random.shuffle(reg_idx)
            reg_idx = reg_idx[:m]

            placed = False
            img = rgb.copy()
            itext = []
            ibb = []
            #print (colorize(Color.CYAN, " ** 2instance # : %d"%i))
            # process regions: 
            num_txt_regions = len(reg_idx)
            NUM_REP = 1# ROHIT CHANGED TO 2 FROM 5 # re-use each region three times:
            reg_range = np.arange(NUM_REP * num_txt_regions) % num_txt_regions
            # print("reg_range is ", reg_range)
            for idx in reg_range:
                ireg = reg_idx[idx]
                try:
                    if self.max_time is None:
                        txt_render_res = self.place_text(img,place_masks[ireg],
                                                         regions['homography'][ireg],
                                                         regions['homography_inv'][ireg])
                    else:
                        with time_limit(self.max_time):#SECS_PER_IMG = 5 #max time per image in seconds
                            txt_render_res = self.place_text(img,place_masks[ireg],
                                                             regions['homography'][ireg],
                                                             regions['homography_inv'][ireg])
                except TimeoutException as msg:
                    print (msg)
                    continue
                except:
                    traceback.print_exc()
                    # some error in placing text on the region
                    continue
                

                if txt_render_res is not None:
                    placed = True
                    img,text,bb,collision_mask = txt_render_res
                    # update the region collision mask:
                    place_masks[ireg] = collision_mask
                    # store the result:
                    itext.append(text)
                    ibb.append(bb)

            if  placed:
                # at least 1 word was placed in this instance:
                idict['img'] = img
                idict['txt'] = itext
                #print("itext", itext)
                idict['charBB'] = np.concatenate(ibb, axis=2)
                idict['wordBB'] = self.char2wordBB(idict['charBB'].copy(), ' '.join(itext))
                idict['lineBB'] = self.word2lineBB(idict['wordBB'].copy(), ' '.join(itext))
                res.append(idict.copy())
                if viz:
                    viz_textbbchar(1,img, [idict['charBB']], alpha=1.0)
                    viz_textbb(1,img, [idict['wordBB']], alpha=1.0)
                    viz_textbb(1,img, [idict['lineBB']], alpha=1.0)
                    viz_masks(2,img,seg,depth,regions['label'])
                    # viz_regions(rgb.copy(),xyz,seg,regions['coeff'],regions['label'])
                    if i < ninstance-1:
                        input(colorize(Color.BLUE,'continue?',True))                    
        return res
コード例 #6
0
if __name__ == '__main__':
    # testing:
    Renderer = WordRenderer(is_debug=True, Language="Vietnamese")
    for i in (range(40)):
        print(f"{i}--------")
        # Img[HxWx4], Texts(List[str]), CBOX[nx4x2], BBOX[nx4x2] (N_box, corner, x/y)
        bg_img = cv2.imread("img_2.jpg")
        img, texts, cbox, bbox = Renderer.RenderWordImageA(
            660, 1000, (30, 30, 30, 240))
        text_mask = img[:, :, 3]

        print(text_mask.shape)
        print(bg_img.shape)
        bg_img = cv2.resize(bg_img, (text_mask.shape[1], text_mask.shape[0]))
        Colorizer = Colorize()
        img = Colorizer.color(bg_img, np.array([text_mask]), np.array([30]))
        print(texts)
        # print(img.shape)

        # visualize boxes
        img_b = img.copy()
        for box in bbox:
            img_b[box[0, 1]:box[1, 1] + 2,
                  box[0, 0]:box[1, 0] + 2] = [255, 0, 0, 255]
            img_b[box[2, 1]:box[3, 1] + 2,
                  box[3, 0]:box[2, 0] + 2] = [0, 255, 0, 255]
            img_b[box[0, 1]:box[3, 1] + 2,
                  box[0, 0]:box[3, 0] + 2] = [255, 0, 255, 255]
            img_b[box[1, 1]:box[2, 1] + 2,
                  box[1, 0]:box[2, 0] + 2] = [0, 255, 255, 255]
        img_c = img.copy()