Exemple #1
0
 def __init__(self):
     '''
     Constructor
     '''
     self.faker = Faker()
     self.dateformat = ListGenWithProb(["%H:%M(:%S)?", "%I:%M(:%S)[ ]?%p"],
                                       [0.5, 0.5])
Exemple #2
0
def gen_compare(sameheight, dsttxt, comparewith):
    dstimg = cv2.imread(comparewith)
    dstimg = np.mean(dstimg, axis=2).astype(np.uint8)
    dstimg_cl = gray2heatmap(dstimg)
    cv2.imshow('dst', resizeToHeight(dstimg, sameheight))
    cv2.imshow('dst-heatmap', resizeToHeight(dstimg_cl, sameheight))
    pipe_name = CMNDPipeName()
    hotengen = HoTenGen(os.path.join(RESOURCE_PATH, 'hovaten.csv'))
    trashgen = Trash(os.path.join(RESOURCE_PATH, 'hovaten.csv'),0.5)
    quanhuyengen = QuanHuyenGen(os.path.join(RESOURCE_PATH, 'tinhtp.csv'), number=2)
    filecontent = codecs.open(TEMPORARY_PATH + '4/aligned.txt', encoding='utf8').read()
    jsonStr = filecontent.replace('\'', '"')
    jsonObj = json.loads(jsonStr)
    namegen = ListGenWithProb([hotengen, quanhuyengen, trashgen], [0.4,0.3,0.3])
    for i in range(1, 3000):
        print i, '-----------------------'
        if dsttxt is not None: 
            pipe_name.txt = dsttxt
        else:
            pipe_name.txt = namegen.gen()
        pipe_name.p.reset(jsonObj)
        rs, _, txt = pipe_name.gen()
        if rs is None: continue
        txt = txt.strip()
        print '@@@'+txt+'@@@'
        rs_cl = gray2heatmap(rs)
        cv2.imshow('gen', resizeToHeight(rs, sameheight))
        cv2.imshow('gen-heatmap', resizeToHeight(rs_cl, sameheight))
        cv2.waitKey(-1)
Exemple #3
0
def to_weinman(root):
    pipe_name = CMNDPipeName()
    hotengen = HoTenGen(os.path.join(RESOURCE_PATH, 'hovaten.csv'))
    quanhuyengen = QuanHuyenGen(os.path.join(RESOURCE_PATH, 'tinhtp.csv'), number=2)
    trashgen = Trash(os.path.join(RESOURCE_PATH, 'hovaten.csv'),0.5)
    filecontent = codecs.open(TEMPORARY_PATH + '4/aligned.txt', encoding='utf8').read()
    jsonStr = filecontent.replace('\'', '"')
    jsonObj = json.loads(jsonStr)
    namegen = ListGenWithProb([hotengen, quanhuyengen, trashgen], [0.4,0.3,0.3])
    unicodeutil = UnicodeUtil(RESOURCE_PATH + 'diacritics2.csv')
    with codecs.open(root + 'anno-train.txt', 'a', encoding='utf8') as annotation_train:
        with codecs.open(root + 'anno-test.txt', 'a', encoding='utf8') as annotation_test:
            for i in range(1, 3000):
                print i, '-----------------------'
                p = np.random.rand()
                txt = namegen.gen()
                txt = re.sub('\d','',txt)
                pipe_name.txt = txt
                pipe_name.p.reset(jsonObj)
                rs, _, txt = pipe_name.gen()
                if rs is None: continue
                txt = txt.strip()
                txt = unicodeutil.to_vni(txt)
                newwidth = rs.shape[1] * 32.0 / rs.shape[0]
                rs = cv2.resize(rs, (int(newwidth), 32))
                cv2.imwrite(root + str(i) + '.jpg', rs)
                print '@@@'+txt+'@@@'
                cv2.imshow('hihi', rs)
                cv2.waitKey(-1)
                continue
                if i < 2900:
                    annotation_train.write('./' + str(i) + '.jpg ' + txt + '\n')
                else:
                    annotation_test.write('./' + str(i) + '.jpg ' + txt + '\n')
Exemple #4
0
 def __init__(self, afonts):
     self.fonts = []
     probs = []
     for p, fontname, w2h, s2h in afonts:
         font = AFont(FONT_PATH+fontname, p, w2h, s2h)
         self.allfonts[FONT_PATH+fontname] = font
         self.fonts.append(font)
         probs.append(p)
     self.fontgen = ListGenWithProb(self.fonts, probs)
Exemple #5
0
 def __init__(self, afonts=[]):
     pygame.init()
     self.renderfont = RenderFont(DATA_PATH)
     self.height = 100
     self.pad_for_cut = 10
     self.rel_line_spacing = 1.0
     self.loitgfonts = []
     self.fonts = []
     probs = []
     for p, fontname, w2h, s2h in afonts:
         self.fonts.append(AFont(FONT_PATH+fontname, p, w2h, s2h))
         probs.append(p)
     if len(self.fonts) > 0: self.fontgen = ListGenWithProb(self.fonts, probs)
Exemple #6
0
 def __init__(self, fromdate=None, todate=None, dateformat=None):
     '''
     Constructor
     '''
     self.faker = Faker('en_US')
     self.dateformat = ListGenWithProb(*dateformat)
     if fromdate is not None:
         self.fromdate = datetime.strptime(fromdate, "%Y-%m-%d")
     else:
         self.fromdate = datetime.now()
     if todate is not None:
         self.todate = datetime.strptime(todate, "%Y-%m-%d")
     else:
         self.todate = datetime.now()
Exemple #7
0
class Fonts(object):
    allfonts = {}
    def __init__(self, afonts):
        self.fonts = []
        probs = []
        for p, fontname, w2h, s2h in afonts:
            font = AFont(FONT_PATH+fontname, p, w2h, s2h)
            self.allfonts[FONT_PATH+fontname] = font
            self.fonts.append(font)
            probs.append(p)
        self.fontgen = ListGenWithProb(self.fonts, probs)
        
    def genRandom(self):
        return self.fontgen.gen()
    
    def genByName(self, query):
        rs = []
        for name, font in self.allfonts.iteritems():
            name = name.split('/')[-1]
            if query.lower() in name.lower():
                rs.append(font)
        if len(rs) == 1:
            return rs[0]
        else:
            return None
Exemple #8
0
class DateGen(Gen):
    '''
    Date
    '''
    def __init__(self, fromdate=None, todate=None, dateformat=None):
        '''
        Constructor
        '''
        self.faker = Faker('en_US')
        self.dateformat = ListGenWithProb(*dateformat)
        if fromdate is not None:
            self.fromdate = datetime.strptime(fromdate, "%Y-%m-%d")
        else:
            self.fromdate = datetime.now()
        if todate is not None:
            self.todate = datetime.strptime(todate, "%Y-%m-%d")
        else:
            self.todate = datetime.now()

    def gen(self):
        #         d = self.faker.date_between(start_date="-365d", end_date="-60d")
        d = self.faker.date_time_between_dates(datetime_start=self.fromdate,
                                               datetime_end=self.todate,
                                               tzinfo=None).date()
        fm = self.dateformat.gen()
        d = d.strftime(fm)
        d = replaceZeroWithProb(d, 0.15)
        d = changeCaseWithProb(d, upper=0.5, lower=0.0)
        return d
Exemple #9
0
class TimeGen(Gen):
    '''
    Time
    '''
    def __init__(self):
        '''
        Constructor
        '''
        self.faker = Faker()
        self.dateformat = ListGenWithProb(["%H:%M(:%S)?", "%I:%M(:%S)[ ]?%p"],
                                          [0.5, 0.5])

    def gen(self):
        fm = self.dateformat.gen()
        t = self.faker.time(pattern=fm, end_datetime=None)
        d = replaceZeroWithProb(t)
        d = changeCaseWithProb(d, upper=0.5, lower=0.1)
        return d
Exemple #10
0
class RenderText(object):
    def __init__(self, afonts=[]):
        pygame.init()
        self.renderfont = RenderFont(DATA_PATH)
        self.height = 100
        self.pad_for_cut = 10
        self.rel_line_spacing = 1.0
        self.loitgfonts = []
        self.fonts = []
        probs = []
        for p, fontname, w2h, s2h in afonts:
            self.fonts.append(AFont(FONT_PATH+fontname, p, w2h, s2h))
            probs.append(p)
        if len(self.fonts) > 0: self.fontgen = ListGenWithProb(self.fonts, probs)

    def init_font(self, fontpath, spacing):
        font = freetype.Font(FONT_PATH + fontpath, size=self.height)
        font.underline = False
        font.strong = False
        font.oblique = False
        font.strength = False
        font.antialiased = True
        font.origin = True
        self.spacing = spacing
        return font    
    
    def genFont(self):
        if len(self.fonts) > 0:
            return self.fontgen.gen()
        else:
            return None
    
    def toMask2(self, afont, txt):
        ###
        s2h = random.uniform(afont.s2h[0], afont.s2h[1])
        w2h = random.uniform(afont.w2h[0], afont.w2h[1])
        ###
        
        txt_arr, _, bbs = self.renderfont.render_singleline(afont.font, txt , w2h, s2h)
        newwidth = int(txt_arr.shape[1]*w2h)
        txt_arr = cv2.resize(txt_arr,(newwidth, txt_arr.shape[0]))
        return txt_arr, txt
    
    def toMask(self, loitgfont, txt, otherlines=False):
        if loitgfont.fontpath == '/home/loitg/Downloads/fonts/fontss/receipts/westgate/PKMN-Mystery-Dungeon.ttf' and any(c in txt for c in ['<','>',';','!','$','#','&','/','`','~','@','%','^','*','.']):
            return None, None
        if loitgfont.fontpath == '/home/loitg/Downloads/fonts/fontss/receipts/general_fairprice/LEFFC2.TTF' and any(c.isdigit() for c in txt):
            return None, None
        if (loitgfont.fontpath == '/home/loitg/Downloads/fonts/fontss/receipts/general_fairprice/PRINTF Regular.ttf')\
             or (loitgfont.fontpath == '/home/loitg/Downloads/fonts/fontss/receipts/dotted/fake receipt.ttf'):
            txt = txt.upper()
        above = rstr.rstr('ABC0123456789abcdef ', len(txt))
        below = rstr.rstr('ABC0123456789abcdef ', len(txt))
        multilines = above + '\n' + txt + '\n' + below
        if otherlines:
            txt_arr, _, bbs = self.renderfont.render_multiline(loitgfont.font, multilines , 0.3, 0.2, 1)
#             angle = np.random.rand() * 3 + 3
#             if np.random.rand() < 0.5: angle = -angle
#             txt_arr= rotate_bound(txt_arr,angle)
        else:
            txt_arr, _, bbs = self.renderfont.render_multiline(loitgfont.font, multilines , 0.05, 0.5, 1)
#             angle = np.random.randn() * 2
#             if np.random.rand() < 0.5 and abs(angle) > 1 and abs(angle) < 10:
#                 txt_arr= rotate_bound(txt_arr,angle)

        angle = np.random.randn() * 3
        if np.random.rand() < 0.4:
            txt_arr= rotate_bound(txt_arr,angle)            
        
        newwidth = int(txt_arr.shape[1]*loitgfont.getRatio())
        if np.random.rand() < 0.4:
            found = re.search( r'\d\.\d\d', txt)
            if found:
                newwidth *= 2
            elif any(c in txt for c in receiptgenline.TOTAL_KEYS) or \
                any(c.upper() in txt for c in receiptgenline.TOTAL_KEYS):
                newwidth *= 2
        txt_arr = cv2.resize(txt_arr,(newwidth, txt_arr.shape[0]))
        return txt_arr, txt