def __init__(self): ''' Constructor ''' self.faker = Faker() self.dateformat = ListGenWithProb(["%H:%M(:%S)?", "%I:%M(:%S)[ ]?%p"], [0.5, 0.5])
def gen_compare(sameheight, dsttxt, comparewith): dstimg = cv2.imread(comparewith) dstimg = np.mean(dstimg, axis=2).astype(np.uint8) dstimg_cl = gray2heatmap(dstimg) cv2.imshow('dst', resizeToHeight(dstimg, sameheight)) cv2.imshow('dst-heatmap', resizeToHeight(dstimg_cl, sameheight)) pipe_name = CMNDPipeName() hotengen = HoTenGen(os.path.join(RESOURCE_PATH, 'hovaten.csv')) trashgen = Trash(os.path.join(RESOURCE_PATH, 'hovaten.csv'),0.5) quanhuyengen = QuanHuyenGen(os.path.join(RESOURCE_PATH, 'tinhtp.csv'), number=2) filecontent = codecs.open(TEMPORARY_PATH + '4/aligned.txt', encoding='utf8').read() jsonStr = filecontent.replace('\'', '"') jsonObj = json.loads(jsonStr) namegen = ListGenWithProb([hotengen, quanhuyengen, trashgen], [0.4,0.3,0.3]) for i in range(1, 3000): print i, '-----------------------' if dsttxt is not None: pipe_name.txt = dsttxt else: pipe_name.txt = namegen.gen() pipe_name.p.reset(jsonObj) rs, _, txt = pipe_name.gen() if rs is None: continue txt = txt.strip() print '@@@'+txt+'@@@' rs_cl = gray2heatmap(rs) cv2.imshow('gen', resizeToHeight(rs, sameheight)) cv2.imshow('gen-heatmap', resizeToHeight(rs_cl, sameheight)) cv2.waitKey(-1)
def to_weinman(root): pipe_name = CMNDPipeName() hotengen = HoTenGen(os.path.join(RESOURCE_PATH, 'hovaten.csv')) quanhuyengen = QuanHuyenGen(os.path.join(RESOURCE_PATH, 'tinhtp.csv'), number=2) trashgen = Trash(os.path.join(RESOURCE_PATH, 'hovaten.csv'),0.5) filecontent = codecs.open(TEMPORARY_PATH + '4/aligned.txt', encoding='utf8').read() jsonStr = filecontent.replace('\'', '"') jsonObj = json.loads(jsonStr) namegen = ListGenWithProb([hotengen, quanhuyengen, trashgen], [0.4,0.3,0.3]) unicodeutil = UnicodeUtil(RESOURCE_PATH + 'diacritics2.csv') with codecs.open(root + 'anno-train.txt', 'a', encoding='utf8') as annotation_train: with codecs.open(root + 'anno-test.txt', 'a', encoding='utf8') as annotation_test: for i in range(1, 3000): print i, '-----------------------' p = np.random.rand() txt = namegen.gen() txt = re.sub('\d','',txt) pipe_name.txt = txt pipe_name.p.reset(jsonObj) rs, _, txt = pipe_name.gen() if rs is None: continue txt = txt.strip() txt = unicodeutil.to_vni(txt) newwidth = rs.shape[1] * 32.0 / rs.shape[0] rs = cv2.resize(rs, (int(newwidth), 32)) cv2.imwrite(root + str(i) + '.jpg', rs) print '@@@'+txt+'@@@' cv2.imshow('hihi', rs) cv2.waitKey(-1) continue if i < 2900: annotation_train.write('./' + str(i) + '.jpg ' + txt + '\n') else: annotation_test.write('./' + str(i) + '.jpg ' + txt + '\n')
def __init__(self, afonts): self.fonts = [] probs = [] for p, fontname, w2h, s2h in afonts: font = AFont(FONT_PATH+fontname, p, w2h, s2h) self.allfonts[FONT_PATH+fontname] = font self.fonts.append(font) probs.append(p) self.fontgen = ListGenWithProb(self.fonts, probs)
def __init__(self, afonts=[]): pygame.init() self.renderfont = RenderFont(DATA_PATH) self.height = 100 self.pad_for_cut = 10 self.rel_line_spacing = 1.0 self.loitgfonts = [] self.fonts = [] probs = [] for p, fontname, w2h, s2h in afonts: self.fonts.append(AFont(FONT_PATH+fontname, p, w2h, s2h)) probs.append(p) if len(self.fonts) > 0: self.fontgen = ListGenWithProb(self.fonts, probs)
def __init__(self, fromdate=None, todate=None, dateformat=None): ''' Constructor ''' self.faker = Faker('en_US') self.dateformat = ListGenWithProb(*dateformat) if fromdate is not None: self.fromdate = datetime.strptime(fromdate, "%Y-%m-%d") else: self.fromdate = datetime.now() if todate is not None: self.todate = datetime.strptime(todate, "%Y-%m-%d") else: self.todate = datetime.now()
class Fonts(object): allfonts = {} def __init__(self, afonts): self.fonts = [] probs = [] for p, fontname, w2h, s2h in afonts: font = AFont(FONT_PATH+fontname, p, w2h, s2h) self.allfonts[FONT_PATH+fontname] = font self.fonts.append(font) probs.append(p) self.fontgen = ListGenWithProb(self.fonts, probs) def genRandom(self): return self.fontgen.gen() def genByName(self, query): rs = [] for name, font in self.allfonts.iteritems(): name = name.split('/')[-1] if query.lower() in name.lower(): rs.append(font) if len(rs) == 1: return rs[0] else: return None
class DateGen(Gen): ''' Date ''' def __init__(self, fromdate=None, todate=None, dateformat=None): ''' Constructor ''' self.faker = Faker('en_US') self.dateformat = ListGenWithProb(*dateformat) if fromdate is not None: self.fromdate = datetime.strptime(fromdate, "%Y-%m-%d") else: self.fromdate = datetime.now() if todate is not None: self.todate = datetime.strptime(todate, "%Y-%m-%d") else: self.todate = datetime.now() def gen(self): # d = self.faker.date_between(start_date="-365d", end_date="-60d") d = self.faker.date_time_between_dates(datetime_start=self.fromdate, datetime_end=self.todate, tzinfo=None).date() fm = self.dateformat.gen() d = d.strftime(fm) d = replaceZeroWithProb(d, 0.15) d = changeCaseWithProb(d, upper=0.5, lower=0.0) return d
class TimeGen(Gen): ''' Time ''' def __init__(self): ''' Constructor ''' self.faker = Faker() self.dateformat = ListGenWithProb(["%H:%M(:%S)?", "%I:%M(:%S)[ ]?%p"], [0.5, 0.5]) def gen(self): fm = self.dateformat.gen() t = self.faker.time(pattern=fm, end_datetime=None) d = replaceZeroWithProb(t) d = changeCaseWithProb(d, upper=0.5, lower=0.1) return d
class RenderText(object): def __init__(self, afonts=[]): pygame.init() self.renderfont = RenderFont(DATA_PATH) self.height = 100 self.pad_for_cut = 10 self.rel_line_spacing = 1.0 self.loitgfonts = [] self.fonts = [] probs = [] for p, fontname, w2h, s2h in afonts: self.fonts.append(AFont(FONT_PATH+fontname, p, w2h, s2h)) probs.append(p) if len(self.fonts) > 0: self.fontgen = ListGenWithProb(self.fonts, probs) def init_font(self, fontpath, spacing): font = freetype.Font(FONT_PATH + fontpath, size=self.height) font.underline = False font.strong = False font.oblique = False font.strength = False font.antialiased = True font.origin = True self.spacing = spacing return font def genFont(self): if len(self.fonts) > 0: return self.fontgen.gen() else: return None def toMask2(self, afont, txt): ### s2h = random.uniform(afont.s2h[0], afont.s2h[1]) w2h = random.uniform(afont.w2h[0], afont.w2h[1]) ### txt_arr, _, bbs = self.renderfont.render_singleline(afont.font, txt , w2h, s2h) newwidth = int(txt_arr.shape[1]*w2h) txt_arr = cv2.resize(txt_arr,(newwidth, txt_arr.shape[0])) return txt_arr, txt def toMask(self, loitgfont, txt, otherlines=False): if loitgfont.fontpath == '/home/loitg/Downloads/fonts/fontss/receipts/westgate/PKMN-Mystery-Dungeon.ttf' and any(c in txt for c in ['<','>',';','!','$','#','&','/','`','~','@','%','^','*','.']): return None, None if loitgfont.fontpath == '/home/loitg/Downloads/fonts/fontss/receipts/general_fairprice/LEFFC2.TTF' and any(c.isdigit() for c in txt): return None, None if (loitgfont.fontpath == '/home/loitg/Downloads/fonts/fontss/receipts/general_fairprice/PRINTF Regular.ttf')\ or (loitgfont.fontpath == '/home/loitg/Downloads/fonts/fontss/receipts/dotted/fake receipt.ttf'): txt = txt.upper() above = rstr.rstr('ABC0123456789abcdef ', len(txt)) below = rstr.rstr('ABC0123456789abcdef ', len(txt)) multilines = above + '\n' + txt + '\n' + below if otherlines: txt_arr, _, bbs = self.renderfont.render_multiline(loitgfont.font, multilines , 0.3, 0.2, 1) # angle = np.random.rand() * 3 + 3 # if np.random.rand() < 0.5: angle = -angle # txt_arr= rotate_bound(txt_arr,angle) else: txt_arr, _, bbs = self.renderfont.render_multiline(loitgfont.font, multilines , 0.05, 0.5, 1) # angle = np.random.randn() * 2 # if np.random.rand() < 0.5 and abs(angle) > 1 and abs(angle) < 10: # txt_arr= rotate_bound(txt_arr,angle) angle = np.random.randn() * 3 if np.random.rand() < 0.4: txt_arr= rotate_bound(txt_arr,angle) newwidth = int(txt_arr.shape[1]*loitgfont.getRatio()) if np.random.rand() < 0.4: found = re.search( r'\d\.\d\d', txt) if found: newwidth *= 2 elif any(c in txt for c in receiptgenline.TOTAL_KEYS) or \ any(c.upper() in txt for c in receiptgenline.TOTAL_KEYS): newwidth *= 2 txt_arr = cv2.resize(txt_arr,(newwidth, txt_arr.shape[0])) return txt_arr, txt