def __init__(self, hashes, CHARS_PER_LINE): LineParser.__init__(self, hashes) self.CHARS_PER_LINE = CHARS_PER_LINE self.keys_alignment = {'`': None, '``': 1, '```': 2, '````': 3} self.alignment_keys = { None: '` ', 0: '` ', 1: '`` ', 2: '``` ', 3: '```` ' }
def crop_line2png(img_tages_dict, data_root, save_root): """to train the CRNN, we need line annotation, 1. crop line and save to png image, 2. write the png filename and its label to txt file(label.txt) 3. create the map.txt according the label.txt""" file_list = os.listdir(data_root) # import random random.shuffle(file_list) # file_list = ['1480325136111_2gga323vslbuvays.png'] print 'processing images number :%d' % len(file_list) s = set() counter = 0 with open(os.path.join(save_root, 'label.txt'), 'w') as label_file: for i,img_name in enumerate(file_list): # if counter > 100: break if img_name not in img_tages_dict: continue im = cv2.imread(os.path.join(data_root, img_name)) if im is None: continue if np.min(im.shape[:2]) < 400 : continue line_parse = LineParser(img_tages_dict[img_name], im.shape) lines, lines_label = line_parse.gen_line() if Config.DEBUG: char_boxes, char_labels = line_parse.get_all_boxes(line_parse.tags, decode_utf8=Config.decode_utf8) line_parse.display_boxes(im, char_boxes, char_labels) line_parse.display_boxes(im, lines, lines_label) if not os.path.exists(os.path.join(save_root, 'images')): os.makedirs(os.path.join(save_root, 'images')) for j,line in enumerate(lines): new_image_name = '{0}_{1}.{2}'.format(img_name.rsplit('.', 1)[0], j, img_name.rsplit('.', 1)[1]) s = s | set(lines_label[j]) label_file.write('%s %s\n' %(new_image_name, lines_label[j].encode('utf-8'))) line_image = im[int(line[1]): int(line[3]), int(line[0]): int(line[2])] cv2.imwrite(os.path.join(save_root, 'images', new_image_name), line_image) #just for debug for k in xrange(5): height = int(line[3]) - int(line[1]) shift_x = random.uniform(0, 0.4) shift_y = random.uniform(0, 0.4) line_image2 = im[max(0, int(line[1] - shift_y*height)): min(int(line[3] + (0.4-shift_y)*height), im.shape[0]), max(0, int(line[0] - shift_x*height)): min(int(line[2] + (0.4-shift_x)*height), im.shape[1])] new_image_name = '{0}_{1}_{2}.{3}'.format(img_name.rsplit('.', 1)[0], j, k, img_name.rsplit('.', 1)[1]) cv2.imwrite(os.path.join(save_root, 'images', new_image_name), line_image2) counter += 1 if i % 1000 == 0: print 'proceeded %d' % i print 'gen line images : %d' %counter with open(os.path.join(save_root, 'map.txt'), 'w') as map_file: for (ind, label) in enumerate(s): map_file.write('%d %s\n' % (ind, label.encode('utf-8')))
class TelemHandler(object): def __init__(self): self.man = SerialManager() self.par = LineParser() self.cache = PlotCache() def get_data(self): self.man.update() lines = self.man.getLines() if len(lines) > 0: data = [self.par.parseLine(l) for l in lines] self.cache.process_data(data) return self.cache.get()
def save_line_annos(img_tages_dict, data_root, save_root): """to re-check the effect of original single char annos to lines, 1. get lines 2. write to csv file format: [[ 1477568421447_wzrceuthja20s1gm.jpeg,1:3:567:36:#;6:46:49:77:#;4:82:567:112:#;4:126:384:155:#;3:165:280:193:#;298:171:323:194:#;343:168:370:193:#;394:166:571:195:#;6:208:481:238:# IMAGE_NAME,line1_X0:line1_y0:line1_x1:line1_y1;line1_label:line2_X0:line2_y0:line2_x1:line2_y1:line2_label; there are no seprate symbol in label, such as [, : ;], onlye Chinese char, English char, number, * ]] """ file_list = os.listdir(data_root) # import random # random.shuffle(file_list) # file_list = ['1480325230423_zqjcdg86z23vhpyk.jpeg'] print 'processing images number :%d' % len(file_list) s = set() counter = 0 counter_image = 0 with open(os.path.join(save_root, 'line_annos.txt'), 'w') as line_file: for i,img_name in enumerate(file_list): if img_name not in img_tages_dict: continue im = cv2.imread(os.path.join(data_root, img_name)) if im is None: continue # if np.min(im.shape[:2]) < 400 : continue line_parse = LineParser(img_tages_dict[img_name], im.shape) lines, lines_label = line_parse.gen_line() if Config.DEBUG: char_boxes, char_labels = line_parse.get_all_boxes(line_parse.tags, decode_utf8=Config.decode_utf8) line_parse.display_boxes(im, char_boxes, char_labels) line_parse.display_boxes(im, lines, lines_label) if len(lines) > 0: line_file.write('%s,'%img_name) for j,line in enumerate(lines): if j == len(lines) - 1: line_file.write('%d:%d:%d:%d:%s;\n'%(int(line[0]),int(line[1]),int(line[2]),int(line[3]), lines_label[j].encode('utf-8'))) else: line_file.write('%d:%d:%d:%d:%s;' % (int(line[0]), int(line[1]), int(line[2]), int(line[3]), lines_label[j].encode('utf-8'))) counter += 1 counter_image += 1 if i % 1000 == 0: print 'proceeded %d' % i print 'image source number %d, contained image %d, gen line images : %d' \ %(len(file_list), counter_image,counter)
def get_parser(filename): parsers = [] parsers.append(PlaintextParser(filename)) try: parsers.append(LineParser(filename)) except ValueError: pass parsers.append(XMLParser(filename)) parsers.append(CtmParser(filename)) for parser in parsers: if parser.wants_this_file(): return parser return None
def solution(): grid_mx = np.ones(shape=(1000, 1000)) * -1 for line in input_data: line_parser = LineParser.parse_line(line) instruction = line_parser.get_instruction() selector = line_parser.get_selector() if instruction == TURN_ON: grid_mx[selector] = 1 elif instruction == TURN_OFF: grid_mx[selector] = -1 elif instruction == TOGGLE: grid_mx[selector] *= -1 return np.concatenate(grid_mx).tolist().count(1)
import sys from math import sin from line_parser import LineParser if __name__ == '__main__': with open(sys.argv[1], 'r') as f: for index, line in enumerate(f.readlines()): parse_line = LineParser(line.split(';')[0]) parse_line.get_rule() if parse_line.error == 0: pass elif parse_line.error == 1: parse_line.implement_rule() else: sys.exit('line ' + str(index + 1) + ' from file ' + sys.argv[1] + ' has multiple commands !!!')
def crop_line2png(img_tages_dict, data_root, save_root): """to train the CRNN, we need line annotation, 1. crop line and save to png image, 2. write the png filename and its label to txt file(label.txt) 3. create the map.txt according the label.txt""" file_list = os.listdir(data_root) file_list = [ file_name for file_name in file_list if file_name.rsplit('.', 1)[1] != 'gif' ] # file_list = ['1480324751948_w5ya8rmffq2us889.jpeg'] print 'processing images number :%d' % len(file_list) s = set() counter = 0 line_parse = LineParser() with open(os.path.join(save_root, 'label.txt'), 'w') as label_file: for i, img_name in enumerate(file_list): im = cv2.imread(os.path.join(data_root, img_name)) if im is None: continue # if np.min(im.shape[:2]) < 400 : continue gts_line = parse_tags(img_tages_dict, img_name, Config.decode_utf8) if gts_line is None: continue # for convient, split Quad struct to (lines, lines_label) for j, gt_line in enumerate(gts_line): rect = cv2.minAreaRect(gt_line.crds) box = cv2.cv.BoxPoints(rect) bbox = np.int0(box) if len(gt_line.label) == 1 and gt_line.label != '*': quad = bbox.reshape((4, 2)) im_new = im elif np.linalg.norm(gt_line.crds[0] - gt_line.crds[1]) < np.linalg.norm(gt_line.crds[0] - gt_line.crds[3]) \ and (len(gt_line.label) >=2 or gt_line.label == '*'): continue else: im_copy = im.copy() poly = bbox.reshape((4, 2)) # is vertical text p_lowest = np.argmax(poly[:, 1]) p_lowest_right = (p_lowest - 1) % 4 p_lowest_left = (p_lowest + 1) % 4 if np.linalg.norm(poly[p_lowest] - poly[p_lowest_right] ) > np.linalg.norm(poly[p_lowest] - poly[p_lowest_left]): start_pt = p_lowest end_pt = p_lowest_right else: start_pt = p_lowest_left end_pt = p_lowest angle = np.rad2deg( np.arctan((poly[start_pt][1] - poly[end_pt][1]) * 1.0 / (poly[start_pt][0] - poly[end_pt][0]))) im_new = rotate_image(im_copy, angle) crds = list(bbox.reshape((-1))) quad = rotate_xml(im_copy, crds, angle) quad = quad.reshape((4, 2)) x0 = np.min(quad[:, 0]) y0 = np.min(quad[:, 1]) x1 = np.max(quad[:, 0]) y1 = np.max(quad[:, 1]) # just for debug height = y1 - y0 expand_ratio = 0.2 # lines, lines_label = split_lines(gts_line) # lines = clip_boxes(lines, im.shape) # lines, lines_label = line_parse.filter_lines(lines, lines_label) new_image_name = '{0}_{1}.{2}'.format( img_name.rsplit('.', 1)[0], j, img_name.rsplit('.', 1)[1]) s = s | set(gt_line.label) label_file.write( '%s %s\n' % (new_image_name, gt_line.label.encode('utf-8'))) expand_y0 = max(0, int(y0) - expand_ratio * height) expand_y1 = min(im_new.shape[0], int(y1) + expand_ratio * height) expand_x0 = max(0, int(x0) - expand_ratio * height) expand_x1 = min(im_new.shape[1], int(x1) + expand_ratio * height) line_image = im_new[expand_y0:expand_y1, expand_x0:expand_x1] if cfg.DEBUG: print gt_line.label.encode('utf-8') cv2.imshow('', line_image) cv2.waitKey(0) if not os.path.exists(os.path.join(save_root, 'images')): os.mkdir(os.path.join(save_root, 'images')) cv2.imwrite(os.path.join(save_root, 'images', new_image_name), line_image) counter += 1 if i % 1000 == 0: print 'proceeded %d' % i print 'gen line images : %d' % counter with open(os.path.join(save_root, 'map.txt'), 'w') as map_file: for (ind, label) in enumerate(s): map_file.write('%d %s\n' % (ind, label.encode('utf-8'))) label_file.close() map_file.close()
def __init__(self): self.man = SerialManager() self.par = LineParser() self.cache = PlotCache()