Beispiel #1
0
    def __init__(self, hashes, CHARS_PER_LINE):
        LineParser.__init__(self, hashes)
        self.CHARS_PER_LINE = CHARS_PER_LINE

        self.keys_alignment = {'`': None, '``': 1, '```': 2, '````': 3}
        self.alignment_keys = {
            None: '` ',
            0: '` ',
            1: '`` ',
            2: '``` ',
            3: '```` '
        }
Beispiel #2
0
def crop_line2png(img_tages_dict, data_root, save_root):
    """to train the CRNN, we need line annotation,
    1. crop line and save to png image,
    2. write the png filename and its label to txt file(label.txt)
    3. create the map.txt according the label.txt"""

    file_list = os.listdir(data_root)
    # import random
    random.shuffle(file_list)
    # file_list = ['1480325136111_2gga323vslbuvays.png']
    print 'processing images number :%d' % len(file_list)
    s = set()
    counter = 0
    with open(os.path.join(save_root, 'label.txt'), 'w') as label_file:
        for i,img_name in enumerate(file_list):
            # if counter > 100: break
            if img_name not in img_tages_dict: continue
            im = cv2.imread(os.path.join(data_root, img_name))
            if im is None: continue
            if np.min(im.shape[:2]) < 400 : continue

            line_parse = LineParser(img_tages_dict[img_name], im.shape)
            lines, lines_label = line_parse.gen_line()
            if Config.DEBUG:
                char_boxes, char_labels = line_parse.get_all_boxes(line_parse.tags, decode_utf8=Config.decode_utf8)
                line_parse.display_boxes(im, char_boxes, char_labels)
                line_parse.display_boxes(im, lines, lines_label)

            if not os.path.exists(os.path.join(save_root, 'images')):
                os.makedirs(os.path.join(save_root, 'images'))
            for j,line in enumerate(lines):
                new_image_name = '{0}_{1}.{2}'.format(img_name.rsplit('.', 1)[0], j, img_name.rsplit('.', 1)[1])
                s = s | set(lines_label[j])
                label_file.write('%s %s\n' %(new_image_name, lines_label[j].encode('utf-8')))
                line_image = im[int(line[1]): int(line[3]), int(line[0]): int(line[2])]
                cv2.imwrite(os.path.join(save_root, 'images', new_image_name), line_image)

                #just for debug
                for k in xrange(5):
                    height = int(line[3]) - int(line[1])
                    shift_x = random.uniform(0, 0.4)
                    shift_y = random.uniform(0, 0.4)
                    line_image2 = im[max(0, int(line[1] - shift_y*height)):
                                    min(int(line[3] + (0.4-shift_y)*height), im.shape[0]),
                                    max(0, int(line[0] - shift_x*height)):
                                    min(int(line[2] + (0.4-shift_x)*height), im.shape[1])]
                    new_image_name = '{0}_{1}_{2}.{3}'.format(img_name.rsplit('.', 1)[0], j, k, img_name.rsplit('.', 1)[1])
                    cv2.imwrite(os.path.join(save_root, 'images', new_image_name), line_image2)
                counter += 1
            if i % 1000 == 0: print 'proceeded %d' % i

    print 'gen line images : %d' %counter
    with open(os.path.join(save_root, 'map.txt'), 'w') as map_file:
        for (ind, label) in enumerate(s):
            map_file.write('%d %s\n' % (ind, label.encode('utf-8')))
Beispiel #3
0
class TelemHandler(object):

    def __init__(self):
        self.man = SerialManager()
        self.par = LineParser()
        self.cache = PlotCache()

    def get_data(self):
        self.man.update()
        lines = self.man.getLines()
        if len(lines) > 0:
            data = [self.par.parseLine(l) for l in lines]
            self.cache.process_data(data)
        return self.cache.get()
Beispiel #4
0
def save_line_annos(img_tages_dict, data_root, save_root):
    """to re-check the effect of original single char annos to lines,
    1. get lines
    2. write to csv file
        format:
        [[
            1477568421447_wzrceuthja20s1gm.jpeg,1:3:567:36:#;6:46:49:77:#;4:82:567:112:#;4:126:384:155:#;3:165:280:193:#;298:171:323:194:#;343:168:370:193:#;394:166:571:195:#;6:208:481:238:#
            IMAGE_NAME,line1_X0:line1_y0:line1_x1:line1_y1;line1_label:line2_X0:line2_y0:line2_x1:line2_y1:line2_label;
            there are no seprate symbol in label, such as [, : ;], onlye Chinese char, English char, number, *
        ]]
    """

    file_list = os.listdir(data_root)
    # import random
    # random.shuffle(file_list)
    # file_list = ['1480325230423_zqjcdg86z23vhpyk.jpeg']
    print 'processing images number :%d' % len(file_list)
    s = set()
    counter = 0
    counter_image = 0
    with open(os.path.join(save_root, 'line_annos.txt'), 'w') as line_file:
        for i,img_name in enumerate(file_list):
            if img_name not in img_tages_dict: continue
            im = cv2.imread(os.path.join(data_root, img_name))
            if im is None: continue
            # if np.min(im.shape[:2]) < 400 : continue

            line_parse = LineParser(img_tages_dict[img_name], im.shape)
            lines, lines_label = line_parse.gen_line()
            if Config.DEBUG:
                char_boxes, char_labels = line_parse.get_all_boxes(line_parse.tags, decode_utf8=Config.decode_utf8)
                line_parse.display_boxes(im, char_boxes, char_labels)
                line_parse.display_boxes(im, lines, lines_label)
            if len(lines) > 0:
                line_file.write('%s,'%img_name)
                for j,line in enumerate(lines):
                    if j == len(lines) - 1:
                        line_file.write('%d:%d:%d:%d:%s;\n'%(int(line[0]),int(line[1]),int(line[2]),int(line[3]),
                                                          lines_label[j].encode('utf-8')))
                    else:
                        line_file.write('%d:%d:%d:%d:%s;' % (int(line[0]), int(line[1]), int(line[2]), int(line[3]),
                                                             lines_label[j].encode('utf-8')))
                    counter += 1
            counter_image += 1
            if i % 1000 == 0: print 'proceeded %d' % i
    print 'image source number %d, contained image %d, gen line images : %d' \
          %(len(file_list), counter_image,counter)
Beispiel #5
0
def get_parser(filename):
    parsers = []
    parsers.append(PlaintextParser(filename))
    try:
        parsers.append(LineParser(filename))
    except ValueError:
        pass
    parsers.append(XMLParser(filename))
    parsers.append(CtmParser(filename))

    for parser in parsers:
        if parser.wants_this_file():
            return parser

    return None
def solution():
    grid_mx = np.ones(shape=(1000, 1000)) * -1

    for line in input_data:
        line_parser = LineParser.parse_line(line)
        instruction = line_parser.get_instruction()
        selector = line_parser.get_selector()

        if instruction == TURN_ON:
            grid_mx[selector] = 1
        elif instruction == TURN_OFF:
            grid_mx[selector] = -1
        elif instruction == TOGGLE:
            grid_mx[selector] *= -1

    return np.concatenate(grid_mx).tolist().count(1)
Beispiel #7
0
import sys
from math import sin

from line_parser import LineParser

if __name__ == '__main__':

    with open(sys.argv[1], 'r') as f:
        for index, line in enumerate(f.readlines()):
            parse_line = LineParser(line.split(';')[0])
            parse_line.get_rule()
            if parse_line.error == 0:
                pass
            elif parse_line.error == 1:
                parse_line.implement_rule()
            else:
                sys.exit('line ' + str(index + 1) + ' from file ' + sys.argv[1] + ' has multiple commands !!!')
Beispiel #8
0
def crop_line2png(img_tages_dict, data_root, save_root):
    """to train the CRNN, we need line annotation,
    1. crop line and save to png image,
    2. write the png filename and its label to txt file(label.txt)
    3. create the map.txt according the label.txt"""

    file_list = os.listdir(data_root)
    file_list = [
        file_name for file_name in file_list
        if file_name.rsplit('.', 1)[1] != 'gif'
    ]
    # file_list = ['1480324751948_w5ya8rmffq2us889.jpeg']
    print 'processing images number :%d' % len(file_list)
    s = set()
    counter = 0
    line_parse = LineParser()
    with open(os.path.join(save_root, 'label.txt'), 'w') as label_file:
        for i, img_name in enumerate(file_list):
            im = cv2.imread(os.path.join(data_root, img_name))
            if im is None: continue
            # if np.min(im.shape[:2]) < 400 : continue
            gts_line = parse_tags(img_tages_dict, img_name, Config.decode_utf8)
            if gts_line is None: continue

            # for convient, split Quad struct to (lines, lines_label)
            for j, gt_line in enumerate(gts_line):
                rect = cv2.minAreaRect(gt_line.crds)
                box = cv2.cv.BoxPoints(rect)
                bbox = np.int0(box)
                if len(gt_line.label) == 1 and gt_line.label != '*':
                    quad = bbox.reshape((4, 2))
                    im_new = im
                elif np.linalg.norm(gt_line.crds[0] - gt_line.crds[1]) < np.linalg.norm(gt_line.crds[0] - gt_line.crds[3]) \
                    and (len(gt_line.label) >=2 or gt_line.label == '*'):
                    continue
                else:
                    im_copy = im.copy()
                    poly = bbox.reshape((4, 2))
                    # is vertical text
                    p_lowest = np.argmax(poly[:, 1])
                    p_lowest_right = (p_lowest - 1) % 4
                    p_lowest_left = (p_lowest + 1) % 4
                    if np.linalg.norm(poly[p_lowest] - poly[p_lowest_right]
                                      ) > np.linalg.norm(poly[p_lowest] -
                                                         poly[p_lowest_left]):
                        start_pt = p_lowest
                        end_pt = p_lowest_right
                    else:
                        start_pt = p_lowest_left
                        end_pt = p_lowest

                    angle = np.rad2deg(
                        np.arctan((poly[start_pt][1] - poly[end_pt][1]) * 1.0 /
                                  (poly[start_pt][0] - poly[end_pt][0])))
                    im_new = rotate_image(im_copy, angle)
                    crds = list(bbox.reshape((-1)))
                    quad = rotate_xml(im_copy, crds, angle)
                    quad = quad.reshape((4, 2))
                x0 = np.min(quad[:, 0])
                y0 = np.min(quad[:, 1])
                x1 = np.max(quad[:, 0])
                y1 = np.max(quad[:, 1])
                # just for debug
                height = y1 - y0
                expand_ratio = 0.2

                # lines, lines_label = split_lines(gts_line)
                # lines = clip_boxes(lines, im.shape)
                # lines, lines_label = line_parse.filter_lines(lines, lines_label)

                new_image_name = '{0}_{1}.{2}'.format(
                    img_name.rsplit('.', 1)[0], j,
                    img_name.rsplit('.', 1)[1])
                s = s | set(gt_line.label)
                label_file.write(
                    '%s %s\n' %
                    (new_image_name, gt_line.label.encode('utf-8')))

                expand_y0 = max(0, int(y0) - expand_ratio * height)
                expand_y1 = min(im_new.shape[0],
                                int(y1) + expand_ratio * height)
                expand_x0 = max(0, int(x0) - expand_ratio * height)
                expand_x1 = min(im_new.shape[1],
                                int(x1) + expand_ratio * height)
                line_image = im_new[expand_y0:expand_y1, expand_x0:expand_x1]
                if cfg.DEBUG:
                    print gt_line.label.encode('utf-8')
                    cv2.imshow('', line_image)
                    cv2.waitKey(0)
                if not os.path.exists(os.path.join(save_root, 'images')):
                    os.mkdir(os.path.join(save_root, 'images'))
                cv2.imwrite(os.path.join(save_root, 'images', new_image_name),
                            line_image)
                counter += 1
            if i % 1000 == 0: print 'proceeded %d' % i

    print 'gen line images : %d' % counter
    with open(os.path.join(save_root, 'map.txt'), 'w') as map_file:
        for (ind, label) in enumerate(s):
            map_file.write('%d %s\n' % (ind, label.encode('utf-8')))
    label_file.close()
    map_file.close()
Beispiel #9
0
 def __init__(self):
     self.man = SerialManager()
     self.par = LineParser()
     self.cache = PlotCache()