예제 #1
0
def main():
    args = parse_arguments()
    input_path = args.input_path
    scale = 0.23

    if not os.path.exists(args.output_path):
        os.makedirs(args.output_path)

    analyzer = DocumentAnalyzer(model=args.model)

    names = os.listdir(input_path)

    if not os.path.exists(args.output_path):
        os.makedirs(args.output_path)

    length = len(names)
    count = 0
    for img in names:
        filename, file_extension = os.path.splitext(img)        

        if file_extension.lower() in allowed_extensions:         
            analyzer.__scale = scale

            coordinates, img_height, img_width = analyzer.get_document_paragraphs(input_path + img, no_layout=args.n)
            xml_string = HelperMethods.create_page_xml(coordinates, img_width, img_height, filename)


            with open('{}/{}.xml'.format(args.output_path, os.path.splitext(filename)[0]), 'wb') as f:
                f.write(xml_string)

            count += 1
        print('Completed: {}/{}'.format(count, length))
    return 0
def main():
    args = parse_arguments()
    input_path = args.input_path
    analyzer = DocumentAnalyzer(model=args.model)
    show_img = True

    names = os.listdir(input_path)
    images_names = [
        filename for filename in names if filename.endswith('.jpg')
    ]

    if not os.path.exists(args.output_path):
        os.makedirs(args.output_path)

    length = len(images_names)
    count = 0
    for img in images_names:
        filename = img.replace('.jpg', '')
        xml_name = img.replace('.jpg', '.xml')

        coordinates, img_height, img_width = analyzer.get_document_paragraphs(
            input_path + img,
            line_coords=HelperMethods.get_line_coords(input_path + xml_name))
        xml_string = HelperMethods.create_page_xml(coordinates, img_width,
                                                   img_height, filename)

        if show_img:
            in_img = misc.imread(input_path + img, mode="RGB")
            res = get_img_coords(in_img, coordinates)
            fig = plt.figure()
            f, axarr = plt.subplots(1, 1, dpi=1000)
            axarr.axis('off')
            axarr.imshow(res)
            plt.savefig('{}/{}.jpg'.format(args.output_path, filename),
                        bbox_inches='tight')
            plt.close(fig)

        with open('{}/{}.xml'.format(args.output_path, filename), 'wb') as f:
            f.write(xml_string)

        count += 1
        print('Completed: {}/{}'.format(count, length))
    return 0