def main(): args = parse_arguments() input_path = args.input_path scale = 0.23 if not os.path.exists(args.output_path): os.makedirs(args.output_path) analyzer = DocumentAnalyzer(model=args.model) names = os.listdir(input_path) if not os.path.exists(args.output_path): os.makedirs(args.output_path) length = len(names) count = 0 for img in names: filename, file_extension = os.path.splitext(img) if file_extension.lower() in allowed_extensions: analyzer.__scale = scale coordinates, img_height, img_width = analyzer.get_document_paragraphs(input_path + img, no_layout=args.n) xml_string = HelperMethods.create_page_xml(coordinates, img_width, img_height, filename) with open('{}/{}.xml'.format(args.output_path, os.path.splitext(filename)[0]), 'wb') as f: f.write(xml_string) count += 1 print('Completed: {}/{}'.format(count, length)) return 0
def main(): args = parse_arguments() input_path = args.input_path analyzer = DocumentAnalyzer(model=args.model) show_img = True names = os.listdir(input_path) images_names = [ filename for filename in names if filename.endswith('.jpg') ] if not os.path.exists(args.output_path): os.makedirs(args.output_path) length = len(images_names) count = 0 for img in images_names: filename = img.replace('.jpg', '') xml_name = img.replace('.jpg', '.xml') coordinates, img_height, img_width = analyzer.get_document_paragraphs( input_path + img, line_coords=HelperMethods.get_line_coords(input_path + xml_name)) xml_string = HelperMethods.create_page_xml(coordinates, img_width, img_height, filename) if show_img: in_img = misc.imread(input_path + img, mode="RGB") res = get_img_coords(in_img, coordinates) fig = plt.figure() f, axarr = plt.subplots(1, 1, dpi=1000) axarr.axis('off') axarr.imshow(res) plt.savefig('{}/{}.jpg'.format(args.output_path, filename), bbox_inches='tight') plt.close(fig) with open('{}/{}.xml'.format(args.output_path, filename), 'wb') as f: f.write(xml_string) count += 1 print('Completed: {}/{}'.format(count, length)) return 0