def import_folder(bookpath, bookname="", pages="*.xml"): if not bookname: bookname = path.split(bookpath)[1] no_pages_total = len(glob(bookpath + "/*.xml")) try: book = Book.query.filter_by(name=bookname).one() except NoResultFound: book = Book(name=bookname, no_pages_total=no_pages_total) book.no_pages_total = no_pages_total print('Importing book "{}"...'.format(bookname)) cnt = 0 for xmlfile in sorted(glob(bookpath + "/" + pages)): pagename = path.split(xmlfile)[1].split(".")[0] print("Importing page {}...".format(pagename)) try: page = Page.query.filter_by(book_id=book.id, name=pagename).one() except NoResultFound: page = Page(book=book, name=pagename) root = etree.parse(xmlfile).getroot() ns = {"ns": root.nsmap[None]} # convert point notation from pagexml version 2013 for c in root.xpath("//ns:Coords[not(@points)]", namespaces=ns): cc = [] for point in c.xpath("./ns:Point", namespaces=ns): cc.append(point.attrib["x"] + "," + point.attrib["y"]) c.remove(point) c.attrib["points"] = " ".join(cc) textregions = root.xpath('//ns:TextRegion', namespaces=ns) page.no_lines_segm = int( root.xpath("count(//ns:TextLine)", namespaces=ns)) page.no_lines_gt = int( root.xpath('count(//ns:TextLine/ns:TextEquiv[@index="0"])', namespaces=ns)) page.no_lines_ocr = int( root.xpath( 'count(//ns:TextLine[count' '(./ns:TextEquiv[@index>0])>0])', namespaces=ns)) page.data = etree.tounicode(root.getroottree()).replace( "http://schema.primaresearch.org/PAGE/gts/pagecontent/2010-03-19", "http://schema.primaresearch.org/PAGE/gts/pagecontent/2017-07-15") cnt += 1 db_session.add(book) db_session.commit() print('{} pages imported for book {}.'.format(cnt, bookname))
def scan_bookfolder(bookfolder, img_subdir): """ Scan bookfolder and write book info to database. """ books = glob(bookfolder + "//*/") for bookpath in books: bookname = path.split(bookpath[:-1])[1] files = set([f.split(sep=".")[0] for f in glob(bookpath + img_subdir + "*.png")]) no_pages_total = len(files) book = Book.query.filter_by(name=bookname).first() if not book: book = Book(name=bookname, no_pages_total=no_pages_total) db_session.add(book) else: book.no_pages_total = no_pages_total db_session.commit()
def add_page(book, xmlfile, commit=True, scale=None): """ Add page from PageXML file to book (segment lines, update version...). scale may either be int, {"rtype": int, ... "other": int} or None TODO: add reading order """ text_direction = 'horizontal-rl'\ if book.name.endswith("_ar") else 'horizontal-lr' bookpath = path.split(xmlfile)[0] pagename = path.split(xmlfile)[1].split(".")[0] page = Page.query.filter_by(book_id=book.id, name=pagename).first() if not page: page = Page(book=book, name=pagename) page.data, page.no_lines_segm = \ pagexmllineseg(xmlfile, bookpath, text_direction=text_direction, scale=scale) if commit: db_session.add(book) db_session.commit() return page.no_lines_segm
def editorsettings(): if current_user.is_anonymous: email = "user@nashi" else: email = current_user.email if request.method == "GET": try: s = EditorSettings.query.filter_by(email=email).one() except NoResultFound: return jsonify(status="fail") return jsonify(status="success", settings=json.loads(s.settings)) if request.method == "POST": try: s = EditorSettings.query.filter_by(email=email).one() except NoResultFound: s = EditorSettings(email=email) s.settings = json.dumps(request.get_json()) db_session.add(s) db_session.commit() return jsonify(status="success")
""" text_direction = 'horizontal-rl'\ if book.name.endswith("_ar") else 'horizontal-lr' bookpath = path.split(xmlfile)[0] pagename = path.split(xmlfile)[1].split(".")[0] page = Page.query.filter_by(book_id=book.id, name=pagename).first() if not page: page = Page(book=book, name=pagename) page.data, page.no_lines_segm = \ pagexmllineseg(xmlfile, bookpath, text_direction=text_direction, scale=scale) if commit: db_session.add(book) db_session.commit() return page.no_lines_segm if __name__ == '__main__': for bookpath in [ x for x in glob(app.config["LAREX_DIR"] + "/*") if path.isdir(x) ]: book = create_book(bookpath) for xmlfile in sorted(glob(bookpath + "/*.xml")): add_page(book, xmlfile, commit=False) db_session.add(book) db_session.commit()