Exemple #1
0
def import_folder(bookpath, bookname="", pages="*.xml"):
    if not bookname:
        bookname = path.split(bookpath)[1]
    no_pages_total = len(glob(bookpath + "/*.xml"))
    try:
        book = Book.query.filter_by(name=bookname).one()
    except NoResultFound:
        book = Book(name=bookname, no_pages_total=no_pages_total)
        book.no_pages_total = no_pages_total

    print('Importing book "{}"...'.format(bookname))
    cnt = 0
    for xmlfile in sorted(glob(bookpath + "/" + pages)):
        pagename = path.split(xmlfile)[1].split(".")[0]
        print("Importing page {}...".format(pagename))

        try:
            page = Page.query.filter_by(book_id=book.id, name=pagename).one()
        except NoResultFound:
            page = Page(book=book, name=pagename)

        root = etree.parse(xmlfile).getroot()
        ns = {"ns": root.nsmap[None]}

        # convert point notation from pagexml version 2013
        for c in root.xpath("//ns:Coords[not(@points)]", namespaces=ns):
            cc = []
            for point in c.xpath("./ns:Point", namespaces=ns):
                cc.append(point.attrib["x"] + "," + point.attrib["y"])
                c.remove(point)
            c.attrib["points"] = " ".join(cc)

        textregions = root.xpath('//ns:TextRegion', namespaces=ns)

        page.no_lines_segm = int(
            root.xpath("count(//ns:TextLine)", namespaces=ns))
        page.no_lines_gt = int(
            root.xpath('count(//ns:TextLine/ns:TextEquiv[@index="0"])',
                       namespaces=ns))
        page.no_lines_ocr = int(
            root.xpath(
                'count(//ns:TextLine[count'
                '(./ns:TextEquiv[@index>0])>0])',
                namespaces=ns))
        page.data = etree.tounicode(root.getroottree()).replace(
            "http://schema.primaresearch.org/PAGE/gts/pagecontent/2010-03-19",
            "http://schema.primaresearch.org/PAGE/gts/pagecontent/2017-07-15")
        cnt += 1

    db_session.add(book)
    db_session.commit()
    print('{} pages imported for book {}.'.format(cnt, bookname))
Exemple #2
0
def scan_bookfolder(bookfolder, img_subdir):
    """ Scan bookfolder and write book info to database. """
    books = glob(bookfolder + "//*/")
    for bookpath in books:
        bookname = path.split(bookpath[:-1])[1]
        files = set([f.split(sep=".")[0] for f in glob(bookpath + img_subdir
                                                       + "*.png")])
        no_pages_total = len(files)
        book = Book.query.filter_by(name=bookname).first()
        if not book:
            book = Book(name=bookname, no_pages_total=no_pages_total)
            db_session.add(book)
        else:
            book.no_pages_total = no_pages_total
    db_session.commit()
Exemple #3
0
def add_page(book, xmlfile, commit=True, scale=None):
    """ Add page from PageXML file to book (segment lines, update version...).
        scale may either be int, {"rtype": int, ... "other": int} or None
        TODO: add reading order
    """
    text_direction = 'horizontal-rl'\
        if book.name.endswith("_ar") else 'horizontal-lr'
    bookpath = path.split(xmlfile)[0]
    pagename = path.split(xmlfile)[1].split(".")[0]
    page = Page.query.filter_by(book_id=book.id, name=pagename).first()
    if not page:
        page = Page(book=book, name=pagename)

    page.data, page.no_lines_segm = \
        pagexmllineseg(xmlfile, bookpath, text_direction=text_direction,
                       scale=scale)
    if commit:
        db_session.add(book)
        db_session.commit()
    return page.no_lines_segm
Exemple #4
0
def editorsettings():
    if current_user.is_anonymous:
        email = "user@nashi"
    else:
        email = current_user.email
    if request.method == "GET":
        try:
            s = EditorSettings.query.filter_by(email=email).one()
        except NoResultFound:
            return jsonify(status="fail")
        return jsonify(status="success", settings=json.loads(s.settings))

    if request.method == "POST":
        try:
            s = EditorSettings.query.filter_by(email=email).one()
        except NoResultFound:
            s = EditorSettings(email=email)
        s.settings = json.dumps(request.get_json())
        db_session.add(s)
        db_session.commit()
        return jsonify(status="success")
Exemple #5
0
    """
    text_direction = 'horizontal-rl'\
        if book.name.endswith("_ar") else 'horizontal-lr'
    bookpath = path.split(xmlfile)[0]
    pagename = path.split(xmlfile)[1].split(".")[0]
    page = Page.query.filter_by(book_id=book.id, name=pagename).first()
    if not page:
        page = Page(book=book, name=pagename)

    page.data, page.no_lines_segm = \
        pagexmllineseg(xmlfile, bookpath, text_direction=text_direction,
                       scale=scale)
    if commit:
        db_session.add(book)
        db_session.commit()
    return page.no_lines_segm


if __name__ == '__main__':
    for bookpath in [
            x for x in glob(app.config["LAREX_DIR"] + "/*") if path.isdir(x)
    ]:

        book = create_book(bookpath)

        for xmlfile in sorted(glob(bookpath + "/*.xml")):
            add_page(book, xmlfile, commit=False)

        db_session.add(book)
        db_session.commit()