def test_parse(): claim = tools.parse('#1 @ 2,3: 5x4') assert claim.id == 1 assert claim.left == 2 assert claim.top == 3 assert claim.width == 5 assert claim.height == 4
def test_sample(): inputs = [ '#1 @ 1,3: 4x4', '#2 @ 3,1: 4x4', '#3 @ 5,5: 2x2', ] mapa = defaultdict(int) for s in inputs: claim = tools.parse(s) for (x, y) in claim: mapa[(x, y)] += 1 tools.print_mapa(mapa)
def BookParse(bookid, pages=None, exclude=None): """ Takes id of book to parse. Id of book is one from DB, and should correspond to filename as book12.pdf, for id of the book in DB is 12. Also function accepts optional argument "pages", it defines pages to parse, and optional argument "exclude", to define pages to exclude. Range format accepted: 1,2,3-8,15 """ if pages is None: pages = set() if exclude is None: exclude = set() try: bookfile = open("data/book" + str(bookid) + ".pdf", "rb") except FileNotFoundError as e: exception_msg(lg, e , level="ERR" , text="No such book (id=%s) in data dir."\ % str(bookid)) raise mineparser = PDFParser(bookfile) document = PDFDocument(mineparser) if not document.is_extractable: lg.error("PDF text extraction is not allowed.") raise PDFTextExtractionNotAllowed db = DBManager() for pagenum, page in enumerate(PDFPage.create_pages(document)): realnum = pagenum + 1 lg.info("Working on page %s (bookid=%s)", str(realnum), str(bookid)) if (len(pages) > 0 and realnum not in pages)\ or realnum in exclude: lg.info("Page %s (bookid=%s) excluded.", str(realnum), str(bookid)) continue # Insert page entry to db, no HTML db.insert_page(bookid, realnum) lg.info("Recognizing (pagenum=%s) of book (id=%s).", str(realnum), str(bookid)) pagetype = recognize(bookid, page) if pagetype == -1: lg.warning("Can't recognize page (pagenum=%s) in book (id=%s).", str(realnum), str(bookid)) lg.info("Page %s (bookid=%s) skipped.", str(realnum), str(bookid)) continue lg.info("Parsing (pagenum=%s) of book (id=%s). Type (pagetype=%s).", str(realnum), str(bookid), str(pagetype)) try: data = parse(bookid, page, pagetype) except Exception as e: exception_msg(lg, e , level="WARN" , text="Errors while parsing." " Skip (pagenum=%s) of book (id=%s)"\ % (str(realnum), str(bookid))) continue else: lg.info( "Inserting items to DB." " (pagenum=%s) of book (id=%s). Type (pagetype=%s).", str(realnum), str(bookid), str(pagetype)) try: db.bulk_insert(bookid, data, pnum=realnum) except Exception as e: exception_msg(lg, e, level="ERR", text="Errors during inserting data into DB." " Maybe you should check the parser") # Update page entry with parsed HTML lg.info("Parsing to HTML (pagenum=%s) of book (id=%s).", str(realnum), str(bookid)) try: html = pdftohtml(page) except Exception as e: exception_msg(lg, e , text="Cannot convert PDF to HTML." " (pagenum=%s) of book (id=%s)"\ % (str(realnum), str(bookid))) else: lg.info( "Inserting HTML to DB." " (pagenum=%s) of book (id=%s). Type (pagetype=%s).", str(realnum), str(bookid), str(pagetype)) db.insert_page(bookid, realnum, data=html) lg.info( "Done with page." " (pagenum=%s) of book (id=%s). Type (pagetype=%s).", str(realnum), str(bookid), str(pagetype))
import os user = getpass.getuser() import socket computer_name = socket.gethostname() print ''' #-----------------------------------------------------------------------------# # Shell4Win # # # # This is an open source shell interpreter # # it is made for system administrators who are used to shell syntax and # # need to deal with Windows from now and then. it enables you to use shell # # commands and scripts under Windows environment. # # # # author:log4leo https://github.com/log4leo/Shell4Win # # license: BSD # # # #-----------------------------------------------------------------------------# ''' print "[Current directory]" + os.getcwd() while 1: try: s = raw_input('[' + user + '@' + computer_name + ']#') except EOFError: break if s.startswith('#'): continue if not s: continue tools.parse(s)
def sh(fn): f=open(fn,"r") for l in f: tools.parse(l.strip())
def test_print_mapa(): mapa = defaultdict(int) claim = tools.parse('#1 @ 2,3: 5x4') for (x, y) in claim: mapa[(x, y)] += 1 tools.print_mapa(mapa)
def test_iterator(): claim = tools.parse('#1 @ 2,3: 5x4') for coord in claim: print(coord)