def method(self, polfile, conn, cur): values = util.dataAtHocrBboxes( fs.common_invoice, st.basepath + 'html/' + polfile.docid + '.html') columns_raw = [f['field'] for f in fs.common_invoice] data = dict(zip(columns_raw, values)) data['id'] = polfile.docid del data['header'] # munging try: (data['invoice_from'], data['invoice_to']) = util.checkDates( util.fuzzySplit(data['invoice_period'], '-')) del data['invoice_period'] (data['flight_from'], data['flight_to']) = util.checkDates( util.fuzzySplit(data['flight_dates'], '-')) del data['flight_dates'] (data['page'], data['total_pages']) = util.checkInts( util.fuzzySplit(data['page_of'].replace('Page', ''), 'of')) del data['page_of'] data['invoice_date'] = util.checkDates([data['invoice_date']])[0] if data['estimate_no'] == '': data['estimate_no'] = st.NULL except Exception, e: osutil.print_stderr(e) osutil.print_stderr(data) return
def method(self, polfile, conn, cur, abortOnError=True): values = util.dataAtHocrBboxes( fs.common_contract, st.basepath + 'html/' + polfile.docid + '.html') columns_raw = [f['field'] for f in fs.common_contract] data = dict(zip(columns_raw, values)) data['id'] = polfile.docid del data['header'] # munging try: (data['contract_from'], data['contract_to']) = util.checkDates( util.fuzzySplit(data['contract_dates'], ' - ')) del data['contract_dates'] (data['original_date'], data['revision_date']) = util.checkDates( util.fuzzySplit(data['original_date_revision'], ' / ')) del data['original_date_revision'] data['print_date'] = util.checkDates([data['print_date']])[0] (data['page'], data['total_pages']) = util.checkInts( util.fuzzySplit(data['page_from_to'].replace('Page', ''), 'of')) del data['page_from_to'] if data['estimate_no'] == '': data['estimate_no'] = st.NULL except Exception, e: osutil.print_stderr(e) osutil.print_stderr(data) return
def method(self, polfile, conn, cur): try: doctype = util.dataAtHocrBboxes(fs.doctype, polfile.htmlpath) docformat_sql = None # Don't check for text if text already # found in doctype. if len(doctype[0]) > 0: anytext = ['text found'] else: anytext = util.dataAtHocrBboxes( fs.fullpage, polfile.htmlpath, returnFirstWord=True) except Exception, e: raise e # print polfile.outfile return
def method(self, polfile, conn, cur): values = util.dataAtHocrBboxes( fs.common_order, st.basepath + 'html/' + polfile.docid + '.html') columns_raw = [f['field'] for f in fs.common_order] data = dict(zip(columns_raw, values)) data['id'] = polfile.docid.replace('_', ':') del data['header'] # munging try: (data['original_date'], data['revision_date']) = util.checkDates( util.fuzzySplit(data['original_date_revision'], ' / ')) del data['original_date_revision'] (data['flight_from'], data['flight_to']) = util.checkDates( util.fuzzySplit(data['flight_dates'], '-')) del data['flight_dates'] (data['page'], data['total_pages']) = util.checkInts( util.fuzzySplit(data['page_of'].replace('Page', ''), 'of')) del data['page_of'] except Exception, e: osutil.print_stderr(e) osutil.print_stderr(data) return