def method(self, polfile, conn, cur):
        values = util.dataAtHocrBboxes(
            fs.common_invoice, st.basepath + 'html/' + polfile.docid + '.html')
        columns_raw = [f['field'] for f in fs.common_invoice]
        data = dict(zip(columns_raw, values))
        data['id'] = polfile.docid

        del data['header']

        # munging
        try:

            (data['invoice_from'], data['invoice_to']) = util.checkDates(
                util.fuzzySplit(data['invoice_period'], '-'))
            del data['invoice_period']

            (data['flight_from'], data['flight_to']) = util.checkDates(
                util.fuzzySplit(data['flight_dates'], '-'))
            del data['flight_dates']

            (data['page'], data['total_pages']) = util.checkInts(
                util.fuzzySplit(data['page_of'].replace('Page', ''), 'of'))
            del data['page_of']

            data['invoice_date'] = util.checkDates([data['invoice_date']])[0]

            if data['estimate_no'] == '':
                data['estimate_no'] = st.NULL

        except Exception, e:
            osutil.print_stderr(e)
            osutil.print_stderr(data)
            return
    def method(self, polfile, conn, cur, abortOnError=True):
        values = util.dataAtHocrBboxes(
            fs.common_contract, st.basepath + 'html/' + polfile.docid + '.html')

        columns_raw = [f['field'] for f in fs.common_contract]
        data = dict(zip(columns_raw, values))

        data['id'] = polfile.docid

        del data['header']

        # munging

        try:
            (data['contract_from'], data['contract_to']) = util.checkDates(
                util.fuzzySplit(data['contract_dates'], ' - '))
            del data['contract_dates']

            (data['original_date'], data['revision_date']) = util.checkDates(
                util.fuzzySplit(data['original_date_revision'], ' / '))
            del data['original_date_revision']

            data['print_date'] = util.checkDates([data['print_date']])[0]

            (data['page'], data['total_pages']) = util.checkInts(
                util.fuzzySplit(data['page_from_to'].replace('Page', ''), 'of'))
            del data['page_from_to']

            if data['estimate_no'] == '':
                data['estimate_no'] = st.NULL
        except Exception, e:
            osutil.print_stderr(e)
            osutil.print_stderr(data)
            return
    def method(self, polfile, conn, cur):
        try:
            doctype = util.dataAtHocrBboxes(fs.doctype, polfile.htmlpath)
            docformat_sql = None

            # Don't check for text if text already
            # found in doctype.
            if len(doctype[0]) > 0:
                anytext = ['text found']
            else:
                anytext = util.dataAtHocrBboxes(
                    fs.fullpage, polfile.htmlpath, returnFirstWord=True)
        except Exception, e:
            raise e
            # print polfile.outfile
            return
    def method(self, polfile, conn, cur):
        values = util.dataAtHocrBboxes(
            fs.common_order, st.basepath + 'html/' + polfile.docid + '.html')
        columns_raw = [f['field'] for f in fs.common_order]
        data = dict(zip(columns_raw, values))

        data['id'] = polfile.docid.replace('_', ':')

        del data['header']

        # munging

        try:

            (data['original_date'], data['revision_date']) = util.checkDates(
                util.fuzzySplit(data['original_date_revision'], ' / '))
            del data['original_date_revision']

            (data['flight_from'], data['flight_to']) = util.checkDates(
                util.fuzzySplit(data['flight_dates'], '-'))
            del data['flight_dates']

            (data['page'], data['total_pages']) = util.checkInts(
                util.fuzzySplit(data['page_of'].replace('Page', ''), 'of'))
            del data['page_of']

        except Exception, e:
            osutil.print_stderr(e)
            osutil.print_stderr(data)
            return