예제 #1
0
def processPage(page, abbyyParsed):
    if page is None:
        helper.log.debug('No pages for processing')
        return
    else:
        startTime = time()
        helper.log.debug('Starting Processing for scan_id: %s' %
                         (page['scan_id']))
        if (page['abbyy_complete'] is False):
            if page['scandata_index'] <= len(abbyyParsed):
                result = abbyyParsed[page['scandata_index']]
                if (result is not False):
                    page['abbyy'] = result
                    page['has_illustration']['abbyy'] = result[
                        'image_detected']
                    page['abbyy_complete'] = True
                    page['abbyy_processing_duration'] = time() - startTime
                    helper.log.debug('ABBYY Processing duration: %s' %
                                     (page['abbyy_processing_duration']))
            else:
                page['abbyy_error'] = 'out of range %s' % (
                    page['scandata_index'])

        #if (page['compression_complete'] is False):
        #result = compression.processImage(page)
        #if (result is not False):
        #page.update(result)
        #page['compression_complete'] = True
        #page['compression_processing_duration'] = time() - startTime
        #helper.log.debug('Compression Processing duration: %s' % (page['compression_processing_duration']))

        if (page['contrast_complete'] is False):
            result = contrast.processImage(page)
            if (result is not False):
                page.update(result)
                page['has_illustration']['contrast'] = result['image_detected']
                page['contrast_complete'] = True
                page['contrast_processing_duration'] = time() - startTime
                helper.log.debug('Contrast Processing duration: %s' %
                                 (page['contrast_processing_duration']))

        page['processing_lock'] = False
        page['processing_lock_end'] = time()

        #if (page['abbyy_complete'] is False or page['compression_complete'] is False or page['contrast_complete'] is False):
        if (page['abbyy_complete'] is False
                or page['contrast_complete'] is False):
            page['processing_error'] = True

        helper.log.debug(
            'Complete: %s|%s: abbyy: %s, compression: %s, contrast: %s' %
            (page['scan_id'], page['ia_page_num'], page['abbyy_complete'],
             page['compression_complete'], page['contrast_complete']))
        helper.log.debug('Processing duration: %s' % (time() - startTime))
예제 #2
0
def processPage(page, abbyyParsed):
    if page is None:
        helper.log.debug('No pages for processing')
        return
    else:
        startTime = time()
        helper.log.debug('Starting Processing for scan_id: %s' % (page['scan_id']))
        if (page['abbyy_complete'] is False):
            if page['scandata_index'] <= len(abbyyParsed):
                result = abbyyParsed[page['scandata_index']]
                if (result is not False):
                    page['abbyy'] = result
                    page['has_illustration']['abbyy'] = result['image_detected']
                    page['abbyy_complete'] = True
                    page['abbyy_processing_duration'] = time() - startTime
                    helper.log.debug('ABBYY Processing duration: %s' % (page['abbyy_processing_duration']))
            else:
                page['abbyy_error'] = 'out of range %s' % (page['scandata_index'])

        #if (page['compression_complete'] is False):
            #result = compression.processImage(page)
            #if (result is not False):
                #page.update(result)
                #page['compression_complete'] = True
                #page['compression_processing_duration'] = time() - startTime
                #helper.log.debug('Compression Processing duration: %s' % (page['compression_processing_duration']))

        if (page['contrast_complete'] is False):
            result = contrast.processImage(page)
            if (result is not False):
                page.update(result)
                page['has_illustration']['contrast'] = result['image_detected']
                page['contrast_complete'] = True
                page['contrast_processing_duration'] = time() - startTime
                helper.log.debug('Contrast Processing duration: %s' % (page['contrast_processing_duration']))

        page['processing_lock'] = False
        page['processing_lock_end'] = time()

        #if (page['abbyy_complete'] is False or page['compression_complete'] is False or page['contrast_complete'] is False):
        if (page['abbyy_complete'] is False or page['contrast_complete'] is False):
            page['processing_error'] = True

        helper.log.debug('Complete: %s|%s: abbyy: %s, compression: %s, contrast: %s' %
            (page['scan_id'], page['ia_page_num'], page['abbyy_complete'], page['compression_complete'], page['contrast_complete']))
        helper.log.debug('Processing duration: %s' % (time() - startTime))
예제 #3
0
    print len(page_data), 'pages'


import compression
import contrast
import abbyy

# Fetch ABBYY file
if (args.v):
    print 'Fetching ABBYY...'
abbyy_file = urllib2.urlopen("http://archive.org/download/%(scan)s/%(file)s" % {'scan': args.scan, 'file': abbyy_filename})
abbyy_data = ET.fromstring(zlib.decompress(abbyy_file.read(), 15 + 32))
abbyy_pages = abbyy_data.findall('{http://www.abbyy.com/FineReader_xml/FineReader6-schema-v1.xml}page')

# Process each page
for page in page_data:

    url = 'http://www.archive.org/download/%s/page/n%s' % (page['scan_id'], page['ia_page_num'])
    img_file = StringIO(urllib2.urlopen(url).read())
    image = Image.open(img_file)

    print compression.processImage(img_file, image)
    print contrast.processImage(image, page['scan_id'], page['ia_page_num'])
    print abbyy.processABBYY(abbyy_pages[page['scandata_index']])

    break


# Process metadata