Ejemplo n.º 1
0
def processPages(pages, collection):
    abbyyParsed = None
    scanId = None

    for page in pages:
        if abbyyParsed is None:
            abbyyParsed = abbyy.parseABBYY(page['scan_id'])
        processPage(page, abbyyParsed)
        saveId = collection.save(page)
        scanId = page['scan_id']
        helper.log.debug('Save id: %s' % (saveId))

    helper.removeIAImages(scanId);
def processPages(pages, collection):
    abbyyParsed = None
    imagesDownloaded = None
    scanId = None
    pageShift = 0
    firstPass = False

    try:
        for page in pages:
            if abbyyParsed is None:
                abbyyParsed = abbyy.parseABBYY(page['scan_id'])

            if imagesDownloaded is None:
                imagesDownloaded = helper.fetchAllImages(page['scan_id'])

            if imagesDownloaded is False:
                helper.log.debug('Images Downloaded False')
                raise NameError('NoImages')

            # Determine if the page image exists. Our data starts counting at 0. Sometimes the pages start counting at 1.
            if not firstPass:  # we don't want to do this every time we loop
                # Does the page 0 file exist?
                imgPath = '%s/%s/%s_jp2/%s_%s.jp2' % (
                    helper.base_path, page['scan_id'], page['scan_id'],
                    page['scan_id'], '0000')
                if not os.path.exists(imgPath):
                    # No, we need to shift pages when we reference files on disk
                    pageShift = 1
                    firstPass = true

            processPage(page, abbyyParsed, pageShift)
            saveId = collection.save(page)
            scanId = page['scan_id']
            helper.log.debug('Save id: %s' % (saveId))

        # Now that all pages are processed, clear the lock
        collection.update({'scan_id': page['scan_id']}, {
            '$set': {
                'processing_lock': False,
                'processing_lock_end': time()
            }
        },
                          multi=True)

    except Exception, e:
        helper.log.debug('Error processing pages:' + str(e))
        page['processing_error'] = True
        saveId = collection.save(page)
def processPages(pages, collection):
    abbyyParsed = None
    imagesDownloaded = None
    scanId = None
    pageShift = 0
    firstPass = False

    try:
        for page in pages:
            if abbyyParsed is None:
                abbyyParsed = abbyy.parseABBYY(page['scan_id'])

            if imagesDownloaded is None:
                imagesDownloaded = helper.fetchAllImages(page['scan_id'])

            if imagesDownloaded is False:
                helper.log.debug('Images Downloaded False')
                raise NameError('NoImages')

            # Determine if the page image exists. Our data starts counting at 0. Sometimes the pages start counting at 1.
            if not firstPass: # we don't want to do this every time we loop
                # Does the page 0 file exist?
                imgPath = '%s/%s/%s_jp2/%s_%s.jp2' % (helper.base_path, page['scan_id'], page['scan_id'], page['scan_id'], '0000')
                if not os.path.exists(imgPath):
                    # No, we need to shift pages when we reference files on disk
                    pageShift = 1
                    firstPass = true


            processPage(page, abbyyParsed, pageShift)
            saveId = collection.save(page)
            scanId = page['scan_id']
            helper.log.debug('Save id: %s' % (saveId))

        # Now that all pages are processed, clear the lock
        collection.update({'scan_id': page['scan_id']}, {'$set': {'processing_lock': False, 'processing_lock_end': time()}}, multi=True)

    except Exception, e:
        helper.log.debug('Error processing pages:' + str(e))
        page['processing_error'] = True
        saveId = collection.save(page)