def processPages(pages, collection): abbyyParsed = None scanId = None for page in pages: if abbyyParsed is None: abbyyParsed = abbyy.parseABBYY(page['scan_id']) processPage(page, abbyyParsed) saveId = collection.save(page) scanId = page['scan_id'] helper.log.debug('Save id: %s' % (saveId)) helper.removeIAImages(scanId);
def processPages(pages, collection): abbyyParsed = None imagesDownloaded = None scanId = None pageShift = 0 firstPass = False try: for page in pages: if abbyyParsed is None: abbyyParsed = abbyy.parseABBYY(page['scan_id']) if imagesDownloaded is None: imagesDownloaded = helper.fetchAllImages(page['scan_id']) if imagesDownloaded is False: helper.log.debug('Images Downloaded False') raise NameError('NoImages') # Determine if the page image exists. Our data starts counting at 0. Sometimes the pages start counting at 1. if not firstPass: # we don't want to do this every time we loop # Does the page 0 file exist? imgPath = '%s/%s/%s_jp2/%s_%s.jp2' % ( helper.base_path, page['scan_id'], page['scan_id'], page['scan_id'], '0000') if not os.path.exists(imgPath): # No, we need to shift pages when we reference files on disk pageShift = 1 firstPass = true processPage(page, abbyyParsed, pageShift) saveId = collection.save(page) scanId = page['scan_id'] helper.log.debug('Save id: %s' % (saveId)) # Now that all pages are processed, clear the lock collection.update({'scan_id': page['scan_id']}, { '$set': { 'processing_lock': False, 'processing_lock_end': time() } }, multi=True) except Exception, e: helper.log.debug('Error processing pages:' + str(e)) page['processing_error'] = True saveId = collection.save(page)
def processPages(pages, collection): abbyyParsed = None imagesDownloaded = None scanId = None pageShift = 0 firstPass = False try: for page in pages: if abbyyParsed is None: abbyyParsed = abbyy.parseABBYY(page['scan_id']) if imagesDownloaded is None: imagesDownloaded = helper.fetchAllImages(page['scan_id']) if imagesDownloaded is False: helper.log.debug('Images Downloaded False') raise NameError('NoImages') # Determine if the page image exists. Our data starts counting at 0. Sometimes the pages start counting at 1. if not firstPass: # we don't want to do this every time we loop # Does the page 0 file exist? imgPath = '%s/%s/%s_jp2/%s_%s.jp2' % (helper.base_path, page['scan_id'], page['scan_id'], page['scan_id'], '0000') if not os.path.exists(imgPath): # No, we need to shift pages when we reference files on disk pageShift = 1 firstPass = true processPage(page, abbyyParsed, pageShift) saveId = collection.save(page) scanId = page['scan_id'] helper.log.debug('Save id: %s' % (saveId)) # Now that all pages are processed, clear the lock collection.update({'scan_id': page['scan_id']}, {'$set': {'processing_lock': False, 'processing_lock_end': time()}}, multi=True) except Exception, e: helper.log.debug('Error processing pages:' + str(e)) page['processing_error'] = True saveId = collection.save(page)