def init_pid_file(self): Logger.info('PID Init: Begin') config_dir = scribe_globals.CONFIG_DIR if not os.path.exists(config_dir): os.makedirs(config_dir) if not os.access(config_dir, os.W_OK | os.X_OK): raise ScribeException('Config dir "{}" not writable' .format(config_dir)) # Check to see if another copy of the app is running # and if needed, remove stale pidfiles path = os.path.join(config_dir, 'scribe_pid') Logger.info('PID Init: Looking for pidfile at {}'.format(path)) if os.path.exists(path): f = open(path) old_pid = f.read().strip() f.close() pid_dir = os.path.join('/proc', old_pid) if os.path.exists(pid_dir): Logger.error('There seems to be a pid file at {}. Try ' 'removing it and relaunching ' 'the application.'.format(str(pid_dir))) raise ScribeException('Another copy of the Scribe application ' 'is running!') else: os.unlink(path) pid = os.getpid() f = open(path, 'w') f.write(str(pid)) f.close() Logger.info('PID Init: End')
def get_cluster_status(self, book): Logger = self.logger status_msg = 'Geting cluster status for ' + book['identifier'] Logger.info(status_msg) Clock.schedule_once(partial(self.set_status_callback, str(status_msg))) try: md_url = ('https://archive.org/metadata/{id}/metadata'.format( id=book['identifier'])) md = json.load(urllib.request.urlopen(md_url)) except Exception as e: Logger.exception('Get cluster status: Error retrieving metadata') raise ScribeException('Could not query archive.org for ' 'repub_state!') try: if (md is None) or ('result' not in md): raise ScribeException('Could not query metadata') repub_state = md['result'].get('repub_state') status_msg = ('Repub state for {} is {}'.format( str(book['identifier']), str(repub_state))) Logger.info('Check QA status: ' + status_msg) # Clock.schedule_once(partial(self.set_prop_callback, # self.status_label, # status_msg)) Clock.schedule_once(partial(self.set_status_callback, str(status_msg))) return int(repub_state) except: return None
def _check_preimage_is_valid(book): zip_path = (join(book['path'], '{id}_preimage.zip'.format(id=book['identifier']))) if not os.path.exists(zip_path): raise ScribeException('Could not find preimage.zip in book folder.') elif os.path.getsize(zip_path) <= 532: # size of an empty zip raise ScribeException('preimage.zip is zero length!') return zip_path
def create_preimage_zip(book): logger = book.logger if book['status'] >= UploadStatus.uploaded.value: return logger.info('Package book: Creating preimage.zip') #Clock.schedule_once(partial(self.set_status_callback, # 'Now creating book upload bundle for {}'.format(book.get('identifier', '')))) try: zip_path = join(book['path'], '{id}_preimage.zip'.format(id=book['identifier'])) compression = zipfile.ZIP_STORED allow_zip64 = True target = book.get_imagestack() if target == None or len(target) == 0: raise ScribeException('Could not find jpegs to compress.') with zipfile.ZipFile(zip_path, 'w', compression, allow_zip64) as preimage_zip: for jpeg in target: logger.debug('adding ' + jpeg + ' to ' + zip_path) arcname = ('{id}_preimage/{j}'.format( id=book['identifier'], j=os.path.basename(jpeg))) preimage_zip.write(jpeg, arcname) scandata = join(book['path'], 'scandata.json') if os.path.exists(scandata): arcname = ('{id}_preimage/scandata.json'.format( id=book['identifier'])) preimage_zip.write(scandata, arcname) book.do_finish_preimage_zip() except Exception as e: book.error = e book.logger.error(traceback.format_exc()) book.do_error_preimage_zip() payload = { 'local_id': book['uuid'], 'status': book['status'], 'exception': str(e) } push_event('tts-book-packaging-exception', payload, 'book', book['identifier']) raise ScribeException('Could not create preimage.zip - {}'.format( str(e)))
def _change_repub_state(book_item, target_repub_state): res_repub_state_change = None res_repub_state_change = book_item.modify_metadata({'repub_state': target_repub_state}) if res_repub_state_change is None or res_repub_state_change.status_code != 200: raise ScribeException('Received erroneous response: {}'.format(res_repub_state_change)) elif res_repub_state_change.status_code == 200: if res_repub_state_change.text: response = json.loads(res_repub_state_change.text) if 'error' in response: raise ScribeException('Metadata API courteously ' 'tucked an error in a valid response.' '\nWhat seems to have gone wrong is {}' .format(str(response['error'])))
def delete_unfinished_book(book): '''Delete a book without checking for remote state Called in worker thread ''' try: book.logger.debug('Deleting unfinished book') payload = { 'function': 'delete_unfinished_book', 'local_id': book.uuid, 'status': book.status } shutil.rmtree(book.path) book.delete_callback(book) push_event('tts-book-deleted', payload, 'book', book['path']) except ScribeException as e: raise e except OSError as e: return except Exception: book.logger.error(traceback.format_exc()) raise ScribeException('Could not delete book!')
def _save(self): try: with open(self.config_file_path, 'w+') as f: yaml.safe_dump(self.configuration, f, default_flow_style=False) Logger.debug('Scribe3Configuration: Saved:\n{}' .format(pformat(self.configuration))) except Exception: raise ScribeException('Could not save config to {}' .format(self.config_file_path))
def _ensure_file_exists(self): if not os.path.exists(self.config_file_path): config_dir = scribe_globals.CONFIG_DIR if not os.path.exists(config_dir): os.makedirs(config_dir) if not os.access(config_dir, os.W_OK | os.X_OK): raise ScribeException('Config dir "{}" not writable' .format(config_dir)) os.mknod(self.config_file_path) if os.stat(self.config_file_path).st_size == 0: self._initialize()
def _check_preconditions(book, book_item, Logger): if not os.path.exists(os.path.join(book['path'], 'downloaded')): raise ScribeException('_check_preconditions: Cannot use this upload function on a book ' 'that was not downloaded') if book_item.metadata['repub_state'] not in ['32', '42']: raise ScribeException('_check_preconditions: Book is not in correct remote repub_state: aborting upload.') outstanding_catalog_tasks, outstanding_catalog_tasks_list = get_pending_catalog_tasks(book_item.identifier) if outstanding_catalog_tasks > 0: msg = 'Refusing to upload: item {} has {} outstanding (running or pending) catalog book_tasks\n{}'.format( book_item.identifier, outstanding_catalog_tasks, ', '.join( ['{} -> {} ({})'.format( x['task_id'], x['cmd'], x['status']) for x in outstanding_catalog_tasks_list]) ) Logger.error(msg) raise ScribeException(msg)
def _check_repub_state_is_correct(item): if not item.exists: return if item.metadata['mediatype'] == 'audio': return if item.metadata['repub_state'] not in [ '-1', '-2', ]: raise ScribeException('Book is not in correct remote repub_state ' '(is {}): aborting upload.'.format( item.metadata['repub_state']))
def id_available(identifier): ''' Query the upload_api to see if an identifier is available. ''' try: url = 'https://archive.org/upload/app/upload_api.php' params = {'name': 'identifierAvailable', 'identifier': identifier} r = requests.get(url, params=params) ret = r.json() success = ret.get('success', False) except Exception: print((traceback.format_exc())) raise ScribeException( 'Could not query upload_api for identifierAvailable') if success == False: return False return True
def archive_logout(self): try: config.delete('s3') config.delete('email') config.delete('cookie') popup_success = Popup( title='Logged out', content=Label(text="You have logged out" + ". \n\nRestarting the application..."), auto_dismiss=True, size_hint=(None, None), size=(400, 200)) popup_success.open() restart_app() except: # if the keys didn't exist in the first place, a field was missing or whatever, cry. raise ScribeException('There was an error logging out.')
def new_get_identifier(book): book.logger.debug('Getting identifier from path') id_file = os.path.join(book.path, 'identifier.txt') if book.has_identifier() and not os.path.exists(id_file): book.logger.debug('Using previously generated identifier {}' .format(book.identifier)) return if os.path.exists(id_file): try: identifier = open(id_file).read().strip() book.logger.debug('Using preset identifier ' + identifier) if hasattr(book, 'identifier'): if identifier != book['identifier']: book.logger.warn( 'RESETTING IDENTIFIER from {old} to {new}' .format(old=book.identifier, new=identifier) ) except Exception: raise ScribeException('File identifier.txt exists but cannot ' 'be read') else: return None return identifier
def upload_book(book): Logger = book.logger Logger.debug('Starting upload of ' + book['identifier']) _check_preconditons(book) #book.do_book_upload_begin() _set_upload_lock_file(book, Logger) responses_dict = {} book_upload_total_start = time.time() try: scandata = ScanData(book['path']) zip_path = _check_preimage_is_valid(book) ia_session = get_ia_session() item = ia_session.get_item(book['identifier']) Logger.info('Got item {}'.format(item.identifier)) if not book.force_upload: _check_remote_preconditons(item, Logger) encoded_md = _prepare_metadata(book, item, Logger) metasource_file_location, metasource_file_upload_name = _generate_metasource( book, Logger) responses = [] book_upload_phase_start = time.time() needs_metadata_pushed = item.exists doing_foldouts = os.path.exists( os.path.join(book['path'], 'send_to_station')) book_preimage_upload_start, \ book_preimage_upload_end, \ sizes_dict = _upload_book_files( zip_path, book, encoded_md, item, responses, metasource_file_location, metasource_file_upload_name, Logger) if needs_metadata_pushed: _only_push_metadata(encoded_md, book, item, responses, Logger) book_upload_phase_end = time.time() _upload_logs(book=book, item=item, responses=responses) _verify_responses(responses, Logger) Logger.debug('OK! Finished uploads to {} | Took {}s'.format( book['identifier'], book_upload_phase_end - book_upload_phase_start)) book.do_upload_book_end() _push_metrics(book, scandata, encoded_md, sizes_dict, doing_foldouts, responses, responses_dict, book_upload_phase_start, book_upload_phase_end, book_upload_total_start, book_preimage_upload_start, book_preimage_upload_end) if config.is_true('show_book_notifications'): notifications_manager.add_notification( title='Uploaded', message="{} has been successfully uploaded.".format( book['identifier']), book=book) Logger.debug('Finished upload for ' + book['identifier']) # Clock.schedule_once(partial(self.update_status_callback, book)) time.sleep(10) # Wait for book to be added to metadata api except requests.ConnectionError as e: book.do_upload_book_error() Logger.error(traceback.format_exc()) payload = { 'local_id': book['uuid'], 'status': book['status'], 'exception': str(e) } push_event('tts-book-failed-upload', payload, 'book', book['identifier']) raise ScribeException('Upload Failed. ' 'Please check network and S3 Keys') except Exception as e: book.do_upload_book_error() Logger.error(traceback.format_exc()) payload = { 'local_id': book['uuid'], 'status': book['status'], 'responses': responses_dict, 'exception': str(e) } push_event('tts-book-upload-exception', payload, 'book', book['identifier']) raise ScribeException('Upload Failed! - {}'.format(str(e))) finally: book.force_upload = False Logger.info("Removing upload lock file at {}".format( join(book['path'], "upload_lock"))) os.remove(join(book['path'], "upload_lock"))
def item_ready_for_upload(book): '''Book items might have already been preloaded with metadata in the IA scan process. However, prevent uploading to ia items which already have images uploaded. Called in worker thread. ''' try: session = get_ia_session() item = session.get_item(book.identifier) if not item.exists: if book: preloaded_path = os.path.join(book.path, 'preloaded') if os.path.exists(preloaded_path): # This item was created in offline mode, but the # identifier doesn't exist book.logger.error( 'Item {0} is tagged as preloaded, but ' 'the identifier does not exist. Aborting ' 'upload and reverting to scribing ' 'status.'.format(book.identifier)) return False else: book.logger.info('Item does not exist and user wants to ' 'upload to item {0}. Ok\'ing that'.format( book.identifier)) # no existing item, so safe to use this identifier return True allowed_formats = { 'Metadata', 'MARC', 'MARC Source', 'MARC Binary', 'Dublin Core', 'Archive BitTorrent', 'Web ARChive GZ', 'Web ARChive', 'Log', 'OCLC xISBN JSON', 'Internet Archive ARC', 'Internet Archive ARC GZ', 'CDX Index', 'Item CDX Index', 'Item CDX Meta-Index', 'WARC CDX Index', 'Metadata Log' } ALLOWED_ITEM_FILE_NAMES = [ '{}_{}'.format(book.identifier, x) for x in ALLOWED_VARIABLE_FILE_NAMES ] for item_file_metadata in item.files: if item_file_metadata['format'] not in allowed_formats: # Ignore new style in-item thumb files if item_file_metadata['name'] in ALLOWED_FILE_NAMES: book.logger.info( 'File {} ({}) is present in ' 'remote item and allowed: continuing...'.format( item_file_metadata['name'], item_file_metadata['format'])) continue elif item_file_metadata['name'] in ALLOWED_ITEM_FILE_NAMES: continue # files have already been uploaded to this item book.logger.error( 'File {} in item {} is blocking upload.'.format( item_file_metadata, item.identifier)) return False except Exception: book.logger.error(traceback.format_exc()) raise ScribeException('Could not check status of IA item {}'.format( book.identifier)) return True
def _upload_book_files(zip_path, book, encoded_md, item, responses, metasource_file_location, metasource_file_upload_name, Logger): sizes_dict = { 'preimage': os.path.getsize(zip_path), } book_preimage_upload_start = time.time() m = 'Uploading preimage.zip to {}'.format(book['identifier']) Logger.debug(m) # Clock.schedule_once(partial(self.set_status_callback, m)) book.update_message('Upload | Images file') upload_response = item.upload(zip_path, metadata=encoded_md, retries=10, retries_sleep=60, queue_derive=False, verbose=True, verify=True, headers={'x-archive-keep-old-version': '1'}) if len(upload_response) == 0 or getattr(upload_response[0], 'url') is None: if not book.force_upload: raise ScribeException( 'No response was returned by IA upon upload of preimage.zip. ' 'This could mean the file has already been uploaded, the item is not ' 'available, your cookie could have expired. ' 'Refer to the documentation for further guidance.') responses.append(upload_response) url_to_status_code = \ {r.request.url: r.status_code for r in upload_response} book.logger.debug('Response from upload: {} | {}'.format( upload_response, url_to_status_code)) book_preimage_upload_end = time.time() book.update_message('Upload | Metasource') if metasource_file_location is not None: book.logger.debug('Uploading metasource file {0} as {1}'.format( metasource_file_location, metasource_file_upload_name)) response = item.upload( {metasource_file_upload_name: metasource_file_location}, retries=10, retries_sleep=60, queue_derive=False, verify=True, verbose=True, ) responses.append(response) url_to_status_code = \ {r.request.url: r.status_code for r in response} book.logger.debug('Response from upload: {} | {}'.format( response, url_to_status_code)) book.update_message('Upload | MARCs') if os.path.exists(os.path.join(book['path'], 'marc.bin')): book.logger.debug('Uploading MARC Binary file {}'.format( join(book['path'], book['identifier'] + '_marc.bin'))) upload_name_mapping = \ {book['identifier'] + '_meta.mrc': join(book['path'], 'marc.bin')} response = item.upload( upload_name_mapping, retries=10, retries_sleep=60, queue_derive=False, verify=True, verbose=True, ) responses.append(response) url_to_status_code = \ {r.request.url: r.status_code for r in response} book.logger.debug('Response from upload: {} | {}'.format( response, url_to_status_code)) if os.path.exists(os.path.join(book['path'], 'marc.xml')): book.logger.debug('Uploading MARCXML file {} to {}'.format( join(book['path'], 'marc.xml'), join(book['identifier'] + '_marc.xml'))) upload_name_mapping = \ {book['identifier'] + '_marc.xml': join(book['path'], 'marc.xml')} response = item.upload( upload_name_mapping, retries=10, retries_sleep=60, queue_derive=False, verify=True, verbose=True, ) responses.append(response) url_to_status_code = \ {r.request.url: r.status_code for r in response} book.logger.debug('Response from upload: {} | {}'.format( response, url_to_status_code)) send_to_station_file = os.path.join(book['path'], 'send_to_station') if os.path.exists(send_to_station_file): target_scanner = None with open(send_to_station_file, 'r') as f: target_scanner = f.read() assert target_scanner != None book.update_message( 'Upload | Sending foldouts to {}'.format(target_scanner)) Logger.info( 'Book uploader: found instructions to send {} to {}'.format( book['identifier'], target_scanner)) book.logger.debug('Uploading send-to-scribe.txt file {} to {}'.format( send_to_station_file, book['identifier'])) upload_name_mapping = \ {book['identifier'] + '_sent_to.txt': send_to_station_file} response = item.upload( upload_name_mapping, retries=10, retries_sleep=60, queue_derive=False, verify=True, verbose=True, ) responses.append(response) url_to_status_code = \ {r.request.url: r.status_code for r in response} book.logger.debug('Response from upload: {} | {}'.format( response, url_to_status_code)) return book_preimage_upload_start, book_preimage_upload_end, sizes_dict
def delete_finished_book(book): ''' Called from worker thread. ''' # if book['status'] < UploadStatus.done.value: # return book.logger.debug('Checking repub_state for {}'.format(book)) repub_state = None try: md_url = ('https://archive.org/metadata/{id}/metadata'.format( id=book['identifier'])) md = json.load(urllib.request.urlopen(md_url)) except Exception: book.logger.error(traceback.format_exc()) raise ScribeException('Could not query archive.org for ' 'repub_state!') try: if md is None or 'result' not in md: print( "No repub state or MDAPI unavailable. Continuing with deletion." ) else: repub_state = md['result'].get('repub_state') if repub_state is None: book.logger.warning('Repub state not found for {}'.format( book['identifier'])) return if repub_state: if int(repub_state) == RepubState.done.value or \ RepubState.uploaded.value or \ RepubState.post_autocropped.value: if os.path.exists(book.path): # User may have already deleted local copy of this book book.logger.info('Deleting {}'.format(book)) payload = { 'function': 'delete_finished_book', 'local_id': book.uuid, 'status': book.status } push_event('tts-book-deleted', payload, 'book', book['identifier']) shutil.rmtree(book['path']) book.delete_callback(book) else: book.logger.error('UploadWidget: Book not found ' '(could be deleted): {}'.format( book['path'])) else: book.logger.info('Not deleting {} | repub_state={}'.format( book['path'], repub_state)) else: if os.path.exists(book.path): # User may have already deleted local copy of this book book.logger.info('Deleting {}'.format(book)) payload = { 'function': 'delete_finished_book', 'local_id': book.uuid, 'status': book.status } push_event('tts-book-deleted', payload, 'book', book['identifier']) shutil.rmtree(book['path']) book.delete_callback(book) except ScribeException: raise except Exception as e: book.logger.error(traceback.format_exc()) raise ScribeException('Could not delete book! {}'.format(e))
def upload_book_foldouts(book,): try: Logger = book.logger Logger.info('upload_book_foldouts: Uploading foldouts for book ' '{}'.format(book)) ia_session = get_ia_session() book_item = ia_session.get_item(book['identifier'], ) _check_preconditions(book, book_item, Logger) book_folder = 'foldouts' cdic, tdic, rdic, rtdic = _create_scandata(book, book_folder, True, Logger) responses = [] # Upload the pictures Logger.debug('upload_book_foldouts: Uploading pics') book.update_message('Foldouts upload | Images') if cdic != {}: res = book_item.upload(cdic, retries=10, verify=True, retries_sleep=60, queue_derive=False) responses.append(res) if tdic != {}: res = book_item.upload(tdic, retries=10, verify=True, retries_sleep=60, queue_derive=False) responses.append(res) try: if rdic != {}: res = book_item.upload(rdic, retries=10, verify=True, retries_sleep=60, queue_derive=False) responses.append(res) if rtdic != {}: res = book_item.upload(rtdic, retries=10, verify=True, retries_sleep=60, queue_derive=False) responses.append(res) except requests.exceptions.ConnectionError as e: Logger.error( 'upload_book_foldouts: Connection exception {} ' 'has occurred at rdic upload time; aborting!'.format(str(e))) raise e except Exception as e: Logger.error('upload_book_foldouts: Error {} has occurred at rdic upload time'.format(e)) raise e Logger.debug('upload_book_foldouts: Done. Uploading scandata...') # Upload the scandata target_scandata = 'scandata.json' book.update_message('Foldouts upload | Scandata') scandata = join(book['path'], 'scandata_rerepublished.json') upload_res = book_item.upload({target_scandata: scandata}, retries=10, retries_sleep=60, queue_derive=False, verify=True,) if os.path.exists(os.path.join(book['path'], 'scanning.log')): book.update_message('Foldouts upload | Log') book.logger.debug( 'Uploading Scanning log file' ) upload_name_mapping = \ {'logs/' + book['identifier'] + '_scanning_{:%Y-%m-%d%H:%M:%S}.log'.format(datetime.now()): join(book['path'], 'scanning.log')} response = book_item.upload(upload_name_mapping, retries=10, retries_sleep=60, queue_derive=False, verbose=True,verify=True, ) responses.append(response) url_to_status_code = \ {r.request.url: r.status_code for r in response} book.logger.debug('Response from upload: {} | {}' .format(response, url_to_status_code)) responses.append(upload_res) # corrections_uploaded # flatten responses list: flat_responses = [y for x in responses for y in x] for response in flat_responses: Logger.info('{} returned {}'.format(response.url, response.status_code)) if response.status_code != 200: raise Exception('upload_book_foldouts: Response code {} {} - {} from cluster. ' 'URL was: {} | content: {}' 'This is an error. Upload will be attempted again.' .format(response.status_code, response.reason, response.text if 'text' in response else "", response.url, response.content)) Logger.debug('Done. Changing repub state...') _change_repub_state(book_item, 43) _remove_book_from_btserver_item(book, Logger) book.do_upload_foldouts_done() payload = { 'repub_state': 43, 'responses': flat_responses, } push_event('tts-book-corrections-sent', payload, 'book', book['identifier']) Logger.debug('All done.') return except requests.ConnectionError as e: raise ScribeException('Upload Failed. Please check network and ' 'S3 Keys (Error was: {})'.format(e)) except Exception as e: book.do_upload_foldouts_fail() book.raise_exception(e)
def _create_scandata(book, book_folder, foldouts, Logger): scandata = json.loads(ScanData(book['path']).dump()) cdic = {} tdic = {} rdic = {} rtdic = {} ''' This subprogram uploads a corrected book back to republisher. It: - Verifies that the book was downloaded. - Gets ahold of the tts identifier to later remove the book from the item's "books" list. - Constructs and maintains four dictionaries: new pages (cdic), new pages thumbs (tdic), reshot pages(rdic), reshot pages thumbs (rtdic) that will later become the scandata. -- Looks for new pages (spreads insertions and/or appends) and their thumbs -- Add republisher tags (that's what post-processing would do) -- Looks for replacements (in bookpath/reshooting) if present -- saves a scandata_rerepublished.json - Uploads the pictures and scandatas - Updates tts item, repub state and metrics ''' # Here we add the new pages # cdic is the corrections dictionary, and it contains entries in the # form: # # {item path : local path } - for example: # {'corrections/0044.jpg' : '~/scribe_books/1234/0022.jpg'} try: cdic = {book_folder + '/' + k: os.path.join(book['path'], k) for k in next(os.walk(book['path']))[2] if re.match('\d{4}\.jpg$', os.path.basename(k))} # And the thumbs from the new pages # REMOVE THUMB FROM OS WALK PATH tdic = {book_folder + '/thumbnails/' + k: os.path.join(book['path'], 'thumbnails', k) for k in next(os.walk(join(book['path'])))[2] if re.match('\d{4}\.jpg$', os.path.basename(k))} except Exception: Logger.error('_create_scandata: No corrections found.') # Ensure the scandata has the appropriate tags for re-republishing # NEW PAGES DICT Logger.debug('_create_scandata: Processing new pages...') for k in cdic: page_num = str(int(k.split('.jpg')[0].split('/')[1])) Logger.debug('_create_scandata: Processing page {}'.format(page_num)) try: page_data_exists = scandata['pageData'][page_num] is not None Logger.debug('_create_scandata: Page data for page {} exists in ' 'scandata'.format(page_num)) except Exception as e: raise ScribeException(e) # Rotate images im = Image.open(cdic[k]) # im = im.rotate( # int(scandata['pageData'][page_num]['rotateDegree']) # ) width, height = im.size # scandata['pageData'][page_num]['rotateDegree'] = 0 if abs(int(scandata['pageData'][page_num]['rotateDegree'])) in [0, 180]: scandata['pageData'][page_num]['origWidth'] = str(width) scandata['pageData'][page_num]['origHeight'] = str(height) elif abs(int(scandata['pageData'][page_num]['rotateDegree'])) in [90, 270]: scandata['pageData'][page_num]['origWidth'] = str(height) scandata['pageData'][page_num]['origHeight'] = str(width) Logger.debug('\n\n\n ---->>> CORRECTIONS DEBUG - PAGE INSERT- ' 'please report this \n\n') Logger.debug( 'rotatedegree={2}, origWidth={0}, height={1}' .format(scandata['pageData'][page_num]['origWidth'], scandata['pageData'][page_num]['origHeight'], scandata['pageData'][page_num]['rotateDegree']) ) Logger.debug('<<<---- END CORRECTIONS DEBUG - - - - - - - -\n\n\n') scandata['pageData'][page_num]['origFileName'] = k.split('/')[1] scandata['pageData'][page_num]['sourceFileName'] = k scandata['pageData'][page_num]['proxyFullFileName'] = k if not foldouts: scandata['pageData'][page_num]['correctionType'] = 'INSERT' scandata['pageData'][page_num]['TTSflag'] = 0 Logger.debug('\n\n\n ---->>> CORRECTIONS DEBUG - please report ' 'this \n\n') Logger.debug('\n' + str(scandata['pageData'][page_num])) Logger.debug('<<<---- END CORRECTIONS DEBUG - - - - - - - -\n\n\n') # THUMBS FOR NEW PAGES for k in tdic: page_num = str(int(k.split('.jpg')[0].split('/')[2])) scandata['pageData'][page_num]['proxyFileName'] = k Logger.debug('_create_scandata: Processed {} new images.'.format(len(cdic))) try: # here we add the reshot images rdic = { book_folder + '/' + k: join(book['path'], 'reshooting', k) for k in next(os.walk(join(book['path'], 'reshooting')))[2] if re.match('\d{4}\.jpg$', os.path.basename(k)) } # RESHOT IMAGES DICT for k in rdic: page_num = str(int(k.split('.jpg')[0].split('/')[1])) # rotate images im = Image.open(rdic[k]) # im = im.rotate( # int(scandata['pageData'][page_num]['rotateDegree']) # ) width, height = im.size # im.save(rdic[k]) # scandata['pageData'][page_num]['rotateDegree'] = 0 if abs(int(scandata['pageData'][page_num]['rotateDegree'])) in [0, 180]: scandata['pageData'][page_num]['origWidth'] = str(width) scandata['pageData'][page_num]['origHeight'] = str(height) elif abs(int(scandata['pageData'][page_num]['rotateDegree'])) in [90, 270]: scandata['pageData'][page_num]['origWidth'] = str(height) scandata['pageData'][page_num]['origHeight'] = str(width) Logger.debug('---->>> CORRECTIONS DEBUG - PAGE RESHOOT') Logger.debug( 'rotatedegree is {2}, origWidth = {0}, height= {1}' .format(scandata['pageData'][page_num]['origWidth'], scandata['pageData'][page_num]['origHeight'], scandata['pageData'][page_num]['rotateDegree']) ) Logger.debug('<<<---- END CORRECTIONS DEBUG - - - - - - - - -') scandata['pageData'][page_num]['origFileName'] = k.split('/')[1] scandata['pageData'][page_num]['sourceFileName'] = k scandata['pageData'][page_num]['correctionType'] = 'REPLACE' scandata['pageData'][page_num]['proxyFullFileName'] = k scandata['pageData'][page_num]['TTSflag'] = 0 Logger.debug('---->>> CORRECTIONS DEBUG - please report this') Logger.debug('\n' + str(scandata['pageData'][page_num])) Logger.debug('<<<---- END CORRECTIONS DEBUG - - - - - - - -') # here we add the thumbs from the reshooting rtdic = { book_folder + '/thumbnails/' + k: join(book['path'], 'reshooting', 'thumbnails', k) for k in next(os.walk(join(book['path'], 'reshooting', 'thumbnails')))[2] if re.match('\d{4}\.jpg$', os.path.basename(k)) } # THUMBS FOR RESHOT IMAGES for k in rtdic: page_num = str(int(k.split('.jpg')[0].split('/')[2])) scandata['pageData'][page_num]['proxyFileName'] = k Logger.debug('_create_scandata: Processed {} reshot images.'.format(len(rdic))) except Exception as e: Logger.exception('_create_scandata: No reshot pages found') # Super Solenoid Scandata from disk (page info) sss = {int(k): v for k, v in list(scandata['pageData'].items())} # Now we want our own piece of memory for this one new_scandata = copy.deepcopy(scandata) new_scandata['pageData'] = {} new_scandata['pageData']['page'] = [] # Rewrite pages section Logger.debug('_create_scandata: Adding all computed pages to new scandata...') for page in sorted(sss): Logger.debug('_create_scandata: {}'.format(page)) sss[page]['leafNum'] = page try: pnum = sss[page]['pageNumber']['num'] sss[page]['pageNumber'] = pnum except Exception: pass new_scandata['pageData']['page'].append(sss[page]) # Rewrite assertions to be compatible with republisher try: Logger.debug('\nNow rewriting page assertions for repub compatibility ' 'if present') temp_pageNumData = copy.deepcopy(scandata['bookData']['pageNumData']) temp_pageNumData['assertion'] = [] for entry in scandata['bookData']['pageNumData']: if entry.isdigit(): del temp_pageNumData[entry] for assertion in scandata['bookData']['pageNumData'].items(): temp_assertion = {'leafNum': str(assertion[0]), 'pageNum': str(assertion[1])} temp_pageNumData['assertion'].append(temp_assertion) Logger.debug('_create_scandata: OK done. New pageNumData block: {}' .format(temp_pageNumData)) new_scandata['bookData']['pageNumData'] = temp_pageNumData except Exception as e: Logger.exception('_create_scandata: No pageNumData block found or error processing ' 'it.: '.format(e)) # Write it all to file with open(join(book['path'], 'scandata_rerepublished.json'), 'w+') as outfile: json.dump(new_scandata, outfile) Logger.debug('_create_scandata: Done constructing scandata.') return cdic, tdic, rdic, rtdic