def delete_unfinished_book(book): '''Delete a book without checking for remote state Called in worker thread ''' try: book.logger.debug('Deleting unfinished book') payload = { 'function': 'delete_unfinished_book', 'local_id': book.uuid, 'status': book.status } shutil.rmtree(book.path) book.delete_callback(book) push_event('tts-book-deleted', payload, 'book', book['path']) except ScribeException as e: raise e except OSError as e: return except Exception: book.logger.error(traceback.format_exc()) raise ScribeException('Could not delete book!')
def bt_server_register(): message = '' #config = Scribe3Configuration() Logger.info('bt_server_register: Registering scribe') try: dd = dict( (k, v) for k, v in get_metadata(scribe_globals.CONFIG_DIR).items() if v) dd['books'] = '[]' tts = TTSServices(dd) success, tts_id = tts.register_tts(tts_id=dd['scanner'], metadata=dd) if success: #config.set('identifier', str(tts_id)) push_event('tts-register', dd, 'tts', tts_id) Logger.info( 'bt_server_register: Registered scribe: {}'.format(tts_id)) else: message = 'bt_server_register: Could not register this '\ 'scribe with Archive.org or scribe already '\ 'registered' Logger.info(message) except Exception: message = 'bt_server_register: Failed to register' Logger.exception(message) return message
def _update_set_book_repub_state(self): state = -2 errors = [] self.dispatch_progress('Setting republisher state to {}'.format(state)) for _ in range(scribe_globals.TASK_DEFAULT_MAX_RETRIES): try: self.dispatch_progress( '[{}/{}] Setting republisher state to {}'.format( _ + 1, scribe_globals.TASK_DEFAULT_MAX_RETRIES, state)) ia_item = get_ia_session().get_item(self.identifier) resp = ia_item.modify_metadata({'repub_state': state}) self.logger.info('Response from cluster: {} | ' 'Headers {}'.format(resp.text, resp.headers)) except Exception as e: self.logger.error( '[{}/{}]Transient error {} while setting repub_state to {}.' .format(_ + 1, scribe_globals.TASK_DEFAULT_MAX_RETRIES, e, state)) errors.append(e) continue else: break else: self.logger.error( 'Could not set repub_state to {} because {}'.format( state, errors[-1])) payload = { 'task_type': 'MARCMetadataViaDWWITask', 'selector': self.isbn, 'type': 'isbn', 'errors': json.dumps([str(x) for x in errors]), 'attempts': len(errors), } push_event('tts-task-exception', payload)
def on_book_rejected(self, task): try: slip_metadata = task._slip_metadata cleaned_md = { k: '{}'.format(v).encode('utf-8').decode('utf-8') for k, v in self.book_obj.metadata.items() } payload = { 'reason': slip_metadata['reason'], 'error': slip_metadata['error'] if 'error' in slip_metadata else '', 'comment': slip_metadata['comment'] if 'comment' in slip_metadata else '', 'book_metadata': cleaned_md, } push_event('tts-book-dwwi-reject', payload) self.logger.info('CaptureScreen: Pushed event tts-book-reject to ' 'iabdash with payload: {}'.format(payload)) except Exception as e: self.logger.exception( 'CaptureScreen: Failed to push tts-book-reject ' 'event because: {}'.format(e))
def create_preimage_zip(book): logger = book.logger if book['status'] >= UploadStatus.uploaded.value: return logger.info('Package book: Creating preimage.zip') #Clock.schedule_once(partial(self.set_status_callback, # 'Now creating book upload bundle for {}'.format(book.get('identifier', '')))) try: zip_path = join(book['path'], '{id}_preimage.zip'.format(id=book['identifier'])) compression = zipfile.ZIP_STORED allow_zip64 = True target = book.get_imagestack() if target == None or len(target) == 0: raise ScribeException('Could not find jpegs to compress.') with zipfile.ZipFile(zip_path, 'w', compression, allow_zip64) as preimage_zip: for jpeg in target: logger.debug('adding ' + jpeg + ' to ' + zip_path) arcname = ('{id}_preimage/{j}'.format( id=book['identifier'], j=os.path.basename(jpeg))) preimage_zip.write(jpeg, arcname) scandata = join(book['path'], 'scandata.json') if os.path.exists(scandata): arcname = ('{id}_preimage/scandata.json'.format( id=book['identifier'])) preimage_zip.write(scandata, arcname) book.do_finish_preimage_zip() except Exception as e: book.error = e book.logger.error(traceback.format_exc()) book.do_error_preimage_zip() payload = { 'local_id': book['uuid'], 'status': book['status'], 'exception': str(e) } push_event('tts-book-packaging-exception', payload, 'book', book['identifier']) raise ScribeException('Could not create preimage.zip - {}'.format( str(e)))
def _send_stats(self): self.dispatch_progress('Notifying iabdash') payload = { 'files': self._files, 'total_time': self.total_time, } push_event('tts-cd-downloaded', payload, 'cd', self.identifier, os.path.join(self._cd.path, "iabdash.log")) self.notifications_manager.add_notification( title='Downloaded', message="CD {} has been downloaded.".format(self.identifier), show_system_tile=False, book=self._cd)
def _notify_iabdash(self): self.dispatch_progress('Notifying iabdash') payload = { 'task_type': 'MetadataViaIdentifierTask', 'selector': self.identifier, 'type': 'identifier', } try: push_event('tts-task-success', payload) self.logger.info( 'MetadataViaIdentifierTask: Pushed event tts-task-success to ' 'iabdash with payload: {}'.format(payload)) except Exception: self.logger.exception( 'MetadataViaIdentifierTask: Failed to push tts-task-success ' 'event with payload: {}'.format(payload))
def _send_stats(self): self.dispatch_progress('Notifying iabdash') payload = { 'repub_state': self._book_final_repub_state, 'checkout_info': self._checkout_info, 'claimer': self._claimer, 'files': self._files, } push_event('tts-book-downloaded', payload, 'book', self.identifier, os.path.join(self._book.path, "iabdash.log")) self.notifications_manager.add_notification( title='Downloaded', message="{} has been downloaded and is ready for {}.".format( self.identifier, self._download_type), show_system_tile=False, book=self._book)
def _notify_iabdash(self): self.dispatch_progress('Notifying iabdash') payload = { 'task_type': 'MARCMetadataViaDWWITask', 'search_id': self.metadata['scribe3_search_id'], 'catalog': self.metadata['scribe3_search_catalog'], 'identifier': self.book.identifier, } try: push_event('tts-task-success', payload) put_metric('scribe3.tasks.metadata.dwwi', '1', payload) self.logger.info( 'MARCMetadataViaDWWITask: Pushed event tts-task-success to ' 'iabdash with payload: {}'.format(payload)) except Exception: self.logger.exception( 'MARCMetadataViaDWWITask: Failed to push tts-task-success ' 'event with payload: {}'.format(payload))
def _push_metrics(book, scandata, encoded_md, sizes_dict, doing_foldouts, responses, responses_dict, book_upload_phase_start, book_upload_phase_end, book_upload_total_start, book_preimage_upload_start, book_preimage_upload_end): if os.path.exists(join(book['path'], 'time.log')): with open(join(book['path'], 'time.log'), 'r') as fp: global_time_open = float(fp.readline()) else: global_time_open = None for item in responses: try: for r in item: responses_dict[str(r.request.url)] = r.status_code except: responses_dict[item] = 'Error' book_upload_total_end = time.time() times_dict = { 'upload_phase': book_upload_phase_end - book_upload_phase_start, 'total': book_upload_total_end - book_upload_total_start, 'preimage': book_preimage_upload_end - book_preimage_upload_start, } payload = { 'local_id': book['uuid'], 'status': book['status'], 'activeTime': global_time_open, 'leafNum': scandata.count_pages(), 'metadata': encoded_md, 'responses': responses_dict, 'times': times_dict, 'sizes': sizes_dict, 'foldouts': doing_foldouts, } push_event('tts-book-uploaded', payload, 'book', book['identifier'])
def upload_book(book): Logger = book.logger Logger.debug('Starting upload of ' + book['identifier']) _check_preconditons(book) #book.do_book_upload_begin() _set_upload_lock_file(book, Logger) responses_dict = {} book_upload_total_start = time.time() try: scandata = ScanData(book['path']) zip_path = _check_preimage_is_valid(book) ia_session = get_ia_session() item = ia_session.get_item(book['identifier']) Logger.info('Got item {}'.format(item.identifier)) if not book.force_upload: _check_remote_preconditons(item, Logger) encoded_md = _prepare_metadata(book, item, Logger) metasource_file_location, metasource_file_upload_name = _generate_metasource( book, Logger) responses = [] book_upload_phase_start = time.time() needs_metadata_pushed = item.exists doing_foldouts = os.path.exists( os.path.join(book['path'], 'send_to_station')) book_preimage_upload_start, \ book_preimage_upload_end, \ sizes_dict = _upload_book_files( zip_path, book, encoded_md, item, responses, metasource_file_location, metasource_file_upload_name, Logger) if needs_metadata_pushed: _only_push_metadata(encoded_md, book, item, responses, Logger) book_upload_phase_end = time.time() _upload_logs(book=book, item=item, responses=responses) _verify_responses(responses, Logger) Logger.debug('OK! Finished uploads to {} | Took {}s'.format( book['identifier'], book_upload_phase_end - book_upload_phase_start)) book.do_upload_book_end() _push_metrics(book, scandata, encoded_md, sizes_dict, doing_foldouts, responses, responses_dict, book_upload_phase_start, book_upload_phase_end, book_upload_total_start, book_preimage_upload_start, book_preimage_upload_end) if config.is_true('show_book_notifications'): notifications_manager.add_notification( title='Uploaded', message="{} has been successfully uploaded.".format( book['identifier']), book=book) Logger.debug('Finished upload for ' + book['identifier']) # Clock.schedule_once(partial(self.update_status_callback, book)) time.sleep(10) # Wait for book to be added to metadata api except requests.ConnectionError as e: book.do_upload_book_error() Logger.error(traceback.format_exc()) payload = { 'local_id': book['uuid'], 'status': book['status'], 'exception': str(e) } push_event('tts-book-failed-upload', payload, 'book', book['identifier']) raise ScribeException('Upload Failed. ' 'Please check network and S3 Keys') except Exception as e: book.do_upload_book_error() Logger.error(traceback.format_exc()) payload = { 'local_id': book['uuid'], 'status': book['status'], 'responses': responses_dict, 'exception': str(e) } push_event('tts-book-upload-exception', payload, 'book', book['identifier']) raise ScribeException('Upload Failed! - {}'.format(str(e))) finally: book.force_upload = False Logger.info("Removing upload lock file at {}".format( join(book['path'], "upload_lock"))) os.remove(join(book['path'], "upload_lock"))
def delete_finished_book(book): ''' Called from worker thread. ''' # if book['status'] < UploadStatus.done.value: # return book.logger.debug('Checking repub_state for {}'.format(book)) repub_state = None try: md_url = ('https://archive.org/metadata/{id}/metadata'.format( id=book['identifier'])) md = json.load(urllib.request.urlopen(md_url)) except Exception: book.logger.error(traceback.format_exc()) raise ScribeException('Could not query archive.org for ' 'repub_state!') try: if md is None or 'result' not in md: print( "No repub state or MDAPI unavailable. Continuing with deletion." ) else: repub_state = md['result'].get('repub_state') if repub_state is None: book.logger.warning('Repub state not found for {}'.format( book['identifier'])) return if repub_state: if int(repub_state) == RepubState.done.value or \ RepubState.uploaded.value or \ RepubState.post_autocropped.value: if os.path.exists(book.path): # User may have already deleted local copy of this book book.logger.info('Deleting {}'.format(book)) payload = { 'function': 'delete_finished_book', 'local_id': book.uuid, 'status': book.status } push_event('tts-book-deleted', payload, 'book', book['identifier']) shutil.rmtree(book['path']) book.delete_callback(book) else: book.logger.error('UploadWidget: Book not found ' '(could be deleted): {}'.format( book['path'])) else: book.logger.info('Not deleting {} | repub_state={}'.format( book['path'], repub_state)) else: if os.path.exists(book.path): # User may have already deleted local copy of this book book.logger.info('Deleting {}'.format(book)) payload = { 'function': 'delete_finished_book', 'local_id': book.uuid, 'status': book.status } push_event('tts-book-deleted', payload, 'book', book['identifier']) shutil.rmtree(book['path']) book.delete_callback(book) except ScribeException: raise except Exception as e: book.logger.error(traceback.format_exc()) raise ScribeException('Could not delete book! {}'.format(e))
def _send_telemetry(self): self.dispatch_progress('Sending telemetry') payload = { 'book': self.book.as_dict(), } push_event('tts-book-reset', payload, 'book', self.book.uuid)
def upload_book_foldouts(book,): try: Logger = book.logger Logger.info('upload_book_foldouts: Uploading foldouts for book ' '{}'.format(book)) ia_session = get_ia_session() book_item = ia_session.get_item(book['identifier'], ) _check_preconditions(book, book_item, Logger) book_folder = 'foldouts' cdic, tdic, rdic, rtdic = _create_scandata(book, book_folder, True, Logger) responses = [] # Upload the pictures Logger.debug('upload_book_foldouts: Uploading pics') book.update_message('Foldouts upload | Images') if cdic != {}: res = book_item.upload(cdic, retries=10, verify=True, retries_sleep=60, queue_derive=False) responses.append(res) if tdic != {}: res = book_item.upload(tdic, retries=10, verify=True, retries_sleep=60, queue_derive=False) responses.append(res) try: if rdic != {}: res = book_item.upload(rdic, retries=10, verify=True, retries_sleep=60, queue_derive=False) responses.append(res) if rtdic != {}: res = book_item.upload(rtdic, retries=10, verify=True, retries_sleep=60, queue_derive=False) responses.append(res) except requests.exceptions.ConnectionError as e: Logger.error( 'upload_book_foldouts: Connection exception {} ' 'has occurred at rdic upload time; aborting!'.format(str(e))) raise e except Exception as e: Logger.error('upload_book_foldouts: Error {} has occurred at rdic upload time'.format(e)) raise e Logger.debug('upload_book_foldouts: Done. Uploading scandata...') # Upload the scandata target_scandata = 'scandata.json' book.update_message('Foldouts upload | Scandata') scandata = join(book['path'], 'scandata_rerepublished.json') upload_res = book_item.upload({target_scandata: scandata}, retries=10, retries_sleep=60, queue_derive=False, verify=True,) if os.path.exists(os.path.join(book['path'], 'scanning.log')): book.update_message('Foldouts upload | Log') book.logger.debug( 'Uploading Scanning log file' ) upload_name_mapping = \ {'logs/' + book['identifier'] + '_scanning_{:%Y-%m-%d%H:%M:%S}.log'.format(datetime.now()): join(book['path'], 'scanning.log')} response = book_item.upload(upload_name_mapping, retries=10, retries_sleep=60, queue_derive=False, verbose=True,verify=True, ) responses.append(response) url_to_status_code = \ {r.request.url: r.status_code for r in response} book.logger.debug('Response from upload: {} | {}' .format(response, url_to_status_code)) responses.append(upload_res) # corrections_uploaded # flatten responses list: flat_responses = [y for x in responses for y in x] for response in flat_responses: Logger.info('{} returned {}'.format(response.url, response.status_code)) if response.status_code != 200: raise Exception('upload_book_foldouts: Response code {} {} - {} from cluster. ' 'URL was: {} | content: {}' 'This is an error. Upload will be attempted again.' .format(response.status_code, response.reason, response.text if 'text' in response else "", response.url, response.content)) Logger.debug('Done. Changing repub state...') _change_repub_state(book_item, 43) _remove_book_from_btserver_item(book, Logger) book.do_upload_foldouts_done() payload = { 'repub_state': 43, 'responses': flat_responses, } push_event('tts-book-corrections-sent', payload, 'book', book['identifier']) Logger.debug('All done.') return except requests.ConnectionError as e: raise ScribeException('Upload Failed. Please check network and ' 'S3 Keys (Error was: {})'.format(e)) except Exception as e: book.do_upload_foldouts_fail() book.raise_exception(e)
def _upload_payload(self): self.dispatch_progress('Uploading') push_event('tts-heartbeat', self._payload.copy()) self.logger.debug('{}: Payload:\n{}'.format(self.name, pformat(self._payload)))