def _create_scandata(self): self.dispatch_progress('Creating scandata') sc_path = os.path.join(self._book.path, 'scandata.xml') tree = book_helpers.validate_scandata_xml(sc_path, self._book) scandata_xml = book_helpers.create_normalized_scandata( tree, self._book) json_data = book_helpers.convert_normalized_scandata_to_json( scandata_xml) json_new = {} self._book.logger.info('Download book: Now converting to Scribe3 JSON') json_new['bookData'] = book_helpers.build_bookdata( json_data, self._book) json_new['pageData'] = book_helpers.build_pagedata( json_data, self._book) with open(os.path.join(self._book.path, 'scandata.json'), 'w') as outfile: json.dump(json_new, outfile) self._book.logger.info('Download book: Created {}'.format( outfile.name)) self._scandata = ScanData(self._book.path) self._scandata.save() self._book.reload_scandata() self._book.logger.info('Download book: Created scandata.')
def load_scandata(self): book_path = self.book['path'] book_uuid = basename(book_path) self.book_obj = self._books_db.get_book(book_uuid) self.scandata = ScanData(book_path, downloaded=True) Logger.info( 'ReScribeScreen: Loaded scandata from directory: {}'.format( book_path))
def _create_scandata(self): self.dispatch_progress('Creating scandata') self._scandata=\ ScanData(self._cd.path) self._scandata.save() self._cd.reload_scandata() self._cd.logger.info('Download CD: Created scandata.')
def reload_scandata(self): self.scandata = ScanData(self.path) self.leafs = self.scandata.count_pages() self.notes_count = self.scandata.count_notes() self.date_last_updated = self.load_last_modified_from_disk( including_move_along=True) self.date_last_modified = self.load_last_modified_from_disk() self.date_created = self.load_date_created_from_disk() if self.notify: self.notify('reloaded_scandata')
def init(self): if not self.scandata: self.scandata = ScanData(self.book['path'], downloaded=True) self._note_leafs[:] = self.scandata.iter_flagged_leafs() try: leaf_index = self._note_leafs.index(self.reopen_at) except ValueError: leaf_index = 0 self.set_current_leaf_index(leaf_index) if not self.keyboard_detector: detector = ReShootActionDetector(RESHOOT_ACTION_BINDINGS) self.keyboard_detector = detector self._keyboard_action_handler.detector = self.keyboard_detector self._cameras_count = self.camera_system.cameras.get_num_cameras() self._capture_running = False self._reverse_cams = False self.config = Scribe3Configuration() super(ReShootScreenBackend, self).init()
def _make_scandata(self): self.dispatch_progress('Generating scandata') self.scandata = ScanData(self.book_obj.path) for image in self.image_stack: if image == '0000.jpg': leaf_number = 0 else: leaf_number = self.__extract_number_from_file(image) side = 'left' if leaf_number % 2 == 0 else 'right' page_type = 'Normal' if image == '0000.jpg': page_type = 'Color Card' elif image == '0001.jpg': page_type = 'Cover' elif leaf_number == len(self.image_stack) - 1: page_type = 'Color Card' self.scandata.insert(leaf_number, side, page_type) if self.do_not_rotate: self.scandata.update_rotate_degree(leaf_number, 0)
def __init__(self, book_dict, callback=None, delete_callback=None): print("[Book::init()] Creating book object from ->", book_dict) super(Book, self).__init__(book_dict, callback, delete_callback) self.scandata = ScanData(self.path) self.leafs = self.scandata.count_pages() self.notes_count = self.scandata.count_notes() self.creator = book_dict[ 'creator'] if 'creator' in book_dict else self.metadata[ 'creator'] if 'creator' in self.metadata else None self.volume = book_dict[ 'volume'] if 'volume' in book_dict else self.metadata[ 'volume'] if 'volume' in self.metadata else None self.shiptracking_id = book_dict[ 'shiptracking'] if 'shiptracking' in book_dict else self.metadata[ 'shiptracking'] if 'shiptracking' in self.metadata else None self.boxid = book_dict[ 'boxid'] if 'boxid' in book_dict else self.metadata[ 'boxid'] if 'boxid' in self.metadata else None
class ReScribeScreen(TooltipScreen, Screen): cover_image = StringProperty(MISSING_IMAGE) book = ObjectProperty(None) scandata = ObjectProperty(None) scribe_widget = ObjectProperty(None) screen_manager = ObjectProperty(None) def __init__(self, **kwargs): self._note_leafs = [] self.book_obj = None super(ReScribeScreen, self).__init__(**kwargs) Clock.schedule_once(self._postponed_init) def _postponed_init(self, *args): menu = self.ids.menu_bar menu.fbind(menu.EVENT_OPTION_SELECT, self.on_menu_bar_option_select) view = self.ids.note_leafs_view view.fbind(view.EVENT_LEAF_SELECT, self.on_note_leaf_select) self._books_db = Library() def on_pre_enter(self): self.load_scandata() self.load_note_leafs() self.setup_menu_bar() self.setup_book_info_panel() self.setup_note_leafs_view() def load_scandata(self): book_path = self.book['path'] book_uuid = basename(book_path) self.book_obj = self._books_db.get_book(book_uuid) self.scandata = ScanData(book_path, downloaded=True) Logger.info( 'ReScribeScreen: Loaded scandata from directory: {}'.format( book_path)) def load_note_leafs(self): leafs = self._note_leafs del leafs[:] scandata = self.scandata book_path = self.book['path'] original_path = join(book_path, 'thumbnails') reshoot_path = join(book_path, 'reshooting', 'thumbnails') for note_leaf in scandata.iter_flagged_leafs(): leaf_data = scandata.get_page_data(note_leaf) image_name = '{:04d}.jpg'.format(note_leaf) reshoot_image_path = join(reshoot_path, image_name) page_number = leaf_data.get('pageNumber', None) new_leaf_data = { 'original_image': join(original_path, image_name), 'reshoot_image': reshoot_image_path, 'leaf_number': note_leaf, 'page_number': self._get_page_number(page_number), 'page_type': leaf_data['pageType'], 'note': leaf_data.get('note', None) or u'', 'status': 1 if exists(reshoot_image_path) else 0 } leafs.append(new_leaf_data) def _get_page_number(self, page_number_data): # TODO: Remove this method when scandata structure becomes the same # for reshooting mode and otherwise if page_number_data: if isinstance(page_number_data, dict): page_number = page_number_data.get('num', None) return None if page_number is None else int(page_number) elif isinstance(page_number_data, str): return int(page_number_data) return None def setup_menu_bar(self): menu = self.ids.menu_bar menu.identifier = self.book['identifier'] #menu.upload_button_disabled = not self.is_rescribing_complete() menu.reshooting_button_disabled = not bool(self._note_leafs) def setup_book_info_panel(self): panel = self.ids.book_info_panel panel.scroll_y = 1.0 cover_image = join(self.book['path'], 'thumbnails', '0001.jpg') if not exists(cover_image): cover_image = MISSING_IMAGE panel.cover_image = cover_image panel.claimer = self.get_claimer() panel.update_from_metadata(get_metadata(self.book['path'])) def get_claimer(self): path = join(expanduser(self.book['path']), 'claimer') if exists(path): with open(path, 'r') as f: return f.read() or NONE_STR return NONE_STR def setup_note_leafs_view(self): view = self.ids.note_leafs_view view.leafs[:] = self._note_leafs view.refresh_views() def popup_dismiss_to_home(self, popup, *args): popup.dismiss(animation=False) self.go_to_home() def go_to_home(self, *args, **kwargs): self.screen_manager.transition.direction = 'left' self.screen_manager.current = 'upload_screen' def on_menu_bar_option_select(self, menu, option): if option == menu.OPTION_UPLOAD: self.package_and_schedule_for_upload() elif option == menu.OPTION_FIRST_LEAF: leaf_number = self.find_first_non_reshoot_leaf_number() if leaf_number: self.open_book_at_leaf(leaf_number) elif option == menu.OPTION_PUBLIC_NOTES: metadata = get_metadata(self.book['path']) notes = metadata.get('notes', None) or '' popup = BookNotesPopup(title='Edit public book notes', notes=notes) popup.bind(on_submit=self.on_book_notes_submit) popup.open() elif option == menu.OPTION_INTERNAL_NOTES: internal_notes = self.scandata.get_internal_book_notes() or '' popup = BookNotesPopup(title='Edit internal book notes', notes=internal_notes) popup.bind(on_submit=self.on_internal_book_notes_submit) popup.open() def on_book_notes_submit(self, popup, notes): metadata = get_metadata(self.book['path']) metadata_notes = metadata.get('notes', None) or '' notes = notes.strip() if metadata_notes != notes: if notes: metadata['notes'] = notes message = 'Saved public book notes: %s' \ % ('\n%s' % notes if '\n' in notes else notes) else: metadata.pop('notes', None) message = 'Removed public book notes' set_metadata(metadata, self.book['path']) Logger.info('ReScribeScreen: %s' % message) self.book_obj.reload_metadata() def on_internal_book_notes_submit(self, popup, notes): scandata = self.scandata internal_notes = scandata.get_internal_book_notes() or '' notes = notes.strip() if internal_notes != notes: scandata.set_internal_book_notes(notes) scandata.save() if notes: message = 'Saved internal book notes: %s' \ % ('\n%s' % notes if '\n' in notes else notes) else: message = 'Removed internal book notes' Logger.info('ReScribeScreen: %s' % message) self.book_obj.reload_scandata() def on_note_leaf_select(self, note_leafs_view, note_leaf): self.open_book_at_leaf(note_leaf['leaf_number']) def find_first_non_reshoot_leaf_number(self): for note_leaf_data in self._note_leafs: if note_leaf_data['status'] == 0: return note_leaf_data['leaf_number'] try: ret = self._note_leafs[0]['leaf_number'] return ret except: return None def open_book_at_leaf(self, leaf_number): Logger.debug('ReScribeScreen: Trying to open book with id: {}'.format( self.book['identifier'])) screen_name = 'reshoot_screen' try: capture_screen = self.screen_manager.get_screen(screen_name) except Exception: capture_screen = ReShootScreen(name=screen_name) self.screen_manager.add_widget(capture_screen) capture_screen.pos = self.screen_manager.pos capture_screen.book = self.book capture_screen.reopen_at = leaf_number capture_screen.scandata = self.scandata capture_screen.screen_manager = self.screen_manager capture_screen.scribe_widget = self.scribe_widget ''' target_screen = screen_name models, ports = self.scribe_widget.cameras.get_cameras() camera_ports = self.scribe_widget.cameras.camera_ports if camera_ports['left'] not in ports: target_screen = 'calibration_screen' if camera_ports['right'] not in ports: target_screen = 'calibration_screen' foldout_port = camera_ports['foldout'] if foldout_port is not None and foldout_port not in ports: target_screen = 'calibration_screen' if target_screen == 'calibration_screen': screen = self.screen_manager.get_screen('calibration_screen') screen.target_screen = 'reshoot_screen' self.screen_manager.transition.direction = 'left' self.screen_manager.current = target_screen else: ''' self.screen_manager.transition.direction = 'left' self.screen_manager.current = screen_name def is_rescribing_complete(self): if not self._note_leafs: return False for leaf_data in self._note_leafs: if not exists(leaf_data['reshoot_image']): return False return True def package_and_schedule_for_upload(self): if self.is_rescribing_complete(): self.action = UploadCorrectionsBookActionMixin( book=self.book_obj, task_scheduler=self.scribe_widget.task_scheduler, done_action_callback=self.go_to_home) self.action.display() else: msg = 'ReScribeScreen: Book is not done rescribing' popup = InfoPopup(title='Error', message=msg, auto_dismiss=False) popup.bind(on_submit=popup.dismiss) popup.open() Logger.error(msg)
def test__init_ppi_for_bookdata(self): scandata = ScanData(self.path) self.assertIsNotNone(scandata.get_bookdata('ppi'))
class Book(Scribe3Item): state_machine = book_state_machine def __init__(self, book_dict, callback=None, delete_callback=None): print("[Book::init()] Creating book object from ->", book_dict) super(Book, self).__init__(book_dict, callback, delete_callback) self.scandata = ScanData(self.path) self.leafs = self.scandata.count_pages() self.notes_count = self.scandata.count_notes() self.creator = book_dict[ 'creator'] if 'creator' in book_dict else self.metadata[ 'creator'] if 'creator' in self.metadata else None self.volume = book_dict[ 'volume'] if 'volume' in book_dict else self.metadata[ 'volume'] if 'volume' in self.metadata else None self.shiptracking_id = book_dict[ 'shiptracking'] if 'shiptracking' in book_dict else self.metadata[ 'shiptracking'] if 'shiptracking' in self.metadata else None self.boxid = book_dict[ 'boxid'] if 'boxid' in book_dict else self.metadata[ 'boxid'] if 'boxid' in self.metadata else None def __repr__(self): ret = '<{} is {} ({}-{})'.format( self.uuid, status_human_readable.get(self.status), self.status, UploadStatus[self.status].value, ) if self.has_identifier(): ret = ret + '| {}'.format(self.identifier) ret = ret + '>' return ret def reload_metadata(self): self.metadata = get_metadata(self.path) #self.notes = self.metadata['notes'] if 'notes' in self.metadata else None self.shiptracking_id = self.metadata[ 'shiptracking'] if 'shiptracking' in self.metadata else None self.boxid = self.metadata[ 'boxid'] if 'boxid' in self.metadata else None self.volume = self.metadata[ 'volume'] if 'volume' in self.metadata else None super(Book, self).reload_metadata() def reload_scandata(self): self.scandata = ScanData(self.path) self.leafs = self.scandata.count_pages() self.notes_count = self.scandata.count_notes() self.date_last_updated = self.load_last_modified_from_disk( including_move_along=True) self.date_last_modified = self.load_last_modified_from_disk() self.date_created = self.load_date_created_from_disk() if self.notify: self.notify('reloaded_scandata') def get_claimer(self): path = join(os.path.expanduser(self.path), 'claimer') if exists(path): with open(path, 'r') as f: return f.read() or 'None' return 'None' def get_scandata(self): return self.scandata.dump_raw() def as_dict(self): ret = super(Book, self).as_dict() ret.update({ 'volume': self.volume, 'notes_count': self.notes_count, 'leafs': self.leafs, 'shiptracking': self.shiptracking_id if self.shiptracking_id else '', 'boxid': self.boxid if self.boxid else '', }) return ret def has_minimal_metadata(self, exclude_catalog=False): result = [] for combination in MINIMAL_METADATA: if combination == ('scribe3_search_catalog', 'scribe3_search_id'): if exclude_catalog: break is_combination_satisfied = True for really_important_field in combination: if really_important_field not in self.metadata: is_combination_satisfied = False break if self.metadata[really_important_field] == '': is_combination_satisfied = False break result.append(is_combination_satisfied) ret = reduce(lambda x, y: x or y, result) return ret def is_downloaded(self): ret = os.path.exists(os.path.join(self.path, 'downloaded')) return ret def is_modern_book(self): return 'scribe3_search_id' in self.metadata def is_preloaded(self): return exists(join(self.path, 'preloaded')) def has_rcs(self): REQUIRED_FIELDS_FOR_NATIVE_BOOK = [ 'sponsor', 'contributor', 'collection', 'rcs_key' ] for field in REQUIRED_FIELDS_FOR_NATIVE_BOOK: if field not in self.metadata: return False return True def has_rcs_if_required(self): if self.is_preloaded(): return True if self.is_downloaded(): return True # RCS is required return self.has_rcs() def get_slip(self, full_path=False): sliplike_files = glob.glob(os.path.join(self.path, '*slip.png')) if len(sliplike_files) == 0: return None slip_paths = [x for x in sliplike_files] slip_paths.sort(key=os.path.getmtime, reverse=True) functor = lambda x: x if not full_path: functor = os.path.basename slip_files = [functor(x) for x in slip_paths] slip_filename = slip_files[0] return slip_filename def has_slip(self): return self.get_slip() is not None def has_slip_if_required(self): if self.is_modern_book(): return self.has_slip() not in [False, None] return True def get_slip_type(self): from ia_scribe.tasks.print_slip import SLIP_IMAGE_NAMES filename_to_slip_type = {v: k for k, v in SLIP_IMAGE_NAMES.items()} slip_filename = self.get_slip() if not slip_filename: return None slip_file = slip_filename.replace('.png', '') if slip_file in list(filename_to_slip_type.keys()): return filename_to_slip_type[slip_file] else: return None def get_slip_metadata_file_path(self): return join(self.path, SLIP_METADATA_FILENAME) def has_slip_metadata_file(self): ret = False path = self.get_slip_metadata_file_path() if exists(path): ret = True return ret def get_slip_metadata(self): ret = None if self.has_slip() or self.has_slip_metadata_file(): path = self.get_slip_metadata_file_path() with open(path, 'r') as f: ret = json.load(f) return ret def set_slip_metadata(self, type, metadata): metadata['type'] = type dt = metadata.get('datetime') metadata['datetime'] = dt.strftime('%Y%m%d%H%M%S') slip_metadata = json.dumps(metadata, indent=4, sort_keys=True) with open(self.get_slip_metadata_file_path(), 'w+') as f: f.write(slip_metadata) def has_full_image_stack(self): return has_full_imgstack(self) def has_foldout_target_selected(self): path = join(self.path, 'send_to_station') return os.path.exists(path) def get_foldout_target(self): if self.has_foldout_target_selected(): with open(join(self.path, 'send_to_station'), 'r') as f: return f.read() else: return None def set_foldout_target(self, scanner): with open(join(self.path, 'send_to_station'), 'w+') as f: f.write(scanner) def has_full_image_stack_wrapper(self, e): self.logger.info('checking that {} has a full imgstack...'.format( self.identifier)) ret, msg = has_full_imgstack(self) self.logger.info('Result is {} {}'.format(ret, msg)) if ret == False: self.raise_exception('has_full_imgstack_wrapper', msg) return ret def item_clear_for_upload_wrapper(self, e): self.logger.info('checking that {} is clear for upload'.format( self.identifier)) ret = item_ready_for_upload(self) self.logger.info('Result is {}'.format(ret)) return ret def was_image_stack_processed_wrapper(self, e): self.logger.info('checking that imagestack was formed properly') ret = was_image_stack_processed(self) self.logger.info('Result is {}'.format(ret)) return ret def has_valid_preimage_zip_wrapper(self, e): self.logger.info( 'checking that preimage.zip archive was built properly') ret = has_valid_preimage_zip(self) self.logger.info('Result is {}'.format(ret)) return ret def has_slip_if_required_wrapper(self, e): self.logger.info('checking that a slip is present if required') ret = self.has_slip_if_required() self.logger.info('Result is {}'.format(ret)) return ret def has_rcs_if_required_wrapper(self, e): self.logger.info( 'checking that a collection string was set for the book') ret = self.has_rcs_if_required() self.logger.info('Result is {}'.format(ret)) return ret def ok_to_delete(self, e): self.logger.info('Checking whether it is safe to delete this book') if self.is_downloaded(): self.logger.info('Book was downloaded, we can go ahead.') return True else: self.logger.info( 'Book was scanned on this station. Verifying with cluster...') return verify_uploaded(self) def get_jpegs(self): jpegs = sorted( glob.glob(os.path.join(self.path, '[0-9][0-9][0-9][0-9].jpg'))) return jpegs def get_thumb_jpegs(self): jpegs = sorted( glob.glob( os.path.join(self.path, 'thumbnails', '[0-9][0-9][0-9][0-9].jpg'))) return jpegs def get_imagestack(self): jp2s = sorted( glob.glob(os.path.join(self.path, '[0-9][0-9][0-9][0-9].jp2'))) if len(jp2s) == 0: jp2s = self.get_jpegs() return jp2s def get_path_to_upload(self, human_readable=False): return_value = [] if self.get_numeric_status() >= 888: return return_value if self.get_numeric_status() < 797: return_value = path_to_success(self.status) else: return_value = path_to_success_corrections(self.status) if human_readable: return_value = self.humanify(return_value) return return_value def set_force_upload(self): self.force_upload = True def build_callbacks(self): ret = { 'onchangestate': self._on_change_state, 'onbeforedo_create_identifier': get_identifier_fysom, # 'onbeforedo_queue_processing': partial(self.set_checkpoint, 'processing_queued'), # 'onbeforedo_queue_for_upload': partial(self.set_checkpoint, 'upload_queued'), # 'onbeforedo_move_to_trash': partial(self.set_checkpoint, 'delete_queued'), # 'create_metadata': partial(make_identifier, self, self) # 'ondo_begin_packaging': package_book, # 'ondo_create_image_stack': create_imagestack, # 'ondo_finish_image_stack': create_preimage_zip, # 'ondo_queue_for_upload': upload_book, # 'onafterdo_upload_book_done': verify_uploaded, } return ret def is_folio(self): ret = self.metadata.get('source') == 'folio' return ret def get_cover_image(self): if self.is_folio(): ret = os.path.join(self.path, 'thumbnails', '0002.jpg') else: ret = os.path.join(self.path, 'thumbnails', '0001.jpg') return ret GSM = FysomGlobal( cfg=book_state_machine, state_field='status', )
def _create_scandata(book, book_folder, foldouts, Logger): scandata = json.loads(ScanData(book['path']).dump()) cdic = {} tdic = {} rdic = {} rtdic = {} ''' This subprogram uploads a corrected book back to republisher. It: - Verifies that the book was downloaded. - Gets ahold of the tts identifier to later remove the book from the item's "books" list. - Constructs and maintains four dictionaries: new pages (cdic), new pages thumbs (tdic), reshot pages(rdic), reshot pages thumbs (rtdic) that will later become the scandata. -- Looks for new pages (spreads insertions and/or appends) and their thumbs -- Add republisher tags (that's what post-processing would do) -- Looks for replacements (in bookpath/reshooting) if present -- saves a scandata_rerepublished.json - Uploads the pictures and scandatas - Updates tts item, repub state and metrics ''' # Here we add the new pages # cdic is the corrections dictionary, and it contains entries in the # form: # # {item path : local path } - for example: # {'corrections/0044.jpg' : '~/scribe_books/1234/0022.jpg'} try: cdic = {book_folder + '/' + k: os.path.join(book['path'], k) for k in next(os.walk(book['path']))[2] if re.match('\d{4}\.jpg$', os.path.basename(k))} # And the thumbs from the new pages # REMOVE THUMB FROM OS WALK PATH tdic = {book_folder + '/thumbnails/' + k: os.path.join(book['path'], 'thumbnails', k) for k in next(os.walk(join(book['path'])))[2] if re.match('\d{4}\.jpg$', os.path.basename(k))} except Exception: Logger.error('_create_scandata: No corrections found.') # Ensure the scandata has the appropriate tags for re-republishing # NEW PAGES DICT Logger.debug('_create_scandata: Processing new pages...') for k in cdic: page_num = str(int(k.split('.jpg')[0].split('/')[1])) Logger.debug('_create_scandata: Processing page {}'.format(page_num)) try: page_data_exists = scandata['pageData'][page_num] is not None Logger.debug('_create_scandata: Page data for page {} exists in ' 'scandata'.format(page_num)) except Exception as e: raise ScribeException(e) # Rotate images im = Image.open(cdic[k]) # im = im.rotate( # int(scandata['pageData'][page_num]['rotateDegree']) # ) width, height = im.size # scandata['pageData'][page_num]['rotateDegree'] = 0 if abs(int(scandata['pageData'][page_num]['rotateDegree'])) in [0, 180]: scandata['pageData'][page_num]['origWidth'] = str(width) scandata['pageData'][page_num]['origHeight'] = str(height) elif abs(int(scandata['pageData'][page_num]['rotateDegree'])) in [90, 270]: scandata['pageData'][page_num]['origWidth'] = str(height) scandata['pageData'][page_num]['origHeight'] = str(width) Logger.debug('\n\n\n ---->>> CORRECTIONS DEBUG - PAGE INSERT- ' 'please report this \n\n') Logger.debug( 'rotatedegree={2}, origWidth={0}, height={1}' .format(scandata['pageData'][page_num]['origWidth'], scandata['pageData'][page_num]['origHeight'], scandata['pageData'][page_num]['rotateDegree']) ) Logger.debug('<<<---- END CORRECTIONS DEBUG - - - - - - - -\n\n\n') scandata['pageData'][page_num]['origFileName'] = k.split('/')[1] scandata['pageData'][page_num]['sourceFileName'] = k scandata['pageData'][page_num]['proxyFullFileName'] = k if not foldouts: scandata['pageData'][page_num]['correctionType'] = 'INSERT' scandata['pageData'][page_num]['TTSflag'] = 0 Logger.debug('\n\n\n ---->>> CORRECTIONS DEBUG - please report ' 'this \n\n') Logger.debug('\n' + str(scandata['pageData'][page_num])) Logger.debug('<<<---- END CORRECTIONS DEBUG - - - - - - - -\n\n\n') # THUMBS FOR NEW PAGES for k in tdic: page_num = str(int(k.split('.jpg')[0].split('/')[2])) scandata['pageData'][page_num]['proxyFileName'] = k Logger.debug('_create_scandata: Processed {} new images.'.format(len(cdic))) try: # here we add the reshot images rdic = { book_folder + '/' + k: join(book['path'], 'reshooting', k) for k in next(os.walk(join(book['path'], 'reshooting')))[2] if re.match('\d{4}\.jpg$', os.path.basename(k)) } # RESHOT IMAGES DICT for k in rdic: page_num = str(int(k.split('.jpg')[0].split('/')[1])) # rotate images im = Image.open(rdic[k]) # im = im.rotate( # int(scandata['pageData'][page_num]['rotateDegree']) # ) width, height = im.size # im.save(rdic[k]) # scandata['pageData'][page_num]['rotateDegree'] = 0 if abs(int(scandata['pageData'][page_num]['rotateDegree'])) in [0, 180]: scandata['pageData'][page_num]['origWidth'] = str(width) scandata['pageData'][page_num]['origHeight'] = str(height) elif abs(int(scandata['pageData'][page_num]['rotateDegree'])) in [90, 270]: scandata['pageData'][page_num]['origWidth'] = str(height) scandata['pageData'][page_num]['origHeight'] = str(width) Logger.debug('---->>> CORRECTIONS DEBUG - PAGE RESHOOT') Logger.debug( 'rotatedegree is {2}, origWidth = {0}, height= {1}' .format(scandata['pageData'][page_num]['origWidth'], scandata['pageData'][page_num]['origHeight'], scandata['pageData'][page_num]['rotateDegree']) ) Logger.debug('<<<---- END CORRECTIONS DEBUG - - - - - - - - -') scandata['pageData'][page_num]['origFileName'] = k.split('/')[1] scandata['pageData'][page_num]['sourceFileName'] = k scandata['pageData'][page_num]['correctionType'] = 'REPLACE' scandata['pageData'][page_num]['proxyFullFileName'] = k scandata['pageData'][page_num]['TTSflag'] = 0 Logger.debug('---->>> CORRECTIONS DEBUG - please report this') Logger.debug('\n' + str(scandata['pageData'][page_num])) Logger.debug('<<<---- END CORRECTIONS DEBUG - - - - - - - -') # here we add the thumbs from the reshooting rtdic = { book_folder + '/thumbnails/' + k: join(book['path'], 'reshooting', 'thumbnails', k) for k in next(os.walk(join(book['path'], 'reshooting', 'thumbnails')))[2] if re.match('\d{4}\.jpg$', os.path.basename(k)) } # THUMBS FOR RESHOT IMAGES for k in rtdic: page_num = str(int(k.split('.jpg')[0].split('/')[2])) scandata['pageData'][page_num]['proxyFileName'] = k Logger.debug('_create_scandata: Processed {} reshot images.'.format(len(rdic))) except Exception as e: Logger.exception('_create_scandata: No reshot pages found') # Super Solenoid Scandata from disk (page info) sss = {int(k): v for k, v in list(scandata['pageData'].items())} # Now we want our own piece of memory for this one new_scandata = copy.deepcopy(scandata) new_scandata['pageData'] = {} new_scandata['pageData']['page'] = [] # Rewrite pages section Logger.debug('_create_scandata: Adding all computed pages to new scandata...') for page in sorted(sss): Logger.debug('_create_scandata: {}'.format(page)) sss[page]['leafNum'] = page try: pnum = sss[page]['pageNumber']['num'] sss[page]['pageNumber'] = pnum except Exception: pass new_scandata['pageData']['page'].append(sss[page]) # Rewrite assertions to be compatible with republisher try: Logger.debug('\nNow rewriting page assertions for repub compatibility ' 'if present') temp_pageNumData = copy.deepcopy(scandata['bookData']['pageNumData']) temp_pageNumData['assertion'] = [] for entry in scandata['bookData']['pageNumData']: if entry.isdigit(): del temp_pageNumData[entry] for assertion in scandata['bookData']['pageNumData'].items(): temp_assertion = {'leafNum': str(assertion[0]), 'pageNum': str(assertion[1])} temp_pageNumData['assertion'].append(temp_assertion) Logger.debug('_create_scandata: OK done. New pageNumData block: {}' .format(temp_pageNumData)) new_scandata['bookData']['pageNumData'] = temp_pageNumData except Exception as e: Logger.exception('_create_scandata: No pageNumData block found or error processing ' 'it.: '.format(e)) # Write it all to file with open(join(book['path'], 'scandata_rerepublished.json'), 'w+') as outfile: json.dump(new_scandata, outfile) Logger.debug('_create_scandata: Done constructing scandata.') return cdic, tdic, rdic, rtdic
class DownloadBookTask(TaskBase): def __init__(self, **kwargs): super(DownloadBookTask, self).__init__(**kwargs) self._book = None self._priority = 'medium' self._library = kwargs['library'] self.identifier = kwargs['identifier'] self.logger.info('Download books: Downloading {}'.format( self.identifier)) self.notifications_manager = NotificationManager() self._download_type = None def create_pipeline(self): return [ self._get_ia_session, self._load_item, self._validate_repub_state, self._create_stub_book, self._load_book_metadata, self._create_files, self._create_scandata, self._get_checkout_information, self._write_claimer_file, self._download_proxies, self._set_states, self._release_lock, self._send_stats, ] def handle_event(self, event_name, *args, **kwargs): if event_name == 'on_state' and self.state == CANCELLED_WITH_ERROR: if self._book: self._book.do_move_to_trash() self._book.do_delete_anyway() def _get_ia_session(self): self.dispatch_progress('Getting IA session') self._ia_session = get_ia_session() def _load_item(self): self.dispatch_progress('Loading item') try: self.item = self._ia_session.get_item(self.identifier) assert self.item.metadata['repub_state'] is not None except Exception as e: self.logger.error('No repub_state or item darkened. Skipping...') raise e self.logger.info( 'Download book: target item: {} (repub_state = {})'.format( self.item.identifier, self.item.metadata['repub_state'])) def _validate_repub_state(self): self.dispatch_progress('Validating repub state') is_repub_state_valid = lambda x: int(x.metadata[ 'repub_state']) in scribe_globals.ALLOWED_DOWNLOAD_REPUB_STATES self.logger.info('Validating repub_state {}'.format( int(self.item.metadata['repub_state']))) if not is_repub_state_valid(self.item): msg = 'Download book: Repub state is not 31 or 34 or 41' \ '(is {1}), refusing to download item {0}' \ .format(self.item.identifier, self.item.metadata['repub_state']) self.logger.error(msg) raise Exception(msg) def _create_stub_book(self): self.dispatch_progress('Creating local book') message = "This books is being downloaded and no actions are available just yet." book_id = str(uuid4()) self._book = self._library.new_book(book_id, status='download_incomplete', error=message) self._book.set_lock() self._book.logger.info('Download book: Created stub book {}'.format( self._book)) def _load_book_metadata(self): self.dispatch_progress('Loading metadata') md_url = ('https://{}/RePublisher/RePublisher-viewScanData.php' '?id={}'.format(self.item.d1, self.identifier)) self._md = requests.get(md_url, timeout=5) self._book.logger.info( 'Download book: Fetch scandata from cluster: {}'.format( self._md.status_code)) def _create_files(self): self.dispatch_progress('Downloading files') ret = [] with open(os.path.join(self._book.path, 'identifier.txt'), 'w+') as fp: fp.write(self.item.identifier) ret.append(fp.name) self._book.logger.info('Download book: Created {}'.format(fp.name)) with open(os.path.join(self._book.path, 'downloaded'), 'w+') as fp: fp.write('True') ret.append(fp.name) self._book.logger.info('Download book: Created {}'.format(fp.name)) with open(os.path.join(self._book.path, 'uuid'), 'w+') as fp: fp.write(self._book.uuid) ret.append(fp.name) self._book.logger.info('Download book: Created {}'.format(fp.name)) with open(os.path.join(self._book.path, 'scandata.xml'), 'w+') as fp: fp.write(self._md.content.decode()) ret.append(fp.name) self._book.logger.info('Download book: Created {}'.format(fp.name)) self.item.get_file(self.item.identifier + '_meta.xml') \ .download(file_path=self._book.path + '/metadata.xml') ret.append('{}'.format(self._book.path + '/metadata.xml')) self._book.logger.info('Download book: Created metadata.xml') self._book.reload_metadata() if not os.path.exists(os.path.join(self._book.path, 'reshooting')): os.makedirs(os.path.join(self._book.path, 'reshooting')) ret.append('{}'.format(self._book.path + '/reshooting')) self._book.logger.info('Download book: Created reshooting directory') self._files = ret self._book.logger.info('Download book: Created files, now converting ' 'scandata from RePublisher XML to Scribe3 JSON') def _create_scandata(self): self.dispatch_progress('Creating scandata') sc_path = os.path.join(self._book.path, 'scandata.xml') tree = book_helpers.validate_scandata_xml(sc_path, self._book) scandata_xml = book_helpers.create_normalized_scandata( tree, self._book) json_data = book_helpers.convert_normalized_scandata_to_json( scandata_xml) json_new = {} self._book.logger.info('Download book: Now converting to Scribe3 JSON') json_new['bookData'] = book_helpers.build_bookdata( json_data, self._book) json_new['pageData'] = book_helpers.build_pagedata( json_data, self._book) with open(os.path.join(self._book.path, 'scandata.json'), 'w') as outfile: json.dump(json_new, outfile) self._book.logger.info('Download book: Created {}'.format( outfile.name)) self._scandata = ScanData(self._book.path) self._scandata.save() self._book.reload_scandata() self._book.logger.info('Download book: Created scandata.') def _get_checkout_information(self): self.dispatch_progress('Pulling checkout information') book_checkout_url = ('https://{}/RePublisher/RePublisher-' 'checkoutBook.php?peek=true&id={}'.format( self.item.d1, self._book.identifier)) self._book.logger.info( 'Getting checkout information from {}'.format(book_checkout_url)) ret = self._ia_session.get(book_checkout_url) self._book.logger.info('Got {} ({})'.format(ret.text, ret.status_code)) self._checkout_info = json.loads(ret.text) def _write_claimer_file(self): self.dispatch_progress('Writing claimer file') if 'claimed_by' in self._checkout_info and self._checkout_info[ 'claimed_by'] != False: claimer = self._checkout_info['claimed_by'] else: claimer = '-' with open(os.path.join(self._book.path, 'claimer'), 'w+') as fp: fp.write(claimer) self._claimer = claimer self._book.logger.info('This book was claimed by {}'.format(claimer)) def _download_proxies(self): self.dispatch_progress('Downloading proxies') all_ok = True counter = 0 page_data = self._scandata.dump_raw()['pageData'] for i, page in enumerate(page_data): self.dispatch_progress('Downloading proxies [{}/{}]'.format( i, len(page_data))) if int(page) != i: self._book.logger.error('Download book: Download Proxies: ' 'CRITICAL MISMATCH') break short_msg = 'Download pics | {percent:.1f}% | {n}/{total}'.format( percent=i * 100 / len(page_data), n=i, total=len(page_data), ) self._book.update_message(short_msg) url = book_helpers.get_cluster_proxy_url_by_leaf( self._scandata, self.item, page) res = self.download_proxy_image(page, self._book, url) all_ok = all_ok and res counter += 1 if res: self._book.logger.debug( 'Download book: Got proxies for leaf #{0}'.format(page)) else: self._book.logger.error( 'Download book: Error downloading leaf #{0}'.format(page)) try: leafnr = self._scandata.get_page_num(page)['num'] except Exception: pass self._book.logger.info( 'Download book: Downloaded {} proxy images.'.format(counter)) return all_ok def _set_states(self): self.dispatch_progress('Setting states') self._book.error = None if int(self.item.metadata['repub_state']) == 31: book_final_repub_state = 32 self._download_type = 'corrections' self._book.do_end_download_correction() elif int(self.item.metadata['repub_state']) == 41: book_final_repub_state = 42 self._download_type = 'foldouts' self._book.do_end_download_foldout() else: self._book.logger( 'Error while processing item in repub_state {}'.format( self.item.metadata['repub_state'])) raise Exception( 'remote repub state in inconsistent with book download') self._book.logger.info( 'Setting remote repub_state to {}'.format(book_final_repub_state)) mdapi_response = self.item.modify_metadata( {'repub_state': book_final_repub_state}) self._book.logger.info( 'Response from MDAPI: {}'.format(mdapi_response)) if mdapi_response: self._mdapi_response_text = mdapi_response.text self._book.logger.info('Body of MDAPI: {}'.format( self._mdapi_response_text)) if mdapi_response.status_code != 200: raise Exception( 'MDAPI response was not OK! - Got this instead: {} - {}'. format(mdapi_response.status_code, mdapi_response.text)) self._book.logger.info( 'Download book: Set book repub_state to {}'.format( book_final_repub_state)) self._book_final_repub_state = book_final_repub_state else: raise Exception('No response from MDAPI. Aborting download.') def _send_stats(self): self.dispatch_progress('Notifying iabdash') payload = { 'repub_state': self._book_final_repub_state, 'checkout_info': self._checkout_info, 'claimer': self._claimer, 'files': self._files, } push_event('tts-book-downloaded', payload, 'book', self.identifier, os.path.join(self._book.path, "iabdash.log")) self.notifications_manager.add_notification( title='Downloaded', message="{} has been downloaded and is ready for {}.".format( self.identifier, self._download_type), show_system_tile=False, book=self._book) def _release_lock(self): total_time = 100 self._book.logger.info('Download book: ------ DONE. Downloaded {0} in ' '{1}s ----------'.format( self.identifier, total_time)) self._book.release_lock() def download_proxy_image( self, page, book, url, ): def is_proxy_valid(proxy_path): return True file_target = '{n:04d}.jpg'.format(n=int(page)) dest = os.path.os.path.join(book.path, "thumbnails", file_target) if url is not None: image = self._ia_session.get(url).content with open(dest, 'wb+') as proxy: book.logger.debug('Writing {}'.format(dest)) proxy.write(image) book.logger.info('Download book: Written {}'.format( proxy.name)) else: import shutil book.logger.debug('Page {} has no proxy, adding missing ' 'image at {}'.format(page, dest)) shutil.copyfile(scribe_globals.MISSING_IMAGE, dest) ret = is_proxy_valid(dest) return ret
class DownloadCDTask(TaskBase): def __init__(self, **kwargs): super(DownloadCDTask, self).__init__(**kwargs) self._cd = None self._priority = 'medium' self._library = kwargs['library'] self.identifier = kwargs['identifier'] self.logger.info('Download CD: Downloading {}'.format(self.identifier)) self.notifications_manager = NotificationManager() self._download_type = None self.start_time = None self.total_time = None def create_pipeline(self): return [ self._begin, self._get_ia_session, self._load_item, self._verify_is_CD, self._verify_is_stub_item, self._create_stub_CD, self._create_files, self._create_scandata, self._set_states, self._release_lock, self._send_stats, ] def _begin(self): self.start_time = time.time() def handle_event(self, event_name, *args, **kwargs): if event_name == 'on_state' and self.state == CANCELLED_WITH_ERROR: if self._cd: self._cd.do_move_to_trash() self._cd.do_delete_anyway() def _get_ia_session(self): self.dispatch_progress('Getting IA session') self._ia_session = get_ia_session() def _load_item(self): self.dispatch_progress('Loading item') self.item = self._ia_session.get_item(self.identifier) self.logger.info('Download CD: target item: {}'.format( self.item.identifier)) def _verify_is_CD(self): self.dispatch_progress('Verifying this is an ArchiveCD item') mediatype = self.item.metadata.get('mediatype') software_version = self.item.metadata.get('software_version') assert mediatype == 'audio', 'This is not an audio item. It is {}.'.format( mediatype) assert software_version is not None, 'This item was not created with ArchiveCD' assert 'ArchiveCD' in software_version, 'This item was not created with ArchiveCD' def _verify_is_stub_item(self): self.dispatch_progress('Verifying this is a stub item') stub_file = self.item.get_file('stub.txt') #if not stub_file.exists: # raise Exception('No stub file found!') def _create_stub_CD(self): self.dispatch_progress('Creating local CD') message = "This CD is being downloaded and no actions are available just yet." cd_id = str(uuid4()) self._cd = self._library.new_cd(cd_id, status='download_incomplete', error=message) self._cd.set_lock() self._cd.logger.info('Download CD: Created stub CD {}'.format( self._cd)) def _create_files(self): self.dispatch_progress('Downloading files') ret = [] with open(os.path.join(self._cd.path, 'identifier.txt'), 'w+') as fp: fp.write(self.item.identifier) ret.append(fp.name) self._cd.logger.info('Download CD: Created {}'.format(fp.name)) with open(os.path.join(self._cd.path, 'downloaded'), 'w+') as fp: fp.write('True') ret.append(fp.name) self._cd.logger.info('Download CD: Created {}'.format(fp.name)) with open(os.path.join(self._cd.path, 'uuid'), 'w+') as fp: fp.write(self._cd.uuid) ret.append(fp.name) self._cd.logger.info('Download CD: Created {}'.format(fp.name)) self.item.get_file(self.item.identifier + '_meta.xml') \ .download(file_path=self._cd.path + '/metadata.xml') ret.append('{}'.format(self._cd.path + '/metadata.xml')) self._cd.logger.info('Download CD: Created metadata.xml') self._cd.reload_metadata() if not os.path.exists(os.path.join(self._cd.path, 'thumbs')): os.makedirs(os.path.join(self._cd.path, 'thumbs')) ret.append('{}'.format(self._cd.path + '/thumbs')) self._cd.logger.info('Download CD: Created thumbs directory') self.item.get_file(self.item.identifier + '_itemimage.png') \ .download(file_path=self._cd.path + '/cover.png') ret.append('{}'.format(self._cd.path + '/cover.png')) self._cd.logger.info('Download CD: Downloaded cover') self._files = ret self._cd.logger.info('Download CD: Created files.') def _create_scandata(self): self.dispatch_progress('Creating scandata') self._scandata=\ ScanData(self._cd.path) self._scandata.save() self._cd.reload_scandata() self._cd.logger.info('Download CD: Created scandata.') def _set_states(self): self.dispatch_progress('Setting states') self._cd.do_finish_download() def _send_stats(self): self.dispatch_progress('Notifying iabdash') payload = { 'files': self._files, 'total_time': self.total_time, } push_event('tts-cd-downloaded', payload, 'cd', self.identifier, os.path.join(self._cd.path, "iabdash.log")) self.notifications_manager.add_notification( title='Downloaded', message="CD {} has been downloaded.".format(self.identifier), show_system_tile=False, book=self._cd) def _release_lock(self): self.total_time = time.time() - self.start_time self._cd.logger.info('Download CD: ------ DONE. Downloaded {0} in ' '{1}s ----------'.format(self.identifier, self.total_time)) self._cd.release_lock()
class ReShootScreenBackend(WidgetBackend): EVENT_CAPTURE_LEAF = 'on_capture_leaf' EVENT_CURRENT_LEAF = 'on_current_leaf' EVENT_ROTATE_LEAF = 'on_rotate_leaf' EVENT_PAGE_TYPE = 'on_page_type' EVENT_SHOW_ORIGINAL_FILE = 'on_show_original_file' EVENT_SHOW_RESHOOT_FILE = 'on_show_reshoot_file' EVENT_SHOW_PAGE_TYPE_FORM_POPUP = 'on_show_page_type_form_popup' EVENT_GO_BACK = 'on_go_back' __events__ = (EVENT_CAPTURE_LEAF, EVENT_CURRENT_LEAF, EVENT_ROTATE_LEAF, EVENT_PAGE_TYPE, EVENT_GO_BACK, EVENT_SHOW_ORIGINAL_FILE, EVENT_SHOW_RESHOOT_FILE, EVENT_SHOW_PAGE_TYPE_FORM_POPUP) def __init__(self, **kwargs): super(ReShootScreenBackend, self).__init__(**kwargs) self._note_leafs = [] self._reverse_cams = False self._cameras_count = 0 self._current_leaf_index = 0 self._capture_running = False self._keyboard_action_handler = ReShootScreenKeyboardHandler(self) self.keyboard_detector = None self.book = None self.reopen_at = 0 self.scandata = None self.camera_system = None self.window = None def init(self): if not self.scandata: self.scandata = ScanData(self.book['path'], downloaded=True) self._note_leafs[:] = self.scandata.iter_flagged_leafs() try: leaf_index = self._note_leafs.index(self.reopen_at) except ValueError: leaf_index = 0 self.set_current_leaf_index(leaf_index) if not self.keyboard_detector: detector = ReShootActionDetector(RESHOOT_ACTION_BINDINGS) self.keyboard_detector = detector self._keyboard_action_handler.detector = self.keyboard_detector self._cameras_count = self.camera_system.cameras.get_num_cameras() self._capture_running = False self._reverse_cams = False self.config = Scribe3Configuration() super(ReShootScreenBackend, self).init() def reset(self): self.book = None self.reopen_at = None self.scandata = None self.camera_system = None self.window = None del self._note_leafs[:] self._current_leaf_index = 0 self._reverse_cams = False self._cameras_count = 0 self._capture_running = False super(ReShootScreenBackend, self).reset() def is_capture_running(self): return self._capture_running def is_reshoot_leaf_ready(self): path, thumb_path = self.get_current_reshoot_paths() return exists(path) and exists(thumb_path) def can_switch_cameras(self): return self._cameras_count > 1 def can_capture_spread(self): return cradle_closed and not self._capture_running and self._cameras_count > 0 def get_current_leaf_number(self): return self._note_leafs[self._current_leaf_index] def get_current_leaf_index(self): return self._current_leaf_index def set_current_leaf_index(self, index): max_index = max(0, len(self._note_leafs) - 1) if 0 <= index <= max_index and self._current_leaf_index != index: self._current_leaf_index = index self.dispatch(self.EVENT_CURRENT_LEAF) def get_leafs_count(self): return len(self._note_leafs) def get_book_metadata(self): md = get_metadata(self.book['path']) return { 'identifier': self.book.get('identifier', None), 'path': self.book['path'], 'title': md.get('title', None), 'creator': md.get('creator', md.get('author', None)), 'language': md.get('language', None) } def get_leaf_data(self): leaf_number = self.get_current_leaf_number() leaf_data = self.scandata.get_page_data(leaf_number) page_number_data = leaf_data.get('pageNumber', None) page_number = self._get_page_number(page_number_data) return { 'hand_side': leaf_data.get('handSide', None), 'page_number': page_number, 'page_type': leaf_data['pageType'], 'note': leaf_data.get('note', None) } def get_current_reshoot_paths(self): leaf_number = self.get_current_leaf_number() image_name = '{:04d}.jpg'.format(leaf_number) book_path = self.book['path'] path = join(book_path, 'reshooting', image_name) thumb_path = join(book_path, 'reshooting', 'thumbnails', image_name) ensure_dir_exists(join(book_path, 'reshooting')) ensure_dir_exists(join(book_path, 'reshooting', 'thumbnails')) return path, thumb_path def get_current_original_paths(self): leaf_number = self.get_current_leaf_number() image_name = '{:04d}.jpg'.format(leaf_number) book_path = self.book['path'] path = join(book_path, image_name) thumb_path = join(book_path, 'thumbnails', image_name) return path, thumb_path def _get_page_number(self, page_number_data): # TODO: Remove this method when scandata structure becomes the same # for reshooting mode and otherwise if page_number_data: if isinstance(page_number_data, dict): page_number = page_number_data.get('num', None) return None if page_number is None else int(page_number) elif isinstance(page_number_data, str): return int(page_number_data) return None def goto_previous_leaf(self, *args): self.set_current_leaf_index(self._current_leaf_index - 1) def goto_next_leaf(self, *args): self.set_current_leaf_index(self._current_leaf_index + 1) def goto_first_leaf(self, *args): self.set_current_leaf_index(0) def goto_last_leaf(self, *args): max_index = max(0, len(self._note_leafs) - 1) self.set_current_leaf_index(max_index) def goto_rescribe_screen(self, *args): self.dispatch(self.EVENT_GO_BACK) def show_original_file(self, *args): self.dispatch(self.EVENT_SHOW_ORIGINAL_FILE) def show_reshoot_file(self, *args): if self.is_reshoot_leaf_ready(): self.dispatch(self.EVENT_SHOW_RESHOOT_FILE) def show_page_type_form_popup(self, *args): if self.is_reshoot_leaf_ready(): self.dispatch(self.EVENT_SHOW_PAGE_TYPE_FORM_POPUP) def save_leaf_note(self, note): scandata = self.scandata leaf_number = self.get_current_leaf_number() if scandata.get_note(leaf_number) != note: scandata.set_note(leaf_number, note) scandata.save() if note: self.logger.info( 'ReShootScreenBackend: Updated leaf %d with note: %s' % (leaf_number, '\n%s' % note if '\n' in note else note)) else: self.logger.info( 'ReShootScreenBackend: Removed note from leaf {}'.format( leaf_number)) def update_page_type(self, leaf_number, page_type): scandata = self.scandata scandata.update_page_type(leaf_number, page_type) scandata.save() self.dispatch(self.EVENT_PAGE_TYPE, page_type) def update_leaf_rotation_if_necessary(self, leaf_number): if self._cameras_count == 1: self.logger.info( 'ReShootScreenBackend: Reshooting in single-camera mode, will ' 'rotate by system default of {} degrees'.format( self.config.get_integer('default_single_camera_rotation', 180))) new_degree = self.config.get_integer( 'default_single_camera_rotation', 180) self.scandata.update_rotate_degree(leaf_number, new_degree) self.scandata.save() self.logger.info( 'ReShootScreenBackend: Set leaf {} rotation to {} degree(s)'. format(leaf_number, new_degree)) def enable_keyboard_actions(self, *args): self._keyboard_action_handler.enable() def disable_keyboard_actions(self, *args): self._keyboard_action_handler.disable() def are_keyboard_actions_enabled(self): return self._keyboard_action_handler.is_enabled() def switch_cameras(self, *args): if not self._capture_running and self.can_switch_cameras(): self._reverse_cams = not self._reverse_cams self.logger.info('ReShootScreen: Switched cameras') self.capture_spread() def are_cameras_switched(self): return self._reverse_cams def capture_spread(self, *args): if not self.can_capture_spread(): return if not has_free_disk_space(self.book['path']): self.logger.info('capture_spread: the disk is full!') report = {camera_system.KEY_ERROR: DiskFullError()} self.dispatch(self.EVENT_CAPTURE_LEAF, report) return leaf_number = self.get_current_leaf_number() side = self._get_capture_camera_side(leaf_number) path, thumb_path = self.get_current_reshoot_paths() camera_kwargs = self._create_camera_kwargs(side, leaf_number) self.logger.info( 'ReShootScreen: Capturing new image for leaf {}, camera side {}, ' '{}using reversed cameras'.format( leaf_number, side, '' if self._reverse_cams else 'not ')) self._capture_running = True self.delete_current_spread() self.update_leaf_rotation_if_necessary(leaf_number) report = {camera_system.KEY_CAPTURE_START: True} self.dispatch(self.EVENT_CAPTURE_LEAF, report) self.camera_system.left_queue.put(camera_kwargs) def _get_capture_camera_side(self, leaf_number): if self._cameras_count == 1: camera_side = 'foldout' else: camera_side = 'left' if leaf_number % 2 == 0 else 'right' if self._reverse_cams: camera_side = 'left' if camera_side == 'right' else 'right' return camera_side def _capture_spread_end(self, report, *args): self._capture_running = False report[camera_system.KEY_CAPTURE_END] = True if self.is_initialized(): stats = report[camera_system.KEY_STATS] leaf_number = report[camera_system.KEY_EXTRA]['leaf_number'] self.scandata.set_capture_time(leaf_number, stats['capture_time']) self.dispatch(self.EVENT_CAPTURE_LEAF, report) def delete_current_spread(self, *args): path, thumb_path = self.get_current_reshoot_paths() self._delete_file(path) self._delete_file(thumb_path) def _delete_file(self, path): if exists(path): os.remove(path) self.logger.info('ReShootScreenBackend: Removed: {}'.format(path)) def rotate_reshoot_leaf(self, *args): scandata_rotation_angle = 90 path, thumb_path = self.get_current_reshoot_paths() if not self.is_reshoot_leaf_ready(): self.logger.error( 'ReShootScreen: Failed to rotate. Image not found: {}'.format( thumb_path)) return leaf_number = self.get_current_leaf_number() leaf_data = self.scandata.get_page_data(leaf_number) current_degree = int(leaf_data.get('rotateDegree', 0)) new_degree = (current_degree + scandata_rotation_angle) % 360 self.scandata.update_rotate_degree(leaf_number, new_degree) self.scandata.save() rotate_by = convert_scandata_angle_to_thumbs_rotation( new_degree, scandata_rotation_angle) image = Image.open(path) size = (1500, 1000) # (6000,4000)/4 image.thumbnail(size) image = image.rotate(rotate_by, expand=True) image.save(thumb_path, 'JPEG', quality=90) self.logger.info( 'ReShootScreenBackend: Set leaf {} rotation to {} degree(s) in scandata ( {} thumbs-equivalent) from {}' .format(leaf_number, new_degree, rotate_by, current_degree)) self.dispatch(self.EVENT_ROTATE_LEAF) def _create_camera_kwargs(self, camera_side, leaf_number): path, thumb_path = self.get_current_reshoot_paths() return { camera_system.KEY_CALLBACK: self._capture_spread_end, camera_system.KEY_SIDE: camera_side, camera_system.KEY_PATH: path, camera_system.KEY_THUMB_PATH: thumb_path, camera_system.KEY_EXTRA: { 'leaf_number': leaf_number } } def on_capture_leaf(self, report): pass def on_current_leaf(self, *args): pass def on_rotate_leaf(self, *args): pass def on_page_type(self, *args): pass def on_show_original_file(self, *args): pass def on_show_reshoot_file(self, *args): pass def on_show_page_type_form_popup(self, *args): pass def on_go_back(self, *args): pass
class ImportFolderTask(TaskBase): def __init__(self, **kwargs): super(ImportFolderTask, self).__init__(**kwargs) self.source_path = kwargs['path'] self.library = kwargs['library'] self.book_obj = None self.image_stack = None self.scandata = None self.metadata = None self.DEFAULT_FIELDS_AND_VALUES = [ ('operator', get_sc_metadata()['operator']), ('scanningcenter', get_sc_metadata()['scanningcenter']), ('ppi', Scribe3Configuration().get_numeric_or_none('ppi')), ] self.do_not_rotate = True def create_pipeline(self): return [ self._load_directory, self._verify_preconditions, self._make_book_object, self._load_metadata, self._augment_metadata, self._write_metadata, self._load_image_stack, self._make_scandata, self._check_for_missing_images, self._move_image_stack, self._generate_thumbs, ] def handle_event(self, event_name, *args, **kwargs): if event_name == 'on_state' and self.state == CANCELLED_WITH_ERROR: if self.book_obj: self.book_obj.do_move_to_trash() self.book_obj.do_delete_anyway() def _load_directory(self): self.dispatch_progress('Loading directory') if not [ f for f in os.listdir(self.source_path) if not f.startswith('.') ]: raise Exception('The folder you selected is empty') self.directory_list = list(os.walk(os.path.join(self.source_path)))[0] def _verify_preconditions(self): self.dispatch_progress('Verifying preconditions') if '0000.jpg' not in self.directory_list[2]: raise Exception('No image stack provided') def _make_book_object(self): self.dispatch_progress('Making book object') generated_uuid = str(uuid4()) self.book_obj = self.library.new_book(generated_uuid) def _load_metadata(self): self.dispatch_progress('Loading metadata') if 'metadata.xml' in self.directory_list[2]: self.metadata = get_metadata(self.source_path) else: self.metadata = {} def _augment_metadata(self): for field, default_value in self.DEFAULT_FIELDS_AND_VALUES: if field not in self.metadata and default_value is not None: self.metadata[field] = default_value def _load_image_stack(self): self.dispatch_progress('Loading image stack') self.image_stack = sorted([ k for k in self.directory_list[2] if re.match('\d{4}\.jpg$', os.path.basename(k)) ]) # consider .*[^\d]\d{4}.jpg def _make_scandata(self): self.dispatch_progress('Generating scandata') self.scandata = ScanData(self.book_obj.path) for image in self.image_stack: if image == '0000.jpg': leaf_number = 0 else: leaf_number = self.__extract_number_from_file(image) side = 'left' if leaf_number % 2 == 0 else 'right' page_type = 'Normal' if image == '0000.jpg': page_type = 'Color Card' elif image == '0001.jpg': page_type = 'Cover' elif leaf_number == len(self.image_stack) - 1: page_type = 'Color Card' self.scandata.insert(leaf_number, side, page_type) if self.do_not_rotate: self.scandata.update_rotate_degree(leaf_number, 0) def _check_for_missing_images(self): self.dispatch_progress('Checking for image stack integrity') if not (self.source_path and self.scandata): raise Exception('Cover image is missing!') max_leaf_number = self.scandata.get_max_leaf_number() if max_leaf_number is None or max_leaf_number < 1: raise Exception('Cover image is missing!') for leaf_number in range(max_leaf_number + 1): leaf_data = self.scandata.get_page_data(leaf_number) image_path = os.path.join(self.source_path, '{:04d}.jpg'.format(leaf_number)) if not (leaf_data and os.path.exists(image_path)): if leaf_number == 0 or leaf_number == 1: raise Exception('Cover image is missing!') raise Exception('Image #{} is missing'.format(leaf_number)) self.scandata.save() self.book_obj.reload_scandata() def _write_metadata(self): self.dispatch_progress('Writing metadata') set_metadata(self.metadata, self.book_obj.path) self.book_obj.reload_metadata() self.book_obj.do_create_metadata() def _move_image_stack(self): self.dispatch_progress('Relocating image stack') for image in self.image_stack: source = os.path.join(self.source_path, image) destination = os.path.join(self.book_obj.path, image) shutil.copy(source, destination) def _generate_thumbs(self): self.dispatch_progress('Generating thumbs') for n, image in enumerate(self.image_stack): self.dispatch_progress('Generating thumbs [{}/{}]'.format( n, len(self.image_stack))) source_image = os.path.join(self.book_obj.path, image) target_image = os.path.join(self.book_obj.path, 'thumbnails', image) current_degree = int( self.scandata.get_page_data(n).get('rotateDegree', 0)) rotate_by = convert_scandata_angle_to_thumbs_rotation( current_degree, None) thumbnail_size = (1500, 1000) if Scribe3Configuration().is_true('low_res_proxies'): thumbnail_size = (750, 500) image = Image.open(source_image) image.thumbnail(thumbnail_size) image = image.rotate(rotate_by, expand=True) image.save(target_image, 'JPEG', quality=90) @staticmethod def __extract_number_from_file(filename): number = filename.split('.jpg')[0] ret = number.lstrip('0') return int(ret)
def upload_book(book): Logger = book.logger Logger.debug('Starting upload of ' + book['identifier']) _check_preconditons(book) #book.do_book_upload_begin() _set_upload_lock_file(book, Logger) responses_dict = {} book_upload_total_start = time.time() try: scandata = ScanData(book['path']) zip_path = _check_preimage_is_valid(book) ia_session = get_ia_session() item = ia_session.get_item(book['identifier']) Logger.info('Got item {}'.format(item.identifier)) if not book.force_upload: _check_remote_preconditons(item, Logger) encoded_md = _prepare_metadata(book, item, Logger) metasource_file_location, metasource_file_upload_name = _generate_metasource( book, Logger) responses = [] book_upload_phase_start = time.time() needs_metadata_pushed = item.exists doing_foldouts = os.path.exists( os.path.join(book['path'], 'send_to_station')) book_preimage_upload_start, \ book_preimage_upload_end, \ sizes_dict = _upload_book_files( zip_path, book, encoded_md, item, responses, metasource_file_location, metasource_file_upload_name, Logger) if needs_metadata_pushed: _only_push_metadata(encoded_md, book, item, responses, Logger) book_upload_phase_end = time.time() _upload_logs(book=book, item=item, responses=responses) _verify_responses(responses, Logger) Logger.debug('OK! Finished uploads to {} | Took {}s'.format( book['identifier'], book_upload_phase_end - book_upload_phase_start)) book.do_upload_book_end() _push_metrics(book, scandata, encoded_md, sizes_dict, doing_foldouts, responses, responses_dict, book_upload_phase_start, book_upload_phase_end, book_upload_total_start, book_preimage_upload_start, book_preimage_upload_end) if config.is_true('show_book_notifications'): notifications_manager.add_notification( title='Uploaded', message="{} has been successfully uploaded.".format( book['identifier']), book=book) Logger.debug('Finished upload for ' + book['identifier']) # Clock.schedule_once(partial(self.update_status_callback, book)) time.sleep(10) # Wait for book to be added to metadata api except requests.ConnectionError as e: book.do_upload_book_error() Logger.error(traceback.format_exc()) payload = { 'local_id': book['uuid'], 'status': book['status'], 'exception': str(e) } push_event('tts-book-failed-upload', payload, 'book', book['identifier']) raise ScribeException('Upload Failed. ' 'Please check network and S3 Keys') except Exception as e: book.do_upload_book_error() Logger.error(traceback.format_exc()) payload = { 'local_id': book['uuid'], 'status': book['status'], 'responses': responses_dict, 'exception': str(e) } push_event('tts-book-upload-exception', payload, 'book', book['identifier']) raise ScribeException('Upload Failed! - {}'.format(str(e))) finally: book.force_upload = False Logger.info("Removing upload lock file at {}".format( join(book['path'], "upload_lock"))) os.remove(join(book['path'], "upload_lock"))
def verify_uploaded(book): ia_session = get_ia_session() book.logger.info( 'verify_uploaded: Verifying {} was uploaded to the cluster.'.format( book)) # we do have identifier in the book dictionary, but we only trust # what's on the drive for this one identifier = book.identifier if not identifier: book.logger.info( 'verify_uploaded: No identifier.txt. Assuming empty book and deleting.' .format(book)) return True book.logger.info( 'verify_uploaded: Read {} from identifier.txt.'.format(book)) # gather data i = ia_session.get_item(identifier) repub_state = int( i.metadata['repub_state']) if 'repub_state' in i.metadata else None book.logger.info('verify_uploaded: repub_state {}'.format(repub_state)) scandate = datetime.strptime( i.metadata['scandate'], '%Y%m%d%H%M%S') if 'scandate' in i.metadata else None book.logger.info('verify_uploaded: scandate {}'.format(scandate)) #scanner = i.metadata['scanner'] if 'scanner' in i.metadata else None #book.logger.info('verify_uploaded: scanner {}'.format(scanner)) #this_scanner = config.get('identifier', 0) tasks_running, tasks_list = get_pending_catalog_tasks(i) book.logger.info( 'verify_uploaded: pending book_tasks {}'.format(tasks_running)) local_imgcount = int(ScanData(book.path).count_pages()) remote_imgcount = int( i.metadata['imagecount']) if 'imagecount' in i.metadata else None book.logger.info('verify_uploaded: local pages: {} ' '| remote pages: {}'.format(local_imgcount, remote_imgcount)) # These are here so you can bypass one easily by setting it to True scandate_ok = False repub_state_ok = False tasks_running_ok = False #scanner_ok = False imgcount_ok = True # policies if not repub_state: repub_state_ok = True elif repub_state > 10: repub_state_ok = True threshold = config.get_numeric_or_none('defer_delete_by') if threshold and scandate: if not datetime.now() - timedelta( hours=threshold) <= scandate <= datetime.now(): scandate_ok = True else: # If the user doesn't specify a value, delete immediately scandate_ok = True if tasks_running == 0: tasks_running_ok = True if remote_imgcount: if local_imgcount == remote_imgcount: imgcount_ok = True else: imgcount_ok = True # aggregate and return ret = scandate_ok \ and repub_state_ok and tasks_running_ok \ and imgcount_ok if book.force_delete: ret = True book.logger.info( 'verify_uploaded: Do selectors allow for deletion?' ' scandate ok: {} | repub_state_ok {} ' '| book_tasks ok: {} | imgcount_ok: {} | Force delete: {}-->>> ' 'VERDICT: {}'.format(scandate_ok, repub_state_ok, tasks_running_ok, imgcount_ok, book.force_delete, ret)) return ret