Ejemplo n.º 1
0
    def _create_scandata(self):
        self.dispatch_progress('Creating scandata')
        sc_path = os.path.join(self._book.path, 'scandata.xml')

        tree = book_helpers.validate_scandata_xml(sc_path, self._book)
        scandata_xml = book_helpers.create_normalized_scandata(
            tree, self._book)
        json_data = book_helpers.convert_normalized_scandata_to_json(
            scandata_xml)

        json_new = {}

        self._book.logger.info('Download book: Now converting to Scribe3 JSON')
        json_new['bookData'] = book_helpers.build_bookdata(
            json_data, self._book)
        json_new['pageData'] = book_helpers.build_pagedata(
            json_data, self._book)

        with open(os.path.join(self._book.path, 'scandata.json'),
                  'w') as outfile:
            json.dump(json_new, outfile)
            self._book.logger.info('Download book: Created {}'.format(
                outfile.name))

        self._scandata = ScanData(self._book.path)
        self._scandata.save()

        self._book.reload_scandata()

        self._book.logger.info('Download book: Created scandata.')
Ejemplo n.º 2
0
 def load_scandata(self):
     book_path = self.book['path']
     book_uuid = basename(book_path)
     self.book_obj = self._books_db.get_book(book_uuid)
     self.scandata = ScanData(book_path, downloaded=True)
     Logger.info(
         'ReScribeScreen: Loaded scandata from directory: {}'.format(
             book_path))
Ejemplo n.º 3
0
    def _create_scandata(self):
        self.dispatch_progress('Creating scandata')

        self._scandata=\
            ScanData(self._cd.path)
        self._scandata.save()
        self._cd.reload_scandata()

        self._cd.logger.info('Download CD: Created scandata.')
Ejemplo n.º 4
0
 def reload_scandata(self):
     self.scandata = ScanData(self.path)
     self.leafs = self.scandata.count_pages()
     self.notes_count = self.scandata.count_notes()
     self.date_last_updated = self.load_last_modified_from_disk(
         including_move_along=True)
     self.date_last_modified = self.load_last_modified_from_disk()
     self.date_created = self.load_date_created_from_disk()
     if self.notify:
         self.notify('reloaded_scandata')
Ejemplo n.º 5
0
 def init(self):
     if not self.scandata:
         self.scandata = ScanData(self.book['path'], downloaded=True)
     self._note_leafs[:] = self.scandata.iter_flagged_leafs()
     try:
         leaf_index = self._note_leafs.index(self.reopen_at)
     except ValueError:
         leaf_index = 0
     self.set_current_leaf_index(leaf_index)
     if not self.keyboard_detector:
         detector = ReShootActionDetector(RESHOOT_ACTION_BINDINGS)
         self.keyboard_detector = detector
     self._keyboard_action_handler.detector = self.keyboard_detector
     self._cameras_count = self.camera_system.cameras.get_num_cameras()
     self._capture_running = False
     self._reverse_cams = False
     self.config = Scribe3Configuration()
     super(ReShootScreenBackend, self).init()
Ejemplo n.º 6
0
 def _make_scandata(self):
     self.dispatch_progress('Generating scandata')
     self.scandata = ScanData(self.book_obj.path)
     for image in self.image_stack:
         if image == '0000.jpg':
             leaf_number = 0
         else:
             leaf_number = self.__extract_number_from_file(image)
         side = 'left' if leaf_number % 2 == 0 else 'right'
         page_type = 'Normal'
         if image == '0000.jpg':
             page_type = 'Color Card'
         elif image == '0001.jpg':
             page_type = 'Cover'
         elif leaf_number == len(self.image_stack) - 1:
             page_type = 'Color Card'
         self.scandata.insert(leaf_number, side, page_type)
         if self.do_not_rotate:
             self.scandata.update_rotate_degree(leaf_number, 0)
Ejemplo n.º 7
0
    def __init__(self, book_dict, callback=None, delete_callback=None):
        print("[Book::init()] Creating book object from ->", book_dict)

        super(Book, self).__init__(book_dict, callback, delete_callback)

        self.scandata = ScanData(self.path)
        self.leafs = self.scandata.count_pages()
        self.notes_count = self.scandata.count_notes()

        self.creator = book_dict[
            'creator'] if 'creator' in book_dict else self.metadata[
                'creator'] if 'creator' in self.metadata else None
        self.volume = book_dict[
            'volume'] if 'volume' in book_dict else self.metadata[
                'volume'] if 'volume' in self.metadata else None

        self.shiptracking_id = book_dict[
            'shiptracking'] if 'shiptracking' in book_dict else self.metadata[
                'shiptracking'] if 'shiptracking' in self.metadata else None

        self.boxid = book_dict[
            'boxid'] if 'boxid' in book_dict else self.metadata[
                'boxid'] if 'boxid' in self.metadata else None
Ejemplo n.º 8
0
class ReScribeScreen(TooltipScreen, Screen):

    cover_image = StringProperty(MISSING_IMAGE)
    book = ObjectProperty(None)
    scandata = ObjectProperty(None)
    scribe_widget = ObjectProperty(None)
    screen_manager = ObjectProperty(None)

    def __init__(self, **kwargs):
        self._note_leafs = []
        self.book_obj = None
        super(ReScribeScreen, self).__init__(**kwargs)
        Clock.schedule_once(self._postponed_init)

    def _postponed_init(self, *args):
        menu = self.ids.menu_bar
        menu.fbind(menu.EVENT_OPTION_SELECT, self.on_menu_bar_option_select)
        view = self.ids.note_leafs_view
        view.fbind(view.EVENT_LEAF_SELECT, self.on_note_leaf_select)
        self._books_db = Library()

    def on_pre_enter(self):
        self.load_scandata()
        self.load_note_leafs()
        self.setup_menu_bar()
        self.setup_book_info_panel()
        self.setup_note_leafs_view()

    def load_scandata(self):
        book_path = self.book['path']
        book_uuid = basename(book_path)
        self.book_obj = self._books_db.get_book(book_uuid)
        self.scandata = ScanData(book_path, downloaded=True)
        Logger.info(
            'ReScribeScreen: Loaded scandata from directory: {}'.format(
                book_path))

    def load_note_leafs(self):
        leafs = self._note_leafs
        del leafs[:]
        scandata = self.scandata
        book_path = self.book['path']
        original_path = join(book_path, 'thumbnails')
        reshoot_path = join(book_path, 'reshooting', 'thumbnails')
        for note_leaf in scandata.iter_flagged_leafs():
            leaf_data = scandata.get_page_data(note_leaf)
            image_name = '{:04d}.jpg'.format(note_leaf)
            reshoot_image_path = join(reshoot_path, image_name)
            page_number = leaf_data.get('pageNumber', None)
            new_leaf_data = {
                'original_image': join(original_path, image_name),
                'reshoot_image': reshoot_image_path,
                'leaf_number': note_leaf,
                'page_number': self._get_page_number(page_number),
                'page_type': leaf_data['pageType'],
                'note': leaf_data.get('note', None) or u'',
                'status': 1 if exists(reshoot_image_path) else 0
            }
            leafs.append(new_leaf_data)

    def _get_page_number(self, page_number_data):
        # TODO: Remove this method when scandata structure becomes the same
        # for reshooting mode and otherwise
        if page_number_data:
            if isinstance(page_number_data, dict):
                page_number = page_number_data.get('num', None)
                return None if page_number is None else int(page_number)
            elif isinstance(page_number_data, str):
                return int(page_number_data)
        return None

    def setup_menu_bar(self):
        menu = self.ids.menu_bar
        menu.identifier = self.book['identifier']
        #menu.upload_button_disabled = not self.is_rescribing_complete()
        menu.reshooting_button_disabled = not bool(self._note_leafs)

    def setup_book_info_panel(self):
        panel = self.ids.book_info_panel
        panel.scroll_y = 1.0
        cover_image = join(self.book['path'], 'thumbnails', '0001.jpg')
        if not exists(cover_image):
            cover_image = MISSING_IMAGE
        panel.cover_image = cover_image
        panel.claimer = self.get_claimer()
        panel.update_from_metadata(get_metadata(self.book['path']))

    def get_claimer(self):
        path = join(expanduser(self.book['path']), 'claimer')
        if exists(path):
            with open(path, 'r') as f:
                return f.read() or NONE_STR
        return NONE_STR

    def setup_note_leafs_view(self):
        view = self.ids.note_leafs_view
        view.leafs[:] = self._note_leafs
        view.refresh_views()

    def popup_dismiss_to_home(self, popup, *args):
        popup.dismiss(animation=False)
        self.go_to_home()

    def go_to_home(self, *args, **kwargs):
        self.screen_manager.transition.direction = 'left'
        self.screen_manager.current = 'upload_screen'

    def on_menu_bar_option_select(self, menu, option):
        if option == menu.OPTION_UPLOAD:
            self.package_and_schedule_for_upload()
        elif option == menu.OPTION_FIRST_LEAF:
            leaf_number = self.find_first_non_reshoot_leaf_number()
            if leaf_number:
                self.open_book_at_leaf(leaf_number)
        elif option == menu.OPTION_PUBLIC_NOTES:
            metadata = get_metadata(self.book['path'])
            notes = metadata.get('notes', None) or ''
            popup = BookNotesPopup(title='Edit public book notes', notes=notes)
            popup.bind(on_submit=self.on_book_notes_submit)
            popup.open()
        elif option == menu.OPTION_INTERNAL_NOTES:
            internal_notes = self.scandata.get_internal_book_notes() or ''
            popup = BookNotesPopup(title='Edit internal book notes',
                                   notes=internal_notes)
            popup.bind(on_submit=self.on_internal_book_notes_submit)
            popup.open()

    def on_book_notes_submit(self, popup, notes):
        metadata = get_metadata(self.book['path'])
        metadata_notes = metadata.get('notes', None) or ''
        notes = notes.strip()
        if metadata_notes != notes:
            if notes:
                metadata['notes'] = notes
                message = 'Saved public book notes: %s' \
                          % ('\n%s' % notes if '\n' in notes else notes)
            else:
                metadata.pop('notes', None)
                message = 'Removed public book notes'
            set_metadata(metadata, self.book['path'])
            Logger.info('ReScribeScreen: %s' % message)
            self.book_obj.reload_metadata()

    def on_internal_book_notes_submit(self, popup, notes):
        scandata = self.scandata
        internal_notes = scandata.get_internal_book_notes() or ''
        notes = notes.strip()
        if internal_notes != notes:
            scandata.set_internal_book_notes(notes)
            scandata.save()
            if notes:
                message = 'Saved internal book notes: %s' \
                          % ('\n%s' % notes if '\n' in notes else notes)
            else:
                message = 'Removed internal book notes'
            Logger.info('ReScribeScreen: %s' % message)
            self.book_obj.reload_scandata()

    def on_note_leaf_select(self, note_leafs_view, note_leaf):
        self.open_book_at_leaf(note_leaf['leaf_number'])

    def find_first_non_reshoot_leaf_number(self):
        for note_leaf_data in self._note_leafs:
            if note_leaf_data['status'] == 0:
                return note_leaf_data['leaf_number']
        try:
            ret = self._note_leafs[0]['leaf_number']
            return ret
        except:
            return None

    def open_book_at_leaf(self, leaf_number):
        Logger.debug('ReScribeScreen: Trying to open book with id: {}'.format(
            self.book['identifier']))
        screen_name = 'reshoot_screen'
        try:
            capture_screen = self.screen_manager.get_screen(screen_name)
        except Exception:
            capture_screen = ReShootScreen(name=screen_name)
            self.screen_manager.add_widget(capture_screen)
            capture_screen.pos = self.screen_manager.pos
        capture_screen.book = self.book
        capture_screen.reopen_at = leaf_number
        capture_screen.scandata = self.scandata
        capture_screen.screen_manager = self.screen_manager
        capture_screen.scribe_widget = self.scribe_widget
        '''
        target_screen = screen_name
        models, ports = self.scribe_widget.cameras.get_cameras()
        camera_ports = self.scribe_widget.cameras.camera_ports
        if camera_ports['left'] not in ports:
            target_screen = 'calibration_screen'
        if camera_ports['right'] not in ports:
            target_screen = 'calibration_screen'
        foldout_port = camera_ports['foldout']
        if foldout_port is not None and foldout_port not in ports:
            target_screen = 'calibration_screen'
        if target_screen == 'calibration_screen':
            screen = self.screen_manager.get_screen('calibration_screen')
            screen.target_screen = 'reshoot_screen'
            self.screen_manager.transition.direction = 'left'
            self.screen_manager.current = target_screen
        else:
        '''
        self.screen_manager.transition.direction = 'left'
        self.screen_manager.current = screen_name

    def is_rescribing_complete(self):
        if not self._note_leafs:
            return False
        for leaf_data in self._note_leafs:
            if not exists(leaf_data['reshoot_image']):
                return False
        return True

    def package_and_schedule_for_upload(self):
        if self.is_rescribing_complete():
            self.action = UploadCorrectionsBookActionMixin(
                book=self.book_obj,
                task_scheduler=self.scribe_widget.task_scheduler,
                done_action_callback=self.go_to_home)
            self.action.display()
        else:
            msg = 'ReScribeScreen: Book is not done rescribing'
            popup = InfoPopup(title='Error', message=msg, auto_dismiss=False)
            popup.bind(on_submit=popup.dismiss)
            popup.open()
            Logger.error(msg)
Ejemplo n.º 9
0
 def test__init_ppi_for_bookdata(self):
     scandata = ScanData(self.path)
     self.assertIsNotNone(scandata.get_bookdata('ppi'))
Ejemplo n.º 10
0
class Book(Scribe3Item):
    state_machine = book_state_machine

    def __init__(self, book_dict, callback=None, delete_callback=None):
        print("[Book::init()] Creating book object from ->", book_dict)

        super(Book, self).__init__(book_dict, callback, delete_callback)

        self.scandata = ScanData(self.path)
        self.leafs = self.scandata.count_pages()
        self.notes_count = self.scandata.count_notes()

        self.creator = book_dict[
            'creator'] if 'creator' in book_dict else self.metadata[
                'creator'] if 'creator' in self.metadata else None
        self.volume = book_dict[
            'volume'] if 'volume' in book_dict else self.metadata[
                'volume'] if 'volume' in self.metadata else None

        self.shiptracking_id = book_dict[
            'shiptracking'] if 'shiptracking' in book_dict else self.metadata[
                'shiptracking'] if 'shiptracking' in self.metadata else None

        self.boxid = book_dict[
            'boxid'] if 'boxid' in book_dict else self.metadata[
                'boxid'] if 'boxid' in self.metadata else None

    def __repr__(self):
        ret = '<{} is {} ({}-{})'.format(
            self.uuid,
            status_human_readable.get(self.status),
            self.status,
            UploadStatus[self.status].value,
        )
        if self.has_identifier():
            ret = ret + '| {}'.format(self.identifier)

        ret = ret + '>'
        return ret

    def reload_metadata(self):
        self.metadata = get_metadata(self.path)
        #self.notes = self.metadata['notes'] if 'notes' in self.metadata else None
        self.shiptracking_id = self.metadata[
            'shiptracking'] if 'shiptracking' in self.metadata else None
        self.boxid = self.metadata[
            'boxid'] if 'boxid' in self.metadata else None
        self.volume = self.metadata[
            'volume'] if 'volume' in self.metadata else None
        super(Book, self).reload_metadata()

    def reload_scandata(self):
        self.scandata = ScanData(self.path)
        self.leafs = self.scandata.count_pages()
        self.notes_count = self.scandata.count_notes()
        self.date_last_updated = self.load_last_modified_from_disk(
            including_move_along=True)
        self.date_last_modified = self.load_last_modified_from_disk()
        self.date_created = self.load_date_created_from_disk()
        if self.notify:
            self.notify('reloaded_scandata')

    def get_claimer(self):
        path = join(os.path.expanduser(self.path), 'claimer')
        if exists(path):
            with open(path, 'r') as f:
                return f.read() or 'None'
        return 'None'

    def get_scandata(self):
        return self.scandata.dump_raw()

    def as_dict(self):
        ret = super(Book, self).as_dict()
        ret.update({
            'volume':
            self.volume,
            'notes_count':
            self.notes_count,
            'leafs':
            self.leafs,
            'shiptracking':
            self.shiptracking_id if self.shiptracking_id else '',
            'boxid':
            self.boxid if self.boxid else '',
        })
        return ret

    def has_minimal_metadata(self, exclude_catalog=False):
        result = []
        for combination in MINIMAL_METADATA:
            if combination == ('scribe3_search_catalog', 'scribe3_search_id'):
                if exclude_catalog:
                    break
            is_combination_satisfied = True
            for really_important_field in combination:
                if really_important_field not in self.metadata:
                    is_combination_satisfied = False
                    break
                if self.metadata[really_important_field] == '':
                    is_combination_satisfied = False
                    break
            result.append(is_combination_satisfied)
        ret = reduce(lambda x, y: x or y, result)
        return ret

    def is_downloaded(self):
        ret = os.path.exists(os.path.join(self.path, 'downloaded'))
        return ret

    def is_modern_book(self):
        return 'scribe3_search_id' in self.metadata

    def is_preloaded(self):
        return exists(join(self.path, 'preloaded'))

    def has_rcs(self):
        REQUIRED_FIELDS_FOR_NATIVE_BOOK = [
            'sponsor', 'contributor', 'collection', 'rcs_key'
        ]
        for field in REQUIRED_FIELDS_FOR_NATIVE_BOOK:
            if field not in self.metadata:
                return False
        return True

    def has_rcs_if_required(self):
        if self.is_preloaded():
            return True
        if self.is_downloaded():
            return True
        # RCS is required
        return self.has_rcs()

    def get_slip(self, full_path=False):
        sliplike_files = glob.glob(os.path.join(self.path, '*slip.png'))
        if len(sliplike_files) == 0:
            return None
        slip_paths = [x for x in sliplike_files]
        slip_paths.sort(key=os.path.getmtime, reverse=True)
        functor = lambda x: x
        if not full_path:
            functor = os.path.basename
        slip_files = [functor(x) for x in slip_paths]
        slip_filename = slip_files[0]
        return slip_filename

    def has_slip(self):
        return self.get_slip() is not None

    def has_slip_if_required(self):
        if self.is_modern_book():
            return self.has_slip() not in [False, None]
        return True

    def get_slip_type(self):
        from ia_scribe.tasks.print_slip import SLIP_IMAGE_NAMES
        filename_to_slip_type = {v: k for k, v in SLIP_IMAGE_NAMES.items()}
        slip_filename = self.get_slip()
        if not slip_filename:
            return None
        slip_file = slip_filename.replace('.png', '')
        if slip_file in list(filename_to_slip_type.keys()):
            return filename_to_slip_type[slip_file]
        else:
            return None

    def get_slip_metadata_file_path(self):
        return join(self.path, SLIP_METADATA_FILENAME)

    def has_slip_metadata_file(self):
        ret = False
        path = self.get_slip_metadata_file_path()
        if exists(path):
            ret = True
        return ret

    def get_slip_metadata(self):
        ret = None
        if self.has_slip() or self.has_slip_metadata_file():
            path = self.get_slip_metadata_file_path()
            with open(path, 'r') as f:
                ret = json.load(f)
        return ret

    def set_slip_metadata(self, type, metadata):
        metadata['type'] = type
        dt = metadata.get('datetime')
        metadata['datetime'] = dt.strftime('%Y%m%d%H%M%S')
        slip_metadata = json.dumps(metadata, indent=4, sort_keys=True)
        with open(self.get_slip_metadata_file_path(), 'w+') as f:
            f.write(slip_metadata)

    def has_full_image_stack(self):
        return has_full_imgstack(self)

    def has_foldout_target_selected(self):
        path = join(self.path, 'send_to_station')
        return os.path.exists(path)

    def get_foldout_target(self):
        if self.has_foldout_target_selected():
            with open(join(self.path, 'send_to_station'), 'r') as f:
                return f.read()
        else:
            return None

    def set_foldout_target(self, scanner):
        with open(join(self.path, 'send_to_station'), 'w+') as f:
            f.write(scanner)

    def has_full_image_stack_wrapper(self, e):
        self.logger.info('checking that {} has a full imgstack...'.format(
            self.identifier))
        ret, msg = has_full_imgstack(self)
        self.logger.info('Result is {} {}'.format(ret, msg))
        if ret == False:
            self.raise_exception('has_full_imgstack_wrapper', msg)
        return ret

    def item_clear_for_upload_wrapper(self, e):
        self.logger.info('checking that {} is clear for upload'.format(
            self.identifier))
        ret = item_ready_for_upload(self)
        self.logger.info('Result is {}'.format(ret))
        return ret

    def was_image_stack_processed_wrapper(self, e):
        self.logger.info('checking that imagestack was formed properly')
        ret = was_image_stack_processed(self)
        self.logger.info('Result is {}'.format(ret))
        return ret

    def has_valid_preimage_zip_wrapper(self, e):
        self.logger.info(
            'checking that preimage.zip archive was built properly')
        ret = has_valid_preimage_zip(self)
        self.logger.info('Result is {}'.format(ret))
        return ret

    def has_slip_if_required_wrapper(self, e):
        self.logger.info('checking that a slip is present if required')
        ret = self.has_slip_if_required()
        self.logger.info('Result is {}'.format(ret))
        return ret

    def has_rcs_if_required_wrapper(self, e):
        self.logger.info(
            'checking that a collection string was set for the book')
        ret = self.has_rcs_if_required()
        self.logger.info('Result is {}'.format(ret))
        return ret

    def ok_to_delete(self, e):
        self.logger.info('Checking whether it is safe to delete this book')
        if self.is_downloaded():
            self.logger.info('Book was downloaded, we can go ahead.')
            return True
        else:
            self.logger.info(
                'Book was scanned on this station. Verifying with cluster...')
            return verify_uploaded(self)

    def get_jpegs(self):
        jpegs = sorted(
            glob.glob(os.path.join(self.path, '[0-9][0-9][0-9][0-9].jpg')))
        return jpegs

    def get_thumb_jpegs(self):
        jpegs = sorted(
            glob.glob(
                os.path.join(self.path, 'thumbnails',
                             '[0-9][0-9][0-9][0-9].jpg')))
        return jpegs

    def get_imagestack(self):
        jp2s = sorted(
            glob.glob(os.path.join(self.path, '[0-9][0-9][0-9][0-9].jp2')))
        if len(jp2s) == 0:
            jp2s = self.get_jpegs()
        return jp2s

    def get_path_to_upload(self, human_readable=False):
        return_value = []
        if self.get_numeric_status() >= 888:
            return return_value
        if self.get_numeric_status() < 797:
            return_value = path_to_success(self.status)
        else:
            return_value = path_to_success_corrections(self.status)
        if human_readable:
            return_value = self.humanify(return_value)
        return return_value

    def set_force_upload(self):
        self.force_upload = True

    def build_callbacks(self):
        ret = {
            'onchangestate': self._on_change_state,
            'onbeforedo_create_identifier': get_identifier_fysom,
            # 'onbeforedo_queue_processing': partial(self.set_checkpoint, 'processing_queued'),
            # 'onbeforedo_queue_for_upload': partial(self.set_checkpoint, 'upload_queued'),
            # 'onbeforedo_move_to_trash': partial(self.set_checkpoint, 'delete_queued'),
            # 'create_metadata': partial(make_identifier, self, self)
            # 'ondo_begin_packaging': package_book,
            # 'ondo_create_image_stack': create_imagestack,
            # 'ondo_finish_image_stack': create_preimage_zip,
            #  'ondo_queue_for_upload': upload_book,
            # 'onafterdo_upload_book_done': verify_uploaded,
        }
        return ret

    def is_folio(self):
        ret = self.metadata.get('source') == 'folio'
        return ret

    def get_cover_image(self):
        if self.is_folio():
            ret = os.path.join(self.path, 'thumbnails', '0002.jpg')
        else:
            ret = os.path.join(self.path, 'thumbnails', '0001.jpg')
        return ret

    GSM = FysomGlobal(
        cfg=book_state_machine,
        state_field='status',
    )
Ejemplo n.º 11
0
def _create_scandata(book, book_folder, foldouts, Logger):
    scandata = json.loads(ScanData(book['path']).dump())
    cdic = {}
    tdic = {}
    rdic = {}
    rtdic = {}
    '''
    This subprogram uploads a corrected book back to republisher.
    It:
    - Verifies that the book was downloaded.
    - Gets ahold of the tts identifier to later remove the book from the 
        item's "books" list.
    - Constructs and maintains four dictionaries: new pages (cdic), new 
        pages thumbs (tdic), reshot pages(rdic), reshot pages thumbs 
        (rtdic) that will later become the scandata.
    -- Looks for new pages (spreads insertions and/or appends) and their 
        thumbs
    -- Add republisher tags (that's what post-processing would do)
    -- Looks for replacements (in bookpath/reshooting) if present
    -- saves a scandata_rerepublished.json
    - Uploads the pictures and scandatas
    - Updates tts item, repub state and metrics
    '''

    # Here we add the new pages

    # cdic is the corrections dictionary, and it contains entries in the
    # form:
    #
    # {item path : local path } - for example:
    # {'corrections/0044.jpg' : '~/scribe_books/1234/0022.jpg'}
    try:
        cdic = {book_folder + '/' + k: os.path.join(book['path'], k)
                for k in next(os.walk(book['path']))[2]
                if re.match('\d{4}\.jpg$', os.path.basename(k))}
        # And the thumbs from the new pages
        # REMOVE THUMB FROM OS WALK PATH
        tdic = {book_folder + '/thumbnails/' + k:
                    os.path.join(book['path'], 'thumbnails', k)
                for k in next(os.walk(join(book['path'])))[2]
                if re.match('\d{4}\.jpg$', os.path.basename(k))}
    except Exception:
        Logger.error('_create_scandata: No corrections found.')

    # Ensure the scandata has the appropriate tags for re-republishing

    # NEW PAGES DICT
    Logger.debug('_create_scandata: Processing new pages...')
    for k in cdic:
        page_num = str(int(k.split('.jpg')[0].split('/')[1]))
        Logger.debug('_create_scandata: Processing page {}'.format(page_num))
        try:
            page_data_exists = scandata['pageData'][page_num] is not None
            Logger.debug('_create_scandata: Page data for page {} exists in '
                         'scandata'.format(page_num))
        except Exception as e:
            raise ScribeException(e)

        # Rotate images
        im = Image.open(cdic[k])
        # im = im.rotate(
        #     int(scandata['pageData'][page_num]['rotateDegree'])
        # )
        width, height = im.size

        # scandata['pageData'][page_num]['rotateDegree'] = 0
        if abs(int(scandata['pageData'][page_num]['rotateDegree'])) in [0, 180]:
            scandata['pageData'][page_num]['origWidth'] = str(width)
            scandata['pageData'][page_num]['origHeight'] = str(height)
        elif abs(int(scandata['pageData'][page_num]['rotateDegree'])) in [90, 270]:
            scandata['pageData'][page_num]['origWidth'] = str(height)
            scandata['pageData'][page_num]['origHeight'] = str(width)

        Logger.debug('\n\n\n ---->>> CORRECTIONS DEBUG - PAGE INSERT- '
                     'please report this \n\n')
        Logger.debug(
            'rotatedegree={2}, origWidth={0}, height={1}'
                .format(scandata['pageData'][page_num]['origWidth'],
                        scandata['pageData'][page_num]['origHeight'],
                        scandata['pageData'][page_num]['rotateDegree'])
        )
        Logger.debug('<<<---- END CORRECTIONS DEBUG - - - - - - - -\n\n\n')

        scandata['pageData'][page_num]['origFileName'] = k.split('/')[1]
        scandata['pageData'][page_num]['sourceFileName'] = k
        scandata['pageData'][page_num]['proxyFullFileName'] = k
        if not foldouts:
            scandata['pageData'][page_num]['correctionType'] = 'INSERT'
            scandata['pageData'][page_num]['TTSflag'] = 0

        Logger.debug('\n\n\n ---->>> CORRECTIONS DEBUG - please report '
                     'this \n\n')
        Logger.debug('\n' + str(scandata['pageData'][page_num]))
        Logger.debug('<<<---- END CORRECTIONS DEBUG - - - - - - - -\n\n\n')
    # THUMBS FOR NEW PAGES
    for k in tdic:
        page_num = str(int(k.split('.jpg')[0].split('/')[2]))
        scandata['pageData'][page_num]['proxyFileName'] = k

    Logger.debug('_create_scandata: Processed {} new images.'.format(len(cdic)))

    try:
        # here we add the reshot images
        rdic = {
            book_folder + '/' + k: join(book['path'], 'reshooting', k)
            for k in next(os.walk(join(book['path'], 'reshooting')))[2]
            if re.match('\d{4}\.jpg$', os.path.basename(k))
        }

        # RESHOT IMAGES DICT
        for k in rdic:
            page_num = str(int(k.split('.jpg')[0].split('/')[1]))
            # rotate images
            im = Image.open(rdic[k])
            # im = im.rotate(
            #     int(scandata['pageData'][page_num]['rotateDegree'])
            # )
            width, height = im.size
            # im.save(rdic[k])

            # scandata['pageData'][page_num]['rotateDegree'] = 0
            if abs(int(scandata['pageData'][page_num]['rotateDegree'])) in [0, 180]:
                scandata['pageData'][page_num]['origWidth'] = str(width)
                scandata['pageData'][page_num]['origHeight'] = str(height)
            elif abs(int(scandata['pageData'][page_num]['rotateDegree'])) in [90, 270]:
                scandata['pageData'][page_num]['origWidth'] = str(height)
                scandata['pageData'][page_num]['origHeight'] = str(width)

            Logger.debug('---->>> CORRECTIONS DEBUG - PAGE RESHOOT')
            Logger.debug(
                'rotatedegree is {2}, origWidth = {0}, height= {1}'
                    .format(scandata['pageData'][page_num]['origWidth'],
                            scandata['pageData'][page_num]['origHeight'],
                            scandata['pageData'][page_num]['rotateDegree'])
            )
            Logger.debug('<<<---- END CORRECTIONS DEBUG - - - - - - - - -')

            scandata['pageData'][page_num]['origFileName'] = k.split('/')[1]
            scandata['pageData'][page_num]['sourceFileName'] = k
            scandata['pageData'][page_num]['correctionType'] = 'REPLACE'
            scandata['pageData'][page_num]['proxyFullFileName'] = k
            scandata['pageData'][page_num]['TTSflag'] = 0

            Logger.debug('---->>> CORRECTIONS DEBUG - please report this')
            Logger.debug('\n' + str(scandata['pageData'][page_num]))
            Logger.debug('<<<---- END CORRECTIONS DEBUG - - - - - - - -')

        # here we add the thumbs from the reshooting
        rtdic = {
            book_folder + '/thumbnails/' + k: join(book['path'], 'reshooting', 'thumbnails', k)
            for k in next(os.walk(join(book['path'], 'reshooting', 'thumbnails')))[2]
            if re.match('\d{4}\.jpg$', os.path.basename(k))
        }

        # THUMBS FOR RESHOT IMAGES
        for k in rtdic:
            page_num = str(int(k.split('.jpg')[0].split('/')[2]))
            scandata['pageData'][page_num]['proxyFileName'] = k

        Logger.debug('_create_scandata: Processed {} reshot images.'.format(len(rdic)))

    except Exception as e:
        Logger.exception('_create_scandata: No reshot pages found')

    # Super Solenoid Scandata from disk (page info)
    sss = {int(k): v for k, v in list(scandata['pageData'].items())}
    # Now we want our own piece of memory for this one
    new_scandata = copy.deepcopy(scandata)
    new_scandata['pageData'] = {}
    new_scandata['pageData']['page'] = []

    # Rewrite pages section

    Logger.debug('_create_scandata: Adding all computed pages to new scandata...')
    for page in sorted(sss):
        Logger.debug('_create_scandata: {}'.format(page))
        sss[page]['leafNum'] = page
        try:
            pnum = sss[page]['pageNumber']['num']
            sss[page]['pageNumber'] = pnum
        except Exception:
            pass
        new_scandata['pageData']['page'].append(sss[page])

    # Rewrite assertions to be compatible with republisher

    try:
        Logger.debug('\nNow rewriting page assertions for repub compatibility '
                     'if present')
        temp_pageNumData = copy.deepcopy(scandata['bookData']['pageNumData'])
        temp_pageNumData['assertion'] = []
        for entry in scandata['bookData']['pageNumData']:
            if entry.isdigit():
                del temp_pageNumData[entry]

        for assertion in scandata['bookData']['pageNumData'].items():
            temp_assertion = {'leafNum': str(assertion[0]),
                              'pageNum': str(assertion[1])}
            temp_pageNumData['assertion'].append(temp_assertion)

        Logger.debug('_create_scandata: OK done. New pageNumData block: {}'
                     .format(temp_pageNumData))

        new_scandata['bookData']['pageNumData'] = temp_pageNumData
    except Exception as e:
        Logger.exception('_create_scandata: No pageNumData block found or error processing '
                         'it.: '.format(e))

    # Write it all to file
    with open(join(book['path'], 'scandata_rerepublished.json'), 'w+') as outfile:
        json.dump(new_scandata, outfile)

    Logger.debug('_create_scandata: Done constructing scandata.')
    return cdic, tdic, rdic, rtdic
Ejemplo n.º 12
0
class DownloadBookTask(TaskBase):
    def __init__(self, **kwargs):
        super(DownloadBookTask, self).__init__(**kwargs)
        self._book = None
        self._priority = 'medium'
        self._library = kwargs['library']
        self.identifier = kwargs['identifier']
        self.logger.info('Download books: Downloading {}'.format(
            self.identifier))
        self.notifications_manager = NotificationManager()
        self._download_type = None

    def create_pipeline(self):
        return [
            self._get_ia_session,
            self._load_item,
            self._validate_repub_state,
            self._create_stub_book,
            self._load_book_metadata,
            self._create_files,
            self._create_scandata,
            self._get_checkout_information,
            self._write_claimer_file,
            self._download_proxies,
            self._set_states,
            self._release_lock,
            self._send_stats,
        ]

    def handle_event(self, event_name, *args, **kwargs):
        if event_name == 'on_state' and self.state == CANCELLED_WITH_ERROR:
            if self._book:
                self._book.do_move_to_trash()
                self._book.do_delete_anyway()

    def _get_ia_session(self):
        self.dispatch_progress('Getting IA session')
        self._ia_session = get_ia_session()

    def _load_item(self):
        self.dispatch_progress('Loading item')
        try:
            self.item = self._ia_session.get_item(self.identifier)
            assert self.item.metadata['repub_state'] is not None
        except Exception as e:
            self.logger.error('No repub_state or item darkened. Skipping...')
            raise e
        self.logger.info(
            'Download book: target item: {} (repub_state = {})'.format(
                self.item.identifier, self.item.metadata['repub_state']))

    def _validate_repub_state(self):
        self.dispatch_progress('Validating repub state')
        is_repub_state_valid = lambda x: int(x.metadata[
            'repub_state']) in scribe_globals.ALLOWED_DOWNLOAD_REPUB_STATES
        self.logger.info('Validating repub_state {}'.format(
            int(self.item.metadata['repub_state'])))

        if not is_repub_state_valid(self.item):
            msg = 'Download book: Repub state is not 31 or 34 or 41' \
                  '(is {1}), refusing to download item {0}' \
                .format(self.item.identifier, self.item.metadata['repub_state'])
            self.logger.error(msg)
            raise Exception(msg)

    def _create_stub_book(self):
        self.dispatch_progress('Creating local book')
        message = "This books is being downloaded and no actions are available just yet."
        book_id = str(uuid4())
        self._book = self._library.new_book(book_id,
                                            status='download_incomplete',
                                            error=message)
        self._book.set_lock()
        self._book.logger.info('Download book: Created stub book {}'.format(
            self._book))

    def _load_book_metadata(self):
        self.dispatch_progress('Loading metadata')
        md_url = ('https://{}/RePublisher/RePublisher-viewScanData.php'
                  '?id={}'.format(self.item.d1, self.identifier))
        self._md = requests.get(md_url, timeout=5)
        self._book.logger.info(
            'Download book: Fetch scandata from cluster: {}'.format(
                self._md.status_code))

    def _create_files(self):
        self.dispatch_progress('Downloading files')
        ret = []
        with open(os.path.join(self._book.path, 'identifier.txt'), 'w+') as fp:
            fp.write(self.item.identifier)
        ret.append(fp.name)
        self._book.logger.info('Download book: Created {}'.format(fp.name))

        with open(os.path.join(self._book.path, 'downloaded'), 'w+') as fp:
            fp.write('True')
            ret.append(fp.name)
        self._book.logger.info('Download book: Created {}'.format(fp.name))

        with open(os.path.join(self._book.path, 'uuid'), 'w+') as fp:
            fp.write(self._book.uuid)
        ret.append(fp.name)
        self._book.logger.info('Download book: Created {}'.format(fp.name))

        with open(os.path.join(self._book.path, 'scandata.xml'), 'w+') as fp:
            fp.write(self._md.content.decode())
        ret.append(fp.name)
        self._book.logger.info('Download book: Created {}'.format(fp.name))

        self.item.get_file(self.item.identifier + '_meta.xml') \
            .download(file_path=self._book.path + '/metadata.xml')
        ret.append('{}'.format(self._book.path + '/metadata.xml'))
        self._book.logger.info('Download book: Created metadata.xml')
        self._book.reload_metadata()

        if not os.path.exists(os.path.join(self._book.path, 'reshooting')):
            os.makedirs(os.path.join(self._book.path, 'reshooting'))
            ret.append('{}'.format(self._book.path + '/reshooting'))
        self._book.logger.info('Download book: Created reshooting directory')

        self._files = ret

        self._book.logger.info('Download book: Created files, now converting '
                               'scandata from RePublisher XML to Scribe3 JSON')

    def _create_scandata(self):
        self.dispatch_progress('Creating scandata')
        sc_path = os.path.join(self._book.path, 'scandata.xml')

        tree = book_helpers.validate_scandata_xml(sc_path, self._book)
        scandata_xml = book_helpers.create_normalized_scandata(
            tree, self._book)
        json_data = book_helpers.convert_normalized_scandata_to_json(
            scandata_xml)

        json_new = {}

        self._book.logger.info('Download book: Now converting to Scribe3 JSON')
        json_new['bookData'] = book_helpers.build_bookdata(
            json_data, self._book)
        json_new['pageData'] = book_helpers.build_pagedata(
            json_data, self._book)

        with open(os.path.join(self._book.path, 'scandata.json'),
                  'w') as outfile:
            json.dump(json_new, outfile)
            self._book.logger.info('Download book: Created {}'.format(
                outfile.name))

        self._scandata = ScanData(self._book.path)
        self._scandata.save()

        self._book.reload_scandata()

        self._book.logger.info('Download book: Created scandata.')

    def _get_checkout_information(self):
        self.dispatch_progress('Pulling checkout information')
        book_checkout_url = ('https://{}/RePublisher/RePublisher-'
                             'checkoutBook.php?peek=true&id={}'.format(
                                 self.item.d1, self._book.identifier))

        self._book.logger.info(
            'Getting checkout information from {}'.format(book_checkout_url))
        ret = self._ia_session.get(book_checkout_url)
        self._book.logger.info('Got {} ({})'.format(ret.text, ret.status_code))
        self._checkout_info = json.loads(ret.text)

    def _write_claimer_file(self):
        self.dispatch_progress('Writing claimer file')
        if 'claimed_by' in self._checkout_info and self._checkout_info[
                'claimed_by'] != False:
            claimer = self._checkout_info['claimed_by']
        else:
            claimer = '-'

        with open(os.path.join(self._book.path, 'claimer'), 'w+') as fp:
            fp.write(claimer)
        self._claimer = claimer
        self._book.logger.info('This book was claimed by {}'.format(claimer))

    def _download_proxies(self):
        self.dispatch_progress('Downloading proxies')
        all_ok = True
        counter = 0
        page_data = self._scandata.dump_raw()['pageData']
        for i, page in enumerate(page_data):
            self.dispatch_progress('Downloading proxies [{}/{}]'.format(
                i, len(page_data)))
            if int(page) != i:
                self._book.logger.error('Download book: Download Proxies: '
                                        'CRITICAL MISMATCH')
                break
            short_msg = 'Download pics | {percent:.1f}% | {n}/{total}'.format(
                percent=i * 100 / len(page_data),
                n=i,
                total=len(page_data),
            )
            self._book.update_message(short_msg)
            url = book_helpers.get_cluster_proxy_url_by_leaf(
                self._scandata, self.item, page)
            res = self.download_proxy_image(page, self._book, url)

            all_ok = all_ok and res
            counter += 1
            if res:
                self._book.logger.debug(
                    'Download book: Got proxies for leaf #{0}'.format(page))
            else:
                self._book.logger.error(
                    'Download book: Error downloading leaf #{0}'.format(page))

            try:
                leafnr = self._scandata.get_page_num(page)['num']
            except Exception:
                pass

        self._book.logger.info(
            'Download book: Downloaded {} proxy images.'.format(counter))

        return all_ok

    def _set_states(self):
        self.dispatch_progress('Setting states')
        self._book.error = None
        if int(self.item.metadata['repub_state']) == 31:
            book_final_repub_state = 32
            self._download_type = 'corrections'
            self._book.do_end_download_correction()
        elif int(self.item.metadata['repub_state']) == 41:
            book_final_repub_state = 42
            self._download_type = 'foldouts'
            self._book.do_end_download_foldout()
        else:
            self._book.logger(
                'Error while processing item in repub_state {}'.format(
                    self.item.metadata['repub_state']))
            raise Exception(
                'remote repub state in inconsistent with book download')

        self._book.logger.info(
            'Setting remote repub_state to {}'.format(book_final_repub_state))
        mdapi_response = self.item.modify_metadata(
            {'repub_state': book_final_repub_state})
        self._book.logger.info(
            'Response from MDAPI: {}'.format(mdapi_response))
        if mdapi_response:
            self._mdapi_response_text = mdapi_response.text
            self._book.logger.info('Body of MDAPI: {}'.format(
                self._mdapi_response_text))
            if mdapi_response.status_code != 200:
                raise Exception(
                    'MDAPI response was not OK! - Got this instead: {} - {}'.
                    format(mdapi_response.status_code, mdapi_response.text))

            self._book.logger.info(
                'Download book: Set book repub_state to {}'.format(
                    book_final_repub_state))
            self._book_final_repub_state = book_final_repub_state
        else:
            raise Exception('No response from MDAPI. Aborting download.')

    def _send_stats(self):
        self.dispatch_progress('Notifying iabdash')
        payload = {
            'repub_state': self._book_final_repub_state,
            'checkout_info': self._checkout_info,
            'claimer': self._claimer,
            'files': self._files,
        }
        push_event('tts-book-downloaded', payload, 'book', self.identifier,
                   os.path.join(self._book.path, "iabdash.log"))
        self.notifications_manager.add_notification(
            title='Downloaded',
            message="{} has been downloaded and is ready for {}.".format(
                self.identifier, self._download_type),
            show_system_tile=False,
            book=self._book)

    def _release_lock(self):
        total_time = 100
        self._book.logger.info('Download book: ------ DONE. Downloaded {0} in '
                               '{1}s ----------'.format(
                                   self.identifier, total_time))
        self._book.release_lock()

    def download_proxy_image(
        self,
        page,
        book,
        url,
    ):
        def is_proxy_valid(proxy_path):
            return True

        file_target = '{n:04d}.jpg'.format(n=int(page))
        dest = os.path.os.path.join(book.path, "thumbnails", file_target)

        if url is not None:
            image = self._ia_session.get(url).content
            with open(dest, 'wb+') as proxy:
                book.logger.debug('Writing {}'.format(dest))
                proxy.write(image)
                book.logger.info('Download book: Written {}'.format(
                    proxy.name))
        else:
            import shutil
            book.logger.debug('Page {} has no proxy, adding missing '
                              'image at {}'.format(page, dest))
            shutil.copyfile(scribe_globals.MISSING_IMAGE, dest)
        ret = is_proxy_valid(dest)
        return ret
Ejemplo n.º 13
0
class DownloadCDTask(TaskBase):
    def __init__(self, **kwargs):
        super(DownloadCDTask, self).__init__(**kwargs)
        self._cd = None
        self._priority = 'medium'
        self._library = kwargs['library']
        self.identifier = kwargs['identifier']
        self.logger.info('Download CD: Downloading {}'.format(self.identifier))
        self.notifications_manager = NotificationManager()
        self._download_type = None
        self.start_time = None
        self.total_time = None

    def create_pipeline(self):
        return [
            self._begin,
            self._get_ia_session,
            self._load_item,
            self._verify_is_CD,
            self._verify_is_stub_item,
            self._create_stub_CD,
            self._create_files,
            self._create_scandata,
            self._set_states,
            self._release_lock,
            self._send_stats,
        ]

    def _begin(self):
        self.start_time = time.time()

    def handle_event(self, event_name, *args, **kwargs):
        if event_name == 'on_state' and self.state == CANCELLED_WITH_ERROR:
            if self._cd:
                self._cd.do_move_to_trash()
                self._cd.do_delete_anyway()

    def _get_ia_session(self):
        self.dispatch_progress('Getting IA session')
        self._ia_session = get_ia_session()

    def _load_item(self):
        self.dispatch_progress('Loading item')
        self.item = self._ia_session.get_item(self.identifier)
        self.logger.info('Download CD: target item: {}'.format(
            self.item.identifier))

    def _verify_is_CD(self):
        self.dispatch_progress('Verifying this is an ArchiveCD item')
        mediatype = self.item.metadata.get('mediatype')
        software_version = self.item.metadata.get('software_version')
        assert mediatype == 'audio', 'This is not an audio item. It is {}.'.format(
            mediatype)
        assert software_version is not None, 'This item was not created with ArchiveCD'
        assert 'ArchiveCD' in software_version, 'This item was not created with ArchiveCD'

    def _verify_is_stub_item(self):
        self.dispatch_progress('Verifying this is a stub item')
        stub_file = self.item.get_file('stub.txt')
        #if not stub_file.exists:
        #    raise Exception('No stub file found!')

    def _create_stub_CD(self):
        self.dispatch_progress('Creating local CD')
        message = "This CD is being downloaded and no actions are available just yet."
        cd_id = str(uuid4())
        self._cd = self._library.new_cd(cd_id,
                                        status='download_incomplete',
                                        error=message)
        self._cd.set_lock()
        self._cd.logger.info('Download CD: Created stub CD {}'.format(
            self._cd))

    def _create_files(self):
        self.dispatch_progress('Downloading files')
        ret = []
        with open(os.path.join(self._cd.path, 'identifier.txt'), 'w+') as fp:
            fp.write(self.item.identifier)
        ret.append(fp.name)
        self._cd.logger.info('Download CD: Created {}'.format(fp.name))

        with open(os.path.join(self._cd.path, 'downloaded'), 'w+') as fp:
            fp.write('True')
            ret.append(fp.name)
        self._cd.logger.info('Download CD: Created {}'.format(fp.name))

        with open(os.path.join(self._cd.path, 'uuid'), 'w+') as fp:
            fp.write(self._cd.uuid)
        ret.append(fp.name)
        self._cd.logger.info('Download CD: Created {}'.format(fp.name))

        self.item.get_file(self.item.identifier + '_meta.xml') \
            .download(file_path=self._cd.path + '/metadata.xml')
        ret.append('{}'.format(self._cd.path + '/metadata.xml'))
        self._cd.logger.info('Download CD: Created metadata.xml')
        self._cd.reload_metadata()

        if not os.path.exists(os.path.join(self._cd.path, 'thumbs')):
            os.makedirs(os.path.join(self._cd.path, 'thumbs'))
            ret.append('{}'.format(self._cd.path + '/thumbs'))
        self._cd.logger.info('Download CD: Created thumbs directory')

        self.item.get_file(self.item.identifier + '_itemimage.png') \
            .download(file_path=self._cd.path + '/cover.png')
        ret.append('{}'.format(self._cd.path + '/cover.png'))
        self._cd.logger.info('Download CD: Downloaded cover')

        self._files = ret

        self._cd.logger.info('Download CD: Created files.')

    def _create_scandata(self):
        self.dispatch_progress('Creating scandata')

        self._scandata=\
            ScanData(self._cd.path)
        self._scandata.save()
        self._cd.reload_scandata()

        self._cd.logger.info('Download CD: Created scandata.')

    def _set_states(self):
        self.dispatch_progress('Setting states')
        self._cd.do_finish_download()

    def _send_stats(self):
        self.dispatch_progress('Notifying iabdash')
        payload = {
            'files': self._files,
            'total_time': self.total_time,
        }
        push_event('tts-cd-downloaded', payload, 'cd', self.identifier,
                   os.path.join(self._cd.path, "iabdash.log"))

        self.notifications_manager.add_notification(
            title='Downloaded',
            message="CD {} has been downloaded.".format(self.identifier),
            show_system_tile=False,
            book=self._cd)

    def _release_lock(self):
        self.total_time = time.time() - self.start_time
        self._cd.logger.info('Download CD: ------ DONE. Downloaded {0} in '
                             '{1}s ----------'.format(self.identifier,
                                                      self.total_time))
        self._cd.release_lock()
Ejemplo n.º 14
0
class ReShootScreenBackend(WidgetBackend):

    EVENT_CAPTURE_LEAF = 'on_capture_leaf'
    EVENT_CURRENT_LEAF = 'on_current_leaf'
    EVENT_ROTATE_LEAF = 'on_rotate_leaf'
    EVENT_PAGE_TYPE = 'on_page_type'
    EVENT_SHOW_ORIGINAL_FILE = 'on_show_original_file'
    EVENT_SHOW_RESHOOT_FILE = 'on_show_reshoot_file'
    EVENT_SHOW_PAGE_TYPE_FORM_POPUP = 'on_show_page_type_form_popup'
    EVENT_GO_BACK = 'on_go_back'

    __events__ = (EVENT_CAPTURE_LEAF, EVENT_CURRENT_LEAF, EVENT_ROTATE_LEAF,
                  EVENT_PAGE_TYPE, EVENT_GO_BACK, EVENT_SHOW_ORIGINAL_FILE,
                  EVENT_SHOW_RESHOOT_FILE, EVENT_SHOW_PAGE_TYPE_FORM_POPUP)

    def __init__(self, **kwargs):
        super(ReShootScreenBackend, self).__init__(**kwargs)
        self._note_leafs = []
        self._reverse_cams = False
        self._cameras_count = 0
        self._current_leaf_index = 0
        self._capture_running = False
        self._keyboard_action_handler = ReShootScreenKeyboardHandler(self)
        self.keyboard_detector = None
        self.book = None
        self.reopen_at = 0
        self.scandata = None
        self.camera_system = None
        self.window = None

    def init(self):
        if not self.scandata:
            self.scandata = ScanData(self.book['path'], downloaded=True)
        self._note_leafs[:] = self.scandata.iter_flagged_leafs()
        try:
            leaf_index = self._note_leafs.index(self.reopen_at)
        except ValueError:
            leaf_index = 0
        self.set_current_leaf_index(leaf_index)
        if not self.keyboard_detector:
            detector = ReShootActionDetector(RESHOOT_ACTION_BINDINGS)
            self.keyboard_detector = detector
        self._keyboard_action_handler.detector = self.keyboard_detector
        self._cameras_count = self.camera_system.cameras.get_num_cameras()
        self._capture_running = False
        self._reverse_cams = False
        self.config = Scribe3Configuration()
        super(ReShootScreenBackend, self).init()

    def reset(self):
        self.book = None
        self.reopen_at = None
        self.scandata = None
        self.camera_system = None
        self.window = None
        del self._note_leafs[:]
        self._current_leaf_index = 0
        self._reverse_cams = False
        self._cameras_count = 0
        self._capture_running = False
        super(ReShootScreenBackend, self).reset()

    def is_capture_running(self):
        return self._capture_running

    def is_reshoot_leaf_ready(self):
        path, thumb_path = self.get_current_reshoot_paths()
        return exists(path) and exists(thumb_path)

    def can_switch_cameras(self):
        return self._cameras_count > 1

    def can_capture_spread(self):
        return cradle_closed and not self._capture_running and self._cameras_count > 0

    def get_current_leaf_number(self):
        return self._note_leafs[self._current_leaf_index]

    def get_current_leaf_index(self):
        return self._current_leaf_index

    def set_current_leaf_index(self, index):
        max_index = max(0, len(self._note_leafs) - 1)
        if 0 <= index <= max_index and self._current_leaf_index != index:
            self._current_leaf_index = index
            self.dispatch(self.EVENT_CURRENT_LEAF)

    def get_leafs_count(self):
        return len(self._note_leafs)

    def get_book_metadata(self):
        md = get_metadata(self.book['path'])
        return {
            'identifier': self.book.get('identifier', None),
            'path': self.book['path'],
            'title': md.get('title', None),
            'creator': md.get('creator', md.get('author', None)),
            'language': md.get('language', None)
        }

    def get_leaf_data(self):
        leaf_number = self.get_current_leaf_number()
        leaf_data = self.scandata.get_page_data(leaf_number)
        page_number_data = leaf_data.get('pageNumber', None)
        page_number = self._get_page_number(page_number_data)
        return {
            'hand_side': leaf_data.get('handSide', None),
            'page_number': page_number,
            'page_type': leaf_data['pageType'],
            'note': leaf_data.get('note', None)
        }

    def get_current_reshoot_paths(self):
        leaf_number = self.get_current_leaf_number()
        image_name = '{:04d}.jpg'.format(leaf_number)
        book_path = self.book['path']
        path = join(book_path, 'reshooting', image_name)
        thumb_path = join(book_path, 'reshooting', 'thumbnails', image_name)
        ensure_dir_exists(join(book_path, 'reshooting'))
        ensure_dir_exists(join(book_path, 'reshooting', 'thumbnails'))
        return path, thumb_path

    def get_current_original_paths(self):
        leaf_number = self.get_current_leaf_number()
        image_name = '{:04d}.jpg'.format(leaf_number)
        book_path = self.book['path']
        path = join(book_path, image_name)
        thumb_path = join(book_path, 'thumbnails', image_name)
        return path, thumb_path

    def _get_page_number(self, page_number_data):
        # TODO: Remove this method when scandata structure becomes the same
        # for reshooting mode and otherwise
        if page_number_data:
            if isinstance(page_number_data, dict):
                page_number = page_number_data.get('num', None)
                return None if page_number is None else int(page_number)
            elif isinstance(page_number_data, str):
                return int(page_number_data)
        return None

    def goto_previous_leaf(self, *args):
        self.set_current_leaf_index(self._current_leaf_index - 1)

    def goto_next_leaf(self, *args):
        self.set_current_leaf_index(self._current_leaf_index + 1)

    def goto_first_leaf(self, *args):
        self.set_current_leaf_index(0)

    def goto_last_leaf(self, *args):
        max_index = max(0, len(self._note_leafs) - 1)
        self.set_current_leaf_index(max_index)

    def goto_rescribe_screen(self, *args):
        self.dispatch(self.EVENT_GO_BACK)

    def show_original_file(self, *args):
        self.dispatch(self.EVENT_SHOW_ORIGINAL_FILE)

    def show_reshoot_file(self, *args):
        if self.is_reshoot_leaf_ready():
            self.dispatch(self.EVENT_SHOW_RESHOOT_FILE)

    def show_page_type_form_popup(self, *args):
        if self.is_reshoot_leaf_ready():
            self.dispatch(self.EVENT_SHOW_PAGE_TYPE_FORM_POPUP)

    def save_leaf_note(self, note):
        scandata = self.scandata
        leaf_number = self.get_current_leaf_number()
        if scandata.get_note(leaf_number) != note:
            scandata.set_note(leaf_number, note)
            scandata.save()
            if note:
                self.logger.info(
                    'ReShootScreenBackend: Updated leaf %d with note: %s' %
                    (leaf_number, '\n%s' % note if '\n' in note else note))
            else:
                self.logger.info(
                    'ReShootScreenBackend: Removed note from leaf {}'.format(
                        leaf_number))

    def update_page_type(self, leaf_number, page_type):
        scandata = self.scandata
        scandata.update_page_type(leaf_number, page_type)
        scandata.save()
        self.dispatch(self.EVENT_PAGE_TYPE, page_type)

    def update_leaf_rotation_if_necessary(self, leaf_number):
        if self._cameras_count == 1:
            self.logger.info(
                'ReShootScreenBackend: Reshooting in single-camera mode, will '
                'rotate by system default of {} degrees'.format(
                    self.config.get_integer('default_single_camera_rotation',
                                            180)))
            new_degree = self.config.get_integer(
                'default_single_camera_rotation', 180)
            self.scandata.update_rotate_degree(leaf_number, new_degree)
            self.scandata.save()
            self.logger.info(
                'ReShootScreenBackend: Set leaf {} rotation to {} degree(s)'.
                format(leaf_number, new_degree))

    def enable_keyboard_actions(self, *args):
        self._keyboard_action_handler.enable()

    def disable_keyboard_actions(self, *args):
        self._keyboard_action_handler.disable()

    def are_keyboard_actions_enabled(self):
        return self._keyboard_action_handler.is_enabled()

    def switch_cameras(self, *args):
        if not self._capture_running and self.can_switch_cameras():
            self._reverse_cams = not self._reverse_cams
            self.logger.info('ReShootScreen: Switched cameras')
            self.capture_spread()

    def are_cameras_switched(self):
        return self._reverse_cams

    def capture_spread(self, *args):
        if not self.can_capture_spread():
            return
        if not has_free_disk_space(self.book['path']):
            self.logger.info('capture_spread: the disk is full!')
            report = {camera_system.KEY_ERROR: DiskFullError()}
            self.dispatch(self.EVENT_CAPTURE_LEAF, report)
            return
        leaf_number = self.get_current_leaf_number()
        side = self._get_capture_camera_side(leaf_number)
        path, thumb_path = self.get_current_reshoot_paths()
        camera_kwargs = self._create_camera_kwargs(side, leaf_number)
        self.logger.info(
            'ReShootScreen: Capturing new image for leaf {}, camera side {}, '
            '{}using reversed cameras'.format(
                leaf_number, side, '' if self._reverse_cams else 'not '))
        self._capture_running = True
        self.delete_current_spread()
        self.update_leaf_rotation_if_necessary(leaf_number)
        report = {camera_system.KEY_CAPTURE_START: True}
        self.dispatch(self.EVENT_CAPTURE_LEAF, report)
        self.camera_system.left_queue.put(camera_kwargs)

    def _get_capture_camera_side(self, leaf_number):
        if self._cameras_count == 1:
            camera_side = 'foldout'
        else:
            camera_side = 'left' if leaf_number % 2 == 0 else 'right'
            if self._reverse_cams:
                camera_side = 'left' if camera_side == 'right' else 'right'
        return camera_side

    def _capture_spread_end(self, report, *args):
        self._capture_running = False
        report[camera_system.KEY_CAPTURE_END] = True
        if self.is_initialized():
            stats = report[camera_system.KEY_STATS]
            leaf_number = report[camera_system.KEY_EXTRA]['leaf_number']
            self.scandata.set_capture_time(leaf_number, stats['capture_time'])
        self.dispatch(self.EVENT_CAPTURE_LEAF, report)

    def delete_current_spread(self, *args):
        path, thumb_path = self.get_current_reshoot_paths()
        self._delete_file(path)
        self._delete_file(thumb_path)

    def _delete_file(self, path):
        if exists(path):
            os.remove(path)
            self.logger.info('ReShootScreenBackend: Removed: {}'.format(path))

    def rotate_reshoot_leaf(self, *args):
        scandata_rotation_angle = 90
        path, thumb_path = self.get_current_reshoot_paths()
        if not self.is_reshoot_leaf_ready():
            self.logger.error(
                'ReShootScreen: Failed to rotate. Image not found: {}'.format(
                    thumb_path))
            return
        leaf_number = self.get_current_leaf_number()
        leaf_data = self.scandata.get_page_data(leaf_number)
        current_degree = int(leaf_data.get('rotateDegree', 0))

        new_degree = (current_degree + scandata_rotation_angle) % 360
        self.scandata.update_rotate_degree(leaf_number, new_degree)
        self.scandata.save()

        rotate_by = convert_scandata_angle_to_thumbs_rotation(
            new_degree, scandata_rotation_angle)

        image = Image.open(path)
        size = (1500, 1000)  # (6000,4000)/4
        image.thumbnail(size)
        image = image.rotate(rotate_by, expand=True)
        image.save(thumb_path, 'JPEG', quality=90)

        self.logger.info(
            'ReShootScreenBackend: Set leaf {} rotation to {} degree(s) in scandata ( {} thumbs-equivalent) from {}'
            .format(leaf_number, new_degree, rotate_by, current_degree))
        self.dispatch(self.EVENT_ROTATE_LEAF)

    def _create_camera_kwargs(self, camera_side, leaf_number):
        path, thumb_path = self.get_current_reshoot_paths()
        return {
            camera_system.KEY_CALLBACK: self._capture_spread_end,
            camera_system.KEY_SIDE: camera_side,
            camera_system.KEY_PATH: path,
            camera_system.KEY_THUMB_PATH: thumb_path,
            camera_system.KEY_EXTRA: {
                'leaf_number': leaf_number
            }
        }

    def on_capture_leaf(self, report):
        pass

    def on_current_leaf(self, *args):
        pass

    def on_rotate_leaf(self, *args):
        pass

    def on_page_type(self, *args):
        pass

    def on_show_original_file(self, *args):
        pass

    def on_show_reshoot_file(self, *args):
        pass

    def on_show_page_type_form_popup(self, *args):
        pass

    def on_go_back(self, *args):
        pass
Ejemplo n.º 15
0
class ImportFolderTask(TaskBase):
    def __init__(self, **kwargs):
        super(ImportFolderTask, self).__init__(**kwargs)
        self.source_path = kwargs['path']
        self.library = kwargs['library']
        self.book_obj = None
        self.image_stack = None
        self.scandata = None
        self.metadata = None
        self.DEFAULT_FIELDS_AND_VALUES = [
            ('operator', get_sc_metadata()['operator']),
            ('scanningcenter', get_sc_metadata()['scanningcenter']),
            ('ppi', Scribe3Configuration().get_numeric_or_none('ppi')),
        ]
        self.do_not_rotate = True

    def create_pipeline(self):
        return [
            self._load_directory,
            self._verify_preconditions,
            self._make_book_object,
            self._load_metadata,
            self._augment_metadata,
            self._write_metadata,
            self._load_image_stack,
            self._make_scandata,
            self._check_for_missing_images,
            self._move_image_stack,
            self._generate_thumbs,
        ]

    def handle_event(self, event_name, *args, **kwargs):
        if event_name == 'on_state' and self.state == CANCELLED_WITH_ERROR:
            if self.book_obj:
                self.book_obj.do_move_to_trash()
                self.book_obj.do_delete_anyway()

    def _load_directory(self):
        self.dispatch_progress('Loading directory')
        if not [
                f
                for f in os.listdir(self.source_path) if not f.startswith('.')
        ]:
            raise Exception('The folder you selected is empty')
        self.directory_list = list(os.walk(os.path.join(self.source_path)))[0]

    def _verify_preconditions(self):
        self.dispatch_progress('Verifying preconditions')
        if '0000.jpg' not in self.directory_list[2]:
            raise Exception('No image stack provided')

    def _make_book_object(self):
        self.dispatch_progress('Making book object')
        generated_uuid = str(uuid4())
        self.book_obj = self.library.new_book(generated_uuid)

    def _load_metadata(self):
        self.dispatch_progress('Loading metadata')
        if 'metadata.xml' in self.directory_list[2]:
            self.metadata = get_metadata(self.source_path)
        else:
            self.metadata = {}

    def _augment_metadata(self):
        for field, default_value in self.DEFAULT_FIELDS_AND_VALUES:
            if field not in self.metadata and default_value is not None:
                self.metadata[field] = default_value

    def _load_image_stack(self):
        self.dispatch_progress('Loading image stack')
        self.image_stack = sorted([
            k for k in self.directory_list[2]
            if re.match('\d{4}\.jpg$', os.path.basename(k))
        ])
        # consider .*[^\d]\d{4}.jpg

    def _make_scandata(self):
        self.dispatch_progress('Generating scandata')
        self.scandata = ScanData(self.book_obj.path)
        for image in self.image_stack:
            if image == '0000.jpg':
                leaf_number = 0
            else:
                leaf_number = self.__extract_number_from_file(image)
            side = 'left' if leaf_number % 2 == 0 else 'right'
            page_type = 'Normal'
            if image == '0000.jpg':
                page_type = 'Color Card'
            elif image == '0001.jpg':
                page_type = 'Cover'
            elif leaf_number == len(self.image_stack) - 1:
                page_type = 'Color Card'
            self.scandata.insert(leaf_number, side, page_type)
            if self.do_not_rotate:
                self.scandata.update_rotate_degree(leaf_number, 0)

    def _check_for_missing_images(self):
        self.dispatch_progress('Checking for image stack integrity')
        if not (self.source_path and self.scandata):
            raise Exception('Cover image is missing!')
        max_leaf_number = self.scandata.get_max_leaf_number()
        if max_leaf_number is None or max_leaf_number < 1:
            raise Exception('Cover image is missing!')
        for leaf_number in range(max_leaf_number + 1):
            leaf_data = self.scandata.get_page_data(leaf_number)
            image_path = os.path.join(self.source_path,
                                      '{:04d}.jpg'.format(leaf_number))
            if not (leaf_data and os.path.exists(image_path)):
                if leaf_number == 0 or leaf_number == 1:
                    raise Exception('Cover image is missing!')
                raise Exception('Image #{} is missing'.format(leaf_number))
        self.scandata.save()
        self.book_obj.reload_scandata()

    def _write_metadata(self):
        self.dispatch_progress('Writing metadata')
        set_metadata(self.metadata, self.book_obj.path)
        self.book_obj.reload_metadata()
        self.book_obj.do_create_metadata()

    def _move_image_stack(self):
        self.dispatch_progress('Relocating image stack')
        for image in self.image_stack:
            source = os.path.join(self.source_path, image)
            destination = os.path.join(self.book_obj.path, image)
            shutil.copy(source, destination)

    def _generate_thumbs(self):
        self.dispatch_progress('Generating thumbs')
        for n, image in enumerate(self.image_stack):
            self.dispatch_progress('Generating thumbs [{}/{}]'.format(
                n, len(self.image_stack)))
            source_image = os.path.join(self.book_obj.path, image)
            target_image = os.path.join(self.book_obj.path, 'thumbnails',
                                        image)
            current_degree = int(
                self.scandata.get_page_data(n).get('rotateDegree', 0))
            rotate_by = convert_scandata_angle_to_thumbs_rotation(
                current_degree, None)

            thumbnail_size = (1500, 1000)
            if Scribe3Configuration().is_true('low_res_proxies'):
                thumbnail_size = (750, 500)
            image = Image.open(source_image)
            image.thumbnail(thumbnail_size)
            image = image.rotate(rotate_by, expand=True)
            image.save(target_image, 'JPEG', quality=90)

    @staticmethod
    def __extract_number_from_file(filename):
        number = filename.split('.jpg')[0]
        ret = number.lstrip('0')
        return int(ret)
Ejemplo n.º 16
0
def upload_book(book):
    Logger = book.logger
    Logger.debug('Starting upload of ' + book['identifier'])

    _check_preconditons(book)

    #book.do_book_upload_begin()

    _set_upload_lock_file(book, Logger)

    responses_dict = {}
    book_upload_total_start = time.time()
    try:
        scandata = ScanData(book['path'])

        zip_path = _check_preimage_is_valid(book)

        ia_session = get_ia_session()
        item = ia_session.get_item(book['identifier'])
        Logger.info('Got item {}'.format(item.identifier))

        if not book.force_upload:
            _check_remote_preconditons(item, Logger)

        encoded_md = _prepare_metadata(book, item, Logger)

        metasource_file_location, metasource_file_upload_name = _generate_metasource(
            book, Logger)

        responses = []
        book_upload_phase_start = time.time()

        needs_metadata_pushed = item.exists

        doing_foldouts = os.path.exists(
            os.path.join(book['path'], 'send_to_station'))

        book_preimage_upload_start, \
        book_preimage_upload_end, \
        sizes_dict                  = _upload_book_files( zip_path, book,
                                                        encoded_md, item, responses,
                                                        metasource_file_location,
                                                        metasource_file_upload_name,
                                                        Logger)

        if needs_metadata_pushed:
            _only_push_metadata(encoded_md, book, item, responses, Logger)

        book_upload_phase_end = time.time()

        _upload_logs(book=book, item=item, responses=responses)

        _verify_responses(responses, Logger)

        Logger.debug('OK! Finished uploads to {} | Took {}s'.format(
            book['identifier'],
            book_upload_phase_end - book_upload_phase_start))

        book.do_upload_book_end()

        _push_metrics(book, scandata, encoded_md, sizes_dict, doing_foldouts,
                      responses, responses_dict, book_upload_phase_start,
                      book_upload_phase_end, book_upload_total_start,
                      book_preimage_upload_start, book_preimage_upload_end)

        if config.is_true('show_book_notifications'):
            notifications_manager.add_notification(
                title='Uploaded',
                message="{} has been successfully uploaded.".format(
                    book['identifier']),
                book=book)

        Logger.debug('Finished upload for ' + book['identifier'])

        # Clock.schedule_once(partial(self.update_status_callback, book))
        time.sleep(10)  # Wait for book to be added to metadata api
    except requests.ConnectionError as e:

        book.do_upload_book_error()
        Logger.error(traceback.format_exc())
        payload = {
            'local_id': book['uuid'],
            'status': book['status'],
            'exception': str(e)
        }

        push_event('tts-book-failed-upload', payload, 'book',
                   book['identifier'])

        raise ScribeException('Upload Failed. '
                              'Please check network and S3 Keys')
    except Exception as e:

        book.do_upload_book_error()
        Logger.error(traceback.format_exc())

        payload = {
            'local_id': book['uuid'],
            'status': book['status'],
            'responses': responses_dict,
            'exception': str(e)
        }

        push_event('tts-book-upload-exception', payload, 'book',
                   book['identifier'])

        raise ScribeException('Upload Failed! - {}'.format(str(e)))
    finally:
        book.force_upload = False
        Logger.info("Removing upload lock file at {}".format(
            join(book['path'], "upload_lock")))
        os.remove(join(book['path'], "upload_lock"))
Ejemplo n.º 17
0
def verify_uploaded(book):

    ia_session = get_ia_session()

    book.logger.info(
        'verify_uploaded: Verifying {} was uploaded to the cluster.'.format(
            book))

    # we do have identifier in the book dictionary, but we only trust
    # what's on the drive for this one
    identifier = book.identifier
    if not identifier:
        book.logger.info(
            'verify_uploaded: No identifier.txt. Assuming empty book and deleting.'
            .format(book))
        return True

    book.logger.info(
        'verify_uploaded: Read {} from identifier.txt.'.format(book))

    # gather data

    i = ia_session.get_item(identifier)

    repub_state = int(
        i.metadata['repub_state']) if 'repub_state' in i.metadata else None
    book.logger.info('verify_uploaded: repub_state {}'.format(repub_state))

    scandate = datetime.strptime(
        i.metadata['scandate'],
        '%Y%m%d%H%M%S') if 'scandate' in i.metadata else None
    book.logger.info('verify_uploaded: scandate {}'.format(scandate))

    #scanner = i.metadata['scanner'] if 'scanner' in i.metadata else None
    #book.logger.info('verify_uploaded: scanner {}'.format(scanner))
    #this_scanner = config.get('identifier', 0)

    tasks_running, tasks_list = get_pending_catalog_tasks(i)
    book.logger.info(
        'verify_uploaded: pending book_tasks {}'.format(tasks_running))

    local_imgcount = int(ScanData(book.path).count_pages())
    remote_imgcount = int(
        i.metadata['imagecount']) if 'imagecount' in i.metadata else None
    book.logger.info('verify_uploaded: local pages: {} '
                     '| remote pages: {}'.format(local_imgcount,
                                                 remote_imgcount))

    # These are here so you can bypass one easily by setting it to True
    scandate_ok = False
    repub_state_ok = False
    tasks_running_ok = False
    #scanner_ok = False
    imgcount_ok = True

    # policies
    if not repub_state:
        repub_state_ok = True
    elif repub_state > 10:
        repub_state_ok = True

    threshold = config.get_numeric_or_none('defer_delete_by')
    if threshold and scandate:
        if not datetime.now() - timedelta(
                hours=threshold) <= scandate <= datetime.now():
            scandate_ok = True
    else:
        # If the user doesn't specify a value, delete immediately
        scandate_ok = True

    if tasks_running == 0:
        tasks_running_ok = True

    if remote_imgcount:
        if local_imgcount == remote_imgcount:
            imgcount_ok = True
    else:
        imgcount_ok = True

    # aggregate and return
    ret = scandate_ok \
          and repub_state_ok and tasks_running_ok \
          and imgcount_ok

    if book.force_delete:
        ret = True

    book.logger.info(
        'verify_uploaded: Do selectors allow for deletion?'
        ' scandate ok: {} |  repub_state_ok {} '
        '|  book_tasks ok: {} | imgcount_ok: {} | Force delete: {}-->>> '
        'VERDICT: {}'.format(scandate_ok, repub_state_ok, tasks_running_ok,
                             imgcount_ok, book.force_delete, ret))

    return ret