예제 #1
0
    def init_pid_file(self):
        Logger.info('PID Init: Begin')
        config_dir = scribe_globals.CONFIG_DIR
        if not os.path.exists(config_dir):
            os.makedirs(config_dir)
        if not os.access(config_dir, os.W_OK | os.X_OK):
            raise ScribeException('Config dir "{}" not writable'
                                  .format(config_dir))
        # Check to see if another copy of the app is running
        # and if needed, remove stale pidfiles
        path = os.path.join(config_dir, 'scribe_pid')
        Logger.info('PID Init: Looking for pidfile at {}'.format(path))
        if os.path.exists(path):
            f = open(path)
            old_pid = f.read().strip()
            f.close()
            pid_dir = os.path.join('/proc', old_pid)
            if os.path.exists(pid_dir):
                Logger.error('There seems to be a pid file at {}. Try '
                             'removing it and relaunching '
                             'the application.'.format(str(pid_dir)))
                raise ScribeException('Another copy of the Scribe application '
                                      'is running!')
            else:
                os.unlink(path)

        pid = os.getpid()
        f = open(path, 'w')
        f.write(str(pid))
        f.close()
        Logger.info('PID Init: End')
예제 #2
0
def get_cluster_status(self, book):
    Logger = self.logger
    status_msg = 'Geting cluster status for ' + book['identifier']
    Logger.info(status_msg)
    Clock.schedule_once(partial(self.set_status_callback, str(status_msg)))
    try:
        md_url = ('https://archive.org/metadata/{id}/metadata'.format(
            id=book['identifier']))
        md = json.load(urllib.request.urlopen(md_url))
    except Exception as e:
        Logger.exception('Get cluster status: Error retrieving metadata')
        raise ScribeException('Could not query archive.org for '
                              'repub_state!')
    try:
        if (md is None) or ('result' not in md):
            raise ScribeException('Could not query metadata')

        repub_state = md['result'].get('repub_state')
        status_msg = ('Repub state for {} is {}'.format(
            str(book['identifier']), str(repub_state)))
        Logger.info('Check QA status: ' + status_msg)
        # Clock.schedule_once(partial(self.set_prop_callback,
        #                             self.status_label,
        #                             status_msg))
        Clock.schedule_once(partial(self.set_status_callback, str(status_msg)))
        return int(repub_state)
    except:
        return None
예제 #3
0
def _check_preimage_is_valid(book):
    zip_path = (join(book['path'],
                     '{id}_preimage.zip'.format(id=book['identifier'])))

    if not os.path.exists(zip_path):
        raise ScribeException('Could not find preimage.zip in book folder.')
    elif os.path.getsize(zip_path) <= 532:  # size of an empty zip
        raise ScribeException('preimage.zip is zero length!')
    return zip_path
예제 #4
0
def create_preimage_zip(book):
    logger = book.logger

    if book['status'] >= UploadStatus.uploaded.value:
        return

    logger.info('Package book: Creating preimage.zip')
    #Clock.schedule_once(partial(self.set_status_callback,
    #                            'Now creating book upload bundle for {}'.format(book.get('identifier', ''))))
    try:

        zip_path = join(book['path'],
                        '{id}_preimage.zip'.format(id=book['identifier']))

        compression = zipfile.ZIP_STORED
        allow_zip64 = True
        target = book.get_imagestack()

        if target == None or len(target) == 0:
            raise ScribeException('Could not find jpegs to compress.')

        with zipfile.ZipFile(zip_path, 'w', compression,
                             allow_zip64) as preimage_zip:
            for jpeg in target:
                logger.debug('adding ' + jpeg + ' to ' + zip_path)
                arcname = ('{id}_preimage/{j}'.format(
                    id=book['identifier'], j=os.path.basename(jpeg)))
                preimage_zip.write(jpeg, arcname)

            scandata = join(book['path'], 'scandata.json')
            if os.path.exists(scandata):
                arcname = ('{id}_preimage/scandata.json'.format(
                    id=book['identifier']))
                preimage_zip.write(scandata, arcname)

        book.do_finish_preimage_zip()

    except Exception as e:
        book.error = e
        book.logger.error(traceback.format_exc())
        book.do_error_preimage_zip()

        payload = {
            'local_id': book['uuid'],
            'status': book['status'],
            'exception': str(e)
        }

        push_event('tts-book-packaging-exception', payload, 'book',
                   book['identifier'])
        raise ScribeException('Could not create preimage.zip - {}'.format(
            str(e)))
예제 #5
0
def _change_repub_state(book_item, target_repub_state):
    res_repub_state_change = None
    res_repub_state_change = book_item.modify_metadata({'repub_state': target_repub_state})

    if res_repub_state_change is None or res_repub_state_change.status_code != 200:
        raise ScribeException('Received erroneous response: {}'.format(res_repub_state_change))
    elif res_repub_state_change.status_code == 200:
        if res_repub_state_change.text:
            response = json.loads(res_repub_state_change.text)
            if 'error' in response:
                raise ScribeException('Metadata API courteously '
                                      'tucked an error in a valid response.'
                                      '\nWhat seems to have gone wrong is {}'
                                      .format(str(response['error'])))
예제 #6
0
def delete_unfinished_book(book):
    '''Delete a book without checking for remote state

    Called in worker thread
    '''
    try:
        book.logger.debug('Deleting unfinished book')

        payload = {
            'function': 'delete_unfinished_book',
            'local_id': book.uuid,
            'status': book.status
        }

        shutil.rmtree(book.path)
        book.delete_callback(book)

        push_event('tts-book-deleted', payload, 'book', book['path'])
    except ScribeException as e:
        raise e
    except OSError as e:
        return
    except Exception:
        book.logger.error(traceback.format_exc())
        raise ScribeException('Could not delete book!')
예제 #7
0
 def _save(self):
     try:
         with open(self.config_file_path, 'w+') as f:
             yaml.safe_dump(self.configuration, f, default_flow_style=False)
         Logger.debug('Scribe3Configuration: Saved:\n{}'
                     .format(pformat(self.configuration)))
     except Exception:
         raise ScribeException('Could not save config to {}'
                               .format(self.config_file_path))
예제 #8
0
 def _ensure_file_exists(self):
     if not os.path.exists(self.config_file_path):
         config_dir = scribe_globals.CONFIG_DIR
         if not os.path.exists(config_dir):
             os.makedirs(config_dir)
         if not os.access(config_dir, os.W_OK | os.X_OK):
             raise ScribeException('Config dir "{}" not writable'
                                   .format(config_dir))
         os.mknod(self.config_file_path)
     if os.stat(self.config_file_path).st_size == 0:
         self._initialize()
예제 #9
0
def _check_preconditions(book, book_item, Logger):
    if not os.path.exists(os.path.join(book['path'], 'downloaded')):
        raise ScribeException('_check_preconditions: Cannot use this upload function on a book '
                              'that was not downloaded')

    if book_item.metadata['repub_state'] not in ['32', '42']:
        raise ScribeException('_check_preconditions: Book is not in correct remote repub_state: aborting upload.')

    outstanding_catalog_tasks, outstanding_catalog_tasks_list = get_pending_catalog_tasks(book_item.identifier)

    if outstanding_catalog_tasks > 0:
        msg = 'Refusing to upload: item {} has {} outstanding (running or pending) catalog book_tasks\n{}'.format(
            book_item.identifier,
            outstanding_catalog_tasks,
            ', '.join(
                ['{} -> {} ({})'.format(
                    x['task_id'], x['cmd'], x['status'])
                    for x in outstanding_catalog_tasks_list])
        )
        Logger.error(msg)
        raise ScribeException(msg)
예제 #10
0
def _check_repub_state_is_correct(item):
    if not item.exists:
        return

    if item.metadata['mediatype'] == 'audio':
        return

    if item.metadata['repub_state'] not in [
            '-1',
            '-2',
    ]:
        raise ScribeException('Book is not in correct remote repub_state '
                              '(is {}): aborting upload.'.format(
                                  item.metadata['repub_state']))
예제 #11
0
def id_available(identifier):
    '''
    Query the upload_api to see if an identifier is available.
    '''
    try:
        url = 'https://archive.org/upload/app/upload_api.php'
        params = {'name': 'identifierAvailable', 'identifier': identifier}
        r = requests.get(url, params=params)
        ret = r.json()
        success = ret.get('success', False)
    except Exception:
        print((traceback.format_exc()))
        raise ScribeException(
            'Could not query upload_api for identifierAvailable')

    if success == False:
        return False
    return True
예제 #12
0
    def archive_logout(self):

        try:
            config.delete('s3')
            config.delete('email')
            config.delete('cookie')
            popup_success = Popup(
                title='Logged out',
                content=Label(text="You have logged out" +
                              ". \n\nRestarting the application..."),
                auto_dismiss=True,
                size_hint=(None, None),
                size=(400, 200))
            popup_success.open()
            restart_app()
        except:
            # if the keys didn't exist in the first place, a field was missing or whatever, cry.
            raise ScribeException('There was an error logging out.')
예제 #13
0
def new_get_identifier(book):
    book.logger.debug('Getting identifier from path')
    id_file = os.path.join(book.path, 'identifier.txt')
    if book.has_identifier() and not os.path.exists(id_file):
        book.logger.debug('Using previously generated identifier {}'
                          .format(book.identifier))
        return

    if os.path.exists(id_file):
        try:
            identifier = open(id_file).read().strip()
            book.logger.debug('Using preset identifier ' + identifier)
            if hasattr(book, 'identifier'):
                if identifier != book['identifier']:
                    book.logger.warn(
                        'RESETTING IDENTIFIER from {old} to {new}'
                        .format(old=book.identifier, new=identifier)
                        )
        except Exception:
            raise ScribeException('File identifier.txt exists but cannot '
                                  'be read')
    else:
        return None
    return identifier
예제 #14
0
def upload_book(book):
    Logger = book.logger
    Logger.debug('Starting upload of ' + book['identifier'])

    _check_preconditons(book)

    #book.do_book_upload_begin()

    _set_upload_lock_file(book, Logger)

    responses_dict = {}
    book_upload_total_start = time.time()
    try:
        scandata = ScanData(book['path'])

        zip_path = _check_preimage_is_valid(book)

        ia_session = get_ia_session()
        item = ia_session.get_item(book['identifier'])
        Logger.info('Got item {}'.format(item.identifier))

        if not book.force_upload:
            _check_remote_preconditons(item, Logger)

        encoded_md = _prepare_metadata(book, item, Logger)

        metasource_file_location, metasource_file_upload_name = _generate_metasource(
            book, Logger)

        responses = []
        book_upload_phase_start = time.time()

        needs_metadata_pushed = item.exists

        doing_foldouts = os.path.exists(
            os.path.join(book['path'], 'send_to_station'))

        book_preimage_upload_start, \
        book_preimage_upload_end, \
        sizes_dict                  = _upload_book_files( zip_path, book,
                                                        encoded_md, item, responses,
                                                        metasource_file_location,
                                                        metasource_file_upload_name,
                                                        Logger)

        if needs_metadata_pushed:
            _only_push_metadata(encoded_md, book, item, responses, Logger)

        book_upload_phase_end = time.time()

        _upload_logs(book=book, item=item, responses=responses)

        _verify_responses(responses, Logger)

        Logger.debug('OK! Finished uploads to {} | Took {}s'.format(
            book['identifier'],
            book_upload_phase_end - book_upload_phase_start))

        book.do_upload_book_end()

        _push_metrics(book, scandata, encoded_md, sizes_dict, doing_foldouts,
                      responses, responses_dict, book_upload_phase_start,
                      book_upload_phase_end, book_upload_total_start,
                      book_preimage_upload_start, book_preimage_upload_end)

        if config.is_true('show_book_notifications'):
            notifications_manager.add_notification(
                title='Uploaded',
                message="{} has been successfully uploaded.".format(
                    book['identifier']),
                book=book)

        Logger.debug('Finished upload for ' + book['identifier'])

        # Clock.schedule_once(partial(self.update_status_callback, book))
        time.sleep(10)  # Wait for book to be added to metadata api
    except requests.ConnectionError as e:

        book.do_upload_book_error()
        Logger.error(traceback.format_exc())
        payload = {
            'local_id': book['uuid'],
            'status': book['status'],
            'exception': str(e)
        }

        push_event('tts-book-failed-upload', payload, 'book',
                   book['identifier'])

        raise ScribeException('Upload Failed. '
                              'Please check network and S3 Keys')
    except Exception as e:

        book.do_upload_book_error()
        Logger.error(traceback.format_exc())

        payload = {
            'local_id': book['uuid'],
            'status': book['status'],
            'responses': responses_dict,
            'exception': str(e)
        }

        push_event('tts-book-upload-exception', payload, 'book',
                   book['identifier'])

        raise ScribeException('Upload Failed! - {}'.format(str(e)))
    finally:
        book.force_upload = False
        Logger.info("Removing upload lock file at {}".format(
            join(book['path'], "upload_lock")))
        os.remove(join(book['path'], "upload_lock"))
예제 #15
0
def item_ready_for_upload(book):
    '''Book items might have already been preloaded with metadata in the
    IA scan process. However, prevent uploading to ia items which already
    have images uploaded.

    Called in worker thread.
    '''

    try:
        session = get_ia_session()
        item = session.get_item(book.identifier)

        if not item.exists:
            if book:
                preloaded_path = os.path.join(book.path, 'preloaded')
                if os.path.exists(preloaded_path):
                    # This item was created in offline mode, but the
                    # identifier doesn't exist
                    book.logger.error(
                        'Item {0} is tagged as preloaded, but '
                        'the identifier does not exist. Aborting '
                        'upload and reverting to scribing '
                        'status.'.format(book.identifier))
                    return False
                else:
                    book.logger.info('Item does not exist and user wants to '
                                     'upload to item {0}. Ok\'ing that'.format(
                                         book.identifier))
                    # no existing item, so safe to use this identifier
                    return True
        allowed_formats = {
            'Metadata', 'MARC', 'MARC Source', 'MARC Binary', 'Dublin Core',
            'Archive BitTorrent', 'Web ARChive GZ', 'Web ARChive', 'Log',
            'OCLC xISBN JSON', 'Internet Archive ARC',
            'Internet Archive ARC GZ', 'CDX Index', 'Item CDX Index',
            'Item CDX Meta-Index', 'WARC CDX Index', 'Metadata Log'
        }

        ALLOWED_ITEM_FILE_NAMES = [
            '{}_{}'.format(book.identifier, x)
            for x in ALLOWED_VARIABLE_FILE_NAMES
        ]

        for item_file_metadata in item.files:
            if item_file_metadata['format'] not in allowed_formats:
                # Ignore new style in-item thumb files
                if item_file_metadata['name'] in ALLOWED_FILE_NAMES:
                    book.logger.info(
                        'File {} ({}) is present in '
                        'remote item and allowed: continuing...'.format(
                            item_file_metadata['name'],
                            item_file_metadata['format']))
                    continue
                elif item_file_metadata['name'] in ALLOWED_ITEM_FILE_NAMES:
                    continue
                # files have already been uploaded to this item
                book.logger.error(
                    'File {} in item {} is blocking upload.'.format(
                        item_file_metadata, item.identifier))
                return False

    except Exception:
        book.logger.error(traceback.format_exc())
        raise ScribeException('Could not check status of IA item {}'.format(
            book.identifier))

    return True
예제 #16
0
def _upload_book_files(zip_path, book, encoded_md, item, responses,
                       metasource_file_location, metasource_file_upload_name,
                       Logger):
    sizes_dict = {
        'preimage': os.path.getsize(zip_path),
    }

    book_preimage_upload_start = time.time()

    m = 'Uploading preimage.zip to {}'.format(book['identifier'])
    Logger.debug(m)
    # Clock.schedule_once(partial(self.set_status_callback, m))
    book.update_message('Upload | Images file')
    upload_response = item.upload(zip_path,
                                  metadata=encoded_md,
                                  retries=10,
                                  retries_sleep=60,
                                  queue_derive=False,
                                  verbose=True,
                                  verify=True,
                                  headers={'x-archive-keep-old-version': '1'})
    if len(upload_response) == 0 or getattr(upload_response[0], 'url') is None:
        if not book.force_upload:
            raise ScribeException(
                'No response was returned by IA upon upload of preimage.zip. '
                'This could mean the file has already been uploaded, the item is not '
                'available, your cookie could have expired. '
                'Refer to the documentation for further guidance.')

    responses.append(upload_response)
    url_to_status_code = \
        {r.request.url: r.status_code for r in upload_response}
    book.logger.debug('Response from upload: {} | {}'.format(
        upload_response, url_to_status_code))

    book_preimage_upload_end = time.time()

    book.update_message('Upload | Metasource')
    if metasource_file_location is not None:
        book.logger.debug('Uploading metasource file {0} as {1}'.format(
            metasource_file_location, metasource_file_upload_name))
        response = item.upload(
            {metasource_file_upload_name: metasource_file_location},
            retries=10,
            retries_sleep=60,
            queue_derive=False,
            verify=True,
            verbose=True,
        )
        responses.append(response)
        url_to_status_code = \
            {r.request.url: r.status_code for r in response}
        book.logger.debug('Response from upload: {} | {}'.format(
            response, url_to_status_code))

    book.update_message('Upload | MARCs')

    if os.path.exists(os.path.join(book['path'], 'marc.bin')):
        book.logger.debug('Uploading MARC Binary file {}'.format(
            join(book['path'], book['identifier'] + '_marc.bin')))
        upload_name_mapping = \
            {book['identifier'] + '_meta.mrc': join(book['path'],
                                                    'marc.bin')}
        response = item.upload(
            upload_name_mapping,
            retries=10,
            retries_sleep=60,
            queue_derive=False,
            verify=True,
            verbose=True,
        )
        responses.append(response)
        url_to_status_code = \
            {r.request.url: r.status_code for r in response}
        book.logger.debug('Response from upload: {} | {}'.format(
            response, url_to_status_code))

    if os.path.exists(os.path.join(book['path'], 'marc.xml')):
        book.logger.debug('Uploading MARCXML file {} to {}'.format(
            join(book['path'], 'marc.xml'),
            join(book['identifier'] + '_marc.xml')))
        upload_name_mapping = \
            {book['identifier'] + '_marc.xml': join(book['path'],
                                                    'marc.xml')}
        response = item.upload(
            upload_name_mapping,
            retries=10,
            retries_sleep=60,
            queue_derive=False,
            verify=True,
            verbose=True,
        )
        responses.append(response)
        url_to_status_code = \
            {r.request.url: r.status_code for r in response}
        book.logger.debug('Response from upload: {} | {}'.format(
            response, url_to_status_code))

    send_to_station_file = os.path.join(book['path'], 'send_to_station')
    if os.path.exists(send_to_station_file):
        target_scanner = None
        with open(send_to_station_file, 'r') as f:
            target_scanner = f.read()
            assert target_scanner != None
        book.update_message(
            'Upload | Sending foldouts to {}'.format(target_scanner))
        Logger.info(
            'Book uploader: found instructions to send {} to {}'.format(
                book['identifier'], target_scanner))

        book.logger.debug('Uploading send-to-scribe.txt file {} to {}'.format(
            send_to_station_file, book['identifier']))
        upload_name_mapping = \
            {book['identifier'] + '_sent_to.txt': send_to_station_file}
        response = item.upload(
            upload_name_mapping,
            retries=10,
            retries_sleep=60,
            queue_derive=False,
            verify=True,
            verbose=True,
        )
        responses.append(response)
        url_to_status_code = \
            {r.request.url: r.status_code for r in response}
        book.logger.debug('Response from upload: {} | {}'.format(
            response, url_to_status_code))

    return book_preimage_upload_start, book_preimage_upload_end, sizes_dict
예제 #17
0
def delete_finished_book(book):
    '''

    Called from worker thread.
    '''
    # if book['status'] < UploadStatus.done.value:
    #     return
    book.logger.debug('Checking repub_state for {}'.format(book))
    repub_state = None
    try:
        md_url = ('https://archive.org/metadata/{id}/metadata'.format(
            id=book['identifier']))
        md = json.load(urllib.request.urlopen(md_url))
    except Exception:
        book.logger.error(traceback.format_exc())
        raise ScribeException('Could not query archive.org for '
                              'repub_state!')
    try:
        if md is None or 'result' not in md:
            print(
                "No repub state or MDAPI unavailable. Continuing with deletion."
            )

        else:
            repub_state = md['result'].get('repub_state')
            if repub_state is None:
                book.logger.warning('Repub state not found for {}'.format(
                    book['identifier']))
                return
        if repub_state:
            if int(repub_state) == RepubState.done.value or \
                    RepubState.uploaded.value or \
                    RepubState.post_autocropped.value:
                if os.path.exists(book.path):
                    # User may have already deleted local copy of this book
                    book.logger.info('Deleting {}'.format(book))
                    payload = {
                        'function': 'delete_finished_book',
                        'local_id': book.uuid,
                        'status': book.status
                    }
                    push_event('tts-book-deleted', payload, 'book',
                               book['identifier'])
                    shutil.rmtree(book['path'])
                    book.delete_callback(book)
                else:
                    book.logger.error('UploadWidget: Book not found '
                                      '(could be deleted): {}'.format(
                                          book['path']))
            else:
                book.logger.info('Not deleting {} | repub_state={}'.format(
                    book['path'], repub_state))
        else:
            if os.path.exists(book.path):
                # User may have already deleted local copy of this book
                book.logger.info('Deleting {}'.format(book))
                payload = {
                    'function': 'delete_finished_book',
                    'local_id': book.uuid,
                    'status': book.status
                }
                push_event('tts-book-deleted', payload, 'book',
                           book['identifier'])
                shutil.rmtree(book['path'])
                book.delete_callback(book)
    except ScribeException:
        raise
    except Exception as e:
        book.logger.error(traceback.format_exc())
        raise ScribeException('Could not delete book! {}'.format(e))
예제 #18
0
def upload_book_foldouts(book,):
    try:
        Logger = book.logger
        Logger.info('upload_book_foldouts: Uploading foldouts for book '
                    '{}'.format(book))

        ia_session = get_ia_session()
        book_item = ia_session.get_item(book['identifier'], )

        _check_preconditions(book, book_item, Logger)

        book_folder = 'foldouts'

        cdic, tdic, rdic, rtdic = _create_scandata(book, book_folder, True, Logger)

        responses = []
        # Upload the pictures
        Logger.debug('upload_book_foldouts: Uploading pics')
        book.update_message('Foldouts upload | Images')
        if cdic != {}:
            res = book_item.upload(cdic, retries=10, verify=True,
                                   retries_sleep=60, queue_derive=False)
            responses.append(res)

        if tdic != {}:
            res = book_item.upload(tdic, retries=10, verify=True,
                                   retries_sleep=60, queue_derive=False)
            responses.append(res)

        try:
            if rdic != {}:
                res = book_item.upload(rdic, retries=10, verify=True,
                                       retries_sleep=60, queue_derive=False)
                responses.append(res)

            if rtdic != {}:
                res = book_item.upload(rtdic, retries=10, verify=True,
                                       retries_sleep=60, queue_derive=False)
                responses.append(res)
        except requests.exceptions.ConnectionError as e:
            Logger.error(
                'upload_book_foldouts: Connection exception {} '
                'has occurred at rdic upload time; aborting!'.format(str(e)))
            raise e
        except Exception as e:
            Logger.error('upload_book_foldouts: Error {} has occurred at rdic upload time'.format(e))
            raise e

        Logger.debug('upload_book_foldouts: Done. Uploading scandata...')
        # Upload the scandata

        target_scandata = 'scandata.json'
        book.update_message('Foldouts upload | Scandata')
        scandata = join(book['path'], 'scandata_rerepublished.json')
        upload_res = book_item.upload({target_scandata: scandata},
                                      retries=10,
                                      retries_sleep=60,
                                      queue_derive=False,
                                      verify=True,)

        if os.path.exists(os.path.join(book['path'], 'scanning.log')):
            book.update_message('Foldouts upload | Log')
            book.logger.debug(
                'Uploading Scanning log file'
            )
            upload_name_mapping = \
                {'logs/' + book['identifier']
                 + '_scanning_{:%Y-%m-%d%H:%M:%S}.log'.format(datetime.now()):
                     join(book['path'], 'scanning.log')}
            response = book_item.upload(upload_name_mapping, retries=10,
                                   retries_sleep=60, queue_derive=False, verbose=True,verify=True, )
            responses.append(response)
            url_to_status_code = \
                {r.request.url: r.status_code for r in response}
            book.logger.debug('Response from upload: {} | {}'
                              .format(response, url_to_status_code))

        responses.append(upload_res)
        # corrections_uploaded

        # flatten responses list:
        flat_responses = [y for x in responses for y in x]
        for response in flat_responses:
            Logger.info('{} returned {}'.format(response.url, response.status_code))
            if response.status_code != 200:
                raise Exception('upload_book_foldouts: Response code {} {} - {} from cluster. '
                                'URL was: {} | content: {}'
                                'This is an error. Upload will be attempted again.'
                                .format(response.status_code,
                                        response.reason,
                                        response.text if 'text' in response else "",
                                        response.url,
                                        response.content))

        Logger.debug('Done. Changing repub state...')

        _change_repub_state(book_item, 43)

        _remove_book_from_btserver_item(book, Logger)

        book.do_upload_foldouts_done()

        payload = {
            'repub_state': 43,
            'responses': flat_responses,

        }
        push_event('tts-book-corrections-sent', payload,
                   'book', book['identifier'])
        Logger.debug('All done.')

        return

    except requests.ConnectionError as e:
        raise ScribeException('Upload Failed. Please check network and '
                              'S3 Keys (Error was: {})'.format(e))
    except Exception as e:
        book.do_upload_foldouts_fail()
        book.raise_exception(e)
예제 #19
0
def _create_scandata(book, book_folder, foldouts, Logger):
    scandata = json.loads(ScanData(book['path']).dump())
    cdic = {}
    tdic = {}
    rdic = {}
    rtdic = {}
    '''
    This subprogram uploads a corrected book back to republisher.
    It:
    - Verifies that the book was downloaded.
    - Gets ahold of the tts identifier to later remove the book from the 
        item's "books" list.
    - Constructs and maintains four dictionaries: new pages (cdic), new 
        pages thumbs (tdic), reshot pages(rdic), reshot pages thumbs 
        (rtdic) that will later become the scandata.
    -- Looks for new pages (spreads insertions and/or appends) and their 
        thumbs
    -- Add republisher tags (that's what post-processing would do)
    -- Looks for replacements (in bookpath/reshooting) if present
    -- saves a scandata_rerepublished.json
    - Uploads the pictures and scandatas
    - Updates tts item, repub state and metrics
    '''

    # Here we add the new pages

    # cdic is the corrections dictionary, and it contains entries in the
    # form:
    #
    # {item path : local path } - for example:
    # {'corrections/0044.jpg' : '~/scribe_books/1234/0022.jpg'}
    try:
        cdic = {book_folder + '/' + k: os.path.join(book['path'], k)
                for k in next(os.walk(book['path']))[2]
                if re.match('\d{4}\.jpg$', os.path.basename(k))}
        # And the thumbs from the new pages
        # REMOVE THUMB FROM OS WALK PATH
        tdic = {book_folder + '/thumbnails/' + k:
                    os.path.join(book['path'], 'thumbnails', k)
                for k in next(os.walk(join(book['path'])))[2]
                if re.match('\d{4}\.jpg$', os.path.basename(k))}
    except Exception:
        Logger.error('_create_scandata: No corrections found.')

    # Ensure the scandata has the appropriate tags for re-republishing

    # NEW PAGES DICT
    Logger.debug('_create_scandata: Processing new pages...')
    for k in cdic:
        page_num = str(int(k.split('.jpg')[0].split('/')[1]))
        Logger.debug('_create_scandata: Processing page {}'.format(page_num))
        try:
            page_data_exists = scandata['pageData'][page_num] is not None
            Logger.debug('_create_scandata: Page data for page {} exists in '
                         'scandata'.format(page_num))
        except Exception as e:
            raise ScribeException(e)

        # Rotate images
        im = Image.open(cdic[k])
        # im = im.rotate(
        #     int(scandata['pageData'][page_num]['rotateDegree'])
        # )
        width, height = im.size

        # scandata['pageData'][page_num]['rotateDegree'] = 0
        if abs(int(scandata['pageData'][page_num]['rotateDegree'])) in [0, 180]:
            scandata['pageData'][page_num]['origWidth'] = str(width)
            scandata['pageData'][page_num]['origHeight'] = str(height)
        elif abs(int(scandata['pageData'][page_num]['rotateDegree'])) in [90, 270]:
            scandata['pageData'][page_num]['origWidth'] = str(height)
            scandata['pageData'][page_num]['origHeight'] = str(width)

        Logger.debug('\n\n\n ---->>> CORRECTIONS DEBUG - PAGE INSERT- '
                     'please report this \n\n')
        Logger.debug(
            'rotatedegree={2}, origWidth={0}, height={1}'
                .format(scandata['pageData'][page_num]['origWidth'],
                        scandata['pageData'][page_num]['origHeight'],
                        scandata['pageData'][page_num]['rotateDegree'])
        )
        Logger.debug('<<<---- END CORRECTIONS DEBUG - - - - - - - -\n\n\n')

        scandata['pageData'][page_num]['origFileName'] = k.split('/')[1]
        scandata['pageData'][page_num]['sourceFileName'] = k
        scandata['pageData'][page_num]['proxyFullFileName'] = k
        if not foldouts:
            scandata['pageData'][page_num]['correctionType'] = 'INSERT'
            scandata['pageData'][page_num]['TTSflag'] = 0

        Logger.debug('\n\n\n ---->>> CORRECTIONS DEBUG - please report '
                     'this \n\n')
        Logger.debug('\n' + str(scandata['pageData'][page_num]))
        Logger.debug('<<<---- END CORRECTIONS DEBUG - - - - - - - -\n\n\n')
    # THUMBS FOR NEW PAGES
    for k in tdic:
        page_num = str(int(k.split('.jpg')[0].split('/')[2]))
        scandata['pageData'][page_num]['proxyFileName'] = k

    Logger.debug('_create_scandata: Processed {} new images.'.format(len(cdic)))

    try:
        # here we add the reshot images
        rdic = {
            book_folder + '/' + k: join(book['path'], 'reshooting', k)
            for k in next(os.walk(join(book['path'], 'reshooting')))[2]
            if re.match('\d{4}\.jpg$', os.path.basename(k))
        }

        # RESHOT IMAGES DICT
        for k in rdic:
            page_num = str(int(k.split('.jpg')[0].split('/')[1]))
            # rotate images
            im = Image.open(rdic[k])
            # im = im.rotate(
            #     int(scandata['pageData'][page_num]['rotateDegree'])
            # )
            width, height = im.size
            # im.save(rdic[k])

            # scandata['pageData'][page_num]['rotateDegree'] = 0
            if abs(int(scandata['pageData'][page_num]['rotateDegree'])) in [0, 180]:
                scandata['pageData'][page_num]['origWidth'] = str(width)
                scandata['pageData'][page_num]['origHeight'] = str(height)
            elif abs(int(scandata['pageData'][page_num]['rotateDegree'])) in [90, 270]:
                scandata['pageData'][page_num]['origWidth'] = str(height)
                scandata['pageData'][page_num]['origHeight'] = str(width)

            Logger.debug('---->>> CORRECTIONS DEBUG - PAGE RESHOOT')
            Logger.debug(
                'rotatedegree is {2}, origWidth = {0}, height= {1}'
                    .format(scandata['pageData'][page_num]['origWidth'],
                            scandata['pageData'][page_num]['origHeight'],
                            scandata['pageData'][page_num]['rotateDegree'])
            )
            Logger.debug('<<<---- END CORRECTIONS DEBUG - - - - - - - - -')

            scandata['pageData'][page_num]['origFileName'] = k.split('/')[1]
            scandata['pageData'][page_num]['sourceFileName'] = k
            scandata['pageData'][page_num]['correctionType'] = 'REPLACE'
            scandata['pageData'][page_num]['proxyFullFileName'] = k
            scandata['pageData'][page_num]['TTSflag'] = 0

            Logger.debug('---->>> CORRECTIONS DEBUG - please report this')
            Logger.debug('\n' + str(scandata['pageData'][page_num]))
            Logger.debug('<<<---- END CORRECTIONS DEBUG - - - - - - - -')

        # here we add the thumbs from the reshooting
        rtdic = {
            book_folder + '/thumbnails/' + k: join(book['path'], 'reshooting', 'thumbnails', k)
            for k in next(os.walk(join(book['path'], 'reshooting', 'thumbnails')))[2]
            if re.match('\d{4}\.jpg$', os.path.basename(k))
        }

        # THUMBS FOR RESHOT IMAGES
        for k in rtdic:
            page_num = str(int(k.split('.jpg')[0].split('/')[2]))
            scandata['pageData'][page_num]['proxyFileName'] = k

        Logger.debug('_create_scandata: Processed {} reshot images.'.format(len(rdic)))

    except Exception as e:
        Logger.exception('_create_scandata: No reshot pages found')

    # Super Solenoid Scandata from disk (page info)
    sss = {int(k): v for k, v in list(scandata['pageData'].items())}
    # Now we want our own piece of memory for this one
    new_scandata = copy.deepcopy(scandata)
    new_scandata['pageData'] = {}
    new_scandata['pageData']['page'] = []

    # Rewrite pages section

    Logger.debug('_create_scandata: Adding all computed pages to new scandata...')
    for page in sorted(sss):
        Logger.debug('_create_scandata: {}'.format(page))
        sss[page]['leafNum'] = page
        try:
            pnum = sss[page]['pageNumber']['num']
            sss[page]['pageNumber'] = pnum
        except Exception:
            pass
        new_scandata['pageData']['page'].append(sss[page])

    # Rewrite assertions to be compatible with republisher

    try:
        Logger.debug('\nNow rewriting page assertions for repub compatibility '
                     'if present')
        temp_pageNumData = copy.deepcopy(scandata['bookData']['pageNumData'])
        temp_pageNumData['assertion'] = []
        for entry in scandata['bookData']['pageNumData']:
            if entry.isdigit():
                del temp_pageNumData[entry]

        for assertion in scandata['bookData']['pageNumData'].items():
            temp_assertion = {'leafNum': str(assertion[0]),
                              'pageNum': str(assertion[1])}
            temp_pageNumData['assertion'].append(temp_assertion)

        Logger.debug('_create_scandata: OK done. New pageNumData block: {}'
                     .format(temp_pageNumData))

        new_scandata['bookData']['pageNumData'] = temp_pageNumData
    except Exception as e:
        Logger.exception('_create_scandata: No pageNumData block found or error processing '
                         'it.: '.format(e))

    # Write it all to file
    with open(join(book['path'], 'scandata_rerepublished.json'), 'w+') as outfile:
        json.dump(new_scandata, outfile)

    Logger.debug('_create_scandata: Done constructing scandata.')
    return cdic, tdic, rdic, rtdic