Example #1
0
def rebuild(src_dir, dest_path):
    opf = glob.glob(os.path.join(src_dir, '*.opf'))
    if not opf:
        raise ValueError('No OPF file found in %s'%src_dir)
    opf = opf[0]
    # For debugging, uncomment the following line
    # def fork_job(a, b, args=None, no_output=True): do_rebuild(*args)
    fork_job('calibre.ebooks.mobi.tweak', 'do_rebuild', args=(opf, dest_path),
            no_output=True)
Example #2
0
def rebuild(src_dir, dest_path):
    opf = glob.glob(os.path.join(src_dir, '*.opf'))
    if not opf:
        raise ValueError('No OPF file found in %s'%src_dir)
    opf = opf[0]
    # For debugging, uncomment the following two lines
    # def fork_job(a, b, args=None, no_output=True):
    #     do_rebuild(*args)
    fork_job('calibre.ebooks.mobi.tweak', 'do_rebuild', args=(opf, dest_path),
            no_output=True)
Example #3
0
def do_convert(path, temp_path, key, instance):
    tdir = os.path.join(temp_path, instance['path'])
    fork_job('calibre.srv.render_book', 'render_for_viewer', args=(
        path, tdir, {'size': instance['file_size'], 'mtime': instance['file_mtime'], 'hash': key},
        ), timeout=3000, no_output=True
    )
    size = 0
    for f in walk(tdir):
        size += os.path.getsize(f)
    instance['cache_size'] = size
Example #4
0
    def run(self):
        try:
            if DEBUG_DIALOG:
                self.results = self.sample_results()
            else:
                res = fork_job(
                    "calibre.ebooks.metadata.sources.worker",
                    "single_identify",
                    (self.title, self.authors, self.identifiers),
                    no_output=True,
                    abort=self.abort,
                )
                self.results, covers, caches, log_dump = res["result"]
                self.results = [
                    OPF(BytesIO(r), basedir=os.getcwdu(), populate_spine=False).to_book_metadata() for r in self.results
                ]
                for r, cov in zip(self.results, covers):
                    r.has_cached_cover_url = cov
                self.caches.update(caches)
                self.log.load(log_dump)
            for i, result in enumerate(self.results):
                result.gui_rank = i
        except WorkerError as e:
            self.error = force_unicode(e.orig_tb)
        except:
            import traceback

            self.error = force_unicode(traceback.format_exc())
Example #5
0
def explode(path, dest, question=lambda x:True):
    with open(path, 'rb') as stream:
        raw = stream.read(3)
        stream.seek(0)
        if raw == b'TPZ':
            raise BadFormat(_('This is not a MOBI file. It is a Topaz file.'))

        try:
            header = MetadataHeader(stream, default_log)
        except MobiError:
            raise BadFormat(_('This is not a MOBI file.'))

        if header.encryption_type != 0:
            raise DRMError(_('This file is locked with DRM. It cannot be tweaked.'))

        kf8_type = header.kf8_type

        if kf8_type is None:
            raise BadFormat(_('This MOBI file does not contain a KF8 format '
                    'book. KF8 is the new format from Amazon. calibre can '
                    'only tweak MOBI files that contain KF8 books. Older '
                    'MOBI files without KF8 are not tweakable.'))

        if kf8_type == 'joint':
            if not question(_('This MOBI file contains both KF8 and '
                'older Mobi6 data. Tweaking it will remove the Mobi6 data, which '
                'means the file will not be usable on older Kindles. Are you '
                'sure?')):
                return None

    return fork_job('calibre.ebooks.mobi.tweak', 'do_explode', args=(path,
            dest), no_output=True)['result']
def set_metadata(stream, mi):
    with TemporaryDirectory('_podofo_set_metadata') as tdir:
        with open(os.path.join(tdir, 'input.pdf'), 'wb') as f:
            shutil.copyfileobj(stream, f)
        from calibre.ebooks.metadata.xmp import metadata_to_xmp_packet
        xmp_packet = metadata_to_xmp_packet(mi)

        try:
            result = fork_job('calibre.utils.podofo', 'set_metadata_',
                              (tdir, mi.title, mi.authors, mi.book_producer,
                               mi.tags, xmp_packet))
            touched = result['result']
        except WorkerError as e:
            raise Exception('Failed to set PDF metadata in (%s): %s' %
                            (mi.title, e.orig_tb))
        if touched:
            with open(os.path.join(tdir, 'output.pdf'), 'rb') as f:
                f.seek(0, 2)
                if f.tell() > 100:
                    f.seek(0)
                    stream.seek(0)
                    stream.truncate()
                    shutil.copyfileobj(f, stream)
                    stream.flush()
    stream.seek(0)
Example #7
0
 def open_book(self, pathtoebook):
     with TemporaryFile('_prepprocess_gui') as tf:
         err_msg = _('Failed to generate markup for testing. Click '
                         '"Show Details" to learn more.')
         try:
             fork_job('calibre.ebooks.oeb.iterator', 'get_preprocess_html',
                 (pathtoebook, tf))
         except WorkerError as e:
             return error_dialog(self, _('Failed to generate preview'),
                     err_msg, det_msg=e.orig_tb, show=True)
         except:
             import traceback
             return error_dialog(self, _('Failed to generate preview'),
                     err_msg, det_msg=traceback.format_exc(), show=True)
         with open(tf, 'rb') as f:
             self.preview.setPlainText(f.read().decode('utf-8'))
Example #8
0
 def open_book(self, pathtoebook):
     with TemporaryFile('_prepprocess_gui') as tf:
         err_msg = _('Failed to generate markup for testing. Click '
                         '"Show details" to learn more.')
         try:
             fork_job('calibre.ebooks.oeb.iterator', 'get_preprocess_html',
                 (pathtoebook, tf))
         except WorkerError as e:
             return error_dialog(self, _('Failed to generate preview'),
                     err_msg, det_msg=e.orig_tb, show=True)
         except:
             import traceback
             return error_dialog(self, _('Failed to generate preview'),
                     err_msg, det_msg=traceback.format_exc(), show=True)
         with open(tf, 'rb') as f:
             self.preview.setPlainText(f.read().decode('utf-8'))
Example #9
0
def get_djvu_metadata(stream, cover=True):
    with TemporaryDirectory('_djvu_metadata_read') as djvupath:
        stream.seek(0)
        with open(os.path.join(djvupath, 'src.djvu'), 'wb') as f:
            shutil.copyfileobj(stream, f)
        try:
            res = fork_job('calibre_plugins.djvu_metadata.djvu', 'get_djvu_metadata_worker', (djvupath, bool(cover)))
        except WorkerError as e:
            prints(e.orig_tb)
            raise RuntimeError('Failed to run djvused')
        info = res['result']
        with open(res['stdout_stderr'], 'rb') as f:
            raw = f.read().strip()
            if raw:
                prints(raw)
        if info is None:
            raise ValueError('Could not read metadata from djvu')
        covpath = os.path.join(djvupath, 'cover.jpg')
        cdata = None
        if cover and os.path.exists(covpath):
            with open(covpath, 'rb') as f:
                cdata = f.read()

    title = info.get('Title', None)
    au = info.get('Author', None)
    if au is None:
        au = [_('Unknown')]
    else:
        au = string_to_authors(au)
    mi = MetaInformation(title, au)

    if cdata:
        mi.cover_data = ('jpg', cdata)

    return mi
Example #10
0
def render_html_svg_workaround(path_to_html, log, width=590, height=750):
    from calibre.ebooks.oeb.base import SVG_NS
    raw = open(path_to_html, 'rb').read()
    data = None
    if SVG_NS in raw:
        try:
            data = extract_cover_from_embedded_svg(raw,
                   os.path.dirname(path_to_html), log)
        except:
            pass
    if data is None:
        try:
            data = extract_calibre_cover(raw, os.path.dirname(path_to_html), log)
        except:
            pass

    if data is None:
        from calibre.gui2 import is_ok_to_use_qt
        if is_ok_to_use_qt():
            data = render_html_data(path_to_html, width, height)
        else:
            from calibre.utils.ipc.simple_worker import fork_job, WorkerError
            try:
                result = fork_job('calibre.ebooks',
                                  'render_html_data',
                                  (path_to_html, width, height),
                                  no_output=True)
                data = result['result']
            except WorkerError as err:
                prints(err.orig_tb)
            except:
                traceback.print_exc()
    return data
Example #11
0
def test_exclusive_file(path=None):
    if path is None:
        import tempfile
        f = os.path.join(tempfile.gettempdir(), 'test-exclusive-file')
        with ExclusiveFile(f):
            # Try same process lock
            try:
                with ExclusiveFile(f, timeout=1):
                    raise LockError(
                        "ExclusiveFile failed to prevent multiple uses in the same process!"
                    )
            except LockError:
                pass
            # Try different process lock
            from calibre.utils.ipc.simple_worker import fork_job
            err = fork_job('calibre.utils.lock', 'test_exclusive_file',
                           (f, ))['result']
            if err is not None:
                raise LockError('ExclusiveFile failed with error: %s' % err)
    else:
        try:
            with ExclusiveFile(path, timeout=1):
                raise Exception(
                    'ExclusiveFile failed to prevent multiple uses in different processes!'
                )
        except LockError:
            pass
        except Exception as err:
            return str(err)
Example #12
0
def explode(path, dest, question=lambda x:True):
    with open(path, 'rb') as stream:
        raw = stream.read(3)
        stream.seek(0)
        if raw == b'TPZ':
            raise BadFormat(_('This is not a MOBI file. It is a Topaz file.'))

        try:
            header = MetadataHeader(stream, default_log)
        except MobiError:
            raise BadFormat(_('This is not a MOBI file.'))

        if header.encryption_type != 0:
            raise DRMError(_('This file is locked with DRM. It cannot be tweaked.'))

        kf8_type = header.kf8_type

        if kf8_type is None:
            raise BadFormat(_('This MOBI file does not contain a KF8 format '
                    'book. KF8 is the new format from Amazon. calibre can '
                    'only tweak MOBI files that contain KF8 books. Older '
                    'MOBI files without KF8 are not tweakable.'))

        if kf8_type == 'joint':
            if not question(_('This MOBI file contains both KF8 and '
                'older Mobi6 data. Tweaking it will remove the Mobi6 data, which '
                'means the file will not be usable on older Kindles. Are you '
                'sure?')):
                return None

    return fork_job('calibre.ebooks.mobi.tweak', 'do_explode', args=(path,
            dest), no_output=True)['result']
Example #13
0
 def run(self):
     try:
         if DEBUG_DIALOG:
             self.results = self.sample_results()
         else:
             res = fork_job('calibre.ebooks.metadata.sources.worker',
                            'single_identify',
                            (self.title, self.authors, self.identifiers),
                            no_output=True,
                            abort=self.abort)
             self.results, covers, caches, log_dump = res['result']
             self.results = [
                 OPF(BytesIO(r), basedir=os.getcwdu(),
                     populate_spine=False).to_book_metadata()
                 for r in self.results
             ]
             for r, cov in zip(self.results, covers):
                 r.has_cached_cover_url = cov
             self.caches.update(caches)
             self.log.load(log_dump)
         for i, result in enumerate(self.results):
             result.gui_rank = i
     except WorkerError as e:
         self.error = force_unicode(e.orig_tb)
     except:
         import traceback
         self.error = force_unicode(traceback.format_exc())
Example #14
0
def test_exclusive_file(path=None):
    if path is None:
        import tempfile
        f = os.path.join(tempfile.gettempdir(), 'test-exclusive-file')
        with ExclusiveFile(f):
            # Try same process lock
            try:
                with ExclusiveFile(f, timeout=1):
                    raise LockError(
                        "ExclusiveFile failed to prevent multiple uses in the same process!"
                    )
            except LockError:
                pass
            # Try different process lock
            from calibre.utils.ipc.simple_worker import fork_job
            err = fork_job('calibre.utils.lock', 'test_exclusive_file',
                           (f, ))['result']
            if err is not None:
                raise LockError('ExclusiveFile failed with error: %s' % err)
    else:
        try:
            with ExclusiveFile(path, timeout=1):
                raise Exception(
                    'ExclusiveFile failed to prevent multiple uses in different processes!'
                )
        except LockError:
            pass
        except Exception as err:
            return str(err)
Example #15
0
def render_html_svg_workaround(path_to_html, log, width=590, height=750):
    from calibre.ebooks.oeb.base import SVG_NS
    raw = open(path_to_html, 'rb').read()
    data = None
    if SVG_NS in raw:
        try:
            data = extract_cover_from_embedded_svg(raw,
                   os.path.dirname(path_to_html), log)
        except:
            pass
    if data is None:
        try:
            data = extract_calibre_cover(raw, os.path.dirname(path_to_html), log)
        except:
            pass

    if data is None:
        from calibre.gui2 import is_ok_to_use_qt
        if is_ok_to_use_qt():
            data = render_html_data(path_to_html, width, height)
        else:
            from calibre.utils.ipc.simple_worker import fork_job, WorkerError
            try:
                result = fork_job('calibre.ebooks',
                                  'render_html_data',
                                  (path_to_html, width, height),
                                  no_output=True)
                data = result['result']
            except WorkerError as err:
                prints(err.orig_tb)
            except:
                traceback.print_exc()
    return data
Example #16
0
def render_html_data(path_to_html, width, height):
    from calibre.ptempfile import TemporaryDirectory
    from calibre.utils.ipc.simple_worker import fork_job, WorkerError
    result = {}

    def report_error(text=''):
        prints('Failed to render',
               path_to_html,
               'with errors:',
               file=sys.stderr)
        if text:
            prints(text, file=sys.stderr)
        if result and result['stdout_stderr']:
            with open(result['stdout_stderr'], 'rb') as f:
                prints(f.read(), file=sys.stderr)

    with TemporaryDirectory('-render-html') as tdir:
        try:
            result = fork_job('calibre.ebooks.render_html',
                              'main',
                              args=(path_to_html, tdir, 'jpeg'))
        except WorkerError as e:
            report_error(e.orig_tb)
        else:
            if result['result']:
                with open(os.path.join(tdir, 'rendered.jpeg'), 'rb') as f:
                    return f.read()
            else:
                report_error()
Example #17
0
    def __init__(self, pathtoazw3, log, clone_data=None, tdir=None):
        if clone_data is not None:
            super(AZW3Container, self).__init__(None,
                                                None,
                                                log,
                                                clone_data=clone_data)
            for x in ('pathtoazw3', 'obfuscated_fonts'):
                setattr(self, x, clone_data[x])
            return

        self.pathtoazw3 = pathtoazw3
        if tdir is None:
            tdir = PersistentTemporaryDirectory('_azw3_container')
        tdir = os.path.abspath(os.path.realpath(tdir))
        self.root = tdir
        with open(pathtoazw3, 'rb') as stream:
            raw = stream.read(3)
            if raw == b'TPZ':
                raise InvalidMobi(
                    _('This is not a MOBI file. It is a Topaz file.'))

            try:
                header = MetadataHeader(stream, default_log)
            except MobiError:
                raise InvalidMobi(_('This is not a MOBI file.'))

            if header.encryption_type != 0:
                raise DRMError()

            kf8_type = header.kf8_type

            if kf8_type is None:
                raise InvalidMobi(
                    _('This MOBI file does not contain a KF8 format '
                      'book. KF8 is the new format from Amazon. calibre can '
                      'only edit MOBI files that contain KF8 books. Older '
                      'MOBI files without KF8 are not editable.'))

            if kf8_type == 'joint':
                raise InvalidMobi(
                    _('This MOBI file contains both KF8 and '
                      'older Mobi6 data. calibre can only edit MOBI files '
                      'that contain only KF8 data.'))

        try:
            opf_path, obfuscated_fonts = fork_job(
                'calibre.ebooks.oeb.polish.container',
                'do_explode',
                args=(pathtoazw3, tdir),
                no_output=True)['result']
        except WorkerError as e:
            log(e.orig_tb)
            raise InvalidMobi('Failed to explode MOBI')
        super(AZW3Container, self).__init__(tdir, opf_path, log)
        self.obfuscated_fonts = {
            x.replace(os.sep, '/')
            for x in obfuscated_fonts
        }
Example #18
0
def run_extract_book(*args, **kwargs):
    from calibre.utils.ipc.simple_worker import fork_job
    ans = fork_job('calibre.ebooks.oeb.iterator.book',
                   'extract_book',
                   args=args,
                   kwargs=kwargs,
                   timeout=3000,
                   no_output=True)
    return ans['result']
Example #19
0
    def __init__(self, pathtoazw3, log, clone_data=None, tdir=None):
        if clone_data is not None:
            super(AZW3Container, self).__init__(None, None, log, clone_data=clone_data)
            for x in ("pathtoazw3", "obfuscated_fonts"):
                setattr(self, x, clone_data[x])
            return

        self.pathtoazw3 = pathtoazw3
        if tdir is None:
            tdir = PersistentTemporaryDirectory("_azw3_container")
        tdir = os.path.abspath(os.path.realpath(tdir))
        self.root = tdir
        with open(pathtoazw3, "rb") as stream:
            raw = stream.read(3)
            if raw == b"TPZ":
                raise InvalidMobi(_("This is not a MOBI file. It is a Topaz file."))

            try:
                header = MetadataHeader(stream, default_log)
            except MobiError:
                raise InvalidMobi(_("This is not a MOBI file."))

            if header.encryption_type != 0:
                raise DRMError()

            kf8_type = header.kf8_type

            if kf8_type is None:
                raise InvalidMobi(
                    _(
                        "This MOBI file does not contain a KF8 format "
                        "book. KF8 is the new format from Amazon. calibre can "
                        "only edit MOBI files that contain KF8 books. Older "
                        "MOBI files without KF8 are not editable."
                    )
                )

            if kf8_type == "joint":
                raise InvalidMobi(
                    _(
                        "This MOBI file contains both KF8 and "
                        "older Mobi6 data. calibre can only edit MOBI files "
                        "that contain only KF8 data."
                    )
                )

        try:
            opf_path, obfuscated_fonts = fork_job(
                "calibre.ebooks.oeb.polish.container", "do_explode", args=(pathtoazw3, tdir), no_output=True
            )["result"]
        except WorkerError as e:
            log(e.orig_tb)
            raise InvalidMobi("Failed to explode MOBI")
        super(AZW3Container, self).__init__(tdir, opf_path, log)
        self.obfuscated_fonts = {x.replace(os.sep, "/") for x in obfuscated_fonts}
Example #20
0
File: pdf.py Project: kmshi/calibre
def get_metadata(stream, cover=True):
    with TemporaryDirectory('_pdf_metadata_read') as pdfpath:
        stream.seek(0)
        with open(os.path.join(pdfpath, 'src.pdf'), 'wb') as f:
            shutil.copyfileobj(stream, f)
        try:
            res = fork_job('calibre.ebooks.metadata.pdf', 'read_info',
                           (pdfpath, bool(cover)))
        except WorkerError as e:
            prints(e.orig_tb)
            raise RuntimeError('Failed to run pdfinfo')
        info = res['result']
        with open(res['stdout_stderr'], 'rb') as f:
            raw = f.read().strip()
            if raw:
                prints(raw)
        if not info:
            raise ValueError('Could not read info dict from PDF')
        covpath = os.path.join(pdfpath, 'cover.jpg')
        cdata = None
        if cover and os.path.exists(covpath):
            with open(covpath, 'rb') as f:
                cdata = f.read()

    title = info.get('Title', None)
    au = info.get('Author', None)
    if au is None:
        au = [_('Unknown')]
    else:
        au = string_to_authors(au)
    mi = MetaInformation(title, au)
    # if isbn is not None:
    #    mi.isbn = isbn

    creator = info.get('Creator', None)
    if creator:
        mi.book_producer = creator

    keywords = info.get('Keywords', None)
    mi.tags = []
    if keywords:
        mi.tags = [x.strip() for x in keywords.split(',')]
        isbn = [check_isbn(x) for x in mi.tags if check_isbn(x)]
        if isbn:
            mi.isbn = isbn = isbn[0]
        mi.tags = [x for x in mi.tags if check_isbn(x) != isbn]

    subject = info.get('Subject', None)
    if subject:
        mi.tags.insert(0, subject)

    if cdata:
        mi.cover_data = ('jpeg', cdata)

    return mi
Example #21
0
def get_metadata(stream, cover=True):
    with TemporaryDirectory("_pdf_metadata_read") as pdfpath:
        stream.seek(0)
        with open(os.path.join(pdfpath, "src.pdf"), "wb") as f:
            shutil.copyfileobj(stream, f)
        try:
            res = fork_job("calibre.ebooks.metadata.pdf", "read_info", (pdfpath, bool(cover)))
        except WorkerError as e:
            prints(e.orig_tb)
            raise RuntimeError("Failed to run pdfinfo")
        info = res["result"]
        with open(res["stdout_stderr"], "rb") as f:
            raw = f.read().strip()
            if raw:
                prints(raw)
        if not info:
            raise ValueError("Could not read info dict from PDF")
        covpath = os.path.join(pdfpath, "cover.jpg")
        cdata = None
        if cover and os.path.exists(covpath):
            with open(covpath, "rb") as f:
                cdata = f.read()

    title = info.get("Title", None)
    au = info.get("Author", None)
    if au is None:
        au = [_("Unknown")]
    else:
        au = string_to_authors(au)
    mi = MetaInformation(title, au)
    # if isbn is not None:
    #    mi.isbn = isbn

    creator = info.get("Creator", None)
    if creator:
        mi.book_producer = creator

    keywords = info.get("Keywords", None)
    mi.tags = []
    if keywords:
        mi.tags = [x.strip() for x in keywords.split(",")]
        isbn = [check_isbn(x) for x in mi.tags if check_isbn(x)]
        if isbn:
            mi.isbn = isbn = isbn[0]
        mi.tags = [x for x in mi.tags if check_isbn(x) != isbn]

    subject = info.get("Subject", None)
    if subject:
        mi.tags.insert(0, subject)

    if cdata:
        mi.cover_data = ("jpeg", cdata)

    return mi
Example #22
0
    def get_image_urls(self, title, author, log, abort, timeout):
        from calibre.utils.ipc.simple_worker import fork_job, WorkerError
        try:
            return fork_job('calibre.ebooks.metadata.sources.google_images',
                    'search', args=(title, author, self.prefs['size'], timeout), no_output=True, abort=abort, timeout=timeout)['result']
        except WorkerError as e:
            if e.orig_tb:
                log.error(e.orig_tb)
            log.exception('Searching google failed:' + as_unicode(e))
        except Exception as e:
            log.exception('Searching google failed:' + as_unicode(e))

        return []
Example #23
0
    def get_image_urls(self, title, author, log, abort, timeout):
        from calibre.utils.ipc.simple_worker import fork_job, WorkerError
        try:
            return fork_job('calibre.ebooks.metadata.sources.google_images',
                    'search', args=(title, author, self.prefs['size'], timeout), no_output=True, abort=abort, timeout=timeout)['result']
        except WorkerError as e:
            if e.orig_tb:
                log.error(e.orig_tb)
            log.exception('Searching google failed:' + as_unicode(e))
        except Exception as e:
            log.exception('Searching google failed:' + as_unicode(e))

        return []
Example #24
0
File: pdf.py Project: Eksmo/calibre
def get_metadata(stream, cover=True):
    with TemporaryDirectory('_pdf_metadata_read') as pdfpath:
        stream.seek(0)
        with open(os.path.join(pdfpath, 'src.pdf'), 'wb') as f:
            shutil.copyfileobj(stream, f)
        try:
            res = fork_job('calibre.ebooks.metadata.pdf', 'read_info',
                    (pdfpath, bool(cover)))
        except WorkerError as e:
            prints(e.orig_tb)
            raise RuntimeError('Failed to run pdfinfo')
        info = res['result']
        with open(res['stdout_stderr'], 'rb') as f:
            raw = f.read().strip()
            if raw:
                prints(raw)
        if not info:
            raise ValueError('Could not read info dict from PDF')
        covpath = os.path.join(pdfpath, 'cover.jpg')
        cdata = None
        if cover and os.path.exists(covpath):
            with open(covpath, 'rb') as f:
                cdata = f.read()

    title = info.get('Title', None)
    au = info.get('Author', None)
    if au is None:
        au = [_('Unknown')]
    else:
        au = string_to_authors(au)
    mi = MetaInformation(title, au)
    #if isbn is not None:
    #    mi.isbn = isbn

    creator = info.get('Creator', None)
    if creator:
        mi.book_producer = creator

    keywords = info.get('Keywords', None)
    mi.tags = []
    if keywords:
        mi.tags = [x.strip() for x in keywords.split(',')]

    subject = info.get('Subject', None)
    if subject:
        mi.tags.insert(0, subject)

    if cdata:
        mi.cover_data = ('jpeg', cdata)

    return mi
Example #25
0
    def __init__(self, pathtoazw3, log, clone_data=None, tdir=None):
        if clone_data is not None:
            super(AZW3Container, self).__init__(None, None, log, clone_data=clone_data)
            for x in ('pathtoazw3', 'obfuscated_fonts'):
                setattr(self, x, clone_data[x])
            return

        self.pathtoazw3 = pathtoazw3
        if tdir is None:
            tdir = PersistentTemporaryDirectory('_azw3_container')
        tdir = os.path.abspath(os.path.realpath(tdir))
        self.root = tdir
        with open(pathtoazw3, 'rb') as stream:
            raw = stream.read(3)
            if raw == b'TPZ':
                raise InvalidMobi(_('This is not a MOBI file. It is a Topaz file.'))

            try:
                header = MetadataHeader(stream, default_log)
            except MobiError:
                raise InvalidMobi(_('This is not a MOBI file.'))

            if header.encryption_type != 0:
                raise DRMError()

            kf8_type = header.kf8_type

            if kf8_type is None:
                raise InvalidMobi(_('This MOBI file does not contain a KF8 format '
                        'book. KF8 is the new format from Amazon. calibre can '
                        'only edit MOBI files that contain KF8 books. Older '
                        'MOBI files without KF8 are not editable.'))

            if kf8_type == 'joint':
                raise InvalidMobi(_('This MOBI file contains both KF8 and '
                    'older Mobi6 data. calibre can only edit MOBI files '
                    'that contain only KF8 data.'))

        try:
            opf_path, obfuscated_fonts = fork_job(
            'calibre.ebooks.oeb.polish.container', 'do_explode',
            args=(pathtoazw3, tdir), no_output=True)['result']
        except WorkerError as e:
            log(e.orig_tb)
            raise InvalidMobi('Failed to explode MOBI')
        super(AZW3Container, self).__init__(tdir, opf_path, log)
        self.obfuscated_fonts = {x.replace(os.sep, '/') for x in obfuscated_fonts}
Example #26
0
    def search(self, query, max_results=10, timeout=60):
        url = 'http://woblink.com/katalog-ebooki?query=' + urllib.quote_plus(query.encode('utf-8'))
        if max_results > 10:
            if max_results > 20:
                url += '&limit=30'
            else:
                url += '&limit=20'

        counter = max_results

        try:
            results = fork_job(js_browser,'get_results', (url, timeout,), module_is_source_code=True)
        except WorkerError as e:
            raise Exception('Could not get results: %s'%e.orig_tb)
        doc = html.fromstring(strip_encoding_declarations(results['result']))
        for data in doc.xpath('//div[@class="nw_katalog_lista_ksiazka"]'):
            if counter <= 0:
                break

            id = ''.join(data.xpath('.//div[@class="nw_katalog_lista_ksiazka_okladka nw_okladka"]/a[1]/@href'))
            if not id:
                continue

            cover_url = ''.join(data.xpath('.//div[@class="nw_katalog_lista_ksiazka_okladka nw_okladka"]/a[1]/img/@src'))
            title = ''.join(data.xpath('.//h2[@class="nw_katalog_lista_ksiazka_detale_tytul"]/a[1]/text()'))
            author = ', '.join(data.xpath('.//p[@class="nw_katalog_lista_ksiazka_detale_autor"]/a/text()'))
            price = ''.join(data.xpath('.//div[@class="nw_opcjezakupu_cena"]/text()'))
            formats = ', '.join(data.xpath('.//p[@class="nw_katalog_lista_ksiazka_detale_format"]/span/text()'))

            s = SearchResult()
            s.cover_url = 'http://woblink.com' + cover_url
            s.title = title.strip()
            s.author = author.strip()
            s.price = price + ' zł'
            s.detail_item = id.strip()
            s.formats = formats

            if 'DRM' in formats:
                s.drm = SearchResult.DRM_LOCKED

                counter -= 1
                yield s
            else:
                s.drm = SearchResult.DRM_UNLOCKED

                counter -= 1
                yield s
Example #27
0
    def run_fork(self):
        with TemporaryDirectory('_single_metadata_download') as tdir:
            self.keep_going = True
            t = Thread(target=self.monitor_tdir, args=(tdir,))
            t.daemon = True
            t.start()

            try:
                res = fork_job('calibre.ebooks.metadata.sources.worker',
                    'single_covers',
                    (self.title, self.authors, self.identifiers, self.caches,
                        tdir),
                    no_output=True, abort=self.abort)
                self.log.append_dump(res['result'])
            finally:
                self.keep_going = False
                t.join()
Example #28
0
    def run_fork(self):
        with TemporaryDirectory('_single_metadata_download') as tdir:
            self.keep_going = True
            t = Thread(target=self.monitor_tdir, args=(tdir,))
            t.daemon = True
            t.start()

            try:
                res = fork_job('calibre.ebooks.metadata.sources.worker',
                    'single_covers',
                    (self.title, self.authors, self.identifiers, self.caches,
                        tdir),
                    no_output=True, abort=self.abort)
                self.log.append_dump(res['result'])
            finally:
                self.keep_going = False
                t.join()
Example #29
0
def set_metadata(stream, mi):
    with TemporaryDirectory(u'_podofo_set_metadata') as tdir:
        with open(os.path.join(tdir, u'input.pdf'), 'wb') as f:
            shutil.copyfileobj(stream, f)
        try:
            touched = fork_job('calibre.utils.podofo', 'set_metadata_', (tdir,
                mi.title, mi.authors, mi.book_producer, mi.tags))
        except WorkerError as e:
            raise Exception('Failed to set PDF metadata: %s'%e.orig_tb)
        if touched:
            with open(os.path.join(tdir, u'output.pdf'), 'rb') as f:
                f.seek(0, 2)
                if f.tell() > 100:
                    f.seek(0)
                    stream.seek(0)
                    stream.truncate()
                    shutil.copyfileobj(f, stream)
                    stream.flush()
    stream.seek(0)
Example #30
0
def set_metadata(stream, mi):
    with TemporaryDirectory(u'_podofo_set_metadata') as tdir:
        with open(os.path.join(tdir, u'input.pdf'), 'wb') as f:
            shutil.copyfileobj(stream, f)
        try:
            touched = fork_job(
                'calibre.utils.podofo', 'set_metadata_',
                (tdir, mi.title, mi.authors, mi.book_producer, mi.tags))
        except WorkerError as e:
            raise Exception('Failed to set PDF metadata: %s' % e.orig_tb)
        if touched:
            with open(os.path.join(tdir, u'output.pdf'), 'rb') as f:
                f.seek(0, 2)
                if f.tell() > 100:
                    f.seek(0)
                    stream.seek(0)
                    stream.truncate()
                    shutil.copyfileobj(f, stream)
                    stream.flush()
    stream.seek(0)
Example #31
0
def set_metadata(stream, mi):
    with TemporaryDirectory(u'_podofo_set_metadata') as tdir:
        with open(os.path.join(tdir, u'input.pdf'), 'wb') as f:
            shutil.copyfileobj(stream, f)
        from calibre.ebooks.metadata.xmp import metadata_to_xmp_packet
        xmp_packet = metadata_to_xmp_packet(mi)

        try:
            result = fork_job('calibre.utils.podofo', 'set_metadata_', (tdir,
                mi.title, mi.authors, mi.book_producer, mi.tags, xmp_packet))
            touched = result['result']
        except WorkerError as e:
            raise Exception('Failed to set PDF metadata in (%s): %s'%(mi.title, e.orig_tb))
        if touched:
            with open(os.path.join(tdir, u'output.pdf'), 'rb') as f:
                f.seek(0, 2)
                if f.tell() > 100:
                    f.seek(0)
                    stream.seek(0)
                    stream.truncate()
                    shutil.copyfileobj(f, stream)
                    stream.flush()
    stream.seek(0)
Example #32
0
def get_isbn_from_pdf(log, pdf_path):
    '''
    On a forked job execute pdfinfo to read a page count and then pdftohtml
    to get the page count as an xml file.
    '''
    with TemporaryDirectory('_isbn_pdf') as output_dir:
        pdf_copy = os.path.join(output_dir, 'src.pdf')
        with open(pdf_path, 'rb') as src, open(pdf_copy, 'wb') as dest:
            shutil.copyfileobj(src, dest)

        try:
            # We want to run the scanning of the PDF on a fork_job, however
            # that will only be "fixed" in calibre 0.8.55 to allow calling
            # a calibre plugin from such a job. In the meantime, do it the
            # risky way of calling from in-process.
            if numeric_version < (0, 8, 55):
                log.error(
                    'Warning: PDF analysis may crash, upgrade to calibre 0.8.55 when possible'
                )
                return get_isbn(output_dir, 'src.pdf', log)

            res = fork_job('calibre_plugins.extract_isbn.pdf', 'get_isbn',
                           (output_dir, 'src.pdf'))
        except WorkerError as e:
            prints(e.orig_tb)
            raise RuntimeError('Failed to run pdfinfo/pdftohtml')
        finally:
            try:
                os.remove(pdf_copy)
            except:
                pass
    info = res['result']
    with open(res['stdout_stderr'], 'rb') as f:
        raw = f.read().strip()
        if raw:
            log(raw)
    return info
Example #33
0
def get_pdf_page_count(book_path):
    '''
    Optimisation to read the actual page count for PDFs from the PDF itself.
    '''
    from calibre.ptempfile import TemporaryDirectory
    with TemporaryDirectory('_pages_pdf') as pdfpath:
        pdf_copy = os.path.join(pdfpath, 'src.pdf')
        shutil.copyfile(book_path, pdf_copy)
        try:
            res = fork_job('calibre.ebooks.metadata.pdf', 'read_info',
                    (pdfpath, False))
        except WorkerError as e:
            prints(e.orig_tb)
            raise RuntimeError('Failed to run pdfinfo')
        # Let's try to delete this extra copy straight away
        try:
            os.remove(pdf_copy)
        except:
            pass
        info = res['result']
        if not info:
            raise ValueError('Could not read info dict from PDF')
        if 'Pages' in info:
            return int(info['Pages'])
Example #34
0
def download(all_ids, tf, db, do_identify, covers, ensure_fields,
        log=None, abort=None, notifications=None):
    batch_size = 10
    batches = split_jobs(all_ids, batch_size=batch_size)
    tdir = PersistentTemporaryDirectory('_metadata_bulk')
    heartbeat = HeartBeat(tdir)

    failed_ids = set()
    failed_covers = set()
    title_map = {}
    lm_map = {}
    ans = set()
    all_failed = True
    aborted = False
    count = 0
    notifier = Notifier(notifications, title_map, tdir, len(all_ids))
    notifier.start()

    try:
        for ids in batches:
            if abort.is_set():
                log.error('Aborting...')
                break
            metadata = {i:db.get_metadata(i, index_is_id=True,
                get_user_categories=False) for i in ids}
            for i in ids:
                title_map[i] = metadata[i].title
                lm_map[i] = metadata[i].last_modified
            metadata = {i:metadata_to_opf(mi, default_lang='und') for i, mi in
                    iteritems(metadata)}
            try:
                ret = fork_job('calibre.ebooks.metadata.sources.worker', 'main',
                        (do_identify, covers, metadata, ensure_fields, tdir),
                        abort=abort, heartbeat=heartbeat, no_output=True)
            except WorkerError as e:
                if e.orig_tb:
                    raise Exception('Failed to download metadata. Original '
                            'traceback: \n\n'+e.orig_tb)
                raise
            count += batch_size

            fids, fcovs, allf = ret['result']
            if not allf:
                all_failed = False
            failed_ids = failed_ids.union(fids)
            failed_covers = failed_covers.union(fcovs)
            ans = ans.union(set(ids) - fids)
            for book_id in ids:
                lp = os.path.join(tdir, '%d.log'%book_id)
                if os.path.exists(lp):
                    with open(tf, 'ab') as dest, open(lp, 'rb') as src:
                        dest.write(('\n'+'#'*20 + ' Log for %s '%title_map[book_id] +
                            '#'*20+'\n').encode('utf-8'))
                        shutil.copyfileobj(src, dest)

        if abort.is_set():
            aborted = True
        log('Download complete, with %d failures'%len(failed_ids))
        return (aborted, ans, tdir, tf, failed_ids, failed_covers, title_map,
                lm_map, all_failed)
    finally:
        notifier.keep_going = False
Example #35
0
def download(all_ids,
             tf,
             db,
             do_identify,
             covers,
             ensure_fields,
             log=None,
             abort=None,
             notifications=None):
    batch_size = 10
    batches = split_jobs(all_ids, batch_size=batch_size)
    tdir = PersistentTemporaryDirectory('_metadata_bulk')
    heartbeat = HeartBeat(tdir)

    failed_ids = set()
    failed_covers = set()
    title_map = {}
    lm_map = {}
    ans = set()
    all_failed = True
    aborted = False
    count = 0
    notifier = Notifier(notifications, title_map, tdir, len(all_ids))
    notifier.start()

    try:
        for ids in batches:
            if abort.is_set():
                log.error('Aborting...')
                break
            metadata = {
                i: db.get_metadata(i,
                                   index_is_id=True,
                                   get_user_categories=False)
                for i in ids
            }
            for i in ids:
                title_map[i] = metadata[i].title
                lm_map[i] = metadata[i].last_modified
            metadata = {
                i: metadata_to_opf(mi, default_lang='und')
                for i, mi in iteritems(metadata)
            }
            try:
                ret = fork_job(
                    'calibre.ebooks.metadata.sources.worker',
                    'main',
                    (do_identify, covers, metadata, ensure_fields, tdir),
                    abort=abort,
                    heartbeat=heartbeat,
                    no_output=True)
            except WorkerError as e:
                if e.orig_tb:
                    raise Exception('Failed to download metadata. Original '
                                    'traceback: \n\n' + e.orig_tb)
                raise
            count += batch_size

            fids, fcovs, allf = ret['result']
            if not allf:
                all_failed = False
            failed_ids = failed_ids.union(fids)
            failed_covers = failed_covers.union(fcovs)
            ans = ans.union(set(ids) - fids)
            for book_id in ids:
                lp = os.path.join(tdir, '%d.log' % book_id)
                if os.path.exists(lp):
                    with open(tf, 'ab') as dest, open(lp, 'rb') as src:
                        dest.write(('\n' + '#' * 20 +
                                    ' Log for %s ' % title_map[book_id] +
                                    '#' * 20 + '\n').encode('utf-8'))
                        shutil.copyfileobj(src, dest)

        if abort.is_set():
            aborted = True
        log('Download complete, with %d failures' % len(failed_ids))
        return (aborted, ans, tdir, tf, failed_ids, failed_covers, title_map,
                lm_map, all_failed)
    finally:
        notifier.keep_going = False
Example #36
0
    def auto_add(self):
        from calibre.utils.ipc.simple_worker import fork_job, WorkerError
        from calibre.ebooks.metadata.opf2 import metadata_to_opf
        from calibre.ebooks.metadata.meta import metadata_from_filename

        files = [x for x in os.listdir(self.path) if
                    # Must not be in the process of being added to the db
                    x not in self.staging and
                    # Firefox creates 0 byte placeholder files when downloading
                    os.stat(os.path.join(self.path, x)).st_size > 0 and
                    # Must be a file
                    os.path.isfile(os.path.join(self.path, x)) and
                    # Must have read and write permissions
                    os.access(os.path.join(self.path, x), os.R_OK|os.W_OK) and
                    # Must be a known ebook file type
                    self.is_filename_allowed(x)
                ]
        data = {}
        # Give any in progress copies time to complete
        time.sleep(2)

        for fname in files:
            f = os.path.join(self.path, fname)

            # Try opening the file for reading, if the OS prevents us, then at
            # least on windows, it means the file is open in another
            # application for writing. We will get notified by
            # QFileSystemWatcher when writing is completed, so ignore for now.
            try:
                open(f, 'rb').close()
            except:
                continue
            tdir = tempfile.mkdtemp(dir=self.tdir)
            try:
                fork_job('calibre.ebooks.metadata.meta',
                        'forked_read_metadata', (f, tdir), no_output=True)
            except WorkerError as e:
                prints('Failed to read metadata from:', fname)
                prints(e.orig_tb)
            except:
                import traceback
                traceback.print_exc()

            # Ensure that the pre-metadata file size is present. If it isn't,
            # write 0 so that the file is rescanned
            szpath = os.path.join(tdir, 'size.txt')
            try:
                with open(szpath, 'rb') as f:
                    int(f.read())
            except:
                with open(szpath, 'wb') as f:
                    f.write(b'0')

            opfpath = os.path.join(tdir, 'metadata.opf')
            try:
                if os.stat(opfpath).st_size < 30:
                    raise Exception('metadata reading failed')
            except:
                mi = metadata_from_filename(fname)
                with open(opfpath, 'wb') as f:
                    f.write(metadata_to_opf(mi))
            self.staging.add(fname)
            data[fname] = tdir
        if data:
            self.callback(data)
Example #37
0
    def search(self, query, max_results=10, timeout=60):
        url = 'http://woblink.com/ebooki-kategorie?query=' + urllib.quote_plus(
            query.encode('utf-8'))
        if max_results > 10:
            if max_results > 20:
                url += '&limit=30'
            else:
                url += '&limit=20'

        counter = max_results

        try:
            results = fork_job(js_browser,
                               'get_results', (
                                   url,
                                   timeout,
                               ),
                               module_is_source_code=True)
        except WorkerError as e:
            raise Exception('Could not get results: %s' % e.orig_tb)
        doc = html.fromstring(strip_encoding_declarations(results['result']))
        for data in doc.xpath('//div[@class="nw_katalog_lista_ksiazka "]'):
            if counter <= 0:
                break

            id = ''.join(
                data.xpath(
                    './/div[@class="nw_katalog_lista_ksiazka_okladka nw_okladka"]/a[1]/@href'
                ))
            if not id:
                continue

            cover_url = ''.join(
                data.xpath(
                    './/div[@class="nw_katalog_lista_ksiazka_okladka nw_okladka"]/a[1]/img/@src'
                ))
            title = ''.join(
                data.xpath(
                    './/h3[@class="nw_katalog_lista_ksiazka_detale_tytul"]/a[1]/text()'
                ))
            author = ', '.join(
                data.xpath(
                    './/p[@class="nw_katalog_lista_ksiazka_detale_autor"]/a/text()'
                ))
            price = ''.join(
                data.xpath('.//div[@class="nw_opcjezakupu_cena"]/text()'))
            formats = ', '.join(
                data.xpath(
                    './/p[@class="nw_katalog_lista_ksiazka_detale_format"]/span/text()'
                ))

            s = SearchResult()
            s.cover_url = 'http://woblink.com' + cover_url
            s.title = title.strip()
            s.author = author.strip()
            s.price = price + ' zł'
            s.detail_item = id.strip()
            s.formats = formats

            if 'DRM' in formats:
                s.drm = SearchResult.DRM_LOCKED

                counter -= 1
                yield s
            else:
                s.drm = SearchResult.DRM_UNLOCKED

                counter -= 1
                yield s
Example #38
0
def get_metadata(stream, cover=True):
    with TemporaryDirectory('_pdf_metadata_read') as pdfpath:
        stream.seek(0)
        with open(os.path.join(pdfpath, 'src.pdf'), 'wb') as f:
            shutil.copyfileobj(stream, f)
        try:
            res = fork_job('calibre.ebooks.metadata.pdf', 'read_info',
                           (pdfpath, bool(cover)))
        except WorkerError as e:
            prints(e.orig_tb)
            raise RuntimeError('Failed to run pdfinfo')
        info = res['result']
        with open(res['stdout_stderr'], 'rb') as f:
            raw = f.read().strip()
            if raw:
                prints(raw)
        if info is None:
            raise ValueError('Could not read info dict from PDF')
        covpath = os.path.join(pdfpath, 'cover.jpg')
        cdata = None
        if cover and os.path.exists(covpath):
            with open(covpath, 'rb') as f:
                cdata = f.read()

    title = info.get('Title', None) or _('Unknown')
    au = info.get('Author', None)
    if au is None:
        au = [_('Unknown')]
    else:
        au = string_to_authors(au)
    mi = MetaInformation(title, au)
    # if isbn is not None:
    #    mi.isbn = isbn

    creator = info.get('Creator', None)
    if creator:
        mi.book_producer = creator

    keywords = info.get('Keywords', None)
    mi.tags = []
    if keywords:
        mi.tags = [x.strip() for x in keywords.split(',')]
        isbn = [check_isbn(x) for x in mi.tags if check_isbn(x)]
        if isbn:
            mi.isbn = isbn = isbn[0]
        mi.tags = [x for x in mi.tags if check_isbn(x) != isbn]

    subject = info.get('Subject', None)
    if subject:
        mi.tags.insert(0, subject)

    if 'xmp_metadata' in info:
        from calibre.ebooks.metadata.xmp import consolidate_metadata
        mi = consolidate_metadata(mi, info)

    # Look for recognizable identifiers in the info dict, if they were not
    # found in the XMP metadata
    for scheme, check_func in iteritems({
            'doi': check_doi,
            'isbn': check_isbn
    }):
        if scheme not in mi.get_identifiers():
            for k, v in iteritems(info):
                if k != 'xmp_metadata':
                    val = check_func(v)
                    if val:
                        mi.set_identifier(scheme, val)
                        break

    if cdata:
        mi.cover_data = ('jpeg', cdata)
    return mi
Example #39
0
def get_metadata(stream, cover=True):
    with TemporaryDirectory('_pdf_metadata_read') as pdfpath:
        stream.seek(0)
        with open(os.path.join(pdfpath, 'src.pdf'), 'wb') as f:
            shutil.copyfileobj(stream, f)
        try:
            res = fork_job('calibre.ebooks.metadata.pdf', 'read_info',
                    (pdfpath, bool(cover)))
        except WorkerError as e:
            prints(e.orig_tb)
            raise RuntimeError('Failed to run pdfinfo')
        info = res['result']
        with open(res['stdout_stderr'], 'rb') as f:
            raw = f.read().strip()
            if raw:
                prints(raw)
        if not info:
            raise ValueError('Could not read info dict from PDF')
        covpath = os.path.join(pdfpath, 'cover.jpg')
        cdata = None
        if cover and os.path.exists(covpath):
            with open(covpath, 'rb') as f:
                cdata = f.read()

    title = info.get('Title', None)
    au = info.get('Author', None)
    if au is None:
        au = [_('Unknown')]
    else:
        au = string_to_authors(au)
    mi = MetaInformation(title, au)
    # if isbn is not None:
    #    mi.isbn = isbn

    creator = info.get('Creator', None)
    if creator:
        mi.book_producer = creator

    keywords = info.get('Keywords', None)
    mi.tags = []
    if keywords:
        mi.tags = [x.strip() for x in keywords.split(',')]
        isbn = [check_isbn(x) for x in mi.tags if check_isbn(x)]
        if isbn:
            mi.isbn = isbn = isbn[0]
        mi.tags = [x for x in mi.tags if check_isbn(x) != isbn]

    subject = info.get('Subject', None)
    if subject:
        mi.tags.insert(0, subject)

    if 'xmp_metadata' in info:
        from calibre.ebooks.metadata.xmp import consolidate_metadata
        mi = consolidate_metadata(mi, info)

    # Look for recognizable identifiers in the info dict, if they were not
    # found in the XMP metadata
    for scheme, check_func in {'doi':check_doi, 'isbn':check_isbn}.iteritems():
        if scheme not in mi.get_identifiers():
            for k, v in info.iteritems():
                if k != 'xmp_metadata':
                    val = check_func(v)
                    if val:
                        mi.set_identifier(scheme, val)
                        break

    if cdata:
        mi.cover_data = ('jpeg', cdata)
    return mi
Example #40
0
def run_extract_book(*args, **kwargs):
    from calibre.utils.ipc.simple_worker import fork_job
    ans = fork_job('calibre.ebooks.oeb.iterator.book', 'extract_book', args=args, kwargs=kwargs, timeout=3000, no_output=True)
    return ans['result']
Example #41
0
    def auto_add(self):
        from calibre.utils.ipc.simple_worker import fork_job, WorkerError
        from calibre.ebooks.metadata.opf2 import metadata_to_opf
        from calibre.ebooks.metadata.meta import metadata_from_filename

        files = [x for x in os.listdir(self.path) if
                    # Must not be in the process of being added to the db
                    x not in self.staging
                    # Firefox creates 0 byte placeholder files when downloading
                    and os.stat(os.path.join(self.path, x)).st_size > 0
                    # Must be a file
                    and os.path.isfile(os.path.join(self.path, x))
                    # Must have read and write permissions
                    and os.access(os.path.join(self.path, x), os.R_OK|os.W_OK)
                    # Must be a known ebook file type
                    and os.path.splitext(x)[1][1:].lower() in self.allowed
                ]
        data = {}
        # Give any in progress copies time to complete
        time.sleep(2)

        for fname in files:
            f = os.path.join(self.path, fname)

            # Try opening the file for reading, if the OS prevents us, then at
            # least on windows, it means the file is open in another
            # application for writing. We will get notified by
            # QFileSystemWatcher when writing is completed, so ignore for now.
            try:
                open(f, 'rb').close()
            except:
                continue
            tdir = tempfile.mkdtemp(dir=self.tdir)
            try:
                fork_job('calibre.ebooks.metadata.meta',
                        'forked_read_metadata', (f, tdir), no_output=True)
            except WorkerError as e:
                prints('Failed to read metadata from:', fname)
                prints(e.orig_tb)
            except:
                import traceback
                traceback.print_exc()

            # Ensure that the pre-metadata file size is present. If it isn't,
            # write 0 so that the file is rescanned
            szpath = os.path.join(tdir, 'size.txt')
            try:
                with open(szpath, 'rb') as f:
                    int(f.read())
            except:
                with open(szpath, 'wb') as f:
                    f.write(b'0')

            opfpath = os.path.join(tdir, 'metadata.opf')
            try:
                if os.stat(opfpath).st_size < 30:
                    raise Exception('metadata reading failed')
            except:
                mi = metadata_from_filename(fname)
                with open(opfpath, 'wb') as f:
                    f.write(metadata_to_opf(mi))
            self.staging.add(fname)
            data[fname] = tdir
        if data:
            self.callback(data)