Esempio n. 1
0
 def complete(self, wc):
     if wc == 2:
         self.complete_input()
     elif wc == 3:
         self.complete_output()
     else:
         q = list(self.words[1:3])
         q = [os.path.splitext(x)[0 if x.startswith('.') else 1].partition('.')[-1].lower() for x in q]
         if not q[1]:
             q[1] = 'oeb'
         q = tuple(q)
         if q in self.cache:
             ans = [x for x in self.cache[q] if x.startswith(self.prefix)]
         else:
             from calibre.ebooks.conversion.cli import create_option_parser
             from calibre.utils.logging import Log
             log = Log()
             log.outputs = []
             ans = []
             if not self.prefix or self.prefix.startswith('-'):
                 try:
                     parser, _ = create_option_parser(self.words[:3], log)
                     ans += list(get_opts_from_parser(parser, self.prefix))
                 except:
                     pass
         if self.previous.startswith('-'):
             ans += list(files_and_dirs(self.prefix, None))
         send(ans)
Esempio n. 2
0
    def setup_pipeline(self, *args):
        oidx = self.groups.currentIndex().row()
        input_format = self.input_format
        output_format = self.output_format
        output_path = 'dummy.'+output_format
        log = Log()
        log.outputs = []
        input_file = 'dummy.'+input_format
        if input_format in ARCHIVE_FMTS:
            input_file = 'dummy.html'
        self.plumber = Plumber(input_file, output_path, log)

        def widget_factory(cls):
            return cls(self.stack, self.plumber.get_option_by_name,
                self.plumber.get_option_help, self.db, self.book_id)

        self.mw = widget_factory(MetadataWidget)
        self.setWindowTitle(_('Convert')+ ' ' + unicode(self.mw.title.text()))
        lf = widget_factory(LookAndFeelWidget)
        hw = widget_factory(HeuristicsWidget)
        sr = widget_factory(SearchAndReplaceWidget)
        ps = widget_factory(PageSetupWidget)
        sd = widget_factory(StructureDetectionWidget)
        toc = widget_factory(TOCWidget)
        from calibre.gui2.actions.toc_edit import SUPPORTED
        toc.manually_fine_tune_toc.setVisible(output_format.upper() in SUPPORTED)
        debug = widget_factory(DebugWidget)

        output_widget = self.plumber.output_plugin.gui_configuration_widget(
                self.stack, self.plumber.get_option_by_name,
                self.plumber.get_option_help, self.db, self.book_id)
        input_widget = self.plumber.input_plugin.gui_configuration_widget(
                self.stack, self.plumber.get_option_by_name,
                self.plumber.get_option_help, self.db, self.book_id)
        while True:
            c = self.stack.currentWidget()
            if not c:
                break
            self.stack.removeWidget(c)

        widgets = [self.mw, lf, hw, ps, sd, toc, sr]
        if input_widget is not None:
            widgets.append(input_widget)
        if output_widget is not None:
            widgets.append(output_widget)
        widgets.append(debug)
        for w in widgets:
            self.stack.addWidget(w)
            w.set_help_signal.connect(self.help.setPlainText)

        self._groups_model = GroupModel(widgets)
        self.groups.setModel(self._groups_model)

        idx = oidx if -1 < oidx < self._groups_model.rowCount() else 0
        self.groups.setCurrentIndex(self._groups_model.index(idx))
        self.stack.setCurrentIndex(idx)
        try:
            shutil.rmtree(self.plumber.archive_input_tdir, ignore_errors=True)
        except:
            pass
Esempio n. 3
0
def main(args=sys.argv):
    log = Log()
    parser, plumber = create_option_parser(args, log)
    opts, leftover_args = parser.parse_args(args)
    if len(leftover_args) > 3:
        log.error('Extra arguments not understood:', u', '.join(leftover_args[3:]))
        return 1
    for x in ('read_metadata_from_opf', 'cover'):
        if getattr(opts, x, None) is not None:
            setattr(opts, x, abspath(getattr(opts, x)))
    if opts.search_replace:
        opts.search_replace = read_sr_patterns(opts.search_replace, log)

    recommendations = [(n.dest, getattr(opts, n.dest),
                        OptionRecommendation.HIGH)
                                        for n in parser.options_iter()
                                        if n.dest]
    plumber.merge_ui_recommendations(recommendations)

    try:
        plumber.run()
    except ConversionUserFeedBack as e:
        ll = {'info': log.info, 'warn': log.warn,
                'error':log.error}.get(e.level, log.info)
        ll(e.title)
        if e.det_msg:
            log.debug(e.detmsg)
        ll(e.msg)
        raise SystemExit(1)

    log(_('Output saved to'), ' ', plumber.output)

    return 0
Esempio n. 4
0
def get_metadata(stream):
    from calibre.ebooks.metadata import MetaInformation
    from calibre.ptempfile import TemporaryDirectory
    from calibre.ebooks.mobi.reader.headers import MetadataHeader
    from calibre.ebooks.mobi.reader.mobi6 import MobiReader
    from calibre.utils.magick.draw import save_cover_data_to
    from calibre import CurrentDir

    stream.seek(0)
    try:
        raw = stream.read(3)
    except:
        raw = ''
    stream.seek(0)
    if raw == b'TPZ':
        from calibre.ebooks.metadata.topaz import get_metadata
        return get_metadata(stream)
    from calibre.utils.logging import Log
    log = Log()
    try:
        mi = MetaInformation(os.path.basename(stream.name), [_('Unknown')])
    except:
        mi = MetaInformation(_('Unknown'), [_('Unknown')])
    mh = MetadataHeader(stream, log)
    if mh.title and mh.title != _('Unknown'):
        mi.title = mh.title

    if mh.exth is not None:
        if mh.exth.mi is not None:
            mi = mh.exth.mi
    else:
        size = 1024**3
        if hasattr(stream, 'seek') and hasattr(stream, 'tell'):
            pos = stream.tell()
            stream.seek(0, 2)
            size = stream.tell()
            stream.seek(pos)
        if size < 4*1024*1024:
            with TemporaryDirectory('_mobi_meta_reader') as tdir:
                with CurrentDir(tdir):
                    mr = MobiReader(stream, log)
                    parse_cache = {}
                    mr.extract_content(tdir, parse_cache)
                    if mr.embedded_mi is not None:
                        mi = mr.embedded_mi
    if hasattr(mh.exth, 'cover_offset'):
        cover_index = mh.first_image_index + mh.exth.cover_offset
        data  = mh.section_data(int(cover_index))
    else:
        try:
            data  = mh.section_data(mh.first_image_index)
        except:
            data = ''
    if data and what(None, data) in {'jpg', 'jpeg', 'gif', 'png', 'bmp', 'webp'}:
        try:
            mi.cover_data = ('jpg', save_cover_data_to(data, 'cover.jpg', return_data=True))
        except Exception:
            log.exception('Failed to read MOBI cover')
    return mi
Esempio n. 5
0
 def __init__(self, files):
     s = Stream()
     self.log_stream = s.stream
     log = Log()
     log.outputs = [s]
     ContainerBase.__init__(self, log=log)
     self.mime_map = {k:self.guess_type(k) for k in files}
     self.files = files
Esempio n. 6
0
    def setup_pipeline(self, *args):
        oidx = self.groups.currentIndex().row()
        output_format = self.output_format

        input_path = 'dummy.epub'
        output_path = 'dummy.'+output_format
        log = Log()
        log.outputs = []
        self.plumber = Plumber(input_path, output_path, log,
                merge_plugin_recs=False)

        def widget_factory(cls):
            return cls(self.stack, self.plumber.get_option_by_name,
                self.plumber.get_option_help, self.db)

        self.setWindowTitle(_('Bulk Convert'))
        lf = widget_factory(LookAndFeelWidget)
        hw = widget_factory(HeuristicsWidget)
        sr = widget_factory(SearchAndReplaceWidget)
        ps = widget_factory(PageSetupWidget)
        sd = widget_factory(StructureDetectionWidget)
        toc = widget_factory(TOCWidget)

        output_widget = None
        name = self.plumber.output_plugin.name.lower().replace(' ', '_')
        try:
            output_widget = importlib.import_module(
                    'calibre.gui2.convert.'+name)
            pw = output_widget.PluginWidget
            pw.ICON = I('back.png')
            pw.HELP = _('Options specific to the output format.')
            output_widget = widget_factory(pw)
        except ImportError:
            pass

        while True:
            c = self.stack.currentWidget()
            if not c: break
            self.stack.removeWidget(c)

        widgets = [lf, hw, ps, sd, toc, sr]
        if output_widget is not None:
            widgets.append(output_widget)
        for w in widgets:
            self.stack.addWidget(w)
            self.connect(w, SIGNAL('set_help(PyQt_PyObject)'),
                    self.help.setPlainText)

        self._groups_model = GroupModel(widgets)
        self.groups.setModel(self._groups_model)

        idx = oidx if -1 < oidx < self._groups_model.rowCount() else 0
        self.groups.setCurrentIndex(self._groups_model.index(idx))
        self.stack.setCurrentIndex(idx)
        try:
            shutil.rmtree(self.plumber.archive_input_tdir, ignore_errors=True)
        except:
            pass
Esempio n. 7
0
    def setup_pipeline(self, *args):
        oidx = self.groups.currentIndex().row()
        output_format = self.output_format

        input_path = 'dummy.epub'
        output_path = 'dummy.'+output_format
        log = Log()
        log.outputs = []
        self.plumber = Plumber(input_path, output_path, log,
                merge_plugin_recs=False)

        def widget_factory(cls):
            return cls(self.stack, self.plumber.get_option_by_name,
                self.plumber.get_option_help, self.db)

        self.setWindowTitle(_('Bulk Convert'))
        lf = widget_factory(LookAndFeelWidget)
        hw = widget_factory(HeuristicsWidget)
        sr = widget_factory(SearchAndReplaceWidget)
        ps = widget_factory(PageSetupWidget)
        sd = widget_factory(StructureDetectionWidget)
        toc = widget_factory(TOCWidget)
        toc.manually_fine_tune_toc.hide()

        output_widget = self.plumber.output_plugin.gui_configuration_widget(
                self.stack, self.plumber.get_option_by_name,
                self.plumber.get_option_help, self.db)

        while True:
            c = self.stack.currentWidget()
            if not c:
                break
            self.stack.removeWidget(c)

        widgets = [lf, hw, ps, sd, toc, sr]
        if output_widget is not None:
            widgets.append(output_widget)
        for w in widgets:
            self.stack.addWidget(w)
            self.connect(w, SIGNAL('set_help(PyQt_PyObject)'),
                    self.help.setPlainText)

        self._groups_model = GroupModel(widgets)
        self.groups.setModel(self._groups_model)

        idx = oidx if -1 < oidx < self._groups_model.rowCount() else 0
        self.groups.setCurrentIndex(self._groups_model.index(idx))
        self.stack.setCurrentIndex(idx)
        try:
            shutil.rmtree(self.plumber.archive_input_tdir, ignore_errors=True)
        except:
            pass
Esempio n. 8
0
    def genesis(self, gui):
        log = Log()
        log.outputs = []

        self.plumber = Plumber('dummy.epub', 'dummy.epub', log, dummy=True,
                merge_plugin_recs=False)

        def widget_factory(cls):
            return cls(self, self.plumber.get_option_by_name,
                self.plumber.get_option_help, None, None)

        self.load_conversion_widgets()
        widgets = list(map(widget_factory, self.conversion_widgets))
        self.model = Model(widgets)
        self.list.setModel(self.model)

        for w in widgets:
            w.changed_signal.connect(self.changed_signal)
            self.stack.addWidget(w)

        self.list.currentChanged = self.category_current_changed
        self.list.setCurrentIndex(self.model.index(0))
Esempio n. 9
0
	def __init__(self, path):
		tmpdir = PersistentTemporaryDirectory("_kobo-driver-extended")
		zf = zipfile.ZipFile(path)
		zf.extractall(tmpdir)

		self.root = os.path.abspath(tmpdir)
		self.log = Log()
		self.dirtied = set([])
		self.cache = {}
		self.mime_map = {}

		print("Container:__init__:Got container path {0}".format(self.root))

		if os.path.exists(os.path.join(self.root, 'mimetype')):
			os.remove(os.path.join(self.root, 'mimetype'))

		container_path = os.path.join(self.root, 'META-INF', 'container.xml')
		if not os.path.exists(container_path):
			raise InvalidEpub('No META-INF/container.xml in epub')
		self.container = etree.fromstring(open(container_path, 'rb').read())
		opf_files = self.container.xpath((r'child::ocf:rootfiles/ocf:rootfile[@media-type="{0}" and @full-path]'.format(guess_type('a.opf')[0])), namespaces = self.namespaces)
		if not opf_files:
			raise InvalidEpub('META-INF/container.xml contains no link to OPF file')
		opf_path = os.path.join(self.root, *opf_files[0].get('full-path').split('/'))
		if not os.path.exists(opf_path):
			raise InvalidEpub('OPF file does not exist at location pointed to by META-INF/container.xml')

		# Map of relative paths with / separators to absolute
		# paths on filesystem with os separators
		self.name_map = {}
		for dirpath, dirnames, filenames in os.walk(self.root):
			for f in filenames:
				path = os.path.join(dirpath, f)
				name = os.path.relpath(path, self.root).replace(os.sep, '/')
				self.name_map[name] = path
				self.mime_map[name] = guess_type(f)[0]
				if path == opf_path:
					self.opf_name = name
					self.mime_map[name] = guess_type('a.opf')[0]

		opf = self.opf
		for item in opf.xpath('//opf:manifest/opf:item[@href and @media-type]', namespaces = self.namespaces):
			href = unquote(item.get('href'))
			item.set("href", href)
			self.mime_map[self.href_to_name(href, os.path.dirname(self.opf_name).replace(os.sep, '/'))] = item.get('media-type')
		self.set(self.opf_name, opf)
Esempio n. 10
0
def main(args=None):
    parser = option_parser()
    opts, args = parser.parse_args(args or sys.argv[1:])
    log = Log(level=Log.DEBUG if opts.verbose else Log.INFO)
    if not args:
        parser.print_help()
        log.error(_('You must provide the input file to polish'))
        raise SystemExit(1)
    if len(args) > 2:
        parser.print_help()
        log.error(_('Unknown extra arguments'))
        raise SystemExit(1)
    if len(args) == 1:
        inbook = args[0]
        base, ext = inbook.rpartition('.')[0::2]
        outbook = base + '_polished.' + ext
    else:
        inbook, outbook = args

    popts = ALL_OPTS.copy()
    for k, v in popts.iteritems():
        popts[k] = getattr(opts, k, None)

    O = namedtuple('Options', ' '.join(popts.iterkeys()))
    popts = O(**popts)
    report = []
    if not tuple(filter(None, (getattr(popts, name) for name in ALL_OPTS))):
        parser.print_help()
        log.error(_('You must specify at least one action to perform'))
        raise SystemExit(1)

    polish({inbook:outbook}, popts, log, report.append)
    log('')
    log(REPORT)
    for msg in report:
        log(msg)

    log('Output written to:', outbook)
from calibre.library import db
from calibre.utils.logging import Log
import json
from itertools import chain

log = Log()
mydb = db('~/Archive/CalTest/').new_api

from calibre_plugins.crossref_doi_download import DoiMeta
from calibre.ebooks.metadata.book.base import Metadata
from calibre_plugins.crossref_doi_download.doi_reader import DoiReader

from crossref_fields import USED_FIELDS, IGNORED_FIELDS, COND_FIELDS, check_uninterpreted_fields, check_uninterpreted_list
# from calibre_plugins.crossref_doi_download.crossref_fields import USED_FIELDS, IGNORED_FIELDS, COND_FIELDS, check_uninterpreted_fields
dm = DoiMeta('./plugin/')
url1 = 'https://api.crossref.org/works/10.1002/bmc.835?mailto=vikoya5988%40oniaj.com'


def get_prop_list(results, prop):
    for ind, res in enumerate(results):
        if prop in res:
            print(ind, res[prop])


reader = DoiReader(log)

br = dm.browser
br.set_debug_http(True)
# user_agent =
# [('User-agent',
# 'Mozilla/5.0 (X11;U;Linux 2.4.2.-2 i586; en-us;m18) Gecko/200010131 Netscape6/6.01'
Esempio n. 12
0
def do_download_for_worker(book, options, merge, notification=lambda x, y: x):
    '''
    Child job, to download story when run as a worker job
    '''

    from calibre_plugins.fanficfare_plugin import FanFicFareBase
    fffbase = FanFicFareBase(options['plugin_path'])
    with fffbase:  # so the sys.path was modified while loading the
        # plug impl.
        from calibre_plugins.fanficfare_plugin.dialogs import NotGoingToDownload
        from calibre_plugins.fanficfare_plugin.prefs import (
            SAVE_YES, SAVE_YES_UNLESS_SITE, OVERWRITE, OVERWRITEALWAYS, UPDATE,
            UPDATEALWAYS, ADDNEW, SKIP, CALIBREONLY, CALIBREONLYSAVECOL)
        from calibre_plugins.fanficfare_plugin.wordcount import get_word_count
        from fanficfare import adapters, writers
        from fanficfare.epubutils import get_update_data
        from fanficfare.six import text_type as unicode

        from calibre_plugins.fanficfare_plugin.fff_util import get_fff_config

        try:
            logger.info("\n\n" + ("-" * 80) + " " + book['url'])
            ## No need to download at all.  Can happen now due to
            ## collision moving into book for CALIBREONLY changing to
            ## ADDNEW when story URL not in library.
            if book['collision'] in (CALIBREONLY, CALIBREONLYSAVECOL):
                logger.info("Skipping CALIBREONLY 'update' down inside worker")
                return book

            book['comment'] = _('Download started...')

            configuration = get_fff_config(book['url'], options['fileform'],
                                           options['personal.ini'])

            if not options[
                    'updateepubcover'] and 'epub_for_update' in book and book[
                        'collision'] in (UPDATE, UPDATEALWAYS):
                configuration.set("overrides", "never_make_cover", "true")

            # images only for epub, html, even if the user mistakenly
            # turned it on else where.
            if options['fileform'] not in ("epub", "html"):
                configuration.set("overrides", "include_images", "false")

            adapter = adapters.getAdapter(configuration, book['url'])
            adapter.is_adult = book['is_adult']
            adapter.username = book['username']
            adapter.password = book['password']
            adapter.setChaptersRange(book['begin'], book['end'])

            ## each site download job starts with a new copy of the
            ## cookiejar and basic_cache from the FG process.  They
            ## are not shared between different sites' BG downloads
            if configuration.getConfig('use_browser_cache'):
                if 'browser_cache' in options:
                    configuration.set_browser_cache(options['browser_cache'])
                else:
                    options['browser_cache'] = configuration.get_browser_cache(
                    )
                    if 'browser_cachefile' in options:
                        options['browser_cache'].load_cache(
                            options['browser_cachefile'])
            if 'basic_cache' in options:
                configuration.set_basic_cache(options['basic_cache'])
            else:
                options['basic_cache'] = configuration.get_basic_cache()
                options['basic_cache'].load_cache(options['basic_cachefile'])
            if 'cookiejar' in options:
                configuration.set_cookiejar(options['cookiejar'])
            else:
                options['cookiejar'] = configuration.get_cookiejar()
                options['cookiejar'].load_cookiejar(options['cookiejarfile'])

            story = adapter.getStoryMetadataOnly()
            if not story.getMetadata("series") and 'calibre_series' in book:
                adapter.setSeries(book['calibre_series'][0],
                                  book['calibre_series'][1])

            # set PI version instead of default.
            if 'version' in options:
                story.setMetadata('version', options['version'])

            book['title'] = story.getMetadata("title", removeallentities=True)
            book['author_sort'] = book['author'] = story.getList(
                "author", removeallentities=True)
            book['publisher'] = story.getMetadata("publisher")
            book['url'] = story.getMetadata("storyUrl", removeallentities=True)
            book['tags'] = story.getSubjectTags(removeallentities=True)
            book['comments'] = story.get_sanitized_description()
            book['series'] = story.getMetadata("series",
                                               removeallentities=True)

            if story.getMetadataRaw('datePublished'):
                book['pubdate'] = story.getMetadataRaw(
                    'datePublished').replace(tzinfo=local_tz)
            if story.getMetadataRaw('dateUpdated'):
                book['updatedate'] = story.getMetadataRaw(
                    'dateUpdated').replace(tzinfo=local_tz)
            if story.getMetadataRaw('dateCreated'):
                book['timestamp'] = story.getMetadataRaw(
                    'dateCreated').replace(tzinfo=local_tz)
            else:
                book['timestamp'] = datetime.now().replace(
                    tzinfo=local_tz)  # need *something* there for calibre.

            writer = writers.getWriter(options['fileform'], configuration,
                                       adapter)
            outfile = book['outfile']

            ## checks were done earlier, it's new or not dup or newer--just write it.
            if book['collision'] in (ADDNEW, SKIP, OVERWRITE, OVERWRITEALWAYS) or \
                    ('epub_for_update' not in book and book['collision'] in (UPDATE, UPDATEALWAYS)):

                # preserve logfile even on overwrite.
                if 'epub_for_update' in book:
                    adapter.logfile = get_update_data(
                        book['epub_for_update'])[6]
                    # change the existing entries id to notid so
                    # write_epub writes a whole new set to indicate overwrite.
                    if adapter.logfile:
                        adapter.logfile = adapter.logfile.replace(
                            "span id", "span notid")

                if book['collision'] == OVERWRITE and 'fileupdated' in book:
                    lastupdated = story.getMetadataRaw('dateUpdated')
                    fileupdated = book['fileupdated']

                    # updated doesn't have time (or is midnight), use dates only.
                    # updated does have time, use full timestamps.
                    if (lastupdated.time() == time.min and fileupdated.date() > lastupdated.date()) or \
                            (lastupdated.time() != time.min and fileupdated > lastupdated):
                        raise NotGoingToDownload(
                            _("Not Overwriting, web site is not newer."),
                            'edit-undo.png',
                            showerror=False)

                logger.info("write to %s" % outfile)
                inject_cal_cols(book, story, configuration)
                writer.writeStory(outfilename=outfile,
                                  forceOverwrite=True,
                                  notification=notification)

                if adapter.story.chapter_error_count > 0:
                    book['comment'] = _('Download %(fileform)s completed, %(failed)s failed chapters, %(total)s total chapters.')%\
                        {'fileform':options['fileform'],
                         'failed':adapter.story.chapter_error_count,
                         'total':story.getMetadata("numChapters")}
                    book[
                        'chapter_error_count'] = adapter.story.chapter_error_count
                else:
                    book['comment'] = _('Download %(fileform)s completed, %(total)s chapters.')%\
                        {'fileform':options['fileform'],
                         'total':story.getMetadata("numChapters")}
                book['all_metadata'] = story.getAllMetadata(
                    removeallentities=True)
                if options['savemetacol'] != '':
                    book['savemetacol'] = story.dump_html_metadata()

            ## checks were done earlier, just update it.
            elif 'epub_for_update' in book and book['collision'] in (
                    UPDATE, UPDATEALWAYS):

                # update now handled by pre-populating the old images and
                # chapters in the adapter rather than merging epubs.
                #urlchaptercount = int(story.getMetadata('numChapters').replace(',',''))
                # returns int adjusted for start-end range.
                urlchaptercount = story.getChapterCount()
                (url, chaptercount, adapter.oldchapters, adapter.oldimgs,
                 adapter.oldcover, adapter.calibrebookmark, adapter.logfile,
                 adapter.oldchaptersmap,
                 adapter.oldchaptersdata) = get_update_data(
                     book['epub_for_update'])[0:9]

                # dup handling from fff_plugin needed for anthology updates.
                if book['collision'] == UPDATE:
                    if chaptercount == urlchaptercount:
                        if merge:
                            book['comment'] = _(
                                "Already contains %d chapters.  Reuse as is."
                            ) % chaptercount
                            book['all_metadata'] = story.getAllMetadata(
                                removeallentities=True)
                            if options['savemetacol'] != '':
                                book['savemetacol'] = story.dump_html_metadata(
                                )
                            book['outfile'] = book[
                                'epub_for_update']  # for anthology merge ops.
                            return book
                        else:  # not merge,
                            raise NotGoingToDownload(
                                _("Already contains %d chapters.") %
                                chaptercount,
                                'edit-undo.png',
                                showerror=False)
                    elif chaptercount > urlchaptercount:
                        raise NotGoingToDownload(
                            _("Existing epub contains %d chapters, web site only has %d. Use Overwrite to force update."
                              ) % (chaptercount, urlchaptercount),
                            'dialog_error.png')
                    elif chaptercount == 0:
                        raise NotGoingToDownload(
                            _("FanFicFare doesn't recognize chapters in existing epub, epub is probably from a different source. Use Overwrite to force update."
                              ), 'dialog_error.png')

                if not (book['collision'] == UPDATEALWAYS and chaptercount == urlchaptercount) \
                        and adapter.getConfig("do_update_hook"):
                    chaptercount = adapter.hookForUpdates(chaptercount)

                logger.info("Do update - epub(%d) vs url(%d)" %
                            (chaptercount, urlchaptercount))
                logger.info("write to %s" % outfile)

                inject_cal_cols(book, story, configuration)
                writer.writeStory(outfilename=outfile,
                                  forceOverwrite=True,
                                  notification=notification)

                if adapter.story.chapter_error_count > 0:
                    book['comment'] = _('Update %(fileform)s completed, added %(added)s chapters, %(failed)s failed chapters, for %(total)s total.')%\
                        {'fileform':options['fileform'],
                         'failed':adapter.story.chapter_error_count,
                         'added':(urlchaptercount-chaptercount),
                         'total':urlchaptercount}
                    book[
                        'chapter_error_count'] = adapter.story.chapter_error_count
                else:
                    book['comment'] = _('Update %(fileform)s completed, added %(added)s chapters for %(total)s total.')%\
                        {'fileform':options['fileform'],'added':(urlchaptercount-chaptercount),'total':urlchaptercount}
                book['all_metadata'] = story.getAllMetadata(
                    removeallentities=True)
                if options['savemetacol'] != '':
                    book['savemetacol'] = story.dump_html_metadata()
            else:
                ## Shouldn't ever get here, but hey, it happened once
                ## before with prefs['collision']
                raise Exception(
                    "Impossible state reached -- Book: %s:\nOptions:%s:" %
                    (book, options))

            if options['do_wordcount'] == SAVE_YES or (
                    options['do_wordcount'] == SAVE_YES_UNLESS_SITE
                    and not story.getMetadataRaw('numWords')):
                try:
                    wordcount = get_word_count(outfile)
                    # logger.info("get_word_count:%s"%wordcount)
                    story.setMetadata('numWords', wordcount)
                    writer.writeStory(outfilename=outfile, forceOverwrite=True)
                    book['all_metadata'] = story.getAllMetadata(
                        removeallentities=True)
                    if options['savemetacol'] != '':
                        book['savemetacol'] = story.dump_html_metadata()
                except:
                    logger.error("WordCount failed")

            if options['smarten_punctuation'] and options['fileform'] == "epub" \
                    and calibre_version >= (0, 9, 39):
                # for smarten punc
                from calibre.ebooks.oeb.polish.main import polish, ALL_OPTS
                from calibre.utils.logging import Log
                from collections import namedtuple

                # do smarten_punctuation from calibre's polish feature
                data = {'smarten_punctuation': True}
                opts = ALL_OPTS.copy()
                opts.update(data)
                O = namedtuple('Options', ' '.join(six.iterkeys(ALL_OPTS)))
                opts = O(**opts)

                log = Log(level=Log.DEBUG)
                polish({outfile: outfile}, opts, log, logger.info)

        except NotGoingToDownload as d:
            book['good'] = False
            book['status'] = _('Bad')
            book['showerror'] = d.showerror
            book['comment'] = unicode(d)
            book['icon'] = d.icon

        except Exception as e:
            book['good'] = False
            book['status'] = _('Error')
            book['comment'] = unicode(e)
            book['icon'] = 'dialog_error.png'
            book['status'] = _('Error')
            logger.info("Exception: %s:%s" % (book, book['comment']),
                        exc_info=True)
    return book
Esempio n. 13
0
    def run(self, opts):
        from calibre.utils.serialize import msgpack_dumps
        scripts = {}
        for x in ('console', 'gui'):
            for name in basenames[x]:
                if name in ('calibre-complete', 'calibre_postinstall'):
                    continue
                scripts[name] = x

        dest = self.j(self.RESOURCES, 'scripts.calibre_msgpack')
        if self.newer(dest, self.j(self.SRC, 'calibre', 'linux.py')):
            self.info('\tCreating ' + self.b(dest))
            with open(dest, 'wb') as f:
                f.write(msgpack_dumps(scripts))

        from calibre.web.feeds.recipes.collection import \
                serialize_builtin_recipes, iterate_over_builtin_recipe_files

        files = [x[1] for x in iterate_over_builtin_recipe_files()]

        dest = self.j(self.RESOURCES, 'builtin_recipes.xml')
        if self.newer(dest, files):
            self.info('\tCreating builtin_recipes.xml')
            xml = serialize_builtin_recipes()
            with open(dest, 'wb') as f:
                f.write(xml)

        recipe_icon_dir = self.a(self.j(self.RESOURCES, '..', 'recipes',
            'icons'))
        dest = os.path.splitext(dest)[0] + '.zip'
        files += glob.glob(self.j(recipe_icon_dir, '*.png'))
        if self.newer(dest, files):
            self.info('\tCreating builtin_recipes.zip')
            with zipfile.ZipFile(dest, 'w', zipfile.ZIP_STORED) as zf:
                for n in sorted(files, key=self.b):
                    with open(n, 'rb') as f:
                        zf.writestr(self.b(n), f.read())

        dest = self.j(self.RESOURCES, 'ebook-convert-complete.calibre_msgpack')
        files = []
        for x in os.walk(self.j(self.SRC, 'calibre')):
            for f in x[-1]:
                if f.endswith('.py'):
                    files.append(self.j(x[0], f))
        if self.newer(dest, files):
            self.info('\tCreating ' + self.b(dest))
            complete = {}
            from calibre.ebooks.conversion.plumber import supported_input_formats
            complete['input_fmts'] = set(supported_input_formats())
            from calibre.web.feeds.recipes.collection import get_builtin_recipe_titles
            complete['input_recipes'] = [t+'.recipe ' for t in
                    get_builtin_recipe_titles()]
            from calibre.customize.ui import available_output_formats
            complete['output'] = set(available_output_formats())
            from calibre.ebooks.conversion.cli import create_option_parser
            from calibre.utils.logging import Log
            log = Log()
            # log.outputs = []
            for inf in supported_input_formats():
                if inf in ('zip', 'rar', 'oebzip'):
                    continue
                for ouf in available_output_formats():
                    of = ouf if ouf == 'oeb' else 'dummy.'+ouf
                    p = create_option_parser(('ec', 'dummy1.'+inf, of, '-h'),
                            log)[0]
                    complete[(inf, ouf)] = [x+' 'for x in
                            get_opts_from_parser(p)]

            with open(dest, 'wb') as f:
                f.write(msgpack_dumps(only_unicode_recursive(complete)))

        self.info('\tCreating template-functions.json')
        dest = self.j(self.RESOURCES, 'template-functions.json')
        function_dict = {}
        import inspect
        from calibre.utils.formatter_functions import formatter_functions
        for obj in formatter_functions().get_builtins().values():
            eval_func = inspect.getmembers(obj,
                    lambda x: inspect.ismethod(x) and x.__name__ == 'evaluate')
            try:
                lines = [l[4:] for l in inspect.getsourcelines(eval_func[0][1])[0]]
            except:
                continue
            lines = ''.join(lines)
            function_dict[obj.name] = lines
        dump_json(function_dict, dest)

        self.info('\tCreating editor-functions.json')
        dest = self.j(self.RESOURCES, 'editor-functions.json')
        function_dict = {}
        from calibre.gui2.tweak_book.function_replace import builtin_functions
        for func in builtin_functions():
            try:
                src = ''.join(inspect.getsourcelines(func)[0][1:])
            except Exception:
                continue
            src = src.replace('def ' + func.__name__, 'def replace')
            imports = ['from %s import %s' % (x.__module__, x.__name__) for x in func.imports]
            if imports:
                src = '\n'.join(imports) + '\n\n' + src
            function_dict[func.name] = src
        dump_json(function_dict, dest)
        self.info('\tCreating user-manual-translation-stats.json')
        d = {}
        for lc, stats in iteritems(json.load(open(self.j(self.d(self.SRC), 'manual', 'locale', 'completed.json')))):
            total = sum(itervalues(stats))
            d[lc] = stats['translated'] / float(total)
        dump_json(d, self.j(self.RESOURCES, 'user-manual-translation-stats.json'))

        src = self.j(self.SRC, '..', 'Changelog.txt')
        dest = self.j(self.RESOURCES, 'changelog.json')
        if self.newer(dest, [src]):
            self.info('\tCreating changelog.json')
            from setup.changelog import parse
            with open(src) as f:
                dump_json(parse(f.read(), parse_dates=False), dest)
Esempio n. 14
0
class Container(object):
	META_INF = {
			'container.xml' : True,
			'manifest.xml' : False,
			'encryption.xml' : False,
			'metadata.xml' : False,
			'signatures.xml' : False,
			'rights.xml' : False,
	}

	OCF_NS = 'urn:oasis:names:tc:opendocument:xmlns:container'
	OPF_NS = 'http://www.idpf.org/2007/opf'
	NCX_NS = "http://www.daisy.org/z3986/2005/ncx/"
	DC_NS = "http://purl.org/dc/elements/1.1/"
	XHTML_NS = "http://www.w3.org/1999/xhtml"
	OPF_MIMETYPE = 'application/oebps-package+xml'
	NCX_MIMETYPE = "application/x-dtbncx+xml"

	def __init__(self, path):
		tmpdir = PersistentTemporaryDirectory("_kobo-driver-extended")
		zf = zipfile.ZipFile(path)
		zf.extractall(tmpdir)

		self.root = os.path.abspath(tmpdir)
		self.log = Log()
		self.dirtied = set([])
		self.cache = {}
		self.mime_map = {}

		print("Got container path {0}".format(self.root))

		if os.path.exists(os.path.join(self.root, 'mimetype')):
			os.remove(os.path.join(self.root, 'mimetype'))

		container_path = os.path.join(self.root, 'META-INF', 'container.xml')
		if not os.path.exists(container_path):
			raise InvalidEpub('No META-INF/container.xml in epub')
		self.container = etree.fromstring(open(container_path, 'rb').read())
		opf_files = self.container.xpath((r'child::ocf:rootfiles/ocf:rootfile[@media-type="{0}" and @full-path]'.format(guess_type('a.opf')[0])), namespaces = {'ocf': self.OCF_NS})
		if not opf_files:
			raise InvalidEpub('META-INF/container.xml contains no link to OPF file')
		opf_path = os.path.join(self.root, *opf_files[0].get('full-path').split('/'))
		if not os.path.exists(opf_path):
			raise InvalidEpub('OPF file does not exist at location pointed to by META-INF/container.xml')

		# Map of relative paths with / separators to absolute
		# paths on filesystem with os separators
		self.name_map = {}
		for dirpath, dirnames, filenames in os.walk(self.root):
			for f in filenames:
				path = os.path.join(dirpath, f)
				name = os.path.relpath(path, self.root).replace(os.sep, '/')
				self.name_map[name] = path
				if path == opf_path:
					self.opf_name = name
					self.mime_map[name] = guess_type('a.opf')[0]

		for item in self.opf.xpath('//opf:manifest/opf:item[@href and @media-type]', namespaces = {'opf': self.OPF_NS}):
			href = item.get('href')
			self.mime_map[self.href_to_name(href, posixpath.dirname(self.opf_name))] = item.get('media-type')

	def get_html_names(self):
		"""A generator function that yields only HTML file names from
		the ePub.
		"""
		for name in self.name_map.keys():
			ext = name[name.lower().rfind('.'):].lower()
			if ext in HTML_EXTENSIONS:
				yield name

	def is_drm_encrypted(self):
		"""Determine if the ePub container is encumbered with Digital
		Restrictions Management.

		This method looks for the 'encryption.xml' file which denotes an
		ePub encumbered by Digital Restrictions Management. DRM-encumbered
		files cannot be edited.
		"""
		if 'META-INF/encryption.xml' in self.name_map:
			try:
				xml = self.get('META-INF/encryption.xml')
				if not xml:
					return True # Even if encryption.xml can't be parsed, assume its presence means an encumbered file
				for elem in xml.xpath('.//*[contains(name(), "EncryptionMethod")]'):
					alg = elem.get('Algorithm')
					return alg != 'http://ns.adobe.com/pdf/enc#RC'
			except:
				self.log.error("Could not parse encryption.xml")
				return True # If encryption.xml is present, assume the file is encumbered
		return False

	def manifest_worthy_names(self):
		for name in self.name_map:
			if name.endswith('.opf'): continue
			if name.startswith('META-INF') and posixpath.basename(name) in self.META_INF:
				continue
			yield name

	def delete_name(self, name):
		self.mime_map.pop(name, None)
		path = self.name_map[name]
		os.remove(path)
		self.name_map.pop(name)

	def manifest_item_for_name(self, name):
		href = self.name_to_href(name, posixpath.dirname(self.opf_name))
		q = prepare_string_for_xml(href, attribute = True)
		existing = self.opf.xpath('//opf:manifest/opf:item[@href="{0}"]'.format(q), namespaces = {'opf': self.OPF_NS})
		if not existing:
			return None
		return existing[0]

	def add_name_to_manifest(self, name, mt = None):
		item = self.manifest_item_for_name(name)
		if item is not None:
			return
		manifest = self.opf.xpath('//opf:manifest', namespaces = {'opf': self.OPF_NS})[0]
		item = manifest.makeelement('{%s}item' % self.OPF_NS, nsmap = {'opf': self.OPF_NS}, href = self.name_to_href(name, posixpath.dirname(self.opf_name)), id = self.generate_manifest_id())
		if not mt:
			mt = guess_type(posixpath.basename(name))[0]
		if not mt:
			mt = 'application/octest-stream'
		item.set('media-type', mt)
		manifest.append(item)
		self.fix_tail(item)

	def fix_tail(self, item):
		'''
		Designed only to work with self closing elements after item has
		just been inserted/appended
		'''
		parent = item.getparent()
		idx = parent.index(item)
		if idx == 0:
			item.tail = parent.text
		else:
			item.tail = parent[idx - 1].tail
			if idx == len(parent) - 1:
				parent[idx - 1].tail = parent.text

	def generate_manifest_id(self):
		items = self.opf.xpath('//opf:manifest/opf:item[@id]', namespaces = {'opf': self.OPF_NS})
		ids = set([x.get('id') for x in items])
		for x in xrange(sys.maxint):
			c = 'id{0}'.format(x)
			if c not in ids:
				return c

	@property
	def opf(self):
		return self.get(self.opf_name)

	def href_to_name(self, href, base = ''):
		"""Changed to fix a bug which incorrectly splits the href on
		'#' when '#' is part of the file name. Also normalizes the
		path.

		Taken from the calibre Modify Epub plugin's Container implementation.
		"""
		hash_index = href.find('#')
		period_index = href.find('.')
		if hash_index > 0 and hash_index > period_index:
			href = href.partition('#')[0]
		href = urllib.unquote(href)
		name = href
		if base:
			name = posixpath.join(base, href)
		name = os.path.normpath(name).replace('\\', '/')
		return name

	def name_to_href(self, name, base):
		"""Changed to ensure that blank href names are referenced as the
		empty string instead of '.'.

		Taken from the calibre Modify Epub plugin's Container implementation.
		"""
		if not base:
			return name
		href = posixpath.relpath(name, base)
		if href == '.':
			href = ''
		return href

	def decode(self, data):
		"""Automatically decode :param:`data` into a `unicode` object."""
		def fix_data(d):
			return d.replace('\r\n', '\n').replace('\r', '\n')
		if isinstance(data, unicode):
			return fix_data(data)
		bom_enc = None
		if data[:4] in ('\0\0\xfe\xff', '\xff\xfe\0\0'):
			bom_enc = {'\0\0\xfe\xff':'utf-32-be',
					'\xff\xfe\0\0':'utf-32-le'}[data[:4]]
			data = data[4:]
		elif data[:2] in ('\xff\xfe', '\xfe\xff'):
			bom_enc = {'\xff\xfe':'utf-16-le', '\xfe\xff':'utf-16-be'}[data[:2]]
			data = data[2:]
		elif data[:3] == '\xef\xbb\xbf':
			bom_enc = 'utf-8'
			data = data[3:]
		if bom_enc is not None:
			try:
				return fix_data(data.decode(bom_enc))
			except UnicodeDecodeError:
				pass
		try:
			return fix_data(data.decode('utf-8'))
		except UnicodeDecodeError:
			pass
		data, _ = xml_to_unicode(data)
		return fix_data(data)

	def get_raw(self, name):
		path = self.name_map[name]
		return open(path, 'rb').read()

	def get(self, name):
		if name in self.cache:
			return self.cache[name]
		raw = self.get_raw(name)
		raw = self.decode(raw)
		if name in self.mime_map:
			try:
				raw = self._parse(raw, self.mime_map[name])
			except XMLSyntaxError as err:
				raise ParseError(name, unicode(err))
		self.cache[name] = raw
		return raw

	def set(self, name, val):
		self.cache[name] = val
		self.dirtied.add(name)

	def _parse(self, raw, mimetype):
		mt = mimetype.lower()
		if mt.endswith('+xml'):
			parser = etree.XMLParser(no_network = True, huge_tree = not iswindows)
			raw = xml_to_unicode(raw,
				strip_encoding_pats = True, assume_utf8 = True,
				resolve_entities = True)[0].strip()
			idx = raw.find('<html')
			if idx == -1:
				idx = raw.find('<HTML')
			if idx > -1:
				pre = raw[:idx]
				raw = raw[idx:]
				if '<!DOCTYPE' in pre:
					user_entities = {}
					for match in re.finditer(r'<!ENTITY\s+(\S+)\s+([^>]+)', pre):
						val = match.group(2)
						if val.startswith('"') and val.endswith('"'):
							val = val[1:-1]
						user_entities[match.group(1)] = val
					if user_entities:
						pat = re.compile(r'&(%s);' % ('|'.join(user_entities.keys())))
						raw = pat.sub(lambda m:user_entities[m.group(1)], raw)
			return etree.fromstring(raw, parser = parser)
		return raw

	def write(self, path):
		for name in self.dirtied:
			data = self.cache[name]
			if hasattr(data, 'xpath'):
				data = etree.tostring(data, encoding = 'UTF-8', xml_declaration = True, pretty_print = True)
			f = open(self.name_map[name], "wb")
			f.write(data)
			f.close()
		self.dirtied.clear()
		if os.path.exists(path):
			os.unlink(path)
		epub = zipfile.ZipFile(path, 'w', compression = zipfile.ZIP_DEFLATED)
		epub.writestr('mimetype', bytes(guess_type('a.epub')[0]), compress_type = zipfile.ZIP_STORED)

		cwd = os.getcwdu()
		os.chdir(self.root)
		zip_prefix = self.root
		if not zip_prefix.endswith(os.sep):
			zip_prefix += os.sep
		for t in os.walk(self.root, topdown = True):
			for f in t[2]:
				if f not in EXCLUDE_FROM_ZIP:
					filepath = os.path.join(t[0], f).replace(zip_prefix, '')
					st = os.stat(filepath)
					mtime = time.localtime(st.st_mtime)
					if mtime[0] < 1980:
						os.utime(filepath, None)
					epub.write(filepath)
		epub.close()
		os.chdir(cwd)
Esempio n. 15
0
def main(args=sys.argv):
    log = Log()
    parser, plumber = create_option_parser(args, log)
    opts, leftover_args = parser.parse_args(args)
    if len(leftover_args) > 3:
        log.error('Extra arguments not understood:', u', '.join(leftover_args[3:]))
        return 1
    for x in ('read_metadata_from_opf', 'cover'):
        if getattr(opts, x, None) is not None:
            setattr(opts, x, abspath(getattr(opts, x)))
    if opts.search_replace:
        opts.search_replace = read_sr_patterns(opts.search_replace, log)
    if opts.transform_css_rules:
        from calibre.ebooks.css_transform_rules import import_rules, validate_rule
        with open(opts.transform_css_rules, 'rb') as tcr:
            opts.transform_css_rules = rules = list(import_rules(tcr.read()))
            for rule in rules:
                title, msg = validate_rule(rule)
                if title and msg:
                    log.error('Failed to parse CSS transform rules')
                    log.error(title)
                    log.error(msg)
                    return 1

    recommendations = [(n.dest, getattr(opts, n.dest),
                        OptionRecommendation.HIGH)
                                        for n in parser.options_iter()
                                        if n.dest]
    plumber.merge_ui_recommendations(recommendations)

    try:
        plumber.run()
    except ConversionUserFeedBack as e:
        ll = {'info': log.info, 'warn': log.warn,
                'error':log.error}.get(e.level, log.info)
        ll(e.title)
        if e.det_msg:
            log.debug(e.detmsg)
        ll(e.msg)
        raise SystemExit(1)

    log(_('Output saved to'), ' ', plumber.output)

    return 0
Esempio n. 16
0
def main(args=sys.argv):
    log = Log()
    parser, plumber = create_option_parser(args, log)
    opts, leftover_args = parser.parse_args(args)
    if len(leftover_args) > 3:
        log.error('Extra arguments not understood:',
                  ', '.join(leftover_args[3:]))
        return 1
    for x in ('read_metadata_from_opf', 'cover'):
        if getattr(opts, x, None) is not None:
            setattr(opts, x, abspath(getattr(opts, x)))
    if opts.search_replace:
        opts.search_replace = read_sr_patterns(opts.search_replace, log)
    if opts.transform_css_rules:
        from calibre.ebooks.css_transform_rules import import_rules, validate_rule
        with open(opts.transform_css_rules, 'rb') as tcr:
            opts.transform_css_rules = rules = list(import_rules(tcr.read()))
            for rule in rules:
                title, msg = validate_rule(rule)
                if title and msg:
                    log.error('Failed to parse CSS transform rules')
                    log.error(title)
                    log.error(msg)
                    return 1
    if opts.transform_html_rules:
        from calibre.ebooks.html_transform_rules import import_rules, validate_rule
        with open(opts.transform_html_rules, 'rb') as tcr:
            opts.transform_html_rules = rules = list(import_rules(tcr.read()))
            for rule in rules:
                title, msg = validate_rule(rule)
                if title and msg:
                    log.error('Failed to parse HTML transform rules')
                    log.error(title)
                    log.error(msg)
                    return 1

    recommendations = [(n.dest, getattr(opts,
                                        n.dest), OptionRecommendation.HIGH)
                       for n in parser.options_iter() if n.dest]
    plumber.merge_ui_recommendations(recommendations)

    try:
        plumber.run()
    except ConversionUserFeedBack as e:
        ll = {
            'info': log.info,
            'warn': log.warn,
            'error': log.error
        }.get(e.level, log.info)
        ll(e.title)
        if e.det_msg:
            log.debug(e.detmsg)
        ll(e.msg)
        raise SystemExit(1)

    log(_('Output saved to'), ' ', plumber.output)

    return 0
Esempio n. 17
0
    def run(self, opts):
        scripts = {}
        for x in ('console', 'gui'):
            for name in basenames[x]:
                if name in ('calibre-complete', 'calibre_postinstall'):
                    continue
                scripts[name] = x

        dest = self.j(self.RESOURCES, 'scripts.pickle')
        if self.newer(dest, self.j(self.SRC, 'calibre', 'linux.py')):
            self.info('\tCreating scripts.pickle')
            f = open(dest, 'wb')
            cPickle.dump(scripts, f, -1)

        from calibre.web.feeds.recipes.collection import \
                serialize_builtin_recipes, iterate_over_builtin_recipe_files

        files = [x[1] for x in iterate_over_builtin_recipe_files()]

        dest = self.j(self.RESOURCES, 'builtin_recipes.xml')
        if self.newer(dest, files):
            self.info('\tCreating builtin_recipes.xml')
            xml = serialize_builtin_recipes()
            with open(dest, 'wb') as f:
                f.write(xml)

        recipe_icon_dir = self.a(
            self.j(self.RESOURCES, '..', 'recipes', 'icons'))
        dest = os.path.splitext(dest)[0] + '.zip'
        files += glob.glob(self.j(recipe_icon_dir, '*.png'))
        if self.newer(dest, files):
            self.info('\tCreating builtin_recipes.zip')
            with zipfile.ZipFile(dest, 'w', zipfile.ZIP_STORED) as zf:
                for n in sorted(files, key=self.b):
                    with open(n, 'rb') as f:
                        zf.writestr(os.path.basename(n), f.read())

        dest = self.j(self.RESOURCES, 'ebook-convert-complete.pickle')
        files = []
        for x in os.walk(self.j(self.SRC, 'calibre')):
            for f in x[-1]:
                if f.endswith('.py'):
                    files.append(self.j(x[0], f))
        if self.newer(dest, files):
            self.info('\tCreating ebook-convert-complete.pickle')
            complete = {}
            from calibre.ebooks.conversion.plumber import supported_input_formats
            complete['input_fmts'] = set(supported_input_formats())
            from calibre.web.feeds.recipes.collection import get_builtin_recipe_titles
            complete['input_recipes'] = [
                t + '.recipe ' for t in get_builtin_recipe_titles()
            ]
            from calibre.customize.ui import available_output_formats
            complete['output'] = set(available_output_formats())
            from calibre.ebooks.conversion.cli import create_option_parser
            from calibre.utils.logging import Log
            log = Log()
            # log.outputs = []
            for inf in supported_input_formats():
                if inf in ('zip', 'rar', 'oebzip'):
                    continue
                for ouf in available_output_formats():
                    of = ouf if ouf == 'oeb' else 'dummy.' + ouf
                    p = create_option_parser(('ec', 'dummy1.' + inf, of, '-h'),
                                             log)[0]
                    complete[(inf, ouf)] = [
                        x + ' ' for x in get_opts_from_parser(p)
                    ]

            cPickle.dump(complete, open(dest, 'wb'), -1)

        self.info('\tCreating template-functions.json')
        dest = self.j(self.RESOURCES, 'template-functions.json')
        function_dict = {}
        import inspect
        from calibre.utils.formatter_functions import formatter_functions
        for obj in formatter_functions().get_builtins().values():
            eval_func = inspect.getmembers(
                obj,
                lambda x: inspect.ismethod(x) and x.__name__ == 'evaluate')
            try:
                lines = [
                    l[4:] for l in inspect.getsourcelines(eval_func[0][1])[0]
                ]
            except:
                continue
            lines = ''.join(lines)
            function_dict[obj.name] = lines
        import json
        json.dump(function_dict, open(dest, 'wb'), indent=4)
Esempio n. 18
0
def do_download_for_worker(book, options, merge, notification=lambda x, y: x):
    '''
    Child job, to download story when run as a worker job
    '''

    from calibre_plugins.fanficfare_plugin import FanFicFareBase
    fffbase = FanFicFareBase(options['plugin_path'])
    with fffbase:

        from calibre_plugins.fanficfare_plugin.dialogs import (
            NotGoingToDownload, OVERWRITE, OVERWRITEALWAYS, UPDATE,
            UPDATEALWAYS, ADDNEW, SKIP, CALIBREONLY, CALIBREONLYSAVECOL)
        from calibre_plugins.fanficfare_plugin.fanficfare import adapters, writers, exceptions
        from calibre_plugins.fanficfare_plugin.fanficfare.epubutils import get_update_data

        from calibre_plugins.fanficfare_plugin.fff_util import (
            get_fff_adapter, get_fff_config)

        try:
            book['comment'] = _('Download started...')

            configuration = get_fff_config(book['url'], options['fileform'],
                                           options['personal.ini'])

            if configuration.getConfig('use_ssl_unverified_context'):
                ## monkey patch to avoid SSL bug.  dupliated from
                ## fff_plugin.py because bg jobs run in own process
                ## space.
                import ssl
                if hasattr(ssl, '_create_unverified_context'):
                    ssl._create_default_https_context = ssl._create_unverified_context

            if not options[
                    'updateepubcover'] and 'epub_for_update' in book and options[
                        'collision'] in (UPDATE, UPDATEALWAYS):
                configuration.set("overrides", "never_make_cover", "true")

            # images only for epub, html, even if the user mistakenly
            # turned it on else where.
            if options['fileform'] not in ("epub", "html"):
                configuration.set("overrides", "include_images", "false")

            adapter = adapters.getAdapter(configuration, book['url'])
            adapter.is_adult = book['is_adult']
            adapter.username = book['username']
            adapter.password = book['password']
            adapter.setChaptersRange(book['begin'], book['end'])

            configuration.load_cookiejar(options['cookiejarfile'])
            #logger.debug("cookiejar:%s"%configuration.cookiejar)
            configuration.set_pagecache(options['pagecache'])

            story = adapter.getStoryMetadataOnly()
            if not story.getMetadata("series") and 'calibre_series' in book:
                adapter.setSeries(book['calibre_series'][0],
                                  book['calibre_series'][1])

            # set PI version instead of default.
            if 'version' in options:
                story.setMetadata('version', options['version'])

            book['title'] = story.getMetadata("title", removeallentities=True)
            book['author_sort'] = book['author'] = story.getList(
                "author", removeallentities=True)
            book['publisher'] = story.getMetadata("site")
            book['url'] = story.getMetadata("storyUrl")
            book['tags'] = story.getSubjectTags(removeallentities=True)
            book['comments'] = story.get_sanitized_description()
            book['series'] = story.getMetadata("series",
                                               removeallentities=True)

            if story.getMetadataRaw('datePublished'):
                book['pubdate'] = story.getMetadataRaw(
                    'datePublished').replace(tzinfo=local_tz)
            if story.getMetadataRaw('dateUpdated'):
                book['updatedate'] = story.getMetadataRaw(
                    'dateUpdated').replace(tzinfo=local_tz)
            if story.getMetadataRaw('dateCreated'):
                book['timestamp'] = story.getMetadataRaw(
                    'dateCreated').replace(tzinfo=local_tz)
            else:
                book['timestamp'] = datetime.now(
                )  # need *something* there for calibre.

            writer = writers.getWriter(options['fileform'], configuration,
                                       adapter)

            outfile = book['outfile']

            ## No need to download at all.  Shouldn't ever get down here.
            if options['collision'] in (CALIBREONLY, CALIBREONLYSAVECOL):
                logger.info(
                    "Skipping CALIBREONLY 'update' down inside worker--this shouldn't be happening..."
                )
                book['comment'] = _('Metadata collected.')
                book['all_metadata'] = story.getAllMetadata(
                    removeallentities=True)
                if options['savemetacol'] != '':
                    book['savemetacol'] = story.dump_html_metadata()

            ## checks were done earlier, it's new or not dup or newer--just write it.
            elif options['collision'] in (ADDNEW, SKIP, OVERWRITE, OVERWRITEALWAYS) or \
                    ('epub_for_update' not in book and options['collision'] in (UPDATE, UPDATEALWAYS)):

                # preserve logfile even on overwrite.
                if 'epub_for_update' in book:
                    adapter.logfile = get_update_data(
                        book['epub_for_update'])[6]
                    # change the existing entries id to notid so
                    # write_epub writes a whole new set to indicate overwrite.
                    if adapter.logfile:
                        adapter.logfile = adapter.logfile.replace(
                            "span id", "span notid")

                if options['collision'] == OVERWRITE and 'fileupdated' in book:
                    lastupdated = story.getMetadataRaw('dateUpdated')
                    fileupdated = book['fileupdated']

                    # updated doesn't have time (or is midnight), use dates only.
                    # updated does have time, use full timestamps.
                    if (lastupdated.time() == time.min and fileupdated.date() > lastupdated.date()) or \
                            (lastupdated.time() != time.min and fileupdated > lastupdated):
                        raise NotGoingToDownload(
                            _("Not Overwriting, web site is not newer."),
                            'edit-undo.png',
                            showerror=False)

                logger.info("write to %s" % outfile)
                inject_cal_cols(book, story, configuration)
                writer.writeStory(outfilename=outfile, forceOverwrite=True)

                book['comment'] = _('Download %s completed, %s chapters.') % (
                    options['fileform'], story.getMetadata("numChapters"))
                book['all_metadata'] = story.getAllMetadata(
                    removeallentities=True)
                if options['savemetacol'] != '':
                    book['savemetacol'] = story.dump_html_metadata()

            ## checks were done earlier, just update it.
            elif 'epub_for_update' in book and options['collision'] in (
                    UPDATE, UPDATEALWAYS):

                # update now handled by pre-populating the old images and
                # chapters in the adapter rather than merging epubs.
                #urlchaptercount = int(story.getMetadata('numChapters').replace(',',''))
                # returns int adjusted for start-end range.
                urlchaptercount = story.getChapterCount()
                (url, chaptercount, adapter.oldchapters, adapter.oldimgs,
                 adapter.oldcover, adapter.calibrebookmark, adapter.logfile,
                 adapter.oldchaptersmap,
                 adapter.oldchaptersdata) = get_update_data(
                     book['epub_for_update'])[0:9]

                # dup handling from fff_plugin needed for anthology updates.
                if options['collision'] == UPDATE:
                    if chaptercount == urlchaptercount:
                        if merge:
                            book['comment'] = _(
                                "Already contains %d chapters.  Reuse as is."
                            ) % chaptercount
                            book['all_metadata'] = story.getAllMetadata(
                                removeallentities=True)
                            if options['savemetacol'] != '':
                                book['savemetacol'] = story.dump_html_metadata(
                                )
                            book['outfile'] = book[
                                'epub_for_update']  # for anthology merge ops.
                            return book
                        else:  # not merge,
                            raise NotGoingToDownload(
                                _("Already contains %d chapters.") %
                                chaptercount,
                                'edit-undo.png',
                                showerror=False)
                    elif chaptercount > urlchaptercount:
                        raise NotGoingToDownload(
                            _("Existing epub contains %d chapters, web site only has %d. Use Overwrite to force update."
                              ) % (chaptercount, urlchaptercount),
                            'dialog_error.png')
                    elif chaptercount == 0:
                        raise NotGoingToDownload(
                            _("FanFicFare doesn't recognize chapters in existing epub, epub is probably from a different source. Use Overwrite to force update."
                              ), 'dialog_error.png')

                if not (options['collision'] == UPDATEALWAYS and chaptercount == urlchaptercount) \
                        and adapter.getConfig("do_update_hook"):
                    chaptercount = adapter.hookForUpdates(chaptercount)

                logger.info("Do update - epub(%d) vs url(%d)" %
                            (chaptercount, urlchaptercount))
                logger.info("write to %s" % outfile)

                inject_cal_cols(book, story, configuration)
                writer.writeStory(outfilename=outfile, forceOverwrite=True)

                book['comment'] = _('Update %s completed, added %s chapters for %s total.')%\
                    (options['fileform'],(urlchaptercount-chaptercount),urlchaptercount)
                book['all_metadata'] = story.getAllMetadata(
                    removeallentities=True)
                if options['savemetacol'] != '':
                    book['savemetacol'] = story.dump_html_metadata()

            if options['do_wordcount'] == SAVE_YES or (
                    options['do_wordcount'] == SAVE_YES_UNLESS_SITE
                    and not story.getMetadataRaw('numWords')):
                wordcount = get_word_count(outfile)
                logger.info("get_word_count:%s" % wordcount)
                story.setMetadata('numWords', wordcount)
                writer.writeStory(outfilename=outfile, forceOverwrite=True)
                book['all_metadata'] = story.getAllMetadata(
                    removeallentities=True)
                if options['savemetacol'] != '':
                    book['savemetacol'] = story.dump_html_metadata()

            if options['smarten_punctuation'] and options['fileform'] == "epub" \
                    and calibre_version >= (0, 9, 39):
                # for smarten punc
                from calibre.ebooks.oeb.polish.main import polish, ALL_OPTS
                from calibre.utils.logging import Log
                from collections import namedtuple

                # do smarten_punctuation from calibre's polish feature
                data = {'smarten_punctuation': True}
                opts = ALL_OPTS.copy()
                opts.update(data)
                O = namedtuple('Options', ' '.join(ALL_OPTS.iterkeys()))
                opts = O(**opts)

                log = Log(level=Log.DEBUG)
                polish({outfile: outfile}, opts, log, logger.info)

        except NotGoingToDownload as d:
            book['good'] = False
            book['showerror'] = d.showerror
            book['comment'] = unicode(d)
            book['icon'] = d.icon

        except Exception as e:
            book['good'] = False
            book['comment'] = unicode(e)
            book['icon'] = 'dialog_error.png'
            book['status'] = _('Error')
            logger.info("Exception: %s:%s" % (book, unicode(e)), exc_info=True)

        #time.sleep(10)
    return book
Esempio n. 19
0
def get_metadata(stream):
    from calibre.ebooks.metadata import MetaInformation
    from calibre.ptempfile import TemporaryDirectory
    from calibre.ebooks.mobi.reader.headers import MetadataHeader
    from calibre.ebooks.mobi.reader.mobi6 import MobiReader
    from calibre import CurrentDir

    try:
        from PIL import Image as PILImage
        PILImage
    except ImportError:
        import Image as PILImage


    stream.seek(0)
    try:
        raw = stream.read(3)
    except:
        raw = ''
    stream.seek(0)
    if raw == b'TPZ':
        from calibre.ebooks.metadata.topaz import get_metadata
        return get_metadata(stream)
    from calibre.utils.logging import Log
    log = Log()
    try:
        mi = MetaInformation(os.path.basename(stream.name), [_('Unknown')])
    except:
        mi = MetaInformation(_('Unknown'), [_('Unknown')])
    mh = MetadataHeader(stream, log)
    if mh.title and mh.title != _('Unknown'):
        mi.title = mh.title

    if mh.exth is not None:
        if mh.exth.mi is not None:
            mi = mh.exth.mi
    else:
        size = 1024**3
        if hasattr(stream, 'seek') and hasattr(stream, 'tell'):
            pos = stream.tell()
            stream.seek(0, 2)
            size = stream.tell()
            stream.seek(pos)
        if size < 4*1024*1024:
            with TemporaryDirectory('_mobi_meta_reader') as tdir:
                with CurrentDir(tdir):
                    mr = MobiReader(stream, log)
                    parse_cache = {}
                    mr.extract_content(tdir, parse_cache)
                    if mr.embedded_mi is not None:
                        mi = mr.embedded_mi
    if hasattr(mh.exth, 'cover_offset'):
        cover_index = mh.first_image_index + mh.exth.cover_offset
        data  = mh.section_data(int(cover_index))
    else:
        try:
            data  = mh.section_data(mh.first_image_index)
        except:
            data = ''
    buf = cStringIO.StringIO(data)
    try:
        im = PILImage.open(buf)
    except:
        log.exception('Failed to read MOBI cover')
    else:
        obuf = cStringIO.StringIO()
        im.convert('RGB').save(obuf, format='JPEG')
        mi.cover_data = ('jpg', obuf.getvalue())
    return mi
Esempio n. 20
0
def get_metadata(stream):
    from calibre.ebooks.metadata import MetaInformation
    from calibre.ptempfile import TemporaryDirectory
    from calibre.ebooks.mobi.reader.headers import MetadataHeader
    from calibre.ebooks.mobi.reader.mobi6 import MobiReader
    from calibre import CurrentDir

    try:
        from PIL import Image as PILImage
        PILImage
    except ImportError:
        import Image as PILImage

    stream.seek(0)
    try:
        raw = stream.read(3)
    except:
        raw = ''
    stream.seek(0)
    if raw == b'TPZ':
        from calibre.ebooks.metadata.topaz import get_metadata
        return get_metadata(stream)
    from calibre.utils.logging import Log
    log = Log()
    try:
        mi = MetaInformation(os.path.basename(stream.name), [_('Unknown')])
    except:
        mi = MetaInformation(_('Unknown'), [_('Unknown')])
    mh = MetadataHeader(stream, log)
    if mh.title and mh.title != _('Unknown'):
        mi.title = mh.title

    if mh.exth is not None:
        if mh.exth.mi is not None:
            mi = mh.exth.mi
    else:
        size = 1024**3
        if hasattr(stream, 'seek') and hasattr(stream, 'tell'):
            pos = stream.tell()
            stream.seek(0, 2)
            size = stream.tell()
            stream.seek(pos)
        if size < 4 * 1024 * 1024:
            with TemporaryDirectory('_mobi_meta_reader') as tdir:
                with CurrentDir(tdir):
                    mr = MobiReader(stream, log)
                    parse_cache = {}
                    mr.extract_content(tdir, parse_cache)
                    if mr.embedded_mi is not None:
                        mi = mr.embedded_mi
    if hasattr(mh.exth, 'cover_offset'):
        cover_index = mh.first_image_index + mh.exth.cover_offset
        data = mh.section_data(int(cover_index))
    else:
        try:
            data = mh.section_data(mh.first_image_index)
        except:
            data = ''
    buf = cStringIO.StringIO(data)
    try:
        im = PILImage.open(buf)
    except:
        log.exception('Failed to read MOBI cover')
    else:
        obuf = cStringIO.StringIO()
        im.convert('RGB').save(obuf, format='JPEG')
        mi.cover_data = ('jpg', obuf.getvalue())
    return mi
Esempio n. 21
0
def create_fetcher(options, image_map={}, log=None):
    if log is None:
        log = Log(level=Log.DEBUG) if options.verbose else Log()
    return RecursiveFetcher(options, log, image_map={})
Esempio n. 22
0
    def setup_pipeline(self, *args):
        oidx = self.groups.currentIndex().row()
        input_format = self.input_format
        output_format = self.output_format
        output_path = 'dummy.'+output_format
        log = Log()
        log.outputs = []
        input_file = 'dummy.'+input_format
        if input_format in ARCHIVE_FMTS:
            input_file = 'dummy.html'
        self.plumber = Plumber(input_file, output_path, log)

        def widget_factory(cls):
            return cls(self.stack, self.plumber.get_option_by_name,
                self.plumber.get_option_help, self.db, self.book_id)

        self.mw = widget_factory(MetadataWidget)
        self.setWindowTitle(_('Convert')+ ' ' + unicode(self.mw.title.text()))
        lf = widget_factory(LookAndFeelWidget)
        hw = widget_factory(HeuristicsWidget)
        sr = widget_factory(SearchAndReplaceWidget)
        ps = widget_factory(PageSetupWidget)
        sd = widget_factory(StructureDetectionWidget)
        toc = widget_factory(TOCWidget)
        from calibre.gui2.actions.toc_edit import SUPPORTED
        toc.manually_fine_tune_toc.setVisible(output_format.upper() in SUPPORTED)
        debug = widget_factory(DebugWidget)

        output_widget = self.plumber.output_plugin.gui_configuration_widget(
                self.stack, self.plumber.get_option_by_name,
                self.plumber.get_option_help, self.db, self.book_id)
        input_widget = self.plumber.input_plugin.gui_configuration_widget(
                self.stack, self.plumber.get_option_by_name,
                self.plumber.get_option_help, self.db, self.book_id)
        while True:
            c = self.stack.currentWidget()
            if not c:
                break
            self.stack.removeWidget(c)

        widgets = [self.mw, lf, hw, ps, sd, toc, sr]
        if input_widget is not None:
            widgets.append(input_widget)
        if output_widget is not None:
            widgets.append(output_widget)
        widgets.append(debug)
        for w in widgets:
            self.stack.addWidget(w)
            self.connect(w, SIGNAL('set_help(PyQt_PyObject)'),
                    self.help.setPlainText)

        self._groups_model = GroupModel(widgets)
        self.groups.setModel(self._groups_model)

        idx = oidx if -1 < oidx < self._groups_model.rowCount() else 0
        self.groups.setCurrentIndex(self._groups_model.index(idx))
        self.stack.setCurrentIndex(idx)
        try:
            shutil.rmtree(self.plumber.archive_input_tdir, ignore_errors=True)
        except:
            pass
Esempio n. 23
0
class Container(object):
	META_INF = {
			'container.xml' : True,
			'manifest.xml' : False,
			'encryption.xml' : False,
			'metadata.xml' : False,
			'signatures.xml' : False,
			'rights.xml' : False,
	}

	acceptable_encryption_algorithms = (
		'http://ns.adobe.com/pdf/enc#RC'
	)

	namespaces = {
		'opf': 'http://www.idpf.org/2007/opf',
		'ocf': 'urn:oasis:names:tc:opendocument:xmlns:container',
		'ncx': 'http://www.daisy.org/z3986/2005/ncx/',
		'dc': 'http://purl.org/dc/elements/1.1/',
		'xhtml': 'http://www.w3.org/1999/xhtml',
		'enc': 'http://www.w3.org/2001/04/xmlenc#',
		'deenc': 'http://ns.adobe.com/digitaleditions/enc',
		'xml': 'http://www.w3.org/XML/1998/namespace'
	}

	OPF_MIMETYPE = 'application/oebps-package+xml'
	NCX_MIMETYPE = "application/x-dtbncx+xml"

	def __init__(self, path):
		tmpdir = PersistentTemporaryDirectory("_kobo-driver-extended")
		zf = zipfile.ZipFile(path)
		zf.extractall(tmpdir)

		self.root = os.path.abspath(tmpdir)
		self.log = Log()
		self.dirtied = set([])
		self.cache = {}
		self.mime_map = {}

		print("Container:__init__:Got container path {0}".format(self.root))

		if os.path.exists(os.path.join(self.root, 'mimetype')):
			os.remove(os.path.join(self.root, 'mimetype'))

		container_path = os.path.join(self.root, 'META-INF', 'container.xml')
		if not os.path.exists(container_path):
			raise InvalidEpub('No META-INF/container.xml in epub')
		self.container = etree.fromstring(open(container_path, 'rb').read())
		opf_files = self.container.xpath((r'child::ocf:rootfiles/ocf:rootfile[@media-type="{0}" and @full-path]'.format(guess_type('a.opf')[0])), namespaces = self.namespaces)
		if not opf_files:
			raise InvalidEpub('META-INF/container.xml contains no link to OPF file')
		opf_path = os.path.join(self.root, *opf_files[0].get('full-path').split('/'))
		if not os.path.exists(opf_path):
			raise InvalidEpub('OPF file does not exist at location pointed to by META-INF/container.xml')

		# Map of relative paths with / separators to absolute
		# paths on filesystem with os separators
		self.name_map = {}
		for dirpath, dirnames, filenames in os.walk(self.root):
			for f in filenames:
				path = os.path.join(dirpath, f)
				name = os.path.relpath(path, self.root).replace(os.sep, '/')
				self.name_map[name] = path
				self.mime_map[name] = guess_type(f)[0]
				if path == opf_path:
					self.opf_name = name
					self.mime_map[name] = guess_type('a.opf')[0]

		opf = self.opf
		for item in opf.xpath('//opf:manifest/opf:item[@href and @media-type]', namespaces = self.namespaces):
			href = unquote(item.get('href'))
			item.set("href", href)
			self.mime_map[self.href_to_name(href, os.path.dirname(self.opf_name).replace(os.sep, '/'))] = item.get('media-type')
		self.set(self.opf_name, opf)

	def get_html_names(self):
		"""A generator function that yields only HTML file names from
		the ePub.
		"""
		for node in self.opf.xpath('//opf:manifest/opf:item[@href and @media-type]', namespaces = self.namespaces):
			if node.get("media-type") in HTML_MIMETYPES:
				href = os.path.join(os.path.dirname(self.opf_name), node.get("href"))
				href = os.path.normpath(href).replace(os.sep, '/')
				yield href

	@property
	def is_drm_encumbered(self):
		"""Determine if the ePub container is encumbered with Digital
		Restrictions Management.

		This method looks for the 'encryption.xml' file which denotes an
		ePub encumbered by Digital Restrictions Management. DRM-encumbered
		files cannot be edited.
		"""
		is_encumbered = False
		if 'META-INF/encryption.xml' in self.name_map:
			try:
				xml = self.get('META-INF/encryption.xml')
				if xml is None:
					return True # If encryption.xml can't be parsed, assume its presence means an encumbered file
				for elem in xml.xpath('./enc:EncryptedData/enc:EncryptionMethod[@Algorithm]', namespaces = self.namespaces):
					alg = elem.get('Algorithm')

					# Anything not in acceptable_encryption_algorithms is a sign of an
					# encumbered file.
					if alg not in self.acceptable_encryption_algorithms:
						is_encumbered = True
			except Exception as e:
				self.log.error("Could not parse encryption.xml: " + e.message)
				raise

		return is_encumbered

	def manifest_worthy_names(self):
		for name in self.name_map:
			if name.endswith('.opf'): continue
			if name.startswith('META-INF') and os.path.basename(name) in self.META_INF:
				continue
			yield name

	def delete_name(self, name):
		self.mime_map.pop(name, None)
		path = self.name_map[name]
		os.remove(path)
		self.name_map.pop(name)

	def manifest_item_for_name(self, name):
		href = self.name_to_href(name, os.path.dirname(self.opf_name))
		q = prepare_string_for_xml(href, attribute = True)
		existing = self.opf.xpath('//opf:manifest/opf:item[@href="{0}"]'.format(q), namespaces = self.namespaces)
		if not existing:
			return None
		return existing[0]

	def add_name_to_manifest(self, name, mt = None):
		item = self.manifest_item_for_name(name)
		if item is not None:
			return
		self.log("Adding '{0}' to the manifest".format(name))
		manifest = self.opf.xpath('//opf:manifest', namespaces = self.namespaces)[0]
		item = manifest.makeelement('{%s}item' % self.namespaces['opf'], href = self.name_to_href(name, os.path.dirname(self.opf_name)), id = self.generate_manifest_id())
		if not mt:
			mt = guess_type(os.path.basename(name))[0]
		if not mt:
			mt = 'application/octest-stream'
		item.set('media-type', mt)
		manifest.append(item)
		self.fix_tail(item)
		self.set(self.opf_name, self.opf)
		self.name_map[name] = os.path.join(self.root, name)
		self.mime_map[name] = mt

	def fix_tail(self, item):
		'''
		Designed only to work with self closing elements after item has
		just been inserted/appended
		'''
		parent = item.getparent()
		idx = parent.index(item)
		if idx == 0:
			item.tail = parent.text
		else:
			item.tail = parent[idx - 1].tail
			if idx == len(parent) - 1:
				parent[idx - 1].tail = parent.text

	def copy_file_to_container(self, path, name = None, mt = None):
		'''Copy a file into this Container instance.

		@param path: The path to the file to copy into this Container.
		@param name: The name to give to the copied file, relative to the Container root. Set to None to use the basename of path.
		@param mt: The MIME type of the file to set in the manifest. Set to None to auto-detect.

		@return: The name of the file relative to the Container root
		'''
		if path is None or re.match(r'^\s*$', path, re.MULTILINE):
			raise ValueError("A source path must be given")
		if name is None:
			name = os.path.basename(path)
		self.log("Copying file '{0}' to '{1}'".format(path, os.path.join(self.root, name)))
		shutil.copy(path, os.path.join(self.root, name))
		self.add_name_to_manifest(name, mt)

		return name

	def add_content_file_reference(self, name):
		'''Add a reference to the named file (from self.name_map) to all content files (self.get_html_names()). Currently
		only CSS files with a MIME type of text/css and JavaScript files with a MIME type of application/x-javascript are
		supported.
		'''
		if name not in self.name_map or name not in self.mime_map:
			raise ValueError("A valid file name must be given (got: {0})".format(name))
		for file in self.get_html_names():
			root = self.get(file)
			if not root:
				self.log("Could not retrieve content file {0}".format(file))
				continue
			head = root.xpath('./xhtml:head', namespaces = self.namespaces)
			if not head:
				self.log("Could not find a <head> element in content file {0}".format(file))
				continue
			head = head[0]
			if not head:
				self.log("A <head> section was found but was undefined in content file {0}".format(file))
				continue

			if self.mime_map[name] == guess_type('a.css')[0]:
				elem = head.makeelement("{%s}link" % self.namespaces['xhtml'], rel = 'stylesheet', href = os.path.relpath(name, os.path.dirname(file)).replace(os.sep, '/'))
			elif self.mime_map[name] == guess_type('a.js')[0]:
				elem = head.makeelement("{%s}script" % self.namespaces['xhtml'], type = 'text/javascript', src = os.path.relpath(name, os.path.dirname(file)).replace(os.sep, '/'))
			else:
				elem = None

			if elem is not None:
				head.append(elem)
				if self.mime_map[name] == guess_type('a.css')[0]:
					self.fix_tail(elem)
				self.set(file, root)

	def generate_manifest_id(self):
		items = self.opf.xpath('//opf:manifest/opf:item[@id]', namespaces = self.namespaces)
		ids = set([x.get('id') for x in items])
		for x in xrange(sys.maxint):
			c = 'id{0}'.format(x)
			if c not in ids:
				return c

	@property
	def opf(self):
		return self.get(self.opf_name)

	def href_to_name(self, href, base = ''):
		"""Changed to fix a bug which incorrectly splits the href on
		'#' when '#' is part of the file name. Also normalizes the
		path.

		Taken from the calibre Modify Epub plugin's Container implementation.
		"""
		hash_index = href.find('#')
		period_index = href.find('.')
		if hash_index > 0 and hash_index > period_index:
			href = href.partition('#')[0]
		href = unquote(href)
		name = href
		if base:
			name = os.path.join(base, href)
		name = os.path.normpath(name).replace(os.sep, '/')
		return name

	def name_to_href(self, name, base):
		"""Changed to ensure that blank href names are referenced as the
		empty string instead of '.'.

		Taken from the calibre Modify Epub plugin's Container implementation.
		"""
		if not base:
			return name
		href = os.path.relpath(name, base).replace(os.sep, '/')
		if href == '.':
			href = ''
		return href

	def decode(self, data):
		"""Automatically decode :param:`data` into a `unicode` object."""
		def fix_data(d):
			return d.replace('\r\n', '\n').replace('\r', '\n')
		if isinstance(data, unicode):
			return fix_data(data)
		bom_enc = None
		if data[:4] in ('\0\0\xfe\xff', '\xff\xfe\0\0'):
			bom_enc = {'\0\0\xfe\xff':'utf-32-be',
					'\xff\xfe\0\0':'utf-32-le'}[data[:4]]
			data = data[4:]
		elif data[:2] in ('\xff\xfe', '\xfe\xff'):
			bom_enc = {'\xff\xfe':'utf-16-le', '\xfe\xff':'utf-16-be'}[data[:2]]
			data = data[2:]
		elif data[:3] == '\xef\xbb\xbf':
			bom_enc = 'utf-8'
			data = data[3:]
		if bom_enc is not None:
			try:
				return fix_data(data.decode(bom_enc))
			except UnicodeDecodeError:
				pass
		try:
			return fix_data(data.decode('utf-8'))
		except UnicodeDecodeError:
			pass
		data, _ = xml_to_unicode(data)
		return fix_data(data)

	def get_raw(self, name):
		path = self.name_map[name]
		return open(path, 'rb').read()

	def get(self, name):
		if name in self.cache:
			val = self.cache[name]
			if not hasattr(val, 'xpath'):
				val = self._parse(val, self.mime_map[name])
			return val
		raw = self.get_raw(name)
		raw = self.decode(raw)
		if name in self.mime_map:
			try:
				raw = self._parse(raw, self.mime_map[name])
			except XMLSyntaxError as err:
				raise ParseError(name, unicode(err))
		self.cache[name] = raw
		return raw

	def set(self, name, val):
		self.cache[name] = val
		self.dirtied.add(name)

	def _parse(self, raw, mimetype):
		mt = mimetype.lower()
		if mt.endswith('xml'):
			parser = etree.XMLParser(no_network = True, huge_tree = not iswindows)
			raw = xml_to_unicode(raw,
				strip_encoding_pats = True, assume_utf8 = True,
				resolve_entities = True)[0].strip()
			idx = raw.find('<html')
			if idx == -1:
				idx = raw.find('<HTML')
			if idx > -1:
				pre = raw[:idx]
				raw = raw[idx:]
				if '<!DOCTYPE' in pre:
					user_entities = {}
					for match in re.finditer(r'<!ENTITY\s+(\S+)\s+([^>]+)', pre):
						val = match.group(2)
						if val.startswith('"') and val.endswith('"'):
							val = val[1:-1]
						user_entities[match.group(1)] = val
					if user_entities:
						pat = re.compile(r'&(%s);' % ('|'.join(user_entities.keys())))
						raw = pat.sub(lambda m:user_entities[m.group(1)], raw)
			return etree.fromstring(raw, parser = parser)
		return raw

	def write(self, path):
		for name in self.dirtied:
			data = self.cache[name]
			if hasattr(data, 'xpath'):
				data = etree.tostring(data, encoding = 'UTF-8', xml_declaration = True, pretty_print = True)
			data = string.replace(data, u"\uFFFD", "")
			f = open(self.name_map[name], "wb")
			f.write(data)
			f.close()
		self.dirtied.clear()
		if os.path.exists(path):
			os.unlink(path)
		epub = zipfile.ZipFile(path, 'w', compression = zipfile.ZIP_DEFLATED)
		epub.writestr('mimetype', bytes(guess_type('a.epub')[0]), compress_type = zipfile.ZIP_STORED)

		cwd = os.getcwdu()
		os.chdir(self.root)
		zip_prefix = self.root
		if not zip_prefix.endswith(os.sep):
			zip_prefix += os.sep
		for t in os.walk(self.root, topdown = True):
			for f in t[2]:
				if f not in EXCLUDE_FROM_ZIP:
					filepath = os.path.join(t[0], f).replace(zip_prefix, '')
					st = os.stat(filepath)
					mtime = time.localtime(st.st_mtime)
					if mtime[0] < 1980:
						os.utime(filepath, None)
					epub.write(filepath)
		epub.close()
		os.chdir(cwd)

	def __hyphenate_node(self, elem, hyphenator, hyphen = u'\u00AD'):
		if elem is None:
			return None

		if isinstance(elem, basestring):
			newstr = []
			for w in elem.split():
				if len(w) > 3 and '-' not in w and hyphen not in w:
					w = hyphenator.inserted(w, hyphen = hyphen)
				newstr.append(w)
			elem = " ".join(newstr)
		else:
			if elem.text is None and elem.tail is None:
				# If we get here, there's only child nodes
				for node in elem.xpath('./node()'):
					node = self.__hyphenate_node(node, hyphenator, hyphen)
			else:
				elem.text = self.__hyphenate_node(elem.text, hyphenator, hyphen)
				if elem.text is not None:
					elem.text += u" "
				elem.tail = self.__hyphenate_node(elem.tail, hyphenator, hyphen)
		return elem

	def hyphenate(self, hyphenator, hyphen = u'\u00AD'):
		if hyphenator is None or hyphen is None or hyphen == '':
			return False
		for name in self.get_html_names():
			self.log("Hyphenating file {0}".format(name))
			root = self.get(name)
			for node in root.xpath("./xhtml:body//xhtml:span[starts-with(@id, 'kobo.')]", namespaces = self.namespaces):
				node = self.__hyphenate_node(node, hyphenator, hyphen)
			self.set(name, root)
		return True

	def smarten_punctuation(self):
		preprocessor = HeuristicProcessor(log = self.log)

		for name in self.get_html_names():
			html = self.get_raw(name)
			html = html.encode("UTF-8")

			# Fix non-breaking space indents
			html = preprocessor.fix_nbsp_indents(html)
			# Smarten punctuation
			html = smartyPants(html)
			# Ellipsis to HTML entity
			html = re.sub(r'(?u)(?<=\w)\s?(\.\s+?){2}\.', '&hellip;', html)
			# Double-dash and unicode char code to em-dash
			html = string.replace(html, '---', ' &#x2013; ')
			html = string.replace(html, u"\x97", ' &#x2013; ')
			html = string.replace(html, '--', ' &#x2014; ')
			html = string.replace(html, u"\u2014", ' &#x2014; ')
			html = string.replace(html, u"\u2013", ' &#x2013; ')
			html = string.replace(html, u"...", "&#x2026;")

			# Remove Unicode replacement characters
			html = string.replace(html, u"\uFFFD", "")

			self.set(name, html)

	def clean_markup(self):
		preprocessor = HeuristicProcessor(log = self.log)
		for name in self.get_html_names():
			html = self.get_raw(name)
			html = html.encode("UTF-8")
			html = string.replace(html, u"\u2014", ' -- ')
			html = string.replace(html, u"\u2013", ' --- ')
			html = string.replace(html, u"\x97", ' --- ')
			html = preprocessor.cleanup_markup(html)

			# Remove Unicode replacement characters
			html = string.replace(html, u"\uFFFD", "")

			self.set(name, html)