Ejemplo n.º 1
0
    def run(self):
        try:
            if DEBUG_DIALOG:
                self.results = self.sample_results()
            else:
                res = fork_job(
                    "calibre.ebooks.metadata.sources.worker",
                    "single_identify",
                    (self.title, self.authors, self.identifiers),
                    no_output=True,
                    abort=self.abort,
                )
                self.results, covers, caches, log_dump = res["result"]
                self.results = [
                    OPF(BytesIO(r), basedir=os.getcwdu(), populate_spine=False).to_book_metadata() for r in self.results
                ]
                for r, cov in zip(self.results, covers):
                    r.has_cached_cover_url = cov
                self.caches.update(caches)
                self.log.load(log_dump)
            for i, result in enumerate(self.results):
                result.gui_rank = i
        except WorkerError as e:
            self.error = force_unicode(e.orig_tb)
        except:
            import traceback

            self.error = force_unicode(traceback.format_exc())
Ejemplo n.º 2
0
    def report(self):
        ans = ''
        failures = list(self.failed_dirs) + [(x['dirpath'], tb) for x, tb in
                self.failed_restores]
        if failures:
            ans += 'Failed to restore the books in the following folders:\n'
            for dirpath, tb in failures:
                ans += '\t' + force_unicode(dirpath, filesystem_encoding) + ' with error:\n'
                ans += '\n'.join('\t\t'+force_unicode(x, filesystem_encoding) for x in tb.splitlines())
                ans += '\n\n'

        if self.conflicting_custom_cols:
            ans += '\n\n'
            ans += 'The following custom columns have conflicting definitions ' \
                    'and were not fully restored:\n'
            for x in self.conflicting_custom_cols:
                ans += '\t#'+x+'\n'
                ans += '\tused:\t%s, %s, %s, %s\n'%(self.custom_columns[x][1],
                                                    self.custom_columns[x][2],
                                                    self.custom_columns[x][3],
                                                    self.custom_columns[x][5])
                for coldef in self.conflicting_custom_cols[x]:
                    ans += '\tother:\t%s, %s, %s, %s\n'%(coldef[1], coldef[2],
                                                         coldef[3], coldef[5])

        if self.mismatched_dirs:
            ans += '\n\n'
            ans += 'The following folders were ignored:\n'
            for x in self.mismatched_dirs:
                ans += '\t' + force_unicode(x, filesystem_encoding) + '\n'

        return ans
Ejemplo n.º 3
0
 def do_one(self):
     try:
         i, book_ids, pd, only_fmts, errors = self.job_data
     except (TypeError, AttributeError):
         return
     if i >= len(book_ids) or pd.wasCanceled():
         pd.setValue(pd.maximum())
         pd.hide()
         self.pd_timer.stop()
         self.job_data = None
         self.gui.library_view.model().refresh_ids(book_ids)
         if i > 0:
             self.gui.status_bar.show_message(_('Embedded metadata in %d books') % i, 5000)
         if errors:
             det_msg = [_('The {0} format of {1}:\n\n{2}\n').format(
                 (fmt or '').upper(), force_unicode(mi.title), force_unicode(tb)) for mi, fmt, tb in errors]
             warning_dialog(
                 self.gui, _('Failed for some files'), _(
                 'Failed to embed metadata into some book files. Click "Show details" for details.'),
                 det_msg='\n\n'.join(det_msg), show=True)
         return
     pd.setValue(i)
     db = self.gui.current_db.new_api
     def report_error(mi, fmt, tb):
         errors.append((mi, fmt, tb))
     db.embed_metadata((book_ids[i],), only_fmts=only_fmts, report_error=report_error)
     self.job_data = (i + 1, book_ids, pd, only_fmts, errors)
Ejemplo n.º 4
0
 def create_device(self, connected_device):
     d = connected_device
     man, prod = d.manufacturer, d.product
     if ispy3:
         man = force_unicode(man, 'utf-8') if isinstance(man, bytes) else man
         prod = force_unicode(prod, 'utf-8') if isinstance(prod, bytes) else prod
     return self.libmtp.Device(d.busnum, d.devnum, d.vendor_id,
             d.product_id, man, prod, d.serial)
Ejemplo n.º 5
0
def author_to_author_sort(author, method=None):
    if not author:
        return u''
    sauthor = remove_bracketed_text(author).strip()
    tokens = sauthor.split()
    if len(tokens) < 2:
        return author
    if method is None:
        method = tweaks['author_sort_copy_method']

    ltoks = frozenset(x.lower() for x in tokens)
    copy_words = frozenset(x.lower() for x in tweaks['author_name_copywords'])
    if ltoks.intersection(copy_words):
        method = u'copy'

    if method == u'copy':
        return author

    prefixes = {force_unicode(y).lower() for y in tweaks['author_name_prefixes']}
    prefixes |= {y+u'.' for y in prefixes}
    while True:
        if not tokens:
            return author
        tok = tokens[0].lower()
        if tok in prefixes:
            tokens = tokens[1:]
        else:
            break

    suffixes = {force_unicode(y).lower() for y in tweaks['author_name_suffixes']}
    suffixes |= {y+u'.' for y in suffixes}

    suffix = u''
    while True:
        if not tokens:
            return author
        last = tokens[-1].lower()
        if last in suffixes:
            suffix = tokens[-1] + ' ' + suffix
            tokens = tokens[:-1]
        else:
            break
    suffix = suffix.strip()

    if method == u'comma' and u',' in u''.join(tokens):
        return author

    atokens = tokens[-1:] + tokens[:-1]
    num_toks = len(atokens)
    if suffix:
        atokens.append(suffix)

    if method != u'nocomma' and num_toks > 1:
        atokens[0] += u','

    return u' '.join(atokens)
Ejemplo n.º 6
0
 def run(self):
     try:
         if DEBUG_DIALOG:
             self.fake_run()
         else:
             self.run_fork()
     except WorkerError as e:
         self.error = force_unicode(e.orig_tb)
     except:
         import traceback
         self.error = force_unicode(traceback.format_exc())
Ejemplo n.º 7
0
def filter_css(container, properties, names=()):
    """
    Remove the specified CSS properties from all CSS rules in the book.

    :param properties: Set of properties to remove. For example: :code:`{'font-family', 'color'}`.
    :param names: The files from which to remove the properties. Defaults to all HTML and CSS files in the book.
    """
    if not names:
        types = OEB_STYLES | OEB_DOCS
        names = []
        for name, mt in container.mime_map.iteritems():
            if mt in types:
                names.append(name)
    properties = normalize_filter_css(properties)
    doc_changed = False

    for name in names:
        mt = container.mime_map[name]
        if mt in OEB_STYLES:
            sheet = container.parsed(name)
            filtered = filter_sheet(sheet, properties)
            if filtered:
                container.dirty(name)
                doc_changed = True
        elif mt in OEB_DOCS:
            root = container.parsed(name)
            changed = False
            for style in root.xpath('//*[local-name()="style"]'):
                if style.text and style.get("type", "text/css") in {None, "", "text/css"}:
                    sheet = container.parse_css(style.text)
                    if filter_sheet(sheet, properties):
                        changed = True
                        style.text = force_unicode(sheet.cssText, "utf-8")
                        pretty_script_or_style(container, style)
            for elem in root.xpath("//*[@style]"):
                text = elem.get("style", None)
                if text:
                    style = container.parse_css(text, is_declaration=True)
                    if filter_declaration(style, properties):
                        changed = True
                        if style.length == 0:
                            del elem.attrib["style"]
                        else:
                            elem.set("style", force_unicode(style.getCssText(separator=" "), "utf-8"))
            if changed:
                container.dirty(name)
                doc_changed = True

    return doc_changed
Ejemplo n.º 8
0
    def __init__(self, oeb_metadata=None):
        self.title = _(u'Unknown')
        self.author = _(u'Unknown')
        self.tags = u''

        if oeb_metadata != None:
            if len(oeb_metadata.title) >= 1:
                self.title = oeb_metadata.title[0].value
            if len(oeb_metadata.creator) >= 1:
                self.author = authors_to_string([x.value for x in oeb_metadata.creator])
            if oeb_metadata.subject:
                self.tags = u', '.join(map(unicode, oeb_metadata.subject))

        self.title = force_unicode(self.title)
        self.author = force_unicode(self.author)
Ejemplo n.º 9
0
 def link_replacer(base, url):
     if url.startswith('#'):
         frag = urlunquote(url[1:])
         if not frag:
             return url
         changed.add(base)
         return resource_template.format(encode_url(base, frag))
     purl = urlparse(url)
     if purl.netloc or purl.query:
         return url
     if purl.scheme and purl.scheme != 'file':
         return url
     if not purl.path or purl.path.startswith('/'):
         return url
     url, frag = purl.path, purl.fragment
     name = self.href_to_name(url, base)
     if name:
         if self.has_name_and_is_not_empty(name):
             frag = urlunquote(frag)
             url = resource_template.format(encode_url(name, frag))
         else:
             if isinstance(name, unicode_type):
                 name = name.encode('utf-8')
             url = 'missing:' + force_unicode(quote(name), 'utf-8')
         changed.add(base)
     return url
Ejemplo n.º 10
0
    def initialize_db(self):
        from calibre.db.legacy import LibraryDatabase
        db = None
        self.timed_print('Initializing db...')
        try:
            db = LibraryDatabase(self.library_path)
        except apsw.Error:
            with self.app:
                self.hide_splash_screen()
                repair = question_dialog(self.splash_screen, _('Corrupted database'),
                        _('The library database at %s appears to be corrupted. Do '
                        'you want calibre to try and rebuild it automatically? '
                        'The rebuild may not be completely successful. '
                        'If you say No, a new empty calibre library will be created.')
                        % force_unicode(self.library_path, filesystem_encoding),
                        det_msg=traceback.format_exc()
                        )
            if repair:
                if iswindows:
                    # On some windows systems the existing db file gets locked
                    # by something when running restore from the main process.
                    # So run the restore in a separate process.
                    windows_repair(self.library_path)
                    self.app.quit()
                    return
                if repair_library(self.library_path):
                    db = LibraryDatabase(self.library_path)
        except:
            self.show_error(_('Bad database location'),
                    _('Bad database location %r. Will start with '
                    ' a new, empty calibre library')%self.library_path,
                    det_msg=traceback.format_exc())

        self.initialize_db_stage2(db, None)
Ejemplo n.º 11
0
 def item(i):
     templ = (
         u'<div title="{4}" class="category-item">'
         '<div class="category-name">'
         '<a href="{5}{3}" title="{4}">{0}</a></div>'
         "<div>{1}</div>"
         "<div>{2}</div></div>"
     )
     rating, rstring = render_rating(i.avg_rating, prefix)
     orig_name = i.sort if i.use_sort_as_name else i.name
     name = xml(orig_name)
     if datatype == "rating":
         name = xml(_("%d stars") % int(i.avg_rating))
     id_ = i.id
     if id_ is None:
         id_ = hexlify(force_unicode(orig_name).encode("utf-8"))
     id_ = xml(str(id_))
     desc = ""
     if i.count > 0:
         desc += "[" + _("%d books") % i.count + "]"
     q = i.category
     if not q:
         q = category
     href = "/browse/matches/%s/%s" % (quote(q), quote(id_))
     return templ.format(xml(name), rating, xml(desc), xml(href, True), rstring, prefix)
Ejemplo n.º 12
0
def create_service(desc, type, port, properties, add_hostname, use_ip_address=None):
    port = int(port)
    try:
        hostname = ascii_text(force_unicode(socket.gethostname())).partition('.')[0]
    except:
        hostname = 'Unknown'

    if add_hostname:
        try:
            desc += ' (on %s port %d)'%(hostname, port)
        except:
            try:
                desc += ' (on %s)'%hostname
            except:
                pass

    if use_ip_address:
        local_ip = use_ip_address
    else:
        local_ip = get_external_ip()
    type = type+'.local.'
    from calibre.utils.Zeroconf import ServiceInfo
    return ServiceInfo(type, desc+'.'+type,
                          address=socket.inet_aton(local_ip),
                          port=port,
                          properties=properties,
                          server=hostname+'.local.')
Ejemplo n.º 13
0
 def walk(self, root):
     self.books = []
     for dirpath in os.walk(root):
         if self.canceled:
             return
         self.update.emit(_("Searching in") + " " + force_unicode(dirpath[0], filesystem_encoding))
         self.books += list(self.db.find_books_in_directory(dirpath[0], self.single_book_per_directory))
Ejemplo n.º 14
0
    def magnify_fonts(self, factor):
        # Magnify all font sizes defined in the book by the specified factor
        # First we create a restore point so that the user can undo all changes
        # we make.
        self.boss.add_savepoint('Before: Magnify fonts')

        container = self.current_container  # The book being edited as a container object

        # Iterate over all style declarations in the book, this means css
        # stylesheets, <style> tags and style="" attributes
        for name, media_type in container.mime_map.items():
            if media_type in OEB_STYLES:
                # A stylesheet. Parsed stylesheets are css_parser CSSStylesheet
                # objects.
                self.magnify_stylesheet(container.parsed(name), factor)
                container.dirty(name)  # Tell the container that we have changed the stylesheet
            elif media_type in OEB_DOCS:
                # A HTML file. Parsed HTML files are lxml elements

                for style_tag in container.parsed(name).xpath('//*[local-name="style"]'):
                    if style_tag.text and style_tag.get('type', None) in {None, 'text/css'}:
                        # We have an inline CSS <style> tag, parse it into a
                        # stylesheet object
                        sheet = container.parse_css(style_tag.text)
                        self.magnify_stylesheet(sheet, factor)
                        style_tag.text = serialize(sheet, 'text/css', pretty_print=True)
                        container.dirty(name)  # Tell the container that we have changed the stylesheet
                for elem in container.parsed(name).xpath('//*[@style]'):
                    # Process inline style attributes
                    block = container.parse_css(elem.get('style'), is_declaration=True)
                    self.magnify_declaration(block, factor)
                    elem.set('style', force_unicode(block.getCssText(separator=' '), 'utf-8'))
Ejemplo n.º 15
0
def sort_key_for_action(ac):
    q = getattr(ac, 'action_spec', None)
    try:
        q = ac.name if q is None else q[0]
        return primary_sort_key(force_unicode(q))
    except Exception:
        return primary_sort_key(u'')
Ejemplo n.º 16
0
    def initialize_db(self):
        from calibre.db import get_db_loader
        db = None
        self.db_class, errs = get_db_loader()
        try:
            db = self.db_class(self.library_path)
        except errs:
            repair = question_dialog(self.splash_screen, _('Corrupted database'),
                    _('The library database at %s appears to be corrupted. Do '
                    'you want calibre to try and rebuild it automatically? '
                    'The rebuild may not be completely successful. '
                    'If you say No, a new empty calibre library will be created.')
                    % force_unicode(self.library_path, filesystem_encoding),
                    det_msg=traceback.format_exc()
                    )
            if repair:
                if repair_library(self.library_path):
                    db = self.db_class(self.library_path)
        except:
            error_dialog(self.splash_screen, _('Bad database location'),
                    _('Bad database location %r. Will start with '
                    ' a new, empty calibre library')%self.library_path,
                    det_msg=traceback.format_exc(), show=True)

        self.initialize_db_stage2(db, None)
Ejemplo n.º 17
0
 def item(i):
     templ = (u'<div title="{4}" class="category-item">'
             '<div class="category-name">'
             '<a href="{5}{3}" title="{4}">{0}</a></div>'
             '<div>{1}</div>'
             '<div>{2}</div></div>')
     rating, rstring = render_rating(i.avg_rating, prefix)
     if i.use_sort_as_name:
         name = xml(i.sort)
     else:
         name = xml(i.name)
     if datatype == 'rating':
         name = xml(_('%d stars')%int(i.avg_rating))
     id_ = i.id
     if id_ is None:
         id_ = hexlify(force_unicode(name).encode('utf-8'))
     id_ = xml(str(id_))
     desc = ''
     if i.count > 0:
         desc += '[' + _('%d books')%i.count + ']'
     q = i.category
     if not q:
         q = category
     href = '/browse/matches/%s/%s'%(quote(q), quote(id_))
     return templ.format(xml(name), rating,
             xml(desc), xml(href, True), rstring, prefix)
Ejemplo n.º 18
0
    def initialize_db(self):
        from calibre.db.legacy import LibraryDatabase
        db = None
        try:
            db = LibraryDatabase(self.library_path)
        except apsw.Error:
            with self.app:
                self.hide_splash_screen()
                repair = question_dialog(self.splash_screen, _('Corrupted database'),
                        _('The library database at %s appears to be corrupted. Do '
                        'you want calibre to try and rebuild it automatically? '
                        'The rebuild may not be completely successful. '
                        'If you say No, a new empty calibre library will be created.')
                        % force_unicode(self.library_path, filesystem_encoding),
                        det_msg=traceback.format_exc()
                        )
            if repair:
                if repair_library(self.library_path):
                    db = LibraryDatabase(self.library_path)
        except:
            self.show_error(_('Bad database location'),
                    _('Bad database location %r. Will start with '
                    ' a new, empty calibre library')%self.library_path,
                    det_msg=traceback.format_exc())

        self.initialize_db_stage2(db, None)
Ejemplo n.º 19
0
    def initialize_db(self):
        from calibre.db.legacy import LibraryDatabase

        db = None
        try:
            db = LibraryDatabase(self.library_path)
        except apsw.Error:
            repair = question_dialog(
                self.splash_screen,
                _("Corrupted database"),
                _(
                    "The library database at %s appears to be corrupted. Do "
                    "you want calibre to try and rebuild it automatically? "
                    "The rebuild may not be completely successful. "
                    "If you say No, a new empty calibre library will be created."
                )
                % force_unicode(self.library_path, filesystem_encoding),
                det_msg=traceback.format_exc(),
            )
            if repair:
                if repair_library(self.library_path):
                    db = LibraryDatabase(self.library_path)
        except:
            error_dialog(
                self.splash_screen,
                _("Bad database location"),
                _("Bad database location %r. Will start with " " a new, empty calibre library") % self.library_path,
                det_msg=traceback.format_exc(),
                show=True,
            )

        self.initialize_db_stage2(db, None)
Ejemplo n.º 20
0
def get_metadata_from_reader(rdr):
    raw = rdr.GetFile(rdr.home)
    home = BeautifulSoup(xml_to_unicode(raw, strip_encoding_pats=True,
        resolve_entities=True)[0])

    title = rdr.title
    try:
        x = rdr.GetEncoding()
        codecs.lookup(x)
        enc = x
    except:
        enc = 'cp1252'
    title = force_unicode(title, enc)
    authors = _get_authors(home)
    mi = MetaInformation(title, authors)
    publisher = _get_publisher(home)
    if publisher:
        mi.publisher = publisher
    isbn = _get_isbn(home)
    if isbn:
        mi.isbn = isbn
    comments = _get_comments(home)
    if comments:
        mi.comments = comments

    cdata = _get_cover(home, rdr)
    if cdata is not None:
        mi.cover_data = ('jpg', cdata)

    return mi
Ejemplo n.º 21
0
def pretty_script_or_style(container, child):
    if child.text:
        indent = indent_for_tag(child)
        if child.tag.endswith('style'):
            child.text = force_unicode(pretty_css(container, '', child.text), 'utf-8')
        child.text = textwrap.dedent(child.text)
        child.text = '\n' + '\n'.join([(indent + x) if x else '' for x in child.text.splitlines()])
        set_indent(child, 'text', indent)
Ejemplo n.º 22
0
 def __init__(self, output, outheaders):
     self.output_file = output
     pos = output.tell()
     output.seek(0, os.SEEK_END)
     self.content_length = output.tell() - pos
     self.etag = hashlib.sha1(force_unicode(output.name or '') + str(os.fstat(output.fileno()).st_mtime)).hexdigest()
     output.seek(pos)
     self.accept_ranges = True
Ejemplo n.º 23
0
 def none_cmp(xx, yy):
     x = xx[1]
     y = yy[1]
     if x is None and y is None:
         # No sort_key needed here, because defaults are ascii
         return cmp(xx[2], yy[2])
     if x is None:
         return 1
     if y is None:
         return -1
     if isinstance(x, basestring) and isinstance(y, basestring):
         x, y = sort_key(force_unicode(x)), sort_key(force_unicode(y))
     c = cmp(x, y)
     if c != 0:
         return c
     # same as above -- no sort_key needed here
     return cmp(xx[2], yy[2])
Ejemplo n.º 24
0
def categories(ctx, rd, library_id):
    '''
    Return the list of top-level categories as a list of dictionaries. Each
    dictionary is of the form::
        {
        'name': Display Name,
        'url':URL that gives the JSON object corresponding to all entries in this category,
        'icon': URL to icon of this category,
        'is_category': False for the All Books and Newest categories, True for everything else
        }

    '''
    db = get_db(ctx, rd, library_id)
    with db.safe_read_lock:
        ans = {}
        categories = ctx.get_categories(rd, db)
        category_meta = db.field_metadata
        library_id = db.server_library_id

        def getter(x):
            return category_meta[x]['name']

        displayed_custom_fields = custom_fields_to_display(db)

        for category in sorted(categories, key=lambda x: sort_key(getter(x))):
            if len(categories[category]) == 0:
                continue
            if category in ('formats', 'identifiers'):
                continue
            meta = category_meta.get(category, None)
            if meta is None:
                continue
            if category_meta.is_ignorable_field(category) and \
                        category not in displayed_custom_fields:
                continue
            display_name = meta['name']
            if category.startswith('@'):
                category = category.partition('.')[0]
                display_name = category[1:]
            url = force_unicode(category)
            icon = category_icon(category, meta)
            ans[url] = (display_name, icon)

        ans = [{'url':k, 'name':v[0], 'icon':v[1], 'is_category':True}
                for k, v in ans.iteritems()]
        ans.sort(key=lambda x: sort_key(x['name']))
        for name, url, icon in [
                (_('All books'), 'allbooks', 'book.png'),
                (_('Newest'), 'newest', 'forward.png'),
                ]:
            ans.insert(0, {'name':name, 'url':url, 'icon':icon,
                'is_category':False})

        for c in ans:
            c['url'] = ctx.url_for(globals()['category'], encoded_name=encode_name(c['url']), library_id=library_id)
            c['icon'] = ctx.url_for(get_icon, which=c['icon'])

        return ans
Ejemplo n.º 25
0
def get_metadata(stream):
    ''' Return fb2 metadata as a L{MetaInformation} object '''

    root = _get_fbroot(stream)
    book_title = _parse_book_title(root)
    authors = _parse_authors(root)

    # fallback for book_title
    if book_title:
        book_title = unicode(book_title)
    else:
        book_title = force_unicode(os.path.splitext(
            os.path.basename(getattr(stream, 'name',
                _('Unknown'))))[0])
    mi = MetaInformation(book_title, authors)

    try:
        _parse_cover(root, mi)
    except:
        pass
    try:
        _parse_comments(root, mi)
    except:
        pass
    try:
        _parse_tags(root, mi)
    except:
        pass
    try:
        _parse_series(root, mi)
    except:
        pass
    try:
        _parse_isbn(root, mi)
    except:
        pass
    try:
        _parse_publisher(root, mi)
    except:
        pass
    try:
        _parse_pubdate(root, mi)
    except:
        pass
    #try:
    #    _parse_timestamp(root, mi)
    #except:
    #    pass

    try:
        _parse_language(root, mi)
    except:
        pass
    #_parse_uuid(root, mi)

    #if DEBUG:
    #   prints(mi)
    return mi
Ejemplo n.º 26
0
    def debug_managed_device_detection(self, devices_on_system, output):
        import pprint
        p = partial(prints, file=output)
        if self.currently_connected_pnp_id is not None:
            return True
        if self.wpd_error:
            p('Cannot detect MTP devices')
            p(force_unicode(self.wpd_error))
            return False
        try:
            pnp_ids = frozenset(self.wpd.enumerate_devices())
        except:
            p("Failed to get list of PNP ids on system")
            p(traceback.format_exc())
            return False

        if not pnp_ids:
            p('The Windows WPD service says there are no portable devices connected')
            return False

        p('List of WPD PNP ids:')
        p(pprint.pformat(list(pnp_ids)))

        for pnp_id in pnp_ids:
            try:
                data = self.wpd.device_info(pnp_id)
            except:
                p('Failed to get data for device:', pnp_id)
                p(traceback.format_exc())
                continue
            protocol = data.get('protocol', '').lower()
            if not protocol.startswith('mtp:'):
                continue
            p('MTP device:', pnp_id)
            p(pprint.pformat(data))
            if not self.is_suitable_wpd_device(data):
                p('Not a suitable MTP device, ignoring\n')
                continue
            p('\nTrying to open:', pnp_id)
            try:
                self.open(pnp_id, 'debug-detection')
            except BlacklistedDevice:
                p('This device has been blacklisted by the user')
                continue
            except:
                p('Open failed:')
                p(traceback.format_exc())
                continue
            break
        if self.currently_connected_pnp_id:
            p('Opened', self.current_friendly_name, 'successfully')
            p('Device info:')
            p(pprint.pformat(self.dev.data))
            self.post_yank_cleanup()
            return True
        p('No suitable MTP devices found')
        return False
Ejemplo n.º 27
0
    def __init__(self, oeb_metadata=None):
        from calibre import force_unicode
        from calibre.ebooks.metadata import authors_to_string

        self.title = _(u"Unknown")
        self.author = _(u"Unknown")
        self.tags = u""

        if oeb_metadata is not None:
            if len(oeb_metadata.title) >= 1:
                self.title = oeb_metadata.title[0].value
            if len(oeb_metadata.creator) >= 1:
                self.author = authors_to_string([x.value for x in oeb_metadata.creator])
            if oeb_metadata.subject:
                self.tags = u", ".join(map(unicode, oeb_metadata.subject))

        self.title = force_unicode(self.title)
        self.author = force_unicode(self.author)
Ejemplo n.º 28
0
    def __init__(self, mi=None):
        from calibre import force_unicode
        from calibre.ebooks.metadata import authors_to_string
        self.title = _(u'Unknown')
        self.author = _(u'Unknown')
        self.tags = u''
        self.mi = mi

        if mi is not None:
            if mi.title:
                self.title = mi.title
            if mi.authors:
                self.author = authors_to_string(mi.authors)
            if mi.tags:
                self.tags = u', '.join(mi.tags)

        self.title = force_unicode(self.title)
        self.author = force_unicode(self.author)
Ejemplo n.º 29
0
 def choose_loc(self, *args):
     base = get_portable_base()
     if base is None:
         loc = choose_dir(self, "choose library location", _("Choose location for calibre library"))
     else:
         name = force_unicode("choose library loc at" + base, filesystem_encoding)
         loc = choose_dir(self, name, _("Choose location for calibre library"), default_dir=base, no_save_dir=True)
     if loc is not None:
         self.location.setText(loc)
Ejemplo n.º 30
0
def run_optimizer(file_path, cmd, as_filter=False, input_data=None):
    file_path = os.path.abspath(file_path)
    cwd = os.path.dirname(file_path)
    fd, outfile = tempfile.mkstemp(dir=cwd)
    try:
        if as_filter:
            outf = os.fdopen(fd, 'wb')
        else:
            os.close(fd)
        iname, oname = os.path.basename(file_path), os.path.basename(outfile)
        def repl(q, r):
            cmd[cmd.index(q)] = r
        if not as_filter:
            repl(True, iname), repl(False, oname)
        if iswindows:
            # subprocess in python 2 cannot handle unicode strings that are not
            # encodeable in mbcs, so we fail here, where it is more explicit,
            # instead.
            cmd = [x.encode('mbcs') if isinstance(x, type('')) else x for x in cmd]
            if isinstance(cwd, type('')):
                cwd = cwd.encode('mbcs')
        stdin = subprocess.PIPE if as_filter else None
        stderr = subprocess.PIPE if as_filter else subprocess.STDOUT
        creationflags = 0x08 if iswindows else 0
        p = subprocess.Popen(cmd, cwd=cwd, stdout=subprocess.PIPE, stderr=stderr, stdin=stdin, creationflags=creationflags)
        stderr = p.stderr if as_filter else p.stdout
        if as_filter:
            src = input_data or open(file_path, 'rb')
            def copy(src, dest):
                try:
                    shutil.copyfileobj(src, dest)
                finally:
                    src.close(), dest.close()
            inw = Thread(name='CopyInput', target=copy, args=(src, p.stdin))
            inw.daemon = True
            inw.start()
            outw = Thread(name='CopyOutput', target=copy, args=(p.stdout, outf))
            outw.daemon = True
            outw.start()
        raw = force_unicode(stderr.read())
        if p.wait() != 0:
            return raw
        else:
            try:
                sz = os.path.getsize(outfile)
            except EnvironmentError:
                sz = 0
            if sz < 1:
                return raw
            shutil.copystat(file_path, outfile)
            atomic_rename(outfile, file_path)
    finally:
        try:
            os.remove(outfile)
        except EnvironmentError as err:
            if err.errno != errno.ENOENT:
                raise
Ejemplo n.º 31
0
def compile_fast(
    data,
    filename=None,
    beautify=True,
    private_scope=True,
    libdir=None,
    omit_baselib=False,
    js_version=None,
):
    global has_external_compiler
    if has_external_compiler is None:
        has_external_compiler = detect_external_compiler()
    if not has_external_compiler:
        return compile_pyj(data, filename or '<stdin>', beautify,
                           private_scope, libdir, omit_baselib, js_version
                           or 6)
    args = ['--cache-dir', module_cache_dir()]
    if libdir:
        args += ['--import-path', libdir]
    if not beautify:
        args.append('--uglify')
    if not private_scope:
        args.append('--bare')
    if omit_baselib:
        args.append('--omit-baselib')
    if js_version:
        args.append('--js-version={}'.format(js_version or 6))
    if not isinstance(data, bytes):
        data = data.encode('utf-8')
    if filename:
        args.append('--filename-for-stdin'), args.append(filename)
    p = subprocess.Popen([has_external_compiler, 'compile'] + args,
                         stdout=subprocess.PIPE,
                         stdin=subprocess.PIPE,
                         stderr=subprocess.PIPE)
    js, stderr = p.communicate(data)
    if p.wait() != 0:
        raise CompileFailure(force_unicode(stderr, 'utf-8'))
    return js.decode('utf-8')
Ejemplo n.º 32
0
 def __init__(self, id, title, url, author, summary, published, content):
     from lxml import html
     self.downloaded = False
     self.id = id
     if not title or not isinstance(title, string_or_bytes):
         title = _('Unknown')
     title = force_unicode(title, 'utf-8')
     self._title = clean_xml_chars(title).strip()
     try:
         self._title = re.sub(r'&(\S+?);',
             entity_to_unicode, self._title)
     except:
         pass
     self._title = clean_ascii_chars(self._title)
     self.url = url
     self.author = author
     self.toc_thumbnail = None
     if author and not isinstance(author, unicode_type):
         author = author.decode('utf-8', 'replace')
     if summary and not isinstance(summary, unicode_type):
         summary = summary.decode('utf-8', 'replace')
     summary = clean_xml_chars(summary) if summary else summary
     self.summary = summary
     if summary and '<' in summary:
         try:
             s = html.fragment_fromstring(summary, create_parent=True)
             summary = html.tostring(s, method='text', encoding='unicode')
         except:
             print('Failed to process article summary, deleting:')
             print(summary.encode('utf-8'))
             traceback.print_exc()
             summary = ''
     self.text_summary = clean_ascii_chars(summary)
     self.author = author
     self.content = content
     self.date = published
     self.utctime = dt_factory(self.date, assume_utc=True, as_utc=True)
     self.localtime = self.utctime.astimezone(local_tz)
     self._formatted_date = None
Ejemplo n.º 33
0
    def __init__(self, parent=None):
        Base.__init__(self, parent)
        self.days = [
            QCheckBox(force_unicode(calendar.day_abbr[d]), self)
            for d in range(7)
        ]
        for i, cb in enumerate(self.days):
            row = i % 2
            col = i // 2
            self.l.addWidget(cb, row, col, 1, 1)

        self.time = QTimeEdit(self)
        self.time.setDisplayFormat('hh:mm AP')
        if canonicalize_lang(get_lang()) in {'deu', 'nds'}:
            self.time.setDisplayFormat('HH:mm')
        self.hl = QHBoxLayout()
        self.l1 = QLabel(_('&Download after:'))
        self.l1.setBuddy(self.time)
        self.hl.addWidget(self.l1)
        self.hl.addWidget(self.time)
        self.l.addLayout(self.hl, 1, 3, 1, 1)
        self.initialize()
Ejemplo n.º 34
0
def find_programs(extensions):
    extensions = {ext.lower() for ext in extensions}
    data_dirs = [os.environ.get('XDG_DATA_HOME') or os.path.expanduser('~/.local/share')]
    data_dirs += (os.environ.get('XDG_DATA_DIRS') or '/usr/local/share/:/usr/share/').split(os.pathsep)
    data_dirs = [force_unicode(x, filesystem_encoding).rstrip(os.sep) for x in data_dirs]
    data_dirs = [x for x in data_dirs if x and os.path.isdir(x)]
    desktop_files = {}
    mime_types = {guess_type('file.' + ext)[0] for ext in extensions}
    ans = []
    for base in data_dirs:
        for f in walk(os.path.join(base, 'applications')):
            if f.endswith('.desktop'):
                bn = os.path.basename(f)
                if f not in desktop_files:
                    desktop_files[bn] = f
    for bn, path in iteritems(desktop_files):
        try:
            data = parse_desktop_file(path)
        except Exception:
            import traceback
            traceback.print_exc()
            continue
        if data is not None and mime_types.intersection(data['MimeType']):
            icon = data.get('Icon', {}).get(None)
            if icon and not os.path.isabs(icon):
                icon = find_icons().get(icon)
                if icon:
                    data['Icon'] = icon
                else:
                    data.pop('Icon')
            if not isinstance(data.get('Icon'), string_or_bytes):
                data.pop('Icon', None)
            for k in ('Name', 'GenericName', 'Comment'):
                val = data.get(k)
                if val:
                    data[k] = localize_string(val)
            ans.append(data)
    ans.sort(key=lambda d:sort_key(d.get('Name')))
    return ans
Ejemplo n.º 35
0
 def save_history(self):
     items = []
     ct = str(self.currentText())
     if ct:
         items.append(ct)
     for i in range(self.count()):
         item = str(self.itemText(i))
         if item not in items:
             items.append(item)
     self.blockSignals(True)
     self.clear()
     self.addItems(items)
     self.setEditText(ct)
     self.blockSignals(False)
     try:
         history.set(self.store_name, items)
     except ValueError:
         from calibre.utils.cleantext import clean_ascii_chars
         items = [clean_ascii_chars(force_unicode(x)) for x in items]
         try:
             history.set(self.store_name, items)
         except ValueError:
             pass
Ejemplo n.º 36
0
 def __init__(self, libraries):
     self.lock = Lock()
     self.lmap = {}
     seen = set()
     for i, path in enumerate(os.path.abspath(p) for p in libraries):
         if path in seen:
             continue
         seen.add(path)
         if not LibraryDatabase.exists_at(path):
             continue
         bname = library_id = force_unicode(os.path.basename(path),
                                            filesystem_encoding).replace(
                                                ' ', '_')
         c = 0
         while library_id in self.lmap:
             c += 1
             library_id = bname + '%d' % c
         if i == 0:
             self.default_library = library_id
         self.lmap[library_id] = path
     self.category_caches = {lid: OrderedDict() for lid in self.lmap}
     self.search_caches = {lid: OrderedDict() for lid in self.lmap}
     self.tag_browser_caches = {lid: OrderedDict() for lid in self.lmap}
Ejemplo n.º 37
0
def serialize_collection(mapping_of_recipe_classes):
    collection = E.recipe_collection()
    '''for u, x in mapping_of_recipe_classes.items():
        print 11111, u, repr(x.title)
        if isinstance(x.title, str):
            x.title.decode('ascii')
    '''
    for urn in sorted(mapping_of_recipe_classes.keys(),
                      key=lambda key: force_unicode(
                          getattr(mapping_of_recipe_classes[key], 'title',
                                  'zzz'), 'utf-8')):
        try:
            recipe = serialize_recipe(urn, mapping_of_recipe_classes[urn])
        except:
            import traceback
            traceback.print_exc()
            continue
        collection.append(recipe)
    collection.set('count', str(len(collection)))
    return etree.tostring(collection,
                          encoding='utf-8',
                          xml_declaration=True,
                          pretty_print=True)
Ejemplo n.º 38
0
def create_service(desc,
                   type,
                   port,
                   properties,
                   add_hostname,
                   use_ip_address=None):
    port = int(port)
    try:
        hostname = ascii_text(force_unicode(
            socket.gethostname())).partition('.')[0]
    except:
        hostname = 'Unknown'

    if add_hostname:
        try:
            desc += ' (on %s port %d)' % (hostname, port)
        except:
            try:
                desc += ' (on %s)' % hostname
            except:
                pass

    if use_ip_address:
        local_ip = use_ip_address
    else:
        local_ip = get_external_ip()
    if not local_ip:
        raise ValueError(
            'Failed to determine local IP address to advertise via BonJour')
    type = type + '.local.'
    from calibre.utils.Zeroconf import ServiceInfo
    return ServiceInfo(type,
                       desc + '.' + type,
                       address=socket.inet_aton(local_ip),
                       port=port,
                       properties=properties,
                       server=hostname + '.local.')
Ejemplo n.º 39
0
    def initialize_db(self):
        from calibre.db.legacy import LibraryDatabase
        db = None
        self.timed_print('Initializing db...')
        try:
            db = LibraryDatabase(self.library_path)
        except apsw.Error:
            with self.app:
                self.hide_splash_screen()
                repair = question_dialog(
                    self.splash_screen,
                    _('Corrupted database'),
                    _('The library database at %s appears to be corrupted. Do '
                      'you want calibre to try and rebuild it automatically? '
                      'The rebuild may not be completely successful. '
                      'If you say No, a new empty calibre library will be created.'
                      ) %
                    force_unicode(self.library_path, filesystem_encoding),
                    det_msg=traceback.format_exc())
            if repair:
                if iswindows:
                    # On some windows systems the existing db file gets locked
                    # by something when running restore from the main process.
                    # So run the restore in a separate process.
                    windows_repair(self.library_path)
                    self.app.quit()
                    return
                if repair_library(self.library_path):
                    db = LibraryDatabase(self.library_path)
        except:
            self.show_error(_('Bad database location'),
                            _('Bad database location %r. Will start with '
                              ' a new, empty calibre library') %
                            self.library_path,
                            det_msg=traceback.format_exc())

        self.initialize_db_stage2(db, None)
Ejemplo n.º 40
0
 def item(i):
     templ = (u'<div title="{4}" class="category-item">'
              '<div class="category-name">'
              '<a href="{5}{3}" title="{4}">{0}</a></div>'
              '<div>{1}</div>'
              '<div>{2}</div></div>')
     rating, rstring = render_rating(i.avg_rating, prefix)
     orig_name = i.sort if i.use_sort_as_name else i.name
     name = xml(orig_name)
     if datatype == 'rating':
         name = xml(_('%d stars') % int(i.avg_rating))
     id_ = i.id
     if id_ is None:
         id_ = hexlify(force_unicode(orig_name).encode('utf-8'))
     id_ = xml(str(id_))
     desc = ''
     if i.count > 0:
         desc += '[' + _('%d books') % i.count + ']'
     q = i.category
     if not q:
         q = category
     href = '/browse/matches/%s/%s' % (quote(q), quote(id_))
     return templ.format(xml(name), rating, xml(desc), xml(href, True),
                         rstring, prefix)
Ejemplo n.º 41
0
def mi_to_html(mi, field_list=None, default_author_link=None, use_roman_numbers=True, rating_font='Liberation Serif', rtl=False):
    if field_list is None:
        field_list = get_field_list(mi)
    ans = []
    comment_fields = []
    isdevice = not hasattr(mi, 'id')
    row = u'<td class="title">%s</td><td class="value">%s</td>'
    p = prepare_string_for_xml
    a = partial(prepare_string_for_xml, attribute=True)
    book_id = getattr(mi, 'id', 0)

    for field in (field for field, display in field_list if display):
        try:
            metadata = mi.metadata_for_field(field)
        except:
            continue
        if not metadata:
            continue
        if field == 'sort':
            field = 'title_sort'
        if metadata['is_custom'] and metadata['datatype'] in {'bool', 'int', 'float'}:
            isnull = mi.get(field) is None
        else:
            isnull = mi.is_null(field)
        if isnull:
            continue
        name = metadata['name']
        if not name:
            name = field
        name += ':'
        if metadata['datatype'] == 'comments' or field == 'comments':
            val = getattr(mi, field)
            if val:
                val = force_unicode(val)
                comment_fields.append(comments_to_html(val))
        elif metadata['datatype'] == 'rating':
            val = getattr(mi, field)
            if val:
                val = val/2.0
                ans.append((field,
                    u'<td class="title">%s</td><td class="rating value" '
                    'style=\'font-family:"%s"\'>%s</td>'%(
                        name, rating_font, u'\u2605'*int(val))))
        elif metadata['datatype'] == 'composite':
            val = getattr(mi, field)
            if val:
                val = force_unicode(val)
                if metadata['display'].get('contains_html', False):
                    ans.append((field, row % (name, comments_to_html(val))))
                else:
                    if not metadata['is_multiple']:
                        val = '<a href="%s" title="%s">%s</a>' % (
                              search_href(field, val),
                              _('Click to see books with {0}: {1}').format(metadata['name'], a(val)), p(val))
                    else:
                        all_vals = [v.strip()
                            for v in val.split(metadata['is_multiple']['list_to_ui']) if v.strip()]
                        links = ['<a href="%s" title="%s">%s</a>' % (
                            search_href(field, x), _('Click to see books with {0}: {1}').format(
                                     metadata['name'], a(x)), p(x)) for x in all_vals]
                        val = metadata['is_multiple']['list_to_ui'].join(links)
                    ans.append((field, row % (name, val)))
        elif field == 'path':
            if mi.path:
                path = force_unicode(mi.path, filesystem_encoding)
                scheme = u'devpath' if isdevice else u'path'
                url = prepare_string_for_xml(path if isdevice else
                        unicode(book_id), True)
                pathstr = _('Click to open')
                extra = ''
                if isdevice:
                    durl = url
                    if durl.startswith('mtp:::'):
                        durl = ':::'.join((durl.split(':::'))[2:])
                    extra = '<br><span style="font-size:smaller">%s</span>'%(
                            prepare_string_for_xml(durl))
                link = u'<a href="%s:%s" title="%s">%s</a>%s' % (scheme, url,
                        prepare_string_for_xml(path, True), pathstr, extra)
                ans.append((field, row % (name, link)))
        elif field == 'formats':
            if isdevice:
                continue
            path = mi.path or ''
            bpath = ''
            if path:
                h, t = os.path.split(path)
                bpath = os.sep.join((os.path.basename(h), t))
            data = ({
                'fmt':x, 'path':a(path or ''), 'fname':a(mi.format_files.get(x, '')),
                'ext':x.lower(), 'id':book_id, 'bpath':bpath, 'sep':os.sep
            } for x in mi.formats)
            fmts = [u'<a data-full-path="{path}{sep}{fname}.{ext}" title="{bpath}{sep}{fname}.{ext}" href="format:{id}:{fmt}">{fmt}</a>'.format(**x)
                    for x in data]
            ans.append((field, row % (name, u', '.join(fmts))))
        elif field == 'identifiers':
            urls = urls_from_identifiers(mi.identifiers)
            links = [u'<a href="%s" title="%s:%s" data-item="%s">%s</a>' % (a(url), a(id_typ), a(id_val), a(item_data(field, id_typ, book_id)), p(namel))
                    for namel, id_typ, id_val, url in urls]
            links = u', '.join(links)
            if links:
                ans.append((field, row % (_('Ids')+':', links)))
        elif field == 'authors' and not isdevice:
            authors = []
            formatter = EvalFormatter()
            for aut in mi.authors:
                link = ''
                if mi.author_link_map[aut]:
                    link = lt = mi.author_link_map[aut]
                elif default_author_link:
                    if default_author_link == 'search-calibre':
                        link = search_href('authors', aut)
                        lt = a(_('Search the calibre library for books by %s') % aut)
                    else:
                        vals = {'author': aut.replace(' ', '+')}
                        try:
                            vals['author_sort'] =  mi.author_sort_map[aut].replace(' ', '+')
                        except:
                            vals['author_sort'] = aut.replace(' ', '+')
                        link = lt = a(formatter.safe_format(default_author_link, vals, '', vals))
                aut = p(aut)
                if link:
                    authors.append(u'<a calibre-data="authors" title="%s" href="%s">%s</a>'%(lt, link, aut))
                else:
                    authors.append(aut)
            ans.append((field, row % (name, u' & '.join(authors))))
        elif field == 'languages':
            if not mi.languages:
                continue
            names = filter(None, map(calibre_langcode_to_name, mi.languages))
            ans.append((field, row % (name, u', '.join(names))))
        elif field == 'publisher':
            if not mi.publisher:
                continue
            val = '<a href="%s" title="%s" data-item="%s">%s</a>' % (
                search_href('publisher', mi.publisher), _('Click to see books with {0}: {1}').format(metadata['name'], a(mi.publisher)),
                a(item_data('publisher', mi.publisher, book_id)), p(mi.publisher))
            ans.append((field, row % (name, val)))
        elif field == 'title':
            # otherwise title gets metadata['datatype'] == 'text'
            # treatment below with a click to search link (which isn't
            # too bad), and a right-click 'Delete' option to delete
            # the title (which is bad).
            val = mi.format_field(field)[-1]
            ans.append((field, row % (name, val)))
        else:
            val = mi.format_field(field)[-1]
            if val is None:
                continue
            val = p(val)
            if metadata['datatype'] == 'series':
                sidx = mi.get(field+'_index')
                if sidx is None:
                    sidx = 1.0
                try:
                    st = metadata['search_terms'][0]
                except Exception:
                    st = field
                series = getattr(mi, field)
                val = _(
                    '%(sidx)s of <a href="%(href)s" title="%(tt)s" data-item="%(data)s">'
                    '<span class="%(cls)s">%(series)s</span></a>') % dict(
                        sidx=fmt_sidx(sidx, use_roman=use_roman_numbers), cls="series_name",
                        series=p(series), href=search_href(st, series),
                        data=a(item_data(field, series, book_id)),
                        tt=p(_('Click to see books in this series')))
            elif metadata['datatype'] == 'datetime':
                aval = getattr(mi, field)
                if is_date_undefined(aval):
                    continue
            elif metadata['datatype'] == 'text' and metadata['is_multiple']:
                try:
                    st = metadata['search_terms'][0]
                except Exception:
                    st = field
                all_vals = mi.get(field)
                if field == 'tags':
                    all_vals = sorted(all_vals, key=sort_key)
                links = ['<a href="%s" title="%s" data-item="%s">%s</a>' % (
                    search_href(st, x), _('Click to see books with {0}: {1}').format(
                        metadata['name'], a(x)), a(item_data(field, x, book_id)), p(x))
                         for x in all_vals]
                val = metadata['is_multiple']['list_to_ui'].join(links)
            elif metadata['datatype'] == 'text' or metadata['datatype'] == 'enumeration':
                # text/is_multiple handled above so no need to add the test to the if
                try:
                    st = metadata['search_terms'][0]
                except Exception:
                    st = field
                val = '<a href="%s" title="%s" data-item="%s">%s</a>' % (
                    search_href(st, val), a(_('Click to see books with {0}: {1}').format(metadata['name'], val)),
                    a(item_data(field, val, book_id)), p(val))

            ans.append((field, row % (name, val)))

    dc = getattr(mi, 'device_collections', [])
    if dc:
        dc = u', '.join(sorted(dc, key=sort_key))
        ans.append(('device_collections',
            row % (_('Collections')+':', dc)))

    def classname(field):
        try:
            dt = mi.metadata_for_field(field)['datatype']
        except:
            dt = 'text'
        return 'datatype_%s'%dt

    ans = [u'<tr id="%s" class="%s">%s</tr>'%(fieldl.replace('#', '_'),
        classname(fieldl), html) for fieldl, html in ans]
    # print '\n'.join(ans)
    direction = 'rtl' if rtl else 'ltr'
    margin = 'left' if rtl else 'right'
    return u'<table class="fields" style="direction: %s; margin-%s:auto">%s</table>'%(direction, margin, u'\n'.join(ans)), comment_fields
Ejemplo n.º 42
0
def icu_collator(s1, s2):
    return cmp(sort_key(force_unicode(s1, 'utf-8')),
               sort_key(force_unicode(s2, 'utf-8')))
Ejemplo n.º 43
0
 def library_moved(self,
                   newloc,
                   copy_structure=False,
                   call_close=True,
                   allow_rebuild=False):
     if newloc is None:
         return
     default_prefs = None
     try:
         olddb = self.library_view.model().db
         if copy_structure:
             default_prefs = olddb.prefs
     except:
         olddb = None
     try:
         db = LibraryDatabase(newloc, default_prefs=default_prefs)
     except apsw.Error:
         if not allow_rebuild:
             raise
         import traceback
         repair = question_dialog(
             self,
             _('Corrupted database'),
             _('The library database at %s appears to be corrupted. Do '
               'you want calibre to try and rebuild it automatically? '
               'The rebuild may not be completely successful.') %
             force_unicode(newloc, filesystem_encoding),
             det_msg=traceback.format_exc())
         if repair:
             from calibre.gui2.dialogs.restore_library import repair_library_at
             if repair_library_at(newloc, parent=self):
                 db = LibraryDatabase(newloc, default_prefs=default_prefs)
             else:
                 return
         else:
             return
     if self.content_server is not None:
         self.content_server.set_database(db)
     self.library_path = newloc
     prefs['library_path'] = self.library_path
     self.book_on_device(None, reset=True)
     db.set_book_on_device_func(self.book_on_device)
     self.library_view.set_database(db)
     self.tags_view.set_database(db, self.alter_tb)
     self.library_view.model().set_book_on_device_func(self.book_on_device)
     self.status_bar.clear_message()
     self.search.clear()
     self.saved_search.clear()
     self.book_details.reset_info()
     # self.library_view.model().count_changed()
     db = self.library_view.model().db
     self.iactions['Choose Library'].count_changed(db.count())
     self.set_window_title()
     self.apply_named_search_restriction('')  # reset restriction to null
     self.saved_searches_changed(
         recount=False)  # reload the search restrictions combo box
     if db.prefs['virtual_lib_on_startup']:
         self.apply_virtual_library(db.prefs['virtual_lib_on_startup'])
     self.rebuild_vl_tabs()
     for action in self.iactions.values():
         action.library_changed(db)
     if olddb is not None:
         try:
             if call_close:
                 olddb.close()
         except:
             import traceback
             traceback.print_exc()
         olddb.break_cycles()
     if self.device_connected:
         self.set_books_in_library(self.booklists(), reset=True)
         self.refresh_ondevice()
         self.memory_view.reset()
         self.card_a_view.reset()
         self.card_b_view.reset()
     self.set_current_library_information(current_library_name(),
                                          db.library_id, db.field_metadata)
     self.library_view.set_current_row(0)
     # Run a garbage collection now so that it does not freeze the
     # interface later
     gc.collect()
Ejemplo n.º 44
0
    def ajax_categories(self):
        '''
        Return the list of top-level categories as a list of dictionaries. Each
        dictionary is of the form::
            {
            'name': Display Name,
            'url':URL that gives the JSON object corresponding to all entries in this category,
            'icon': URL to icon of this category,
            'is_category': False for the All Books and Newest categories, True for everything else
            }

        '''
        ans = {}
        categories = self.categories_cache()
        category_meta = self.db.field_metadata

        def getter(x):
            return category_meta[x]['name']

        displayed_custom_fields = custom_fields_to_display(self.db)

        for category in sorted(categories, key=lambda x: sort_key(getter(x))):
            if len(categories[category]) == 0:
                continue
            if category in ('formats', 'identifiers'):
                continue
            meta = category_meta.get(category, None)
            if meta is None:
                continue
            if category_meta.is_ignorable_field(category) and \
                        category not in displayed_custom_fields:
                continue
            display_name = meta['name']
            if category.startswith('@'):
                category = category.partition('.')[0]
                display_name = category[1:]
            url = force_unicode(category)
            icon = category_icon(category, meta)
            ans[url] = (display_name, icon)

        ans = [{
            'url': k,
            'name': v[0],
            'icon': v[1],
            'is_category': True
        } for k, v in ans.iteritems()]
        ans.sort(key=lambda x: sort_key(x['name']))
        for name, url, icon in [
            (_('All books'), 'allbooks', 'book.png'),
            (_('Newest'), 'newest', 'forward.png'),
        ]:
            ans.insert(0, {
                'name': name,
                'url': url,
                'icon': icon,
                'is_category': False
            })

        for c in ans:
            c['url'] = category_url(self.opts.url_prefix, c['url'])
            c['icon'] = icon_url(self.opts.url_prefix, c['icon'])

        return ans
Ejemplo n.º 45
0
    def extract_css_into_flows(self):
        inlines = defaultdict(list)  # Ensure identical <style>s not repeated
        sheets = {}
        passthrough = getattr(self.opts, 'mobi_passthrough', False)

        for item in self.oeb.manifest:
            if item.media_type in OEB_STYLES:
                sheet = self.data(item)
                if not passthrough and not self.opts.expand_css and hasattr(
                        item.data, 'cssText'):
                    condense_sheet(sheet)
                sheets[item.href] = len(self.flows)
                self.flows.append(sheet)

        def fix_import_rules(sheet):
            changed = False
            for rule in sheet.cssRules.rulesOfType(CSSRule.IMPORT_RULE):
                if rule.href:
                    href = item.abshref(rule.href)
                    idx = sheets.get(href, None)
                    if idx is not None:
                        idx = to_ref(idx)
                        rule.href = 'kindle:flow:%s?mime=text/css' % idx
                        changed = True
            return changed

        for item in self.oeb.spine:
            root = self.data(item)

            for link in XPath('//h:link[@href]')(root):
                href = item.abshref(link.get('href'))
                idx = sheets.get(href, None)
                if idx is not None:
                    idx = to_ref(idx)
                    link.set('href', 'kindle:flow:%s?mime=text/css' % idx)

            for tag in XPath('//h:style')(root):
                p = tag.getparent()
                idx = p.index(tag)
                raw = tag.text
                if not raw or not raw.strip():
                    extract(tag)
                    continue
                sheet = cssutils.parseString(raw, validate=False)
                if fix_import_rules(sheet):
                    raw = force_unicode(sheet.cssText, 'utf-8')

                repl = etree.Element(XHTML('link'),
                                     type='text/css',
                                     rel='stylesheet')
                repl.tail = '\n'
                p.insert(idx, repl)
                extract(tag)
                inlines[raw].append(repl)

        for raw, elems in inlines.iteritems():
            idx = to_ref(len(self.flows))
            self.flows.append(raw)
            for link in elems:
                link.set('href', 'kindle:flow:%s?mime=text/css' % idx)

        for item in self.oeb.manifest:
            if item.media_type in OEB_STYLES:
                sheet = self.data(item)
                if hasattr(sheet, 'cssRules'):
                    fix_import_rules(sheet)

        for i, sheet in enumerate(tuple(self.flows)):
            if hasattr(sheet, 'cssText'):
                self.flows[i] = force_unicode(sheet.cssText, 'utf-8')
Ejemplo n.º 46
0
def run_optimizer(file_path, cmd, as_filter=False, input_data=None):
    file_path = os.path.abspath(file_path)
    cwd = os.path.dirname(file_path)
    ext = os.path.splitext(file_path)[1]
    if not ext or len(ext) > 10 or not ext.startswith('.'):
        ext = '.jpg'
    fd, outfile = tempfile.mkstemp(dir=cwd, suffix=ext)
    try:
        if as_filter:
            outf = os.fdopen(fd, 'wb')
        else:
            os.close(fd)
        iname, oname = os.path.basename(file_path), os.path.basename(outfile)

        def repl(q, r):
            cmd[cmd.index(q)] = r

        if not as_filter:
            repl(True, iname), repl(False, oname)
        if iswindows:
            # subprocess in python 2 cannot handle unicode strings that are not
            # encodeable in mbcs, so we fail here, where it is more explicit,
            # instead.
            cmd = [
                x.encode('mbcs') if isinstance(x, type('')) else x for x in cmd
            ]
            if isinstance(cwd, type('')):
                cwd = cwd.encode('mbcs')
        stdin = subprocess.PIPE if as_filter else None
        stderr = subprocess.PIPE if as_filter else subprocess.STDOUT
        creationflags = 0x08 if iswindows else 0
        p = subprocess.Popen(cmd,
                             cwd=cwd,
                             stdout=subprocess.PIPE,
                             stderr=stderr,
                             stdin=stdin,
                             creationflags=creationflags)
        stderr = p.stderr if as_filter else p.stdout
        if as_filter:
            src = input_data or open(file_path, 'rb')

            def copy(src, dest):
                try:
                    shutil.copyfileobj(src, dest)
                finally:
                    src.close(), dest.close()

            inw = Thread(name='CopyInput', target=copy, args=(src, p.stdin))
            inw.daemon = True
            inw.start()
            outw = Thread(name='CopyOutput',
                          target=copy,
                          args=(p.stdout, outf))
            outw.daemon = True
            outw.start()
        raw = force_unicode(stderr.read())
        if p.wait() != 0:
            return raw
        else:
            if as_filter:
                outw.join(60.0), inw.join(60.0)
            try:
                sz = os.path.getsize(outfile)
            except EnvironmentError:
                sz = 0
            if sz < 1:
                return '%s returned a zero size image' % cmd[0]
            shutil.copystat(file_path, outfile)
            atomic_rename(outfile, file_path)
    finally:
        try:
            os.remove(outfile)
        except EnvironmentError as err:
            if err.errno != errno.ENOENT:
                raise
        try:
            os.remove(outfile + '.bak')  # optipng creates these files
        except EnvironmentError as err:
            if err.errno != errno.ENOENT:
                raise
Ejemplo n.º 47
0
def sanitize(s):
    return unicodedata.normalize(
        'NFC', clean_xml_chars(clean_ascii_chars(force_unicode(s or ''))))
Ejemplo n.º 48
0
def css_data(container, book_locale, result_data, *args):
    import tinycss
    from tinycss.css21 import RuleSet, ImportRule

    def css_rules(file_name, rules, sourceline=0):
        ans = []
        for rule in rules:
            if isinstance(rule, RuleSet):
                selector = rule.selector.as_css()
                ans.append(
                    CSSRule(
                        selector,
                        RuleLocation(file_name, sourceline + rule.line,
                                     rule.column)))
            elif isinstance(rule, ImportRule):
                import_name = safe_href_to_name(container, rule.uri, file_name)
                if import_name and container.exists(import_name):
                    ans.append(import_name)
            elif getattr(rule, 'rules', False):
                ans.extend(css_rules(file_name, rule.rules, sourceline))
        return ans

    parser = tinycss.make_full_parser()
    importable_sheets = {}
    html_sheets = {}
    spine_names = {name for name, is_linear in container.spine_names}
    style_path, link_path = XPath('//h:style'), XPath('//h:link/@href')

    for name, mt in iteritems(container.mime_map):
        if mt in OEB_STYLES:
            importable_sheets[name] = css_rules(
                name,
                parser.parse_stylesheet(container.raw_data(name)).rules)
        elif mt in OEB_DOCS and name in spine_names:
            html_sheets[name] = []
            for style in style_path(container.parsed(name)):
                if style.get('type', 'text/css') == 'text/css' and style.text:
                    html_sheets[name].append(
                        css_rules(
                            name,
                            parser.parse_stylesheet(
                                force_unicode(style.text, 'utf-8')).rules,
                            style.sourceline - 1))

    rule_map = defaultdict(lambda: defaultdict(list))

    def rules_in_sheet(sheet):
        for rule in sheet:
            if isinstance(rule, CSSRule):
                yield rule
            else:  # @import rule
                isheet = importable_sheets.get(rule)
                if isheet is not None:
                    for irule in rules_in_sheet(isheet):
                        yield irule

    def sheets_for_html(name, root):
        for href in link_path(root):
            tname = safe_href_to_name(container, href, name)
            sheet = importable_sheets.get(tname)
            if sheet is not None:
                yield sheet

    tt_cache = {}

    def tag_text(elem):
        ans = tt_cache.get(elem)
        if ans is None:
            tag = elem.tag.rpartition('}')[-1]
            if elem.attrib:
                attribs = ' '.join(
                    '%s="%s"' %
                    (k, prepare_string_for_xml(elem.get(k, ''), True))
                    for k in elem.keys())
                return '<%s %s>' % (tag, attribs)
            ans = tt_cache[elem] = '<%s>' % tag

    def matches_for_selector(selector, select, class_map, rule):
        lsel = selector.lower()
        try:
            matches = tuple(select(selector))
        except SelectorError:
            return ()
        for elem in matches:
            for cls in elem.get('class', '').split():
                if '.' + cls.lower() in lsel:
                    class_map[cls][elem].append(rule)

        return (MatchLocation(tag_text(elem), elem.sourceline)
                for elem in matches)

    class_map = defaultdict(lambda: defaultdict(list))

    for name, inline_sheets in iteritems(html_sheets):
        root = container.parsed(name)
        cmap = defaultdict(lambda: defaultdict(list))
        for elem in root.xpath('//*[@class]'):
            for cls in elem.get('class', '').split():
                cmap[cls][elem] = []
        select = Select(root, ignore_inappropriate_pseudo_classes=True)
        for sheet in chain(sheets_for_html(name, root), inline_sheets):
            for rule in rules_in_sheet(sheet):
                rule_map[rule][name].extend(
                    matches_for_selector(rule.selector, select, cmap, rule))
        for cls, elem_map in iteritems(cmap):
            class_elements = class_map[cls][name]
            for elem, usage in iteritems(elem_map):
                class_elements.append(
                    ClassElement(name, elem.sourceline, elem.get('class'),
                                 tag_text(elem), tuple(usage)))

    result_data['classes'] = ans = []
    for cls, name_map in iteritems(class_map):
        la = tuple(
            ClassFileMatch(name, tuple(class_elements), numeric_sort_key(name))
            for name, class_elements in iteritems(name_map) if class_elements)
        num_of_matches = sum(
            sum(len(ce.matched_rules) for ce in cfm.class_elements)
            for cfm in la)
        ans.append(ClassEntry(cls, num_of_matches, la, numeric_sort_key(cls)))

    ans = []
    for rule, loc_map in iteritems(rule_map):
        la = tuple(
            CSSFileMatch(name, tuple(locations), numeric_sort_key(name))
            for name, locations in iteritems(loc_map) if locations)
        count = sum(len(fm.locations) for fm in la)
        ans.append(CSSEntry(rule, count, la, numeric_sort_key(rule.selector)))

    return ans
Ejemplo n.º 49
0
def format_report(title, report):
    from calibre.ebooks.markdown import markdown
    report = [force_unicode(line) for line in report]
    return markdown('# %s\n\n'%force_unicode(title) + '\n\n'.join(report), output_format='html4')
Ejemplo n.º 50
0
def parse_html(data,
               log=None,
               decoder=None,
               preprocessor=None,
               filename='<string>',
               non_html_file_tags=frozenset()):
    if log is None:
        from calibre.utils.logging import default_log
        log = default_log

    filename = force_unicode(filename, enc=filesystem_encoding)

    if not isinstance(data, unicode):
        if decoder is not None:
            data = decoder(data)
        else:
            data = xml_to_unicode(data)[0]

    data = strip_encoding_declarations(data)
    if preprocessor is not None:
        data = preprocessor(data)

    # There could be null bytes in data if it had &#0; entities in it
    data = data.replace('\0', '')

    # Remove DOCTYPE declaration as it messes up parsing
    # In particular, it causes tostring to insert xmlns
    # declarations, which messes up the coercing logic
    pre = ''
    idx = data.find('<html')
    if idx == -1:
        idx = data.find('<HTML')
    has_html4_doctype = False
    if idx > -1:
        pre = data[:idx]
        data = data[idx:]
        if '<!DOCTYPE' in pre:  # Handle user defined entities
            has_html4_doctype = re.search(r'<!DOCTYPE\s+[^>]+HTML\s+4.0[^.]+>',
                                          pre) is not None
            # kindlegen produces invalid xhtml with uppercase attribute names
            # if fed HTML 4 with uppercase attribute names, so try to detect
            # and compensate for that.
            user_entities = {}
            for match in re.finditer(r'<!ENTITY\s+(\S+)\s+([^>]+)', pre):
                val = match.group(2)
                if val.startswith('"') and val.endswith('"'):
                    val = val[1:-1]
                user_entities[match.group(1)] = val
            if user_entities:
                pat = re.compile(r'&(%s);' % ('|'.join(user_entities.keys())))
                data = pat.sub(lambda m: user_entities[m.group(1)], data)

    data = raw = clean_word_doc(data, log)

    # Setting huge_tree=True causes crashes in windows with large files
    parser = etree.XMLParser(no_network=True)

    # Try with more & more drastic measures to parse
    try:
        data = etree.fromstring(data, parser=parser)
        check_for_html5(pre, data)
    except (HTML5Doc, etree.XMLSyntaxError):
        log.debug('Initial parse failed, using more' ' forgiving parsers')
        raw = data = xml_replace_entities(raw)
        try:
            data = etree.fromstring(data, parser=parser)
            check_for_html5(pre, data)
        except (HTML5Doc, etree.XMLSyntaxError):
            log.debug('Parsing %s as HTML' % filename)
            data = raw
            try:
                data = html5_parse(data)
            except Exception:
                log.exception(
                    'HTML 5 parsing failed, falling back to older parsers')
                data = _html4_parse(data)

    if has_html4_doctype or data.tag == 'HTML' or (
            len(data) and (data[-1].get('LANG') or data[-1].get('DIR'))):
        # Lower case all tag and attribute names
        data.tag = data.tag.lower()
        for x in data.iterdescendants():
            try:
                x.tag = x.tag.lower()
                for key, val in list(x.attrib.iteritems()):
                    del x.attrib[key]
                    key = key.lower()
                    x.attrib[key] = val
            except:
                pass

    if barename(data.tag) != 'html':
        if barename(data.tag) in non_html_file_tags:
            raise NotHTML(data.tag)
        log.warn('File %r does not appear to be (X)HTML' % filename)
        nroot = etree.fromstring('<html></html>')
        has_body = False
        for child in list(data):
            if isinstance(child.tag,
                          (unicode, str)) and barename(child.tag) == 'body':
                has_body = True
                break
        parent = nroot
        if not has_body:
            log.warn('File %r appears to be a HTML fragment' % filename)
            nroot = etree.fromstring('<html><body/></html>')
            parent = nroot[0]
        for child in list(data.iter()):
            oparent = child.getparent()
            if oparent is not None:
                oparent.remove(child)
            parent.append(child)
        data = nroot

    # Force into the XHTML namespace
    if not namespace(data.tag):
        log.warn('Forcing', filename, 'into XHTML namespace')
        data.attrib['xmlns'] = XHTML_NS
        data = etree.tostring(data, encoding=unicode)

        try:
            data = etree.fromstring(data, parser=parser)
        except:
            data = data.replace(':=', '=').replace(':>', '>')
            data = data.replace('<http:/>', '')
            try:
                data = etree.fromstring(data, parser=parser)
            except etree.XMLSyntaxError:
                log.warn('Stripping comments from %s' % filename)
                data = re.compile(r'<!--.*?-->', re.DOTALL).sub('', data)
                data = data.replace(
                    "<?xml version='1.0' encoding='utf-8'?><o:p></o:p>", '')
                data = data.replace("<?xml version='1.0' encoding='utf-8'??>",
                                    '')
                try:
                    data = etree.fromstring(data, parser=RECOVER_PARSER)
                except etree.XMLSyntaxError:
                    log.warn('Stripping meta tags from %s' % filename)
                    data = re.sub(r'<meta\s+[^>]+?>', '', data)
                    data = etree.fromstring(data, parser=RECOVER_PARSER)
    elif namespace(data.tag) != XHTML_NS:
        # OEB_DOC_NS, but possibly others
        ns = namespace(data.tag)
        attrib = dict(data.attrib)
        nroot = etree.Element(XHTML('html'),
                              nsmap={None: XHTML_NS},
                              attrib=attrib)
        for elem in data.iterdescendants():
            if isinstance(elem.tag, basestring) and \
                namespace(elem.tag) == ns:
                elem.tag = XHTML(barename(elem.tag))
        for elem in data:
            nroot.append(elem)
        data = nroot

    fnsmap = {k: v for k, v in data.nsmap.iteritems() if v != XHTML_NS}
    fnsmap[None] = XHTML_NS
    if fnsmap != dict(data.nsmap):
        # Remove non default prefixes referring to the XHTML namespace
        data = clone_element(data, nsmap=fnsmap, in_context=False)

    data = merge_multiple_html_heads_and_bodies(data, log)
    # Ensure has a <head/>
    head = xpath(data, '/h:html/h:head')
    head = head[0] if head else None
    if head is None:
        log.warn('File %s missing <head/> element' % filename)
        head = etree.Element(XHTML('head'))
        data.insert(0, head)
        title = etree.SubElement(head, XHTML('title'))
        title.text = _('Unknown')
    elif not xpath(data, '/h:html/h:head/h:title'):
        title = etree.SubElement(head, XHTML('title'))
        title.text = _('Unknown')
    # Ensure <title> is not empty
    title = xpath(data, '/h:html/h:head/h:title')[0]
    if not title.text or not title.text.strip():
        title.text = _('Unknown')
    # Remove any encoding-specifying <meta/> elements
    for meta in META_XP(data):
        meta.getparent().remove(meta)
    meta = etree.SubElement(head,
                            XHTML('meta'),
                            attrib={'http-equiv': 'Content-Type'})
    meta.set('content',
             'text/html; charset=utf-8')  # Ensure content is second attribute

    # Ensure has a <body/>
    if not xpath(data, '/h:html/h:body'):
        body = xpath(data, '//h:body')
        if body:
            body = body[0]
            body.getparent().remove(body)
            data.append(body)
        else:
            log.warn('File %s missing <body/> element' % filename)
            etree.SubElement(data, XHTML('body'))

    # Remove microsoft office markup
    r = [
        x for x in data.iterdescendants(etree.Element)
        if 'microsoft-com' in x.tag
    ]
    for x in r:
        x.tag = XHTML('span')

    def remove_elem(a):
        p = a.getparent()
        idx = p.index(a) - 1
        p.remove(a)
        if a.tail:
            if idx < 0:
                if p.text is None:
                    p.text = ''
                p.text += a.tail
            else:
                if p[idx].tail is None:
                    p[idx].tail = ''
                p[idx].tail += a.tail

    # Remove hyperlinks with no content as they cause rendering
    # artifacts in browser based renderers
    # Also remove empty <b>, <u> and <i> tags
    for a in xpath(data, '//h:a[@href]|//h:i|//h:b|//h:u'):
        if a.get('id', None) is None and a.get('name', None) is None \
                and len(a) == 0 and not a.text:
            remove_elem(a)

    # Convert <br>s with content into paragraphs as ADE can't handle
    # them
    for br in xpath(data, '//h:br'):
        if len(br) > 0 or br.text:
            br.tag = XHTML('div')

    # Remove any stray text in the <head> section and format it nicely
    data.text = '\n  '
    head = xpath(data, '//h:head')
    if head:
        head = head[0]
        head.text = '\n    '
        head.tail = '\n  '
        for child in head:
            child.tail = '\n    '
        child.tail = '\n  '

    return data
Ejemplo n.º 51
0
def decode_output(raw):
    raw = raw or b''
    try:
        return raw.decode(preferred_encoding)
    except UnicodeDecodeError:
        return force_unicode(raw, 'utf-8')
Ejemplo n.º 52
0
 def get_metadata(self):
     ''' Return MetaInformation with title, author'''
     self.get_original_metadata()
     title = force_unicode(self.metadata['Title'], 'utf-8')
     authors = force_unicode(self.metadata['Authors'], 'utf-8').split(';')
     return MetaInformation(title, authors)
Ejemplo n.º 53
0
def case_preserving_open_file(path, mode='wb', mkdir_mode=0o777):
    '''
    Open the file pointed to by path with the specified mode. If any
    directories in path do not exist, they are created. Returns the
    opened file object and the path to the opened file object. This path is
    guaranteed to have the same case as the on disk path. For case insensitive
    filesystems, the returned path may be different from the passed in path.
    The returned path is always unicode and always an absolute path.

    If mode is None, then this function assumes that path points to a directory
    and return the path to the directory as the file object.

    mkdir_mode specifies the mode with which any missing directories in path
    are created.
    '''
    if isbytestring(path):
        path = path.decode(filesystem_encoding)

    path = os.path.abspath(path)

    sep = force_unicode(os.sep, 'ascii')

    if path.endswith(sep):
        path = path[:-1]
    if not path:
        raise ValueError('Path must not point to root')

    components = path.split(sep)
    if not components:
        raise ValueError('Invalid path: %r'%path)

    cpath = sep
    if iswindows:
        # Always upper case the drive letter and add a trailing slash so that
        # the first os.listdir works correctly
        cpath = components[0].upper() + sep

    bdir = path if mode is None else os.path.dirname(path)
    if not os.path.exists(bdir):
        os.makedirs(bdir, mkdir_mode)

    # Walk all the directories in path, putting the on disk case version of
    # the directory into cpath
    dirs = components[1:] if mode is None else components[1:-1]
    for comp in dirs:
        cdir = os.path.join(cpath, comp)
        cl = comp.lower()
        try:
            candidates = [c for c in os.listdir(cpath) if c.lower() == cl]
        except:
            # Dont have permission to do the listdir, assume the case is
            # correct as we have no way to check it.
            pass
        else:
            if len(candidates) == 1:
                cdir = os.path.join(cpath, candidates[0])
            # else: We are on a case sensitive file system so cdir must already
            # be correct
        cpath = cdir

    if mode is None:
        ans = fpath = cpath
    else:
        fname = components[-1]
        ans = lopen(os.path.join(cpath, fname), mode)
        # Ensure file and all its metadata is written to disk so that subsequent
        # listdir() has file name in it. I don't know if this is actually
        # necessary, but given the diversity of platforms, best to be safe.
        ans.flush()
        os.fsync(ans.fileno())

        cl = fname.lower()
        try:
            candidates = [c for c in os.listdir(cpath) if c.lower() == cl]
        except EnvironmentError:
            # The containing directory, somehow disappeared?
            candidates = []
        if len(candidates) == 1:
            fpath = os.path.join(cpath, candidates[0])
        else:
            # We are on a case sensitive filesystem
            fpath = os.path.join(cpath, fname)
    return ans, fpath
Ejemplo n.º 54
0
    def __init__(self,
                 tree,
                 path,
                 oeb,
                 opts,
                 profile=None,
                 extra_css='',
                 user_css=''):
        self.oeb, self.opts = oeb, opts
        self.profile = profile
        if self.profile is None:
            self.profile = opts.output_profile
        self.logger = oeb.logger
        item = oeb.manifest.hrefs[path]
        basename = os.path.basename(path)
        cssname = os.path.splitext(basename)[0] + '.css'
        stylesheets = [html_css_stylesheet()]
        head = xpath(tree, '/h:html/h:head')
        if head:
            head = head[0]
        else:
            head = []

        # Add cssutils parsing profiles from output_profile
        for profile in self.opts.output_profile.extra_css_modules:
            cssprofiles.addProfile(profile['name'], profile['props'],
                                   profile['macros'])

        parser = CSSParser(fetcher=self._fetch_css_file,
                           log=logging.getLogger('calibre.css'))
        self.font_face_rules = []
        for elem in head:
            if (elem.tag == XHTML('style')
                    and elem.get('type', CSS_MIME) in OEB_STYLES):
                text = elem.text if elem.text else u''
                for x in elem:
                    t = getattr(x, 'text', None)
                    if t:
                        text += u'\n\n' + force_unicode(t, u'utf-8')
                    t = getattr(x, 'tail', None)
                    if t:
                        text += u'\n\n' + force_unicode(t, u'utf-8')
                if text:
                    text = oeb.css_preprocessor(text, add_namespace=True)
                    # We handle @import rules separately
                    parser.setFetcher(lambda x: ('utf-8', b''))
                    stylesheet = parser.parseString(text,
                                                    href=cssname,
                                                    validate=False)
                    parser.setFetcher(self._fetch_css_file)
                    stylesheet.namespaces['h'] = XHTML_NS
                    for rule in stylesheet.cssRules:
                        if rule.type == rule.IMPORT_RULE:
                            ihref = item.abshref(rule.href)
                            if rule.media.mediaText == 'amzn-mobi':
                                continue
                            hrefs = self.oeb.manifest.hrefs
                            if ihref not in hrefs:
                                self.logger.warn(
                                    'Ignoring missing stylesheet in @import rule:',
                                    rule.href)
                                continue
                            sitem = hrefs[ihref]
                            if sitem.media_type not in OEB_STYLES:
                                self.logger.warn(
                                    'CSS @import of non-CSS file %r' %
                                    rule.href)
                                continue
                            stylesheets.append(sitem.data)
                    # Make links to resources absolute, since these rules will
                    # be folded into a stylesheet at the root
                    replaceUrls(stylesheet,
                                item.abshref,
                                ignoreImportRules=True)
                    stylesheets.append(stylesheet)
            elif elem.tag == XHTML('link') and elem.get('href') \
                 and elem.get('rel', 'stylesheet').lower() == 'stylesheet' \
                 and elem.get('type', CSS_MIME).lower() in OEB_STYLES:
                href = urlnormalize(elem.attrib['href'])
                path = item.abshref(href)
                sitem = oeb.manifest.hrefs.get(path, None)
                if sitem is None:
                    self.logger.warn(
                        'Stylesheet %r referenced by file %r not in manifest' %
                        (path, item.href))
                    continue
                if not hasattr(sitem.data, 'cssRules'):
                    self.logger.warn(
                        'Stylesheet %r referenced by file %r is not CSS' %
                        (path, item.href))
                    continue
                stylesheets.append(sitem.data)
        csses = {'extra_css': extra_css, 'user_css': user_css}
        for w, x in csses.items():
            if x:
                try:
                    text = XHTML_CSS_NAMESPACE + x
                    stylesheet = parser.parseString(text,
                                                    href=cssname,
                                                    validate=False)
                    stylesheet.namespaces['h'] = XHTML_NS
                    stylesheets.append(stylesheet)
                except:
                    self.logger.exception('Failed to parse %s, ignoring.' % w)
                    self.logger.debug('Bad css: ')
                    self.logger.debug(x)
        rules = []
        index = 0
        self.stylesheets = set()
        self.page_rule = {}
        for stylesheet in stylesheets:
            href = stylesheet.href
            self.stylesheets.add(href)
            for rule in stylesheet.cssRules:
                if rule.type == rule.MEDIA_RULE:
                    media = {
                        rule.media.item(i)
                        for i in xrange(rule.media.length)
                    }
                    if not media.intersection({'all', 'screen', 'amzn-kf8'}):
                        continue
                    for subrule in rule.cssRules:
                        rules.extend(self.flatten_rule(subrule, href, index))
                        index += 1
                else:
                    rules.extend(self.flatten_rule(rule, href, index))
                    index = index + 1
        rules.sort()
        self.rules = rules
        self._styles = {}
        pseudo_pat = re.compile(
            ur':(first-letter|first-line|link|hover|visited|active|focus|before|after)',
            re.I)
        for _, _, cssdict, text, _ in rules:
            fl = pseudo_pat.search(text)
            if fl is not None:
                text = text.replace(fl.group(), '')
            selector = get_css_selector(text, self.oeb.log)
            matches = selector(tree, self.logger)
            if fl is not None:
                fl = fl.group(1)
                if fl == 'first-letter' and getattr(self.oeb,
                                                    'plumber_output_format',
                                                    '').lower() == u'mobi':
                    # Fake first-letter
                    from lxml.builder import ElementMaker
                    E = ElementMaker(namespace=XHTML_NS)
                    for elem in matches:
                        for x in elem.iter():
                            if x.text:
                                punctuation_chars = []
                                text = unicode(x.text)
                                while text:
                                    category = unicodedata.category(text[0])
                                    if category[0] not in {'P', 'Z'}:
                                        break
                                    punctuation_chars.append(text[0])
                                    text = text[1:]

                                special_text = u''.join(punctuation_chars) + \
                                        (text[0] if text else u'')
                                span = E.span(special_text)
                                span.tail = text[1:]
                                x.text = None
                                x.insert(0, span)
                                self.style(span)._update_cssdict(cssdict)
                                break
                else:  # Element pseudo-class
                    for elem in matches:
                        self.style(elem)._update_pseudo_class(fl, cssdict)
            else:
                for elem in matches:
                    self.style(elem)._update_cssdict(cssdict)
        for elem in xpath(tree, '//h:*[@style]'):
            self.style(elem)._apply_style_attr(url_replacer=item.abshref)
        num_pat = re.compile(r'\d+$')
        for elem in xpath(tree, '//h:img[@width or @height]'):
            style = self.style(elem)
            # Check if either height or width is not default
            is_styled = style._style.get('width', 'auto') != 'auto' or \
                    style._style.get('height', 'auto') != 'auto'
            if not is_styled:
                # Update img style dimension using width and height
                upd = {}
                for prop in ('width', 'height'):
                    val = elem.get(prop, '').strip()
                    try:
                        del elem.attrib[prop]
                    except:
                        pass
                    if val:
                        if num_pat.match(val) is not None:
                            val += 'px'
                        upd[prop] = val
                if upd:
                    style._update_cssdict(upd)
Ejemplo n.º 55
0
 def __str__(self):
     return force_unicode(self.css_declaration.cssText, 'utf-8')
Ejemplo n.º 56
0
def encode(unistr):
    if not isinstance(unistr, unicode_type):
        unistr = force_unicode(unistr)
    return ''.join(c if ord(c) < 128 else '\\u{}?'.format(ord(c)) for c in unistr)
Ejemplo n.º 57
0
 def field_trimmer(self, field):
     ''' Remove common joiner words and punctuation to improve matching,
     punctuation is removed first, so that a.and.b becomes a b '''
     field = force_unicode(field)
     return self.joiner_pat.sub(' ',
                                field.translate(self.punctuation_table))
Ejemplo n.º 58
0
    def __init__(self, tree, path, oeb, opts, profile=None,
            extra_css='', user_css='', base_css=''):
        self.oeb, self.opts = oeb, opts
        self.profile = profile
        if self.profile is None:
            # Use the default profile. This should really be using
            # opts.output_profile, but I don't want to risk changing it, as
            # doing so might well have hard to debug font size effects.
            from calibre.customize.ui import output_profiles
            for x in output_profiles():
                if x.short_name == 'default':
                    self.profile = x
                    break
        if self.profile is None:
            # Just in case the default profile is removed in the future :)
            self.profile = opts.output_profile
        self.body_font_size = self.profile.fbase
        self.logger = oeb.logger
        item = oeb.manifest.hrefs[path]
        basename = os.path.basename(path)
        cssname = os.path.splitext(basename)[0] + '.css'
        stylesheets = [html_css_stylesheet()]
        if base_css:
            stylesheets.append(parseString(base_css, validate=False))
        style_tags = xpath(tree, '//*[local-name()="style" or local-name()="link"]')

        # Add cssutils parsing profiles from output_profile
        for profile in self.opts.output_profile.extra_css_modules:
            cssprofiles.addProfile(profile['name'],
                                        profile['props'],
                                        profile['macros'])

        parser = CSSParser(fetcher=self._fetch_css_file,
                log=logging.getLogger('calibre.css'))
        self.font_face_rules = []
        for elem in style_tags:
            if (elem.tag == XHTML('style') and
                elem.get('type', CSS_MIME) in OEB_STYLES):
                text = elem.text if elem.text else u''
                for x in elem:
                    t = getattr(x, 'text', None)
                    if t:
                        text += u'\n\n' + force_unicode(t, u'utf-8')
                    t = getattr(x, 'tail', None)
                    if t:
                        text += u'\n\n' + force_unicode(t, u'utf-8')
                if text:
                    text = oeb.css_preprocessor(text)
                    # We handle @import rules separately
                    parser.setFetcher(lambda x: ('utf-8', b''))
                    stylesheet = parser.parseString(text, href=cssname,
                            validate=False)
                    parser.setFetcher(self._fetch_css_file)
                    for rule in stylesheet.cssRules:
                        if rule.type == rule.IMPORT_RULE:
                            ihref = item.abshref(rule.href)
                            if rule.media.mediaText == 'amzn-mobi':
                                continue
                            hrefs = self.oeb.manifest.hrefs
                            if ihref not in hrefs:
                                self.logger.warn('Ignoring missing stylesheet in @import rule:', rule.href)
                                continue
                            sitem = hrefs[ihref]
                            if sitem.media_type not in OEB_STYLES:
                                self.logger.warn('CSS @import of non-CSS file %r' % rule.href)
                                continue
                            stylesheets.append(sitem.data)
                    for rule in tuple(stylesheet.cssRules.rulesOfType(CSSRule.PAGE_RULE)):
                        stylesheet.cssRules.remove(rule)
                    # Make links to resources absolute, since these rules will
                    # be folded into a stylesheet at the root
                    replaceUrls(stylesheet, item.abshref,
                            ignoreImportRules=True)
                    stylesheets.append(stylesheet)
            elif elem.tag == XHTML('link') and elem.get('href') \
                 and elem.get('rel', 'stylesheet').lower() == 'stylesheet' \
                 and elem.get('type', CSS_MIME).lower() in OEB_STYLES:
                href = urlnormalize(elem.attrib['href'])
                path = item.abshref(href)
                sitem = oeb.manifest.hrefs.get(path, None)
                if sitem is None:
                    self.logger.warn(
                        'Stylesheet %r referenced by file %r not in manifest' %
                        (path, item.href))
                    continue
                if not hasattr(sitem.data, 'cssRules'):
                    self.logger.warn(
                    'Stylesheet %r referenced by file %r is not CSS'%(path,
                        item.href))
                    continue
                stylesheets.append(sitem.data)
        csses = {'extra_css':extra_css, 'user_css':user_css}
        for w, x in csses.items():
            if x:
                try:
                    text = x
                    stylesheet = parser.parseString(text, href=cssname,
                            validate=False)
                    stylesheets.append(stylesheet)
                except:
                    self.logger.exception('Failed to parse %s, ignoring.'%w)
                    self.logger.debug('Bad css: ')
                    self.logger.debug(x)
        rules = []
        index = 0
        self.stylesheets = set()
        self.page_rule = {}
        for sheet_index, stylesheet in enumerate(stylesheets):
            href = stylesheet.href
            self.stylesheets.add(href)
            for rule in stylesheet.cssRules:
                if rule.type == rule.MEDIA_RULE:
                    media = {rule.media.item(i) for i in
                             xrange(rule.media.length)}
                    if not media.intersection({'all', 'screen', 'amzn-kf8'}):
                        continue
                    for subrule in rule.cssRules:
                        rules.extend(self.flatten_rule(subrule, href, index, is_user_agent_sheet=sheet_index==0))
                        index += 1
                else:
                    rules.extend(self.flatten_rule(rule, href, index, is_user_agent_sheet=sheet_index==0))
                    index = index + 1
        rules.sort()
        self.rules = rules
        self._styles = {}
        pseudo_pat = re.compile(ur':{1,2}(%s)' % ('|'.join(INAPPROPRIATE_PSEUDO_CLASSES)), re.I)
        select = Select(tree, ignore_inappropriate_pseudo_classes=True)

        for _, _, cssdict, text, _ in rules:
            fl = pseudo_pat.search(text)
            try:
                matches = tuple(select(text))
            except SelectorError as err:
                self.logger.error('Ignoring CSS rule with invalid selector: %r (%s)' % (text, as_unicode(err)))
                continue

            if fl is not None:
                fl = fl.group(1)
                if fl == 'first-letter' and getattr(self.oeb,
                        'plumber_output_format', '').lower() in {u'mobi', u'docx'}:
                    # Fake first-letter
                    from lxml.builder import ElementMaker
                    E = ElementMaker(namespace=XHTML_NS)
                    for elem in matches:
                        for x in elem.iter('*'):
                            if x.text:
                                punctuation_chars = []
                                text = unicode(x.text)
                                while text:
                                    category = unicodedata.category(text[0])
                                    if category[0] not in {'P', 'Z'}:
                                        break
                                    punctuation_chars.append(text[0])
                                    text = text[1:]

                                special_text = u''.join(punctuation_chars) + \
                                        (text[0] if text else u'')
                                span = E.span(special_text)
                                span.set('data-fake-first-letter', '1')
                                span.tail = text[1:]
                                x.text = None
                                x.insert(0, span)
                                self.style(span)._update_cssdict(cssdict)
                                break
                else:  # Element pseudo-class
                    for elem in matches:
                        self.style(elem)._update_pseudo_class(fl, cssdict)
            else:
                for elem in matches:
                    self.style(elem)._update_cssdict(cssdict)
        for elem in xpath(tree, '//h:*[@style]'):
            self.style(elem)._apply_style_attr(url_replacer=item.abshref)
        num_pat = re.compile(r'[0-9.]+$')
        for elem in xpath(tree, '//h:img[@width or @height]'):
            style = self.style(elem)
            # Check if either height or width is not default
            is_styled = style._style.get('width', 'auto') != 'auto' or \
                    style._style.get('height', 'auto') != 'auto'
            if not is_styled:
                # Update img style dimension using width and height
                upd = {}
                for prop in ('width', 'height'):
                    val = elem.get(prop, '').strip()
                    try:
                        del elem.attrib[prop]
                    except:
                        pass
                    if val:
                        if num_pat.match(val) is not None:
                            val += 'px'
                        upd[prop] = val
                if upd:
                    style._update_cssdict(upd)
Ejemplo n.º 59
0
 def state_description(self):
     return 'State: %s Client: %s:%s Request: %s' % (
         getattr(self.handle_event, '__name__',
                 None), self.remote_addr, self.remote_port,
         force_unicode(getattr(self, 'request_line', 'WebSocketConnection'),
                       'utf-8'))
Ejemplo n.º 60
0
def remove_unused_css(container,
                      report=None,
                      remove_unused_classes=False,
                      merge_rules=False,
                      merge_rules_with_identical_properties=False):
    '''
    Remove all unused CSS rules from the book. An unused CSS rule is one that does not match any actual content.

    :param report: An optional callable that takes a single argument. It is called with information about the operations being performed.
    :param remove_unused_classes: If True, class attributes in the HTML that do not match any CSS rules are also removed.
    :param merge_rules: If True, rules with identical selectors are merged.
    '''
    report = report or (lambda x: x)

    def safe_parse(name):
        try:
            return container.parsed(name)
        except TypeError:
            pass

    sheets = {
        name: safe_parse(name)
        for name, mt in iteritems(container.mime_map) if mt in OEB_STYLES
    }
    sheets = {k: v for k, v in iteritems(sheets) if v is not None}
    num_merged = num_rules_merged = 0
    if merge_rules:
        for name, sheet in iteritems(sheets):
            num = merge_identical_selectors(sheet)
            if num:
                container.dirty(name)
                num_merged += num
    if merge_rules_with_identical_properties:
        for name, sheet in iteritems(sheets):
            num = merge_identical_properties(sheet)
            if num:
                container.dirty(name)
                num_rules_merged += num
    import_map = {
        name: get_imported_sheets(name, container, sheets)
        for name in sheets
    }
    if remove_unused_classes:
        class_map = {
            name: {icu_lower(x)
                   for x in classes_in_rule_list(sheet.cssRules)}
            for name, sheet in iteritems(sheets)
        }
    style_rules = {
        name: tuple(sheet.cssRules.rulesOfType(CSSRule.STYLE_RULE))
        for name, sheet in iteritems(sheets)
    }

    removal_stats = {'rules': 0, 'selectors': 0}
    num_of_removed_classes = 0

    for name, mt in iteritems(container.mime_map):
        if mt not in OEB_DOCS:
            continue
        root = container.parsed(name)
        select = Select(root, ignore_inappropriate_pseudo_classes=True)
        used_classes = set()
        for style in root.xpath('//*[local-name()="style"]'):
            if style.get('type', 'text/css') == 'text/css' and style.text:
                sheet = container.parse_css(style.text)
                if merge_rules:
                    num = merge_identical_selectors(sheet)
                    if num:
                        num_merged += num
                        container.dirty(name)
                if merge_rules_with_identical_properties:
                    num = merge_identical_properties(sheet)
                    if num:
                        num_rules_merged += num
                        container.dirty(name)
                if remove_unused_classes:
                    used_classes |= {
                        icu_lower(x)
                        for x in classes_in_rule_list(sheet.cssRules)
                    }
                imports = get_imported_sheets(name,
                                              container,
                                              sheets,
                                              sheet=sheet)
                for imported_sheet in imports:
                    mark_used_selectors(style_rules[imported_sheet],
                                        container.log, select)
                    if remove_unused_classes:
                        used_classes |= class_map[imported_sheet]
                rules = tuple(sheet.cssRules.rulesOfType(CSSRule.STYLE_RULE))
                if mark_used_selectors(rules, container.log, select):
                    remove_unused_selectors_and_rules(sheet.cssRules, rules,
                                                      removal_stats)
                    style.text = force_unicode(sheet.cssText, 'utf-8')
                    pretty_script_or_style(container, style)
                    container.dirty(name)

        for link in root.xpath('//*[local-name()="link" and @href]'):
            sname = container.href_to_name(link.get('href'), name)
            if sname not in sheets:
                continue
            mark_used_selectors(style_rules[sname], container.log, select)
            if remove_unused_classes:
                used_classes |= class_map[sname]

            for iname in import_map[sname]:
                mark_used_selectors(style_rules[iname], container.log, select)
                if remove_unused_classes:
                    used_classes |= class_map[iname]

        if remove_unused_classes:
            for elem in root.xpath('//*[@class]'):
                original_classes, classes = elem.get('class', '').split(), []
                for x in original_classes:
                    if icu_lower(x) in used_classes:
                        classes.append(x)
                if len(classes) != len(original_classes):
                    if classes:
                        elem.set('class', ' '.join(classes))
                    else:
                        del elem.attrib['class']
                    num_of_removed_classes += len(original_classes) - len(
                        classes)
                    container.dirty(name)

    for name, sheet in iteritems(sheets):
        any_found = remove_unused_selectors_and_rules(sheet.cssRules,
                                                      style_rules[name],
                                                      removal_stats)
        if any_found:
            container.dirty(name)

    num_changes = num_merged + num_of_removed_classes + num_rules_merged + removal_stats[
        'rules'] + removal_stats['selectors']
    if num_changes > 0:
        if removal_stats['rules']:
            report(
                ngettext('Removed one unused CSS style rule',
                         'Removed {} unused CSS style rules',
                         removal_stats['rules']).format(
                             removal_stats['rules']))
        if removal_stats['selectors']:
            report(
                ngettext('Removed one unused CSS selector',
                         'Removed {} unused CSS selectors',
                         removal_stats['selectors']).format(
                             removal_stats['selectors']))
        if num_of_removed_classes > 0:
            report(
                ngettext(
                    'Removed one unused class from the HTML',
                    'Removed {} unused classes from the HTML',
                    num_of_removed_classes).format(num_of_removed_classes))
        if num_merged > 0:
            report(
                ngettext('Merged one CSS style rule with identical selectors',
                         'Merged {} CSS style rules with identical selectors',
                         num_merged).format(num_merged))
        if num_rules_merged > 0:
            report(
                ngettext(
                    'Merged one CSS style rule with identical properties',
                    'Merged {} CSS style rules with identical properties',
                    num_rules_merged).format(num_rules_merged))
    if not removal_stats['rules']:
        report(_('No unused CSS style rules found'))
    if not removal_stats['selectors']:
        report(_('No unused CSS selectors found'))
    if remove_unused_classes and num_of_removed_classes == 0:
        report(_('No unused class attributes found'))
    if merge_rules and num_merged == 0:
        report(_('No style rules that could be merged found'))
    return num_changes > 0