Example #1
0
def get_metadata(stream, extract_cover=True):
    zin = zipfile.ZipFile(stream, 'r')
    odfs = odfmetaparser()
    parser = xml.sax.make_parser()
    parser.setFeature(xml.sax.handler.feature_namespaces, 1)
    parser.setContentHandler(odfs)
    content = zin.read('meta.xml')
    parser.parse(StringIO(content))
    data = odfs.seenfields
    mi = MetaInformation(None, [])
    if 'title' in data:
        mi.title = data['title']
    if data.get('initial-creator', '').strip():
        mi.authors = string_to_authors(data['initial-creator'])
    elif 'creator' in data:
        mi.authors = string_to_authors(data['creator'])
    if 'description' in data:
        mi.comments = data['description']
    if 'language' in data:
        mi.language = data['language']
    if data.get('keywords', ''):
        mi.tags = [x.strip() for x in data['keywords'].split(',') if x.strip()]
    opfmeta = False  # we need this later for the cover
    opfnocover = False
    if data.get('opf.metadata','') == 'true':
        # custom metadata contains OPF information
        opfmeta = True
        if data.get('opf.titlesort', ''):
            mi.title_sort = data['opf.titlesort']
        if data.get('opf.authors', ''):
            mi.authors = string_to_authors(data['opf.authors'])
        if data.get('opf.authorsort', ''):
            mi.author_sort = data['opf.authorsort']
        if data.get('opf.isbn', ''):
            isbn = check_isbn(data['opf.isbn'])
            if isbn is not None:
                mi.isbn = isbn
        if data.get('opf.publisher', ''):
            mi.publisher = data['opf.publisher']
        if data.get('opf.pubdate', ''):
            mi.pubdate = parse_date(data['opf.pubdate'], assume_utc=True)
        if data.get('opf.series', ''):
            mi.series = data['opf.series']
            if data.get('opf.seriesindex', ''):
                try:
                    mi.series_index = float(data['opf.seriesindex'])
                except ValueError:
                    mi.series_index = 1.0
        if data.get('opf.language', ''):
            cl = canonicalize_lang(data['opf.language'])
            if cl:
                mi.languages = [cl]
        opfnocover = data.get('opf.nocover', 'false') == 'true'
    if not opfnocover:
        try:
            read_cover(stream, zin, mi, opfmeta, extract_cover)
        except:
            pass  # Do not let an error reading the cover prevent reading other data

    return mi
Example #2
0
 def setUp(self):
     self.tdir    = PersistentTemporaryDirectory('_calibre_dbtest')
     self.db      = LibraryDatabase2(self.tdir)
     f = open(os.path.join(self.tdir, 'test.txt'), 'w+b')
     f.write('test')
     paths = list(repeat(f, 3))
     formats = list(repeat('txt', 3))
     m1 = MetaInformation('Test Ebook 1', ['Test Author 1'])
     m1.tags = ['tag1', 'tag2']
     m1.publisher = 'Test Publisher 1'
     m1.rating = 2
     m1.series = 'Test Series 1'
     m1.series_index = 3
     m1.author_sort = 'as1'
     m1.isbn = 'isbn1'
     m1.cover_data = ('jpg', self.img)
     m2 = MetaInformation('Test Ebook 2', ['Test Author 2'])
     m2.tags = ['tag3', 'tag4']
     m2.publisher = 'Test Publisher 2'
     m2.rating = 3
     m2.series = 'Test Series 2'
     m2.series_index = 1
     m2.author_sort = 'as1'
     m2.isbn = 'isbn1'
     self.db.add_books(paths, formats, [m1, m2, m2], add_duplicates=True)
     self.m1, self.m2 = m1, m2
Example #3
0
 def add_empty(self, *args):
     '''
     Add an empty book item to the library. This does not import any formats
     from a book file.
     '''
     author = series = None
     index = self.gui.library_view.currentIndex()
     if index.isValid():
         raw = index.model().db.authors(index.row())
         if raw:
             authors = [a.strip().replace('|', ',') for a in raw.split(',')]
             if authors:
                 author = authors[0]
         series = index.model().db.series(index.row())
     dlg = AddEmptyBookDialog(self.gui, self.gui.library_view.model().db,
                              author, series)
     if dlg.exec_() == dlg.Accepted:
         num = dlg.qty_to_add
         series = dlg.selected_series
         db = self.gui.library_view.model().db
         ids = []
         for x in xrange(num):
             mi = MetaInformation(_('Unknown'), dlg.selected_authors)
             if series:
                 mi.series = series
                 mi.series_index = db.get_next_series_num_for(series)
             ids.append(db.import_book(mi, []))
         self.gui.library_view.model().books_added(num)
         if hasattr(self.gui, 'db_images'):
             self.gui.db_images.reset()
         self.gui.tags_view.recount()
         if ids:
             ids.reverse()
             self.gui.library_view.select_rows(ids)
Example #4
0
    def add_empty(self, *args):
        '''
        Add an empty book item to the library. This does not import any formats
        from a book file.
        '''
        author = series = title = None
        index = self.gui.library_view.currentIndex()
        if index.isValid():
            raw = index.model().db.authors(index.row())
            if raw:
                authors = [a.strip().replace('|', ',') for a in raw.split(',')]
                if authors:
                    author = authors[0]
            series = index.model().db.series(index.row())
            title = index.model().db.title(index.row())
        dlg = AddEmptyBookDialog(self.gui, self.gui.library_view.model().db,
                                 author, series, dup_title=title)
        if dlg.exec_() == dlg.Accepted:
            temp_files = []
            num = dlg.qty_to_add
            series = dlg.selected_series
            title = dlg.selected_title or _('Unknown')
            db = self.gui.library_view.model().db
            ids, orig_fmts = [], []
            if dlg.duplicate_current_book:
                origmi = db.get_metadata(index.row(), get_cover=True, cover_as_data=True)
                if dlg.copy_formats.isChecked():
                    book_id = db.id(index.row())
                    orig_fmts = tuple(db.new_api.format(book_id, fmt, as_path=True) for fmt in db.new_api.formats(book_id))

            for x in xrange(num):
                if dlg.duplicate_current_book:
                    mi = origmi
                else:
                    mi = MetaInformation(title, dlg.selected_authors)
                    if series:
                        mi.series = series
                        mi.series_index = db.get_next_series_num_for(series)
                fmts = []
                empty_format = gprefs.get('create_empty_format_file', '')
                if dlg.duplicate_current_book and dlg.copy_formats.isChecked():
                    fmts = orig_fmts
                elif empty_format:
                    from calibre.ebooks.oeb.polish.create import create_book
                    pt = PersistentTemporaryFile(suffix='.' + empty_format)
                    pt.close()
                    temp_files.append(pt.name)
                    create_book(mi, pt.name, fmt=empty_format)
                    fmts = [pt.name]
                ids.append(db.import_book(mi, fmts))
            tuple(map(os.remove, orig_fmts))
            self.gui.library_view.model().books_added(num)
            self.gui.refresh_cover_browser()
            self.gui.tags_view.recount()
            if ids:
                ids.reverse()
                self.gui.library_view.select_rows(ids)
            for path in temp_files:
                os.remove(path)
Example #5
0
def get_social_metadata(title, authors, publisher, isbn, username=None,
        password=None):
    from calibre.ebooks.metadata import MetaInformation
    mi = MetaInformation(title, authors)
    if isbn:
        br = get_browser()
        try:
            login(br, username, password)

            raw = br.open_novisit('http://www.librarything.com/isbn/'
                        +isbn).read()
        except:
            return mi
        if '/wiki/index.php/HelpThing:Verify' in raw:
            raise Exception('LibraryThing is blocking calibre.')
        if not raw:
            return mi
        raw = raw.decode('utf-8', 'replace')
        raw = strip_encoding_declarations(raw)
        root = html.fromstring(raw)
        h1 = root.xpath('//div[@class="headsummary"]/h1')
        if h1 and not mi.title:
            mi.title = html.tostring(h1[0], method='text', encoding=unicode)
        h2 = root.xpath('//div[@class="headsummary"]/h2/a')
        if h2 and not mi.authors:
            mi.authors = [html.tostring(x, method='text', encoding=unicode) for
                    x in h2]
        h3 = root.xpath('//div[@class="headsummary"]/h3/a')
        if h3:
            match = None
            for h in h3:
               series = html.tostring(h, method='text', encoding=unicode)
               match = re.search(r'(.+) \((.+)\)', series)
               if match is not None:
                   break
            if match is not None:
                mi.series = match.group(1).strip()
                match = re.search(r'[0-9.]+', match.group(2))
                si = 1.0
                if match is not None:
                    si = float(match.group())
                mi.series_index = si
        #tags = root.xpath('//div[@class="tags"]/span[@class="tag"]/a')
        #if tags:
        #    mi.tags = [html.tostring(x, method='text', encoding=unicode) for x
        #            in tags]
        span = root.xpath(
                '//table[@class="wsltable"]/tr[@class="wslcontent"]/td[4]//span')
        if span:
            raw = html.tostring(span[0], method='text', encoding=unicode)
            match = re.search(r'([0-9.]+)', raw)
            if match is not None:
                rating = float(match.group())
                if rating > 0 and rating <= 5:
                    mi.rating = rating
    return mi
Example #6
0
def do_set_metadata(opts, mi, stream, stream_type):
    mi = MetaInformation(mi)
    for x in ('guide', 'toc', 'manifest', 'spine'):
        setattr(mi, x, None)

    from_opf = getattr(opts, 'from_opf', None)
    if from_opf is not None:
        from calibre.ebooks.metadata.opf2 import OPF
        opf_mi = OPF(open(from_opf, 'rb')).to_book_metadata()
        mi.smart_update(opf_mi)

    for pref in config().option_set.preferences:
        if pref.name in ('to_opf', 'from_opf', 'authors', 'title_sort',
                         'author_sort', 'get_cover', 'cover', 'tags',
                         'lrf_bookid', 'identifiers'):
            continue
        val = getattr(opts, pref.name, None)
        if val is not None:
            setattr(mi, pref.name, val)
    if getattr(opts, 'authors', None) is not None:
        mi.authors = string_to_authors(opts.authors)
        mi.author_sort = authors_to_sort_string(mi.authors)
    if getattr(opts, 'author_sort', None) is not None:
        mi.author_sort = opts.author_sort
    if getattr(opts, 'title_sort', None) is not None:
        mi.title_sort = opts.title_sort
    elif getattr(opts, 'title', None) is not None:
        mi.title_sort = title_sort(opts.title)
    if getattr(opts, 'tags', None) is not None:
        mi.tags = [t.strip() for t in opts.tags.split(',')]
    if getattr(opts, 'series', None) is not None:
        mi.series = opts.series.strip()
    if getattr(opts, 'series_index', None) is not None:
        mi.series_index = float(opts.series_index.strip())
    if getattr(opts, 'pubdate', None) is not None:
        mi.pubdate = parse_date(opts.pubdate, assume_utc=False, as_utc=False)
    if getattr(opts, 'identifiers', None):
        val = {k.strip():v.strip() for k, v in (x.partition(':')[0::2] for x in opts.identifiers)}
        if val:
            orig = mi.get_identifiers()
            orig.update(val)
            val = {k:v for k, v in iteritems(orig) if k and v}
            mi.set_identifiers(val)

    if getattr(opts, 'cover', None) is not None:
        ext = os.path.splitext(opts.cover)[1].replace('.', '').upper()
        mi.cover_data = (ext, open(opts.cover, 'rb').read())

    with force_identifiers:
        set_metadata(stream, mi, stream_type)
Example #7
0
 def add_empty(self, *args):
     '''
     Add an empty book item to the library. This does not import any formats
     from a book file.
     '''
     author = series = None
     index = self.gui.library_view.currentIndex()
     if index.isValid():
         raw = index.model().db.authors(index.row())
         if raw:
             authors = [a.strip().replace('|', ',') for a in raw.split(',')]
             if authors:
                 author = authors[0]
         series = index.model().db.series(index.row())
     dlg = AddEmptyBookDialog(self.gui, self.gui.library_view.model().db,
                              author, series)
     if dlg.exec_() == dlg.Accepted:
         temp_files = []
         num = dlg.qty_to_add
         series = dlg.selected_series
         title = dlg.selected_title or _('Unknown')
         db = self.gui.library_view.model().db
         ids = []
         for x in xrange(num):
             mi = MetaInformation(title, dlg.selected_authors)
             if series:
                 mi.series = series
                 mi.series_index = db.get_next_series_num_for(series)
             fmts = []
             empty_format = gprefs.get('create_empty_format_file', '')
             if empty_format:
                 from calibre.ebooks.oeb.polish.create import create_book
                 pt = PersistentTemporaryFile(suffix='.' + empty_format)
                 pt.close()
                 temp_files.append(pt.name)
                 create_book(mi, pt.name, fmt=empty_format)
                 fmts = [pt.name]
             ids.append(db.import_book(mi, fmts))
         self.gui.library_view.model().books_added(num)
         if hasattr(self.gui, 'db_images'):
             self.gui.db_images.beginResetModel(), self.gui.db_images.endResetModel()
         self.gui.tags_view.recount()
         if ids:
             ids.reverse()
             self.gui.library_view.select_rows(ids)
         for path in temp_files:
             os.remove(path)
Example #8
0
    def add_document(self,document):
        from calibre.ebooks.metadata import MetaInformation

        mi = MetaInformation('', [_('Unknown')])
        mi.title = document['title']
        mi.authors = document['authors']
        mi.tags = ["Mendeley"]

        mendeley_id = {}
        mendeley_id['mendeley'] = document['mendeley_id']

        mi.identifiers = mendeley_id
        mi.series_index = 1 # needed?

        self.db.add_books([document['path']], ['pdf'], [mi], False, True)

        os.remove(document['path'])
Example #9
0
    def get_metadata(self):
        title, authors = self.get_title_and_authors()
        mi = MetaInformation(title, authors)
        publisher = unicode(self.publisher.text()).strip()
        if publisher:
            mi.publisher = publisher
        author_sort = unicode(self.author_sort.text()).strip()
        if author_sort:
            mi.author_sort = author_sort
        comments = self.comment.html
        if comments:
            mi.comments = comments
        mi.series_index = float(self.series_index.value())
        series = unicode(self.series.currentText()).strip()
        if series:
            mi.series = series
        tags = [t.strip() for t in unicode(self.tags.text()).strip().split(',')]
        if tags:
            mi.tags = tags

        return mi
Example #10
0
    def convert_comic_md_to_calibre_md(self, comic_metadata):
        '''
        Maps the entries in the comic_metadata to calibre metadata
        '''
        import unicodedata
        from calibre.ebooks.metadata import MetaInformation
        from calibre.utils.date import parse_only_date
        from datetime import date
        from calibre.utils.localization import calibre_langcode_to_name

        if self.comic_md_in_calibre_format:
            return

        # start with a fresh calibre metadata
        mi = MetaInformation(None, None)
        co = comic_metadata

        # shorten some functions
        role = partial(get_role, credits=co.credits)
        update_field = partial(update_calibre_field, target=mi)

        # Get title, if no title, try to assign series infos
        if co.title:
            mi.title = co.title
        elif co.series:
            mi.title = co.series
            if co.issue:
                mi.title += " " + str(co.issue)
        else:
            mi.title = ""

        # tags
        if co.tags != [] and prefs['import_tags']:
            if prefs['overwrite_calibre_tags']:
                mi.tags = co.tags
            else:
                mi.tags = list(set(self.calibre_metadata.tags + co.tags))

        # simple metadata
        update_field("authors", role(WRITER))
        update_field("series", co.series)
        update_field("rating", co.criticalRating)
        update_field("publisher", co.publisher)
        # special cases
        if co.language:
            update_field("language", calibre_langcode_to_name(co.language))
        if co.comments:
            update_field("comments", co.comments.strip())
        # issue
        if co.issue:
            if isinstance(co.issue, unicode):
                mi.series_index = unicodedata.numeric(co.issue)
            else:
                mi.series_index = float(co.issue)
        # pub date
        puby = co.year
        pubm = co.month
        if puby is not None:
            try:
                dt = date(int(puby), 6 if pubm is None else int(pubm), 15)
                dt = parse_only_date(str(dt))
                mi.pubdate = dt
            except:
                pass

        # custom columns
        update_column = partial(
            update_custom_column,
            calibre_metadata=mi,
            custom_cols=self.db.field_metadata.custom_field_metadata())
        # artists
        update_column(prefs['penciller_column'], role(PENCILLER))
        update_column(prefs['inker_column'], role(INKER))
        update_column(prefs['colorist_column'], role(COLORIST))
        update_column(prefs['letterer_column'], role(LETTERER))
        update_column(prefs['cover_artist_column'], role(COVER_ARTIST))
        update_column(prefs['editor_column'], role(EDITOR))
        # others
        update_column(prefs['storyarc_column'], co.storyArc)
        update_column(prefs['characters_column'], co.characters)
        update_column(prefs['teams_column'], co.teams)
        update_column(prefs['locations_column'], co.locations)
        update_column(prefs['volume_column'], co.volume)
        update_column(prefs['genre_column'], co.genre)
        update_column(prefs['count_column'], co.issueCount)
        if prefs['auto_count_pages']:
            update_column(prefs['pages_column'], self.pages)
        else:
            update_column(prefs['pages_column'], co.pageCount)
        update_column(prefs['comicvine_column'],
                      '<a href="{}">Comic Vine</a>'.format(co.webLink))

        self.comic_md_in_calibre_format = mi
Example #11
0
def get_metadata_(src, encoding=None):
    if not isinstance(src, unicode):
        if not encoding:
            src = xml_to_unicode(src)[0]
        else:
            src = src.decode(encoding, "replace")

    # Meta data definitions as in
    # http://www.mobileread.com/forums/showpost.php?p=712544&postcount=9

    # Title
    title = None
    pat = re.compile(r'<!--.*?TITLE=(?P<q>[\'"])(.+?)(?P=q).*?-->', re.DOTALL)
    src = src[:150000]  # Searching shouldn't take too long
    match = pat.search(src)
    if match:
        title = match.group(2)
    else:
        for x in ("DC.title", "DCTERMS.title", "Title"):
            pat = get_meta_regexp_(x)
            match = pat.search(src)
            if match:
                title = match.group(1)
                break
    if not title:
        pat = re.compile("<title>([^<>]+?)</title>", re.IGNORECASE)
        match = pat.search(src)
        if match:
            title = match.group(1)

    # Author
    author = None
    pat = re.compile(r'<!--.*?AUTHOR=(?P<q>[\'"])(.+?)(?P=q).*?-->', re.DOTALL)
    match = pat.search(src)
    if match:
        author = match.group(2).replace(",", ";")
    else:
        for x in ("Author", "DC.creator.aut", "DCTERMS.creator.aut", "DC.creator"):
            pat = get_meta_regexp_(x)
            match = pat.search(src)
            if match:
                author = match.group(1)
                break

    # Create MetaInformation with Title and Author
    ent_pat = re.compile(r"&(\S+)?;")
    if title:
        title = ent_pat.sub(entity_to_unicode, title)
    if author:
        author = ent_pat.sub(entity_to_unicode, author)
    mi = MetaInformation(title, [author] if author else None)

    # Publisher
    publisher = None
    pat = re.compile(r'<!--.*?PUBLISHER=(?P<q>[\'"])(.+?)(?P=q).*?-->', re.DOTALL)
    match = pat.search(src)
    if match:
        publisher = match.group(2)
    else:
        for x in ("Publisher", "DC.publisher", "DCTERMS.publisher"):
            pat = get_meta_regexp_(x)
            match = pat.search(src)
            if match:
                publisher = match.group(1)
                break
    if publisher:
        mi.publisher = ent_pat.sub(entity_to_unicode, publisher)

    # ISBN
    isbn = None
    pat = re.compile(r'<!--.*?ISBN=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL)
    match = pat.search(src)
    if match:
        isbn = match.group(1)
    else:
        for x in ("ISBN", "DC.identifier.ISBN", "DCTERMS.identifier.ISBN"):
            pat = get_meta_regexp_(x)
            match = pat.search(src)
            if match:
                isbn = match.group(1)
                break
    if isbn:
        mi.isbn = re.sub(r"[^0-9xX]", "", isbn)

    # LANGUAGE
    language = None
    pat = re.compile(r'<!--.*?LANGUAGE=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL)
    match = pat.search(src)
    if match:
        language = match.group(1)
    else:
        for x in ("DC.language", "DCTERMS.language"):
            pat = get_meta_regexp_(x)
            match = pat.search(src)
            if match:
                language = match.group(1)
                break
    if language:
        mi.language = language

    # PUBDATE
    pubdate = None
    pat = re.compile(r'<!--.*?PUBDATE=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL)
    match = pat.search(src)
    if match:
        pubdate = match.group(1)
    else:
        for x in (
            "Pubdate",
            "Date of publication",
            "DC.date.published",
            "DC.date.publication",
            "DC.date.issued",
            "DCTERMS.issued",
        ):
            pat = get_meta_regexp_(x)
            match = pat.search(src)
            if match:
                pubdate = match.group(1)
                break
    if pubdate:
        try:
            mi.pubdate = parse_date(pubdate)
        except:
            pass

    # TIMESTAMP
    timestamp = None
    pat = re.compile(r'<!--.*?TIMESTAMP=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL)
    match = pat.search(src)
    if match:
        timestamp = match.group(1)
    else:
        for x in ("Timestamp", "Date of creation", "DC.date.created", "DC.date.creation", "DCTERMS.created"):
            pat = get_meta_regexp_(x)
            match = pat.search(src)
            if match:
                timestamp = match.group(1)
                break
    if timestamp:
        try:
            mi.timestamp = parse_date(timestamp)
        except:
            pass

    # SERIES
    series = None
    pat = re.compile(r'<!--.*?SERIES=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL)
    match = pat.search(src)
    if match:
        series = match.group(1)
    else:
        pat = get_meta_regexp_("Series")
        match = pat.search(src)
        if match:
            series = match.group(1)
    if series:
        pat = re.compile(r"\[([.0-9]+)\]")
        match = pat.search(series)
        series_index = None
        if match is not None:
            try:
                series_index = float(match.group(1))
            except:
                pass
            series = series.replace(match.group(), "").strip()

        mi.series = ent_pat.sub(entity_to_unicode, series)
        if series_index is None:
            pat = get_meta_regexp_("Seriesnumber")
            match = pat.search(src)
            if match:
                try:
                    series_index = float(match.group(1))
                except:
                    pass
        if series_index is not None:
            mi.series_index = series_index

    # RATING
    rating = None
    pat = re.compile(r'<!--.*?RATING=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL)
    match = pat.search(src)
    if match:
        rating = match.group(1)
    else:
        pat = get_meta_regexp_("Rating")
        match = pat.search(src)
        if match:
            rating = match.group(1)
    if rating:
        try:
            mi.rating = float(rating)
            if mi.rating < 0:
                mi.rating = 0
            if mi.rating > 5:
                mi.rating /= 2.0
            if mi.rating > 5:
                mi.rating = 0
        except:
            pass

    # COMMENTS
    comments = None
    pat = re.compile(r'<!--.*?COMMENTS=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL)
    match = pat.search(src)
    if match:
        comments = match.group(1)
    else:
        pat = get_meta_regexp_("Comments")
        match = pat.search(src)
        if match:
            comments = match.group(1)
    if comments:
        mi.comments = ent_pat.sub(entity_to_unicode, comments)

    # TAGS
    tags = None
    pat = re.compile(r'<!--.*?TAGS=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL)
    match = pat.search(src)
    if match:
        tags = match.group(1)
    else:
        pat = get_meta_regexp_("Tags")
        match = pat.search(src)
        if match:
            tags = match.group(1)
    if tags:
        mi.tags = [x.strip() for x in ent_pat.sub(entity_to_unicode, tags).split(",")]

    # Ready to return MetaInformation
    return mi
    def convert_comic_md_to_calibre_md(self, comic_metadata):
        '''
        Maps the entries in the comic_metadata to calibre metadata
        '''
        import unicodedata
        from calibre.ebooks.metadata import MetaInformation
        from calibre.utils.date import parse_only_date
        from datetime import date
        from calibre.utils.localization import calibre_langcode_to_name

        if self.comic_md_in_calibre_format:
            return

        # synonyms for artists
        WRITER = ['writer', 'plotter', 'scripter']
        PENCILLER = ['artist', 'penciller', 'penciler', 'breakdowns']
        INKER = ['inker', 'artist', 'finishes']
        COLORIST = ['colorist', 'colourist', 'colorer', 'colourer']
        LETTERER = ['letterer']
        COVER_ARTIST = ['cover', 'covers', 'coverartist', 'cover artist']
        EDITOR = ['editor']

        # start with a fresh calibre metadata
        mi = MetaInformation(None, None)
        co = comic_metadata

        # shorten some functions
        role = partial(get_role, credits=co.credits)
        update_field = partial(update_calibre_field, target=mi)

        # Get title, if no title, try to assign series infos
        if co.title:
            mi.title = co.title
        elif co.series:
            mi.title = co.series
            if co.issue:
                mi.title += " " + str(co.issue)
        else:
            mi.title = ""

        # tags
        if co.tags != [] and prefs['import_tags']:
            if prefs['overwrite_calibre_tags']:
                mi.tags = co.tags
            else:
                mi.tags = list(set(self.calibre_metadata.tags + co.tags))

        # simple metadata
        update_field("authors", role(WRITER))
        update_field("series", co.series)
        update_field("rating", co.criticalRating)
        update_field("publisher", co.publisher)
        # special cases
        if co.language:
            update_field("language", calibre_langcode_to_name(co.language))
        if co.comments:
            update_field("comments", co.comments.strip())
        # issue
        if co.issue:
            if isinstance(co.issue, unicode):
                mi.series_index = unicodedata.numeric(co.issue)
            else:
                mi.series_index = float(co.issue)
        # pub date
        puby = co.year
        pubm = co.month
        if puby is not None:
            try:
                dt = date(int(puby), 6 if pubm is None else int(pubm), 15)
                dt = parse_only_date(str(dt))
                mi.pubdate = dt
            except:
                pass

        # custom columns
        custom_cols = self.db.field_metadata.custom_field_metadata()
        update_column = partial(update_custom_column, calibre_metadata=mi, custom_cols=custom_cols)
        # artists
        update_column(prefs['penciller_column'], role(PENCILLER))
        update_column(prefs['inker_column'], role(INKER))
        update_column(prefs['colorist_column'], role(COLORIST))
        update_column(prefs['letterer_column'], role(LETTERER))
        update_column(prefs['cover_artist_column'], role(COVER_ARTIST))
        update_column(prefs['editor_column'], role(EDITOR))
        # others
        update_column(prefs['storyarc_column'], co.storyArc)
        update_column(prefs['characters_column'], co.characters)
        update_column(prefs['teams_column'], co.teams)
        update_column(prefs['locations_column'], co.locations)
        update_column(prefs['volume_column'], co.volume)
        update_column(prefs['genre_column'], co.genre)

        self.comic_md_in_calibre_format = mi
Example #13
0
def metadata_from_filename(name, pat=None, fallback_pat=None):
    if isbytestring(name):
        name = name.decode(filesystem_encoding, "replace")
    name = name.rpartition(".")[0]
    mi = MetaInformation(None, None)
    if pat is None:
        pat = re.compile(prefs.get("filename_pattern"))
    name = name.replace("_", " ")
    match = pat.search(name)
    if match is None and fallback_pat is not None:
        match = fallback_pat.search(name)
    if match is not None:
        try:
            mi.title = match.group("title")
        except IndexError:
            pass
        try:
            au = match.group("author")
            aus = string_to_authors(au)
            if aus:
                mi.authors = aus
                if prefs["swap_author_names"] and mi.authors:

                    def swap(a):
                        if "," in a:
                            parts = a.split(",", 1)
                        else:
                            parts = a.split(None, 1)
                        if len(parts) > 1:
                            t = parts[-1]
                            parts = parts[:-1]
                            parts.insert(0, t)
                        return " ".join(parts)

                    mi.authors = [swap(x) for x in mi.authors]
        except (IndexError, ValueError):
            pass
        try:
            mi.series = match.group("series")
        except IndexError:
            pass
        try:
            si = match.group("series_index")
            mi.series_index = float(si)
        except (IndexError, ValueError, TypeError):
            pass
        try:
            si = match.group("isbn")
            mi.isbn = si
        except (IndexError, ValueError):
            pass
        try:
            publisher = match.group("publisher")
            mi.publisher = publisher
        except (IndexError, ValueError):
            pass
        try:
            pubdate = match.group("published")
            if pubdate:
                from calibre.utils.date import parse_only_date

                mi.pubdate = parse_only_date(pubdate)
        except:
            pass

    if mi.is_null("title"):
        mi.title = name
    return mi
Example #14
0
    def _start_merge(self,book_list):
        db=self.gui.current_db
        self.previous = self.gui.library_view.currentIndex()
        # if any bad, bail.
        bad_list = filter(lambda x : not x['good'], book_list)
        if len(bad_list) > 0:
            d = error_dialog(self.gui,
                             _('Cannot Merge Epubs'),
                             _('%s books failed.')%len(bad_list),
                             det_msg='\n'.join(map(lambda x : x['error'] , bad_list)))
            d.exec_()
        else:
            d = OrderEPUBsDialog(self.gui,
                                 _('Order EPUBs to Merge'),
                                 prefs,
                                 self.qaction.icon(),
                                 book_list,
                                 )
            d.exec_()
            if d.result() != d.Accepted:
                return

            book_list = d.get_books()

            logger.debug("2:%s"%(time.time()-self.t))
            self.t = time.time()

            deftitle = "%s %s" % (book_list[0]['title'],prefs['mergeword'])
            mi = MetaInformation(deftitle,["Temp Author"])

            # if all same series, use series for name.  But only if all.
            serieslist = map(lambda x : x['series'], filter(lambda x : x['series'] != None, book_list))
            if len(serieslist) == len(book_list):
                mi.title = serieslist[0]
                for sr in serieslist:
                    if mi.title != sr:
                        mi.title = deftitle;
                        break

            # logger.debug("======================= mi.title:\n%s\n========================="%mi.title)

            mi.authors = list()
            authorslists = map(lambda x : x['authors'], book_list)
            for l in authorslists:
                for a in l:
                    if a not in mi.authors:
                        mi.authors.append(a)
            #mi.authors = [item for sublist in authorslists for item in sublist]

            # logger.debug("======================= mi.authors:\n%s\n========================="%mi.authors)

            #mi.author_sort = ' & '.join(map(lambda x : x['author_sort'], book_list))

            # logger.debug("======================= mi.author_sort:\n%s\n========================="%mi.author_sort)

            # set publisher if all from same publisher.
            publishers = set(map(lambda x : x['publisher'], book_list))
            if len(publishers) == 1:
                mi.publisher = publishers.pop()

            # logger.debug("======================= mi.publisher:\n%s\n========================="%mi.publisher)

            tagslists = map(lambda x : x['tags'], book_list)
            mi.tags = [item for sublist in tagslists for item in sublist]
            mi.tags.extend(prefs['mergetags'].split(','))

            # logger.debug("======================= mergetags:\n%s\n========================="%prefs['mergetags'])
            # logger.debug("======================= m.tags:\n%s\n========================="%mi.tags)

            languageslists = map(lambda x : x['languages'], book_list)
            mi.languages = [item for sublist in languageslists for item in sublist]

            mi.series = ''
            if prefs['firstseries'] and book_list[0]['series']:
                mi.series = book_list[0]['series']
                mi.series_index = book_list[0]['series_index']

            # ======================= make book comments =========================

            if len(mi.authors) > 1:
                booktitle = lambda x : _("%s by %s") % (x['title'],' & '.join(x['authors']))
            else:
                booktitle = lambda x : x['title']

            mi.comments = ("<p>"+_("%s containing:")+"</p>") % prefs['mergeword']

            if prefs['includecomments']:
                def bookcomments(x):
                    if x['comments']:
                        return '<p><b>%s</b></p>%s'%(booktitle(x),x['comments'])
                    else:
                        return '<b>%s</b><br/>'%booktitle(x)

                mi.comments += ('<div class="mergedbook">' +
                                '<hr></div><div class="mergedbook">'.join([ bookcomments(x) for x in book_list]) +
                                '</div>')
            else:
                mi.comments += '<br/>'.join( [ booktitle(x) for x in book_list ] )

            # ======================= make book entry =========================

            book_id = db.create_book_entry(mi,
                                           add_duplicates=True)

            # set default cover to same as first book
            coverdata = db.cover(book_list[0]['calibre_id'],index_is_id=True)
            if coverdata:
                db.set_cover(book_id, coverdata)

            # ======================= custom columns ===================

            logger.debug("3:%s"%(time.time()-self.t))
            self.t = time.time()

            # have to get custom from db for each book.
            idslist = map(lambda x : x['calibre_id'], book_list)

            custom_columns = self.gui.library_view.model().custom_columns
            for col, action in prefs['custom_cols'].iteritems():
                #logger.debug("col: %s action: %s"%(col,action))

                if col not in custom_columns:
                    logger.debug("%s not an existing column, skipping."%col)
                    continue

                coldef = custom_columns[col]
                #logger.debug("coldef:%s"%coldef)

                if action not in permitted_values[coldef['datatype']]:
                    logger.debug("%s not a valid column type for %s, skipping."%(col,action))
                    continue

                label = coldef['label']

                found = False
                value = None
                idx = None
                if action == 'first':
                    idx = 0

                if action == 'last':
                    idx = -1

                if action in ['first','last']:
                    value = db.get_custom(idslist[idx], label=label, index_is_id=True)
                    if coldef['datatype'] == 'series' and value != None:
                        # get the number-in-series, too.
                        value = "%s [%s]"%(value, db.get_custom_extra(idslist[idx], label=label, index_is_id=True))
                    found = True

                if action in ('add','average','averageall'):
                    value = 0.0
                    count = 0
                    for bid in idslist:
                        try:
                            value += db.get_custom(bid, label=label, index_is_id=True)
                            found = True
                            # only count ones with values unless averageall
                            count += 1
                        except:
                            # if not set, it's None and fails.
                            # only count ones with values unless averageall
                            if action == 'averageall':
                                count += 1

                    if found and action in ('average','averageall'):
                        value = value / count

                    if coldef['datatype'] == 'int':
                        value += 0.5 # so int rounds instead of truncs.

                if action == 'and':
                    value = True
                    for bid in idslist:
                        try:
                            value = value and db.get_custom(bid, label=label, index_is_id=True)
                            found = True
                        except:
                            # if not set, it's None and fails.
                            pass

                if action == 'or':
                    value = False
                    for bid in idslist:
                        try:
                            value = value or db.get_custom(bid, label=label, index_is_id=True)
                            found = True
                        except:
                            # if not set, it's None and fails.
                            pass

                if action == 'newest':
                    value = None
                    for bid in idslist:
                        try:
                            ivalue = db.get_custom(bid, label=label, index_is_id=True)
                            if not value or  ivalue > value:
                                value = ivalue
                                found = True
                        except:
                            # if not set, it's None and fails.
                            pass

                if action == 'oldest':
                    value = None
                    for bid in idslist:
                        try:
                            ivalue = db.get_custom(bid, label=label, index_is_id=True)
                            if not value or  ivalue < value:
                                value = ivalue
                                found = True
                        except:
                            # if not set, it's None and fails.
                            pass

                if action == 'union':
                    if not coldef['is_multiple']:
                        action = 'concat'
                    else:
                        value = set()
                        for bid in idslist:
                            try:
                                value = value.union(db.get_custom(bid, label=label, index_is_id=True))
                                found = True
                            except:
                                # if not set, it's None and fails.
                                pass

                if action == 'concat':
                    value = ""
                    for bid in idslist:
                        try:
                            value = value + ' ' + db.get_custom(bid, label=label, index_is_id=True)
                            found = True
                        except:
                            # if not set, it's None and fails.
                            pass
                    value = value.strip()

                if found and value != None:
                    db.set_custom(book_id,value,label=label,commit=False)

            db.commit()

            logger.debug("4:%s"%(time.time()-self.t))
            self.t = time.time()

            self.gui.library_view.model().books_added(1)
            self.gui.library_view.select_rows([book_id])

            logger.debug("5:%s"%(time.time()-self.t))
            self.t = time.time()

            confirm('\n'+_('''The book for the new Merged EPUB has been created and default metadata filled in.

However, the EPUB will *not* be created until after you've reviewed, edited, and closed the metadata dialog that follows.'''),
                    'epubmerge_created_now_edit_again',
                    self.gui)

            self.gui.iactions['Edit Metadata'].edit_metadata(False)

            logger.debug("5:%s"%(time.time()-self.t))
            self.t = time.time()
            self.gui.tags_view.recount()

            totalsize = sum(map(lambda x : x['epub_size'], book_list))

            logger.debug("merging %s EPUBs totaling %s"%(len(book_list),gethumanreadable(totalsize)))
            if len(book_list) > 100 or totalsize > 5*1024*1024:
                confirm('\n'+_('''You're merging %s EPUBs totaling %s.  Calibre will be locked until the merge is finished.''')%(len(book_list),gethumanreadable(totalsize)),
                        'epubmerge_edited_now_merge_again',
                        self.gui)

            self.gui.status_bar.show_message(_('Merging %s EPUBs...')%len(book_list), 60000)

            mi = db.get_metadata(book_id,index_is_id=True)

            mergedepub = PersistentTemporaryFile(suffix='.epub')
            epubstomerge = map(lambda x : x['epub'] , book_list)

            coverjpgpath = None
            if mi.has_cover:
                # grab the path to the real image.
                coverjpgpath = os.path.join(db.library_path, db.path(book_id, index_is_id=True), 'cover.jpg')

            self.do_merge( mergedepub,
                           epubstomerge,
                           authoropts=mi.authors,
                           titleopt=mi.title,
                           descopt=mi.comments,
                           tags=mi.tags,
                           languages=mi.languages,
                           titlenavpoints=prefs['titlenavpoints'],
                           originalnavpoints=prefs['originalnavpoints'],
                           flattentoc=prefs['flattentoc'],
                           printtimes=True,
                           coverjpgpath=coverjpgpath,
                           keepmetadatafiles=prefs['keepmeta'] )

            logger.debug("6:%s"%(time.time()-self.t))
            logger.debug(_("Merge finished, output in:\n%s")%mergedepub.name)
            self.t = time.time()
            db.add_format_with_hooks(book_id,
                                     'EPUB',
                                     mergedepub, index_is_id=True)

            logger.debug("7:%s"%(time.time()-self.t))
            self.t = time.time()

            self.gui.status_bar.show_message(_('Finished merging %s EPUBs.')%len(book_list), 3000)
            self.gui.library_view.model().refresh_ids([book_id])
            self.gui.tags_view.recount()
            current = self.gui.library_view.currentIndex()
            self.gui.library_view.model().current_changed(current, self.previous)
Example #15
0
def get_metadata(stream, extract_cover=True):
    whitespace = re.compile(r'\s+')

    def normalize(s):
        return whitespace.sub(' ', s).strip()

    with ZipFile(stream) as zf:
        meta = zf.read('meta.xml')
        root = fromstring(meta)

        def find(field):
            ns, tag = fields[field]
            ans = root.xpath('//ns0:{}'.format(tag), namespaces={'ns0': ns})
            if ans:
                return normalize(tostring(ans[0], method='text', encoding='unicode', with_tail=False)).strip()

        mi = MetaInformation(None, [])
        title = find('title')
        if title:
            mi.title = title
        creator = find('initial-creator') or find('creator')
        if creator:
            mi.authors = string_to_authors(creator)
        desc = find('description')
        if desc:
            mi.comments = desc
        lang = find('language')
        if lang and canonicalize_lang(lang):
            mi.languages = [canonicalize_lang(lang)]
        kw = find('keyword') or find('keywords')
        if kw:
            mi.tags = [x.strip() for x in kw.split(',') if x.strip()]
        data = {}
        for tag in root.xpath('//ns0:user-defined', namespaces={'ns0': fields['user-defined'][0]}):
            name = (tag.get('{%s}name' % METANS) or '').lower()
            vtype = tag.get('{%s}value-type' % METANS) or 'string'
            val = tag.text
            if name and val:
                if vtype == 'boolean':
                    val = val == 'true'
                data[name] = val
        opfmeta = False  # we need this later for the cover
        opfnocover = False
        if data.get('opf.metadata'):
            # custom metadata contains OPF information
            opfmeta = True
            if data.get('opf.titlesort', ''):
                mi.title_sort = data['opf.titlesort']
            if data.get('opf.authors', ''):
                mi.authors = string_to_authors(data['opf.authors'])
            if data.get('opf.authorsort', ''):
                mi.author_sort = data['opf.authorsort']
            if data.get('opf.isbn', ''):
                isbn = check_isbn(data['opf.isbn'])
                if isbn is not None:
                    mi.isbn = isbn
            if data.get('opf.publisher', ''):
                mi.publisher = data['opf.publisher']
            if data.get('opf.pubdate', ''):
                mi.pubdate = parse_date(data['opf.pubdate'], assume_utc=True)
            if data.get('opf.identifiers'):
                try:
                    mi.identifiers = json.loads(data['opf.identifiers'])
                except Exception:
                    pass
            if data.get('opf.rating'):
                try:
                    mi.rating = max(0, min(float(data['opf.rating']), 10))
                except Exception:
                    pass
            if data.get('opf.series', ''):
                mi.series = data['opf.series']
                if data.get('opf.seriesindex', ''):
                    try:
                        mi.series_index = float(data['opf.seriesindex'])
                    except Exception:
                        mi.series_index = 1.0
            if data.get('opf.language', ''):
                cl = canonicalize_lang(data['opf.language'])
                if cl:
                    mi.languages = [cl]
            opfnocover = data.get('opf.nocover', False)
        if not opfnocover:
            try:
                read_cover(stream, zf, mi, opfmeta, extract_cover)
            except Exception:
                pass  # Do not let an error reading the cover prevent reading other data

    return mi
Example #16
0
    def _start_merge(self, book_list):
        db = self.gui.current_db
        self.previous = self.gui.library_view.currentIndex()
        # if any bad, bail.
        bad_list = [x for x in book_list if not x['good']]
        if len(bad_list) > 0:
            d = error_dialog(self.gui,
                             _('Cannot Merge Epubs'),
                             _('%s books failed.') % len(bad_list),
                             det_msg='\n'.join([x['error'] for x in bad_list]))
            d.exec_()
        else:
            d = OrderEPUBsDialog(
                self.gui,
                _('Order EPUBs to Merge'),
                prefs,
                self.qaction.icon(),
                book_list,
            )
            d.exec_()
            if d.result() != d.Accepted:
                return

            book_list = d.get_books()

            logger.debug("2:%s" % (time.time() - self.t))
            self.t = time.time()

            deftitle = "%s %s" % (book_list[0]['title'], prefs['mergeword'])
            mi = MetaInformation(deftitle, ["Temp Author"])

            # if all same series, use series for name.  But only if all.
            serieslist = [
                x['series'] for x in book_list if x['series'] != None
            ]
            if len(serieslist) == len(book_list):
                mi.title = serieslist[0]
                for sr in serieslist:
                    if mi.title != sr:
                        mi.title = deftitle
                        break

            # logger.debug("======================= mi.title:\n%s\n========================="%mi.title)

            mi.authors = list()
            authorslists = [x['authors'] for x in book_list]
            for l in authorslists:
                for a in l:
                    if a not in mi.authors:
                        mi.authors.append(a)
            #mi.authors = [item for sublist in authorslists for item in sublist]

            # logger.debug("======================= mi.authors:\n%s\n========================="%mi.authors)

            #mi.author_sort = ' & '.join([ x['author_sort'] for x in book_list ])

            # logger.debug("======================= mi.author_sort:\n%s\n========================="%mi.author_sort)

            # set publisher if all from same publisher.
            publishers = set([x['publisher'] for x in book_list])
            if len(publishers) == 1:
                mi.publisher = publishers.pop()

            # logger.debug("======================= mi.publisher:\n%s\n========================="%mi.publisher)

            tagslists = [x['tags'] for x in book_list]
            mi.tags = [item for sublist in tagslists for item in sublist]
            mi.tags.extend(prefs['mergetags'].split(','))

            # logger.debug("======================= mergetags:\n%s\n========================="%prefs['mergetags'])
            # logger.debug("======================= m.tags:\n%s\n========================="%mi.tags)

            languageslists = [x['languages'] for x in book_list]
            mi.languages = [
                item for sublist in languageslists for item in sublist
            ]

            mi.series = ''
            if prefs['firstseries'] and book_list[0]['series']:
                mi.series = book_list[0]['series']
                mi.series_index = book_list[0]['series_index']

            # ======================= make book comments =========================

            if len(mi.authors) > 1:
                booktitle = lambda x: _("%s by %s") % (x['title'], ' & '.join(
                    x['authors']))
            else:
                booktitle = lambda x: x['title']

            mi.comments = ("<p>" + _("%s containing:") +
                           "</p>") % prefs['mergeword']

            if prefs['includecomments']:

                def bookcomments(x):
                    if x['comments']:
                        return '<p><b>%s</b></p>%s' % (booktitle(x),
                                                       x['comments'])
                    else:
                        return '<b>%s</b><br/>' % booktitle(x)

                mi.comments += ('<div class="mergedbook">' +
                                '<hr></div><div class="mergedbook">'.join(
                                    [bookcomments(x)
                                     for x in book_list]) + '</div>')
            else:
                mi.comments += '<br/>'.join([booktitle(x) for x in book_list])

            # ======================= make book entry =========================

            book_id = db.create_book_entry(mi, add_duplicates=True)

            # set default cover to same as first book
            coverdata = db.cover(book_list[0]['calibre_id'], index_is_id=True)
            if coverdata:
                db.set_cover(book_id, coverdata)

            # ======================= custom columns ===================

            logger.debug("3:%s" % (time.time() - self.t))
            self.t = time.time()

            # have to get custom from db for each book.
            idslist = [x['calibre_id'] for x in book_list]

            custom_columns = self.gui.library_view.model().custom_columns
            for col, action in six.iteritems(prefs['custom_cols']):
                #logger.debug("col: %s action: %s"%(col,action))

                if col not in custom_columns:
                    logger.debug("%s not an existing column, skipping." % col)
                    continue

                coldef = custom_columns[col]
                #logger.debug("coldef:%s"%coldef)

                if action not in permitted_values[coldef['datatype']]:
                    logger.debug(
                        "%s not a valid column type for %s, skipping." %
                        (col, action))
                    continue

                label = coldef['label']

                found = False
                value = None
                idx = None
                if action == 'first':
                    idx = 0

                if action == 'last':
                    idx = -1

                if action in ['first', 'last']:
                    value = db.get_custom(idslist[idx],
                                          label=label,
                                          index_is_id=True)
                    if coldef['datatype'] == 'series' and value != None:
                        # get the number-in-series, too.
                        value = "%s [%s]" % (
                            value,
                            db.get_custom_extra(
                                idslist[idx], label=label, index_is_id=True))
                    found = True

                if action in ('add', 'average', 'averageall'):
                    value = 0.0
                    count = 0
                    for bid in idslist:
                        try:
                            value += db.get_custom(bid,
                                                   label=label,
                                                   index_is_id=True)
                            found = True
                            # only count ones with values unless averageall
                            count += 1
                        except:
                            # if not set, it's None and fails.
                            # only count ones with values unless averageall
                            if action == 'averageall':
                                count += 1

                    if found and action in ('average', 'averageall'):
                        value = value / count

                    if coldef['datatype'] == 'int':
                        value += 0.5  # so int rounds instead of truncs.

                if action == 'and':
                    value = True
                    for bid in idslist:
                        try:
                            value = value and db.get_custom(
                                bid, label=label, index_is_id=True)
                            found = True
                        except:
                            # if not set, it's None and fails.
                            pass

                if action == 'or':
                    value = False
                    for bid in idslist:
                        try:
                            value = value or db.get_custom(
                                bid, label=label, index_is_id=True)
                            found = True
                        except:
                            # if not set, it's None and fails.
                            pass

                if action == 'newest':
                    value = None
                    for bid in idslist:
                        try:
                            ivalue = db.get_custom(bid,
                                                   label=label,
                                                   index_is_id=True)
                            if not value or ivalue > value:
                                value = ivalue
                                found = True
                        except:
                            # if not set, it's None and fails.
                            pass

                if action == 'oldest':
                    value = None
                    for bid in idslist:
                        try:
                            ivalue = db.get_custom(bid,
                                                   label=label,
                                                   index_is_id=True)
                            if not value or ivalue < value:
                                value = ivalue
                                found = True
                        except:
                            # if not set, it's None and fails.
                            pass

                if action == 'union':
                    if not coldef['is_multiple']:
                        action = 'concat'
                    else:
                        value = set()
                        for bid in idslist:
                            try:
                                value = value.union(
                                    db.get_custom(bid,
                                                  label=label,
                                                  index_is_id=True))
                                found = True
                            except:
                                # if not set, it's None and fails.
                                pass

                if action == 'concat':
                    value = ""
                    for bid in idslist:
                        try:
                            value = value + ' ' + db.get_custom(
                                bid, label=label, index_is_id=True)
                            found = True
                        except:
                            # if not set, it's None and fails.
                            pass
                    value = value.strip()

                if action == 'now':
                    value = datetime.now()
                    found = True
                    logger.debug("now: %s" % value)

                if found and value != None:
                    logger.debug("value: %s" % value)
                    db.set_custom(book_id, value, label=label, commit=False)

            db.commit()

            logger.debug("4:%s" % (time.time() - self.t))
            self.t = time.time()

            self.gui.library_view.model().books_added(1)
            self.gui.library_view.select_rows([book_id])

            logger.debug("5:%s" % (time.time() - self.t))
            self.t = time.time()

            confirm(
                '\n' +
                _('''The book for the new Merged EPUB has been created and default metadata filled in.

However, the EPUB will *not* be created until after you've reviewed, edited, and closed the metadata dialog that follows.'''
                  ), 'epubmerge_created_now_edit_again', self.gui)

            self.gui.iactions['Edit Metadata'].edit_metadata(False)

            logger.debug("5:%s" % (time.time() - self.t))
            self.t = time.time()
            self.gui.tags_view.recount()

            totalsize = sum([x['epub_size'] for x in book_list])

            logger.debug("merging %s EPUBs totaling %s" %
                         (len(book_list), gethumanreadable(totalsize)))
            if len(book_list) > 100 or totalsize > 5 * 1024 * 1024:
                confirm(
                    '\n' +
                    _('''You're merging %s EPUBs totaling %s.  Calibre will be locked until the merge is finished.'''
                      ) % (len(book_list), gethumanreadable(totalsize)),
                    'epubmerge_edited_now_merge_again', self.gui)

            self.gui.status_bar.show_message(
                _('Merging %s EPUBs...') % len(book_list), 60000)

            mi = db.get_metadata(book_id, index_is_id=True)

            mergedepub = PersistentTemporaryFile(suffix='.epub')
            epubstomerge = [x['epub'] for x in book_list]

            coverjpgpath = None
            if mi.has_cover:
                # grab the path to the real image.
                coverjpgpath = os.path.join(db.library_path,
                                            db.path(book_id, index_is_id=True),
                                            'cover.jpg')

            self.do_merge(mergedepub,
                          epubstomerge,
                          authoropts=mi.authors,
                          titleopt=mi.title,
                          descopt=mi.comments,
                          tags=mi.tags,
                          languages=mi.languages,
                          titlenavpoints=prefs['titlenavpoints'],
                          originalnavpoints=prefs['originalnavpoints'],
                          flattentoc=prefs['flattentoc'],
                          printtimes=True,
                          coverjpgpath=coverjpgpath,
                          keepmetadatafiles=prefs['keepmeta'])

            logger.debug("6:%s" % (time.time() - self.t))
            logger.debug(_("Merge finished, output in:\n%s") % mergedepub.name)
            self.t = time.time()
            db.add_format_with_hooks(book_id,
                                     'EPUB',
                                     mergedepub,
                                     index_is_id=True)

            logger.debug("7:%s" % (time.time() - self.t))
            self.t = time.time()

            self.gui.status_bar.show_message(
                _('Finished merging %s EPUBs.') % len(book_list), 3000)
            self.gui.library_view.model().refresh_ids([book_id])
            self.gui.tags_view.recount()
            current = self.gui.library_view.currentIndex()
            self.gui.library_view.model().current_changed(
                current, self.previous)
            #self.gui.iactions['View'].view_book(False)
            if self.gui.cover_flow:
                self.gui.cover_flow.dataChanged()
Example #17
0
def get_metadata(stream, extract_cover=True):
    zin = zipfile.ZipFile(stream, 'r')
    odfs = odfmetaparser()
    parser = xml.sax.make_parser()
    parser.setFeature(xml.sax.handler.feature_namespaces, True)
    parser.setFeature(xml.sax.handler.feature_external_ges, False)
    parser.setContentHandler(odfs)
    content = zin.read('meta.xml')
    parser.parse(io.BytesIO(content))
    data = odfs.seenfields
    mi = MetaInformation(None, [])
    if 'title' in data:
        mi.title = data['title']
    if data.get('initial-creator', '').strip():
        mi.authors = string_to_authors(data['initial-creator'])
    elif 'creator' in data:
        mi.authors = string_to_authors(data['creator'])
    if 'description' in data:
        mi.comments = data['description']
    if 'language' in data:
        mi.language = data['language']
    if data.get('keywords', ''):
        mi.tags = [x.strip() for x in data['keywords'].split(',') if x.strip()]
    opfmeta = False  # we need this later for the cover
    opfnocover = False
    if data.get('opf.metadata', '') == 'true':
        # custom metadata contains OPF information
        opfmeta = True
        if data.get('opf.titlesort', ''):
            mi.title_sort = data['opf.titlesort']
        if data.get('opf.authors', ''):
            mi.authors = string_to_authors(data['opf.authors'])
        if data.get('opf.authorsort', ''):
            mi.author_sort = data['opf.authorsort']
        if data.get('opf.isbn', ''):
            isbn = check_isbn(data['opf.isbn'])
            if isbn is not None:
                mi.isbn = isbn
        if data.get('opf.publisher', ''):
            mi.publisher = data['opf.publisher']
        if data.get('opf.pubdate', ''):
            mi.pubdate = parse_date(data['opf.pubdate'], assume_utc=True)
        if data.get('opf.series', ''):
            mi.series = data['opf.series']
            if data.get('opf.seriesindex', ''):
                try:
                    mi.series_index = float(data['opf.seriesindex'])
                except ValueError:
                    mi.series_index = 1.0
        if data.get('opf.language', ''):
            cl = canonicalize_lang(data['opf.language'])
            if cl:
                mi.languages = [cl]
        opfnocover = data.get('opf.nocover', 'false') == 'true'
    if not opfnocover:
        try:
            read_cover(stream, zin, mi, opfmeta, extract_cover)
        except:
            pass  # Do not let an error reading the cover prevent reading other data

    return mi
Example #18
0
File: meta.py Project: sss/calibre
def metadata_from_filename(name, pat=None):
    if isbytestring(name):
        name = name.decode(filesystem_encoding, 'replace')
    name = name.rpartition('.')[0]
    mi = MetaInformation(None, None)
    if pat is None:
        pat = re.compile(prefs.get('filename_pattern'))
    name = name.replace('_', ' ')
    match = pat.search(name)
    if match is not None:
        try:
            mi.title = match.group('title')
        except IndexError:
            pass
        try:
            au = match.group('author')
            aus = string_to_authors(au)
            if aus:
                mi.authors = aus
                if prefs['swap_author_names'] and mi.authors:
                    def swap(a):
                        if ',' in a:
                            parts = a.split(',', 1)
                        else:
                            parts = a.split(None, 1)
                        if len(parts) > 1:
                            t = parts[-1]
                            parts = parts[:-1]
                            parts.insert(0, t)
                        return ' '.join(parts)
                    mi.authors = [swap(x) for x in mi.authors]
        except (IndexError, ValueError):
            pass
        try:
            mi.series = match.group('series')
        except IndexError:
            pass
        try:
            si = match.group('series_index')
            mi.series_index = float(si)
        except (IndexError, ValueError, TypeError):
            pass
        try:
            si = match.group('isbn')
            mi.isbn = si
        except (IndexError, ValueError):
            pass
        try:
            publisher = match.group('publisher')
            mi.publisher = publisher
        except (IndexError, ValueError):
            pass
        try:
            pubdate = match.group('published')
            if pubdate:
                from calibre.utils.date import parse_date
                mi.pubdate = parse_date(pubdate)
        except:
            pass

    if mi.is_null('title'):
        mi.title = name
    return mi
Example #19
0
    def add_empty(self, *args):
        '''
        Add an empty book item to the library. This does not import any formats
        from a book file.
        '''
        author = series = title = None
        index = self.gui.library_view.currentIndex()
        if index.isValid():
            raw = index.model().db.authors(index.row())
            if raw:
                authors = [a.strip().replace('|', ',') for a in raw.split(',')]
                if authors:
                    author = authors[0]
            series = index.model().db.series(index.row())
            title = index.model().db.title(index.row())
        dlg = AddEmptyBookDialog(self.gui,
                                 self.gui.library_view.model().db,
                                 author,
                                 series,
                                 dup_title=title)
        if dlg.exec_() == dlg.Accepted:
            temp_files = []
            num = dlg.qty_to_add
            series = dlg.selected_series
            title = dlg.selected_title or _('Unknown')
            db = self.gui.library_view.model().db
            ids, orig_fmts = [], []
            if dlg.duplicate_current_book:
                origmi = db.get_metadata(index.row(),
                                         get_cover=True,
                                         cover_as_data=True)
                if dlg.copy_formats.isChecked():
                    book_id = db.id(index.row())
                    orig_fmts = tuple(
                        db.new_api.format(book_id, fmt, as_path=True)
                        for fmt in db.new_api.formats(book_id))

            for x in range(num):
                if dlg.duplicate_current_book:
                    mi = origmi
                else:
                    mi = MetaInformation(title, dlg.selected_authors)
                    if series:
                        mi.series = series
                        mi.series_index = db.get_next_series_num_for(series)
                fmts = []
                empty_format = gprefs.get('create_empty_format_file', '')
                if dlg.duplicate_current_book and dlg.copy_formats.isChecked():
                    fmts = orig_fmts
                elif empty_format:
                    from calibre.ebooks.oeb.polish.create import create_book
                    pt = PersistentTemporaryFile(suffix='.' + empty_format)
                    pt.close()
                    temp_files.append(pt.name)
                    create_book(mi, pt.name, fmt=empty_format)
                    fmts = [pt.name]
                ids.append(db.import_book(mi, fmts))
            for path in orig_fmts:
                os.remove(path)
            self.refresh_gui(num)
            if ids:
                ids.reverse()
                self.gui.library_view.select_rows(ids)
            for path in temp_files:
                os.remove(path)
Example #20
0
def metadata_from_filename(name, pat=None, fallback_pat=None):
    if isbytestring(name):
        name = name.decode(filesystem_encoding, 'replace')
    name = name.rpartition('.')[0]
    mi = MetaInformation(None, None)
    if pat is None:
        pat = re.compile(prefs.get('filename_pattern'))
    name = name.replace('_', ' ')
    match = pat.search(name)
    if match is None and fallback_pat is not None:
        match = fallback_pat.search(name)
    if match is not None:
        try:
            mi.title = match.group('title')
        except IndexError:
            pass
        try:
            au = match.group('author')
            aus = string_to_authors(au)
            if aus:
                mi.authors = aus
                if prefs['swap_author_names'] and mi.authors:
                    def swap(a):
                        if ',' in a:
                            parts = a.split(',', 1)
                        else:
                            parts = a.split(None, 1)
                        if len(parts) > 1:
                            t = parts[-1]
                            parts = parts[:-1]
                            parts.insert(0, t)
                        return ' '.join(parts)
                    mi.authors = [swap(x) for x in mi.authors]
        except (IndexError, ValueError):
            pass
        try:
            mi.series = match.group('series')
        except IndexError:
            pass
        try:
            si = match.group('series_index')
            mi.series_index = float(si)
        except (IndexError, ValueError, TypeError):
            pass
        try:
            si = match.group('isbn')
            mi.isbn = si
        except (IndexError, ValueError):
            pass
        try:
            publisher = match.group('publisher')
            mi.publisher = publisher
        except (IndexError, ValueError):
            pass
        try:
            pubdate = match.group('published')
            if pubdate:
                from calibre.utils.date import parse_only_date
                mi.pubdate = parse_only_date(pubdate)
        except:
            pass
        try:
            comments = match.group('comments')
            mi.comments = comments
        except (IndexError, ValueError):
            pass

    if mi.is_null('title'):
        mi.title = name
    return mi
Example #21
0
def get_social_metadata(title,
                        authors,
                        publisher,
                        isbn,
                        username=None,
                        password=None):
    from calibre.ebooks.metadata import MetaInformation
    mi = MetaInformation(title, authors)
    if isbn:
        br = get_browser()
        try:
            login(br, username, password)

            raw = br.open_novisit('http://www.librarything.com/isbn/' +
                                  isbn).read()
        except:
            return mi
        if '/wiki/index.php/HelpThing:Verify' in raw:
            raise Exception('LibraryThing is blocking calibre.')
        if not raw:
            return mi
        raw = raw.decode('utf-8', 'replace')
        raw = strip_encoding_declarations(raw)
        root = html.fromstring(raw)
        h1 = root.xpath('//div[@class="headsummary"]/h1')
        if h1 and not mi.title:
            mi.title = html.tostring(h1[0], method='text', encoding=unicode)
        h2 = root.xpath('//div[@class="headsummary"]/h2/a')
        if h2 and not mi.authors:
            mi.authors = [
                html.tostring(x, method='text', encoding=unicode) for x in h2
            ]
        h3 = root.xpath('//div[@class="headsummary"]/h3/a')
        if h3:
            match = None
            for h in h3:
                series = html.tostring(h, method='text', encoding=unicode)
                match = re.search(r'(.+) \((.+)\)', series)
                if match is not None:
                    break
            if match is not None:
                mi.series = match.group(1).strip()
                match = re.search(r'[0-9.]+', match.group(2))
                si = 1.0
                if match is not None:
                    si = float(match.group())
                mi.series_index = si
        # tags = root.xpath('//div[@class="tags"]/span[@class="tag"]/a')
        # if tags:
        #    mi.tags = [html.tostring(x, method='text', encoding=unicode) for x
        #            in tags]
        span = root.xpath(
            '//table[@class="wsltable"]/tr[@class="wslcontent"]/td[4]//span')
        if span:
            raw = html.tostring(span[0], method='text', encoding=unicode)
            match = re.search(r'([0-9.]+)', raw)
            if match is not None:
                rating = float(match.group())
                if rating > 0 and rating <= 5:
                    mi.rating = rating
    return mi