Example #1
0
def get_metadata(stream, extract_cover=True):
    zin = zipfile.ZipFile(stream, 'r')
    odfs = odfmetaparser()
    parser = xml.sax.make_parser()
    parser.setFeature(xml.sax.handler.feature_namespaces, 1)
    parser.setContentHandler(odfs)
    content = zin.read('meta.xml')
    parser.parse(StringIO(content))
    data = odfs.seenfields
    mi = MetaInformation(None, [])
    if 'title' in data:
        mi.title = data['title']
    if data.get('initial-creator', '').strip():
        mi.authors = string_to_authors(data['initial-creator'])
    elif 'creator' in data:
        mi.authors = string_to_authors(data['creator'])
    if 'description' in data:
        mi.comments = data['description']
    if 'language' in data:
        mi.language = data['language']
    if data.get('keywords', ''):
        mi.tags = [x.strip() for x in data['keywords'].split(',') if x.strip()]
    opfmeta = False  # we need this later for the cover
    opfnocover = False
    if data.get('opf.metadata','') == 'true':
        # custom metadata contains OPF information
        opfmeta = True
        if data.get('opf.titlesort', ''):
            mi.title_sort = data['opf.titlesort']
        if data.get('opf.authors', ''):
            mi.authors = string_to_authors(data['opf.authors'])
        if data.get('opf.authorsort', ''):
            mi.author_sort = data['opf.authorsort']
        if data.get('opf.isbn', ''):
            isbn = check_isbn(data['opf.isbn'])
            if isbn is not None:
                mi.isbn = isbn
        if data.get('opf.publisher', ''):
            mi.publisher = data['opf.publisher']
        if data.get('opf.pubdate', ''):
            mi.pubdate = parse_date(data['opf.pubdate'], assume_utc=True)
        if data.get('opf.series', ''):
            mi.series = data['opf.series']
            if data.get('opf.seriesindex', ''):
                try:
                    mi.series_index = float(data['opf.seriesindex'])
                except ValueError:
                    mi.series_index = 1.0
        if data.get('opf.language', ''):
            cl = canonicalize_lang(data['opf.language'])
            if cl:
                mi.languages = [cl]
        opfnocover = data.get('opf.nocover', 'false') == 'true'
    if not opfnocover:
        try:
            read_cover(stream, zin, mi, opfmeta, extract_cover)
        except:
            pass  # Do not let an error reading the cover prevent reading other data

    return mi
Example #2
0
def get_metadata_from_reader(rdr):
    raw = rdr.GetFile(rdr.home)
    home = BeautifulSoup(xml_to_unicode(raw, strip_encoding_pats=True,
        resolve_entities=True)[0])

    title = rdr.title
    try:
        x = rdr.GetEncoding()
        codecs.lookup(x)
        enc = x
    except:
        enc = 'cp1252'
    title = force_unicode(title, enc)
    authors = _get_authors(home)
    mi = MetaInformation(title, authors)
    publisher = _get_publisher(home)
    if publisher:
        mi.publisher = publisher
    isbn = _get_isbn(home)
    if isbn:
        mi.isbn = isbn
    comments = _get_comments(home)
    if comments:
        mi.comments = comments

    cdata = _get_cover(home, rdr)
    if cdata is not None:
        mi.cover_data = ('jpg', cdata)

    return mi
Example #3
0
def get_metadata_from_reader(rdr):
    raw = rdr.GetFile(rdr.home)
    home = BeautifulSoup(
        xml_to_unicode(raw, strip_encoding_pats=True,
                       resolve_entities=True)[0])

    title = rdr.title
    try:
        x = rdr.GetEncoding()
        codecs.lookup(x)
        enc = x
    except:
        enc = 'cp1252'
    title = force_unicode(title, enc)
    authors = _get_authors(home)
    mi = MetaInformation(title, authors)
    publisher = _get_publisher(home)
    if publisher:
        mi.publisher = publisher
    isbn = _get_isbn(home)
    if isbn:
        mi.isbn = isbn
    comments = _get_comments(home)
    if comments:
        mi.comments = comments

    cdata = _get_cover(home, rdr)
    if cdata is not None:
        mi.cover_data = ('jpg', cdata)

    return mi
Example #4
0
File: odt.py Project: sss/calibre
def get_metadata(stream, extract_cover=True):
    zin = zipfile.ZipFile(stream, 'r')
    odfs = odfmetaparser()
    parser = xml.sax.make_parser()
    parser.setFeature(xml.sax.handler.feature_namespaces, 1)
    parser.setContentHandler(odfs)
    content = zin.read('meta.xml')
    parser.parse(StringIO(content))
    data = odfs.seenfields
    mi = MetaInformation(None, [])
    if data.has_key('title'):
        mi.title = data['title']
    if data.get('initial-creator', '').strip():
        mi.authors = string_to_authors(data['initial-creator'])
    elif data.has_key('creator'):
        mi.authors = string_to_authors(data['creator'])
    if data.has_key('description'):
        mi.comments = data['description']
    if data.has_key('language'):
        mi.language = data['language']
    if data.get('keywords', ''):
        mi.tags = [x.strip() for x in data['keywords'].split(',') if x.strip()]
    opfmeta = False # we need this later for the cover
    opfnocover = False
    if data.get('opf.metadata','') == 'true':
        # custom metadata contains OPF information
        opfmeta = True
        if data.get('opf.titlesort', ''):
            mi.title_sort = data['opf.titlesort']
        if data.get('opf.authors', ''):
            mi.authors = string_to_authors(data['opf.authors'])
        if data.get('opf.authorsort', ''):
            mi.author_sort = data['opf.authorsort']
        if data.get('opf.isbn', ''):
            isbn = check_isbn(data['opf.isbn'])
            if isbn is not None:
                mi.isbn = isbn
        if data.get('opf.publisher', ''):
            mi.publisher = data['opf.publisher']
        if data.get('opf.pubdate', ''):
            mi.pubdate = parse_date(data['opf.pubdate'], assume_utc=True)
        if data.get('opf.series', ''):
            mi.series = data['opf.series']
            if data.get('opf.seriesindex', ''):
                try:
                    mi.series_index = float(data['opf.seriesindex'])
                except ValueError:
                    mi.series_index = 1.0
        if data.get('opf.language', ''):
            cl = canonicalize_lang(data['opf.language'])
            if cl:
                mi.languages = [cl]
        opfnocover = data.get('opf.nocover', 'false') == 'true'
    if not opfnocover:
        try:
            read_cover(stream, zin, mi, opfmeta, extract_cover)
        except:
            pass # Do not let an error reading the cover prevent reading other data

    return mi
Example #5
0
    def add_annotation_to_library(self, db, db_id, annotation):
        from calibre.ebooks.BeautifulSoup import Tag
        from calibre.ebooks.metadata import MetaInformation

        bm = annotation
        ignore_tags = set(['Catalog', 'Clippings'])

        if bm.type == 'kindle_bookmark':
            mi = db.get_metadata(db_id, index_is_id=True)
            user_notes_soup = self.generate_annotation_html(bm.value)
            if mi.comments:
                a_offset = mi.comments.find('<div class="user_annotations">')
                ad_offset = mi.comments.find(
                    '<hr class="annotations_divider" />')

                if a_offset >= 0:
                    mi.comments = mi.comments[:a_offset]
                if ad_offset >= 0:
                    mi.comments = mi.comments[:ad_offset]
                if set(mi.tags).intersection(ignore_tags):
                    return
                if mi.comments:
                    hrTag = Tag(user_notes_soup, 'hr')
                    hrTag['class'] = 'annotations_divider'
                    user_notes_soup.insert(0, hrTag)

                mi.comments += unicode(user_notes_soup.prettify())
            else:
                mi.comments = unicode(user_notes_soup.prettify())
            # Update library comments
            db.set_comment(db_id, mi.comments)

            # Add bookmark file to db_id
            db.add_format_with_hooks(db_id,
                                     bm.value.bookmark_extension,
                                     bm.value.path,
                                     index_is_id=True)
        elif bm.type == 'kindle_clippings':
            # Find 'My Clippings' author=Kindle in database, or add
            last_update = 'Last modified %s' % strftime(
                u'%x %X', bm.value['timestamp'].timetuple())
            mc_id = list(
                db.data.search_getting_ids('title:"My Clippings"',
                                           '',
                                           sort_results=False))
            if mc_id:
                db.add_format_with_hooks(mc_id[0],
                                         'TXT',
                                         bm.value['path'],
                                         index_is_id=True)
                mi = db.get_metadata(mc_id[0], index_is_id=True)
                mi.comments = last_update
                db.set_metadata(mc_id[0], mi)
            else:
                mi = MetaInformation('My Clippings', authors=['Kindle'])
                mi.tags = ['Clippings']
                mi.comments = last_update
                db.add_books([bm.value['path']], ['txt'], [mi])
    def _start_splitmerge(self,book_list, tdir=None, db=None):
        # logger.debug(book_list)

        em = self.get_epubmerge_plugin()
        es = self.get_epubsplit_plugin()

        good_list = [ b for b in book_list if b['good'] ]

        tmp = PersistentTemporaryFile(prefix='merge-',
                                      suffix='.epub',
                                      dir=tdir)
        if len(good_list) == 1:
            deftitle = "New "+good_list[0]['title']
            defauthors = good_list[0]['authors']
        else:
            deftitle = "New Chapters Anthology"
            defauthors = ["Various Authors"]

        mi = MetaInformation(deftitle,defauthors)
        tagslists = [ x['tags'] for x in good_list ]
        mi.tags = [item for sublist in tagslists for item in sublist]
        mi.comments = "<p>New Chapters from:</p>"
        mi.comments += '<br/>'.join( [ "%s by %s"%(x['title'],", ".join(x['authors'])) for x in good_list ] )
    
        em.do_merge(tmp,
                    [b['splittmp'] for b in good_list],
                    authoropts=mi.authors,
                    titleopt=mi.title,
                    descopt=mi.comments,
                    tags=mi.tags,
                    keepmetadatafiles=False,
                    )

        book_id = db.create_book_entry(mi,
                                       add_duplicates=True)

        db.add_format_with_hooks(book_id,
                                 'EPUB',
                                 tmp, index_is_id=True)
        
        self.gui.library_view.model().books_added(1)
        self.gui.library_view.model().refresh_ids([book_id])
        # self.gui.iactions['Edit Metadata'].edit_metadata(False)
        self.gui.tags_view.recount()
        
        ## run word counts
        cp_plugin = self.gui.iactions['Count Pages']
        cp_plugin.count_statistics([book_id],['WordCount'])
        
        ## run auto convert
        self.gui.iactions['Convert Books'].auto_convert_auto_add([book_id])

        ## add to FFF update lists
        self.gui.library_view.select_rows([book_id])
        fff_plugin = self.gui.iactions['FanFicFare']
        fff_plugin.update_lists()
        
        remove_dir(tdir)
Example #7
0
    def add_annotation_to_library(self, db, db_id, annotation):
        from calibre.ebooks.BeautifulSoup import Tag
        from calibre.ebooks.metadata import MetaInformation

        bm = annotation
        ignore_tags = set(['Catalog', 'Clippings'])

        if bm.type == 'kindle_bookmark':
            mi = db.get_metadata(db_id, index_is_id=True)
            user_notes_soup = self.generate_annotation_html(bm.value)
            if mi.comments:
                a_offset = mi.comments.find('<div class="user_annotations">')
                ad_offset = mi.comments.find('<hr class="annotations_divider" />')

                if a_offset >= 0:
                    mi.comments = mi.comments[:a_offset]
                if ad_offset >= 0:
                    mi.comments = mi.comments[:ad_offset]
                if set(mi.tags).intersection(ignore_tags):
                    return
                if mi.comments:
                    hrTag = Tag(user_notes_soup,'hr')
                    hrTag['class'] = 'annotations_divider'
                    user_notes_soup.insert(0, hrTag)

                mi.comments += unicode(user_notes_soup.prettify())
            else:
                mi.comments = unicode(user_notes_soup.prettify())
            # Update library comments
            db.set_comment(db_id, mi.comments)

            # Add bookmark file to db_id
            db.add_format_with_hooks(db_id, bm.value.bookmark_extension,
                                            bm.value.path, index_is_id=True)
        elif bm.type == 'kindle_clippings':
            # Find 'My Clippings' author=Kindle in database, or add
            last_update = 'Last modified %s' % strftime(u'%x %X',bm.value['timestamp'].timetuple())
            mc_id = list(db.data.search_getting_ids('title:"My Clippings"', '', sort_results=False))
            if mc_id:
                db.add_format_with_hooks(mc_id[0], 'TXT', bm.value['path'],
                        index_is_id=True)
                mi = db.get_metadata(mc_id[0], index_is_id=True)
                mi.comments = last_update
                db.set_metadata(mc_id[0], mi)
            else:
                mi = MetaInformation('My Clippings', authors=['Kindle'])
                mi.tags = ['Clippings']
                mi.comments = last_update
                db.add_books([bm.value['path']], ['txt'], [mi])
Example #8
0
    def add_annotation_to_library(self, db, db_id, annotation):
        from calibre.ebooks.BeautifulSoup import Tag
        from calibre.ebooks.metadata import MetaInformation

        bm = annotation
        ignore_tags = set(["Catalog", "Clippings"])

        if bm.type == "kindle_bookmark":
            mi = db.get_metadata(db_id, index_is_id=True)
            user_notes_soup = self.generate_annotation_html(bm.value)
            if mi.comments:
                a_offset = mi.comments.find('<div class="user_annotations">')
                ad_offset = mi.comments.find('<hr class="annotations_divider" />')

                if a_offset >= 0:
                    mi.comments = mi.comments[:a_offset]
                if ad_offset >= 0:
                    mi.comments = mi.comments[:ad_offset]
                if set(mi.tags).intersection(ignore_tags):
                    return
                if mi.comments:
                    hrTag = Tag(user_notes_soup, "hr")
                    hrTag["class"] = "annotations_divider"
                    user_notes_soup.insert(0, hrTag)

                mi.comments += unicode(user_notes_soup.prettify())
            else:
                mi.comments = unicode(user_notes_soup.prettify())
            # Update library comments
            db.set_comment(db_id, mi.comments)

            # Add bookmark file to db_id
            db.add_format_with_hooks(db_id, bm.value.bookmark_extension, bm.value.path, index_is_id=True)
        elif bm.type == "kindle_clippings":
            # Find 'My Clippings' author=Kindle in database, or add
            last_update = "Last modified %s" % strftime(u"%x %X", bm.value["timestamp"].timetuple())
            mc_id = list(db.data.search_getting_ids('title:"My Clippings"', ""))
            if mc_id:
                db.add_format_with_hooks(mc_id[0], "TXT", bm.value["path"], index_is_id=True)
                mi = db.get_metadata(mc_id[0], index_is_id=True)
                mi.comments = last_update
                db.set_metadata(mc_id[0], mi)
            else:
                mi = MetaInformation("My Clippings", authors=["Kindle"])
                mi.tags = ["Clippings"]
                mi.comments = last_update
                db.add_books([bm.value["path"]], ["txt"], [mi])
Example #9
0
def get_metadata(stream):
    """
    Return basic meta-data about the LRF file in C{stream} as a
    L{MetaInformation} object.
    @param stream: A file like object or an instance of L{LRFMetaFile}
    """
    lrf = stream if isinstance(stream, LRFMetaFile) else LRFMetaFile(stream)
    authors = string_to_authors(lrf.author)
    mi = MetaInformation(lrf.title.strip(), authors)
    mi.author = lrf.author.strip()
    mi.comments = lrf.free_text.strip()
    mi.category = lrf.category.strip() + ', ' + lrf.classification.strip()
    tags = [x.strip() for x in mi.category.split(',') if x.strip()]
    if tags:
        mi.tags = tags
    if mi.category.strip() == ',':
        mi.category = None
    mi.publisher = lrf.publisher.strip()
    mi.cover_data = lrf.get_cover()
    try:
        mi.title_sort = lrf.title_reading.strip()
        if not mi.title_sort:
            mi.title_sort = None
    except:
        pass
    try:
        mi.author_sort = lrf.author_reading.strip()
        if not mi.author_sort:
            mi.author_sort = None
    except:
        pass
    if not mi.title or 'unknown' in mi.title.lower():
        mi.title = None
    if not mi.authors:
        mi.authors = None
    if not mi.author or 'unknown' in mi.author.lower():
        mi.author = None
    if not mi.category or 'unknown' in mi.category.lower():
        mi.category = None
    if not mi.publisher or 'unknown' in mi.publisher.lower() or \
            'some publisher' in mi.publisher.lower():
        mi.publisher = None

    return mi
Example #10
0
File: meta.py Project: Farb/calibre
def get_metadata(stream):
    """
    Return basic meta-data about the LRF file in C{stream} as a
    L{MetaInformation} object.
    @param stream: A file like object or an instance of L{LRFMetaFile}
    """
    lrf = stream if isinstance(stream, LRFMetaFile) else LRFMetaFile(stream)
    authors = string_to_authors(lrf.author)
    mi = MetaInformation(lrf.title.strip(), authors)
    mi.author = lrf.author.strip()
    mi.comments = lrf.free_text.strip()
    mi.category = lrf.category.strip()+', '+lrf.classification.strip()
    tags = [x.strip() for x in mi.category.split(',') if x.strip()]
    if tags:
        mi.tags = tags
    if mi.category.strip() == ',':
        mi.category = None
    mi.publisher = lrf.publisher.strip()
    mi.cover_data = lrf.get_cover()
    try:
        mi.title_sort = lrf.title_reading.strip()
        if not mi.title_sort:
            mi.title_sort = None
    except:
        pass
    try:
        mi.author_sort = lrf.author_reading.strip()
        if not mi.author_sort:
            mi.author_sort = None
    except:
        pass
    if not mi.title or 'unknown' in mi.title.lower():
        mi.title = None
    if not mi.authors:
        mi.authors = None
    if not mi.author or 'unknown' in mi.author.lower():
        mi.author = None
    if not mi.category or 'unknown' in mi.category.lower():
        mi.category = None
    if not mi.publisher or 'unknown' in mi.publisher.lower() or \
            'some publisher' in mi.publisher.lower():
        mi.publisher = None

    return mi
Example #11
0
def get_metadata(stream):
    """
    Return metadata as a L{MetaInfo} object
    """
    stream.seek(0)
    if stream.read(5) != r'{\rtf':
        return MetaInformation(_('Unknown'))
    block = get_document_info(stream)[0]
    if not block:
        return MetaInformation(_('Unknown'))

    stream.seek(0)
    cpg = detect_codepage(stream)
    stream.seek(0)

    title_match = title_pat.search(block)
    if title_match is not None:
        title = decode(title_match.group(1).strip(), cpg)
    else:
        title = _('Unknown')
    author_match = author_pat.search(block)
    if author_match is not None:
        author = decode(author_match.group(1).strip(), cpg)
    else:
        author = None
    mi = MetaInformation(title)
    if author:
        mi.authors = string_to_authors(author)

    comment_match = comment_pat.search(block)
    if comment_match is not None:
        comment = decode(comment_match.group(1).strip(), cpg)
        mi.comments = comment
    tags_match = tags_pat.search(block)
    if tags_match is not None:
        tags = decode(tags_match.group(1).strip(), cpg)
        mi.tags = list(filter(None, (x.strip() for x in tags.split(','))))
    publisher_match = publisher_pat.search(block)
    if publisher_match is not None:
        publisher = decode(publisher_match.group(1).strip(), cpg)
        mi.publisher = publisher

    return mi
Example #12
0
def get_metadata(stream):
    """
    Return metadata as a L{MetaInfo} object
    """
    stream.seek(0)
    if stream.read(5) != br'{\rtf':
        return MetaInformation(_('Unknown'))
    block = get_document_info(stream)[0]
    if not block:
        return MetaInformation(_('Unknown'))

    stream.seek(0)
    cpg = detect_codepage(stream)
    stream.seek(0)

    title_match = title_pat.search(block)
    if title_match is not None:
        title = decode(title_match.group(1).strip(), cpg)
    else:
        title = _('Unknown')
    author_match = author_pat.search(block)
    if author_match is not None:
        author = decode(author_match.group(1).strip(), cpg)
    else:
        author = None
    mi = MetaInformation(title)
    if author:
        mi.authors = [x.strip() for x in author.split(',')]

    comment_match = comment_pat.search(block)
    if comment_match is not None:
        comment = decode(comment_match.group(1).strip(), cpg)
        mi.comments = comment
    tags_match = tags_pat.search(block)
    if tags_match is not None:
        tags = decode(tags_match.group(1).strip(), cpg)
        mi.tags = list(filter(None, (x.strip() for x in tags.split(','))))
    publisher_match = publisher_pat.search(block)
    if publisher_match is not None:
        publisher = decode(publisher_match.group(1).strip(), cpg)
        mi.publisher = publisher

    return mi
Example #13
0
    def get_metadata(self):
        title, authors = self.get_title_and_authors()
        mi = MetaInformation(title, authors)
        publisher = unicode(self.publisher.text()).strip()
        if publisher:
            mi.publisher = publisher
        author_sort = unicode(self.author_sort.text()).strip()
        if author_sort:
            mi.author_sort = author_sort
        comments = self.comment.html
        if comments:
            mi.comments = comments
        mi.series_index = float(self.series_index.value())
        series = unicode(self.series.currentText()).strip()
        if series:
            mi.series = series
        tags = [t.strip() for t in unicode(self.tags.text()).strip().split(',')]
        if tags:
            mi.tags = tags

        return mi
Example #14
0
    def get_metadata(self):
        title, authors = self.get_title_and_authors()
        mi = MetaInformation(title, authors)
        publisher = str(self.publisher.text()).strip()
        if publisher:
            mi.publisher = publisher
        author_sort = str(self.author_sort.text()).strip()
        if author_sort:
            mi.author_sort = author_sort
        comments = self.comment.html
        if comments:
            mi.comments = comments
        mi.series_index = float(self.series_index.value())
        series = str(self.series.currentText()).strip()
        if series:
            mi.series = series
        tags = [t.strip() for t in str(self.tags.text()).strip().split(',')]
        if tags:
            mi.tags = tags

        return mi
Example #15
0
def get_metadata(stream):
    zin = zipfile.ZipFile(stream, 'r')
    odfs = odfmetaparser()
    parser = xml.sax.make_parser()
    parser.setFeature(xml.sax.handler.feature_namespaces, 1)
    parser.setContentHandler(odfs)
    content = zin.read('meta.xml')
    parser.parse(StringIO(content))
    data = odfs.seenfields
    mi = MetaInformation(None, [])
    if data.has_key('title'):
        mi.title = data['title']
    if data.get('initial-creator', '').strip():
        mi.authors = string_to_authors(data['initial-creator'])
    elif data.has_key('creator'):
        mi.authors = string_to_authors(data['creator'])
    if data.has_key('description'):
        mi.comments = data['description']
    if data.has_key('language'):
        mi.language = data['language']
    if data.get('keywords', ''):
        mi.tags = data['keywords'].split(',')

    return mi
Example #16
0
 def populate(self, entries, browser, verbose=False, api_key=''):
     for x in entries:
         try:
             id_url = entry_id(x)[0].text
             title = self.get_title(x)
         except:
             report(verbose)
         mi = MetaInformation(title, self.get_authors(x))
         try:
             if api_key != '':
                 id_url = id_url + "?apikey=" + api_key
             raw = browser.open(id_url).read()
             feed = etree.fromstring(raw)
             x = entry(feed)[0]
         except Exception, e:
             if verbose:
                 print 'Failed to get all details for an entry'
                 print e
         mi.comments = self.get_description(x, verbose)
         mi.tags = self.get_tags(x, verbose)
         mi.isbn = self.get_isbn(x, verbose)
         mi.publisher = self.get_publisher(x, verbose)
         mi.pubdate = self.get_date(x, verbose)
         self.append(mi)
Example #17
0
File: odt.py Project: Eksmo/calibre
def get_metadata(stream):
    zin = zipfile.ZipFile(stream, 'r')
    odfs = odfmetaparser()
    parser = xml.sax.make_parser()
    parser.setFeature(xml.sax.handler.feature_namespaces, 1)
    parser.setContentHandler(odfs)
    content = zin.read('meta.xml')
    parser.parse(StringIO(content))
    data = odfs.seenfields
    mi = MetaInformation(None, [])
    if data.has_key('title'):
        mi.title = data['title']
    if data.get('initial-creator', '').strip():
        mi.authors = string_to_authors(data['initial-creator'])
    elif data.has_key('creator'):
        mi.authors = string_to_authors(data['creator'])
    if data.has_key('description'):
        mi.comments = data['description']
    if data.has_key('language'):
        mi.language = data['language']
    if data.get('keywords', ''):
        mi.tags = data['keywords'].split(',')

    return mi
Example #18
0
def get_metadata_(src, encoding=None):
    if not isinstance(src, unicode):
        if not encoding:
            src = xml_to_unicode(src)[0]
        else:
            src = src.decode(encoding, "replace")

    # Meta data definitions as in
    # http://www.mobileread.com/forums/showpost.php?p=712544&postcount=9

    # Title
    title = None
    pat = re.compile(r'<!--.*?TITLE=(?P<q>[\'"])(.+?)(?P=q).*?-->', re.DOTALL)
    src = src[:150000]  # Searching shouldn't take too long
    match = pat.search(src)
    if match:
        title = match.group(2)
    else:
        for x in ("DC.title", "DCTERMS.title", "Title"):
            pat = get_meta_regexp_(x)
            match = pat.search(src)
            if match:
                title = match.group(1)
                break
    if not title:
        pat = re.compile("<title>([^<>]+?)</title>", re.IGNORECASE)
        match = pat.search(src)
        if match:
            title = match.group(1)

    # Author
    author = None
    pat = re.compile(r'<!--.*?AUTHOR=(?P<q>[\'"])(.+?)(?P=q).*?-->', re.DOTALL)
    match = pat.search(src)
    if match:
        author = match.group(2).replace(",", ";")
    else:
        for x in ("Author", "DC.creator.aut", "DCTERMS.creator.aut", "DC.creator"):
            pat = get_meta_regexp_(x)
            match = pat.search(src)
            if match:
                author = match.group(1)
                break

    # Create MetaInformation with Title and Author
    ent_pat = re.compile(r"&(\S+)?;")
    if title:
        title = ent_pat.sub(entity_to_unicode, title)
    if author:
        author = ent_pat.sub(entity_to_unicode, author)
    mi = MetaInformation(title, [author] if author else None)

    # Publisher
    publisher = None
    pat = re.compile(r'<!--.*?PUBLISHER=(?P<q>[\'"])(.+?)(?P=q).*?-->', re.DOTALL)
    match = pat.search(src)
    if match:
        publisher = match.group(2)
    else:
        for x in ("Publisher", "DC.publisher", "DCTERMS.publisher"):
            pat = get_meta_regexp_(x)
            match = pat.search(src)
            if match:
                publisher = match.group(1)
                break
    if publisher:
        mi.publisher = ent_pat.sub(entity_to_unicode, publisher)

    # ISBN
    isbn = None
    pat = re.compile(r'<!--.*?ISBN=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL)
    match = pat.search(src)
    if match:
        isbn = match.group(1)
    else:
        for x in ("ISBN", "DC.identifier.ISBN", "DCTERMS.identifier.ISBN"):
            pat = get_meta_regexp_(x)
            match = pat.search(src)
            if match:
                isbn = match.group(1)
                break
    if isbn:
        mi.isbn = re.sub(r"[^0-9xX]", "", isbn)

    # LANGUAGE
    language = None
    pat = re.compile(r'<!--.*?LANGUAGE=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL)
    match = pat.search(src)
    if match:
        language = match.group(1)
    else:
        for x in ("DC.language", "DCTERMS.language"):
            pat = get_meta_regexp_(x)
            match = pat.search(src)
            if match:
                language = match.group(1)
                break
    if language:
        mi.language = language

    # PUBDATE
    pubdate = None
    pat = re.compile(r'<!--.*?PUBDATE=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL)
    match = pat.search(src)
    if match:
        pubdate = match.group(1)
    else:
        for x in (
            "Pubdate",
            "Date of publication",
            "DC.date.published",
            "DC.date.publication",
            "DC.date.issued",
            "DCTERMS.issued",
        ):
            pat = get_meta_regexp_(x)
            match = pat.search(src)
            if match:
                pubdate = match.group(1)
                break
    if pubdate:
        try:
            mi.pubdate = parse_date(pubdate)
        except:
            pass

    # TIMESTAMP
    timestamp = None
    pat = re.compile(r'<!--.*?TIMESTAMP=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL)
    match = pat.search(src)
    if match:
        timestamp = match.group(1)
    else:
        for x in ("Timestamp", "Date of creation", "DC.date.created", "DC.date.creation", "DCTERMS.created"):
            pat = get_meta_regexp_(x)
            match = pat.search(src)
            if match:
                timestamp = match.group(1)
                break
    if timestamp:
        try:
            mi.timestamp = parse_date(timestamp)
        except:
            pass

    # SERIES
    series = None
    pat = re.compile(r'<!--.*?SERIES=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL)
    match = pat.search(src)
    if match:
        series = match.group(1)
    else:
        pat = get_meta_regexp_("Series")
        match = pat.search(src)
        if match:
            series = match.group(1)
    if series:
        pat = re.compile(r"\[([.0-9]+)\]")
        match = pat.search(series)
        series_index = None
        if match is not None:
            try:
                series_index = float(match.group(1))
            except:
                pass
            series = series.replace(match.group(), "").strip()

        mi.series = ent_pat.sub(entity_to_unicode, series)
        if series_index is None:
            pat = get_meta_regexp_("Seriesnumber")
            match = pat.search(src)
            if match:
                try:
                    series_index = float(match.group(1))
                except:
                    pass
        if series_index is not None:
            mi.series_index = series_index

    # RATING
    rating = None
    pat = re.compile(r'<!--.*?RATING=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL)
    match = pat.search(src)
    if match:
        rating = match.group(1)
    else:
        pat = get_meta_regexp_("Rating")
        match = pat.search(src)
        if match:
            rating = match.group(1)
    if rating:
        try:
            mi.rating = float(rating)
            if mi.rating < 0:
                mi.rating = 0
            if mi.rating > 5:
                mi.rating /= 2.0
            if mi.rating > 5:
                mi.rating = 0
        except:
            pass

    # COMMENTS
    comments = None
    pat = re.compile(r'<!--.*?COMMENTS=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL)
    match = pat.search(src)
    if match:
        comments = match.group(1)
    else:
        pat = get_meta_regexp_("Comments")
        match = pat.search(src)
        if match:
            comments = match.group(1)
    if comments:
        mi.comments = ent_pat.sub(entity_to_unicode, comments)

    # TAGS
    tags = None
    pat = re.compile(r'<!--.*?TAGS=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL)
    match = pat.search(src)
    if match:
        tags = match.group(1)
    else:
        pat = get_meta_regexp_("Tags")
        match = pat.search(src)
        if match:
            tags = match.group(1)
    if tags:
        mi.tags = [x.strip() for x in ent_pat.sub(entity_to_unicode, tags).split(",")]

    # Ready to return MetaInformation
    return mi
Example #19
0
    def _do_split(self,
                  db,
                  source_id,
                  misource,
                  splitepub,
                  origlines,
                  newspecs,
                  deftitle=None):

        linenums, changedtocs, checkedalways = newspecs
        # logger.debug("updated tocs:%s"%changedtocs)
        if not self.has_lines(linenums):
            return
        #logger.debug("2:%s"%(time.time()-self.t))
        self.t = time.time()

        #logger.debug("linenums:%s"%linenums)

        defauthors = None

        if not deftitle and prefs['copytoctitle']:
            if linenums[0] in changedtocs:
                deftitle = changedtocs[linenums[0]][0]  # already unicoded()'ed
            elif len(origlines[linenums[0]]['toc']) > 0:
                deftitle = unicode(origlines[linenums[0]]['toc'][0])
            #logger.debug("deftitle:%s"%deftitle)

        if not deftitle and prefs['copytitle']:
            deftitle = _("%s Split") % misource.title

        if prefs['copyauthors']:
            defauthors = misource.authors

        mi = MetaInformation(deftitle, defauthors)

        if prefs['copytags']:
            mi.tags = misource.tags  # [item for sublist in tagslists for item in sublist]

        if prefs['copylanguages']:
            mi.languages = misource.languages

        if prefs['copyseries']:
            mi.series = misource.series

        if prefs['copydate']:
            mi.timestamp = misource.timestamp

        if prefs['copyrating']:
            mi.rating = misource.rating

        if prefs['copypubdate']:
            mi.pubdate = misource.pubdate

        if prefs['copypublisher']:
            mi.publisher = misource.publisher

        if prefs['copyidentifiers']:
            mi.set_identifiers(misource.get_identifiers())

        if prefs['copycomments'] and misource.comments:
            mi.comments = "<p>" + _("Split from:") + "</p>" + misource.comments

        #logger.debug("mi:%s"%mi)
        book_id = db.create_book_entry(mi, add_duplicates=True)

        if prefs['copycover'] and misource.has_cover:
            db.set_cover(book_id, db.cover(source_id, index_is_id=True))

        #logger.debug("3:%s"%(time.time()-self.t))
        self.t = time.time()

        custom_columns = self.gui.library_view.model().custom_columns
        for col, action in six.iteritems(prefs['custom_cols']):
            #logger.debug("col: %s action: %s"%(col,action))

            if col not in custom_columns:
                #logger.debug("%s not an existing column, skipping."%col)
                continue

            coldef = custom_columns[col]
            #logger.debug("coldef:%s"%coldef)
            label = coldef['label']
            value = db.get_custom(source_id, label=label, index_is_id=True)
            if value:
                db.set_custom(book_id, value, label=label, commit=False)

        #logger.debug("3.5:%s"%(time.time()-self.t))
        self.t = time.time()

        if prefs['sourcecol'] != '' and prefs['sourcecol'] in custom_columns \
                and prefs['sourcetemplate']:
            val = SafeFormat().safe_format(prefs['sourcetemplate'], misource,
                                           'EpubSplit Source Template Error',
                                           misource)
            #logger.debug("Attempting to set %s to %s"%(prefs['sourcecol'],val))
            label = custom_columns[prefs['sourcecol']]['label']
            if custom_columns[prefs['sourcecol']]['datatype'] == 'series':
                val = val + (" [%s]" % self.book_count)
            db.set_custom(book_id, val, label=label, commit=False)
        self.book_count = self.book_count + 1
        db.commit()

        #logger.debug("4:%s"%(time.time()-self.t))
        self.t = time.time()

        self.gui.library_view.model().books_added(1)
        self.gui.library_view.select_rows([book_id])

        #logger.debug("5:%s"%(time.time()-self.t))
        self.t = time.time()

        editconfig_txt = _(
            'You can enable or disable Edit Metadata in Preferences > Plugins > EpubSplit.'
        )
        if prefs['editmetadata']:
            confirm(
                '\n' +
                _('''The book for the new Split EPUB has been created and default metadata filled in.

However, the EPUB will *not* be created until after you've reviewed, edited, and closed the metadata dialog that follows.

You can fill in the metadata yourself, or use download metadata for known books.

If you download or add a cover image, it will be included in the generated EPUB.'''
                  ) + '\n\n' + editconfig_txt + '\n',
                'epubsplit_created_now_edit_again', self.gui)
            self.gui.iactions['Edit Metadata'].edit_metadata(False)

        try:
            QApplication.setOverrideCursor(QCursor(Qt.WaitCursor))
            #logger.debug("5:%s"%(time.time()-self.t))
            self.t = time.time()
            self.gui.tags_view.recount()

            self.gui.status_bar.show_message(_('Splitting off from EPUB...'),
                                             60000)

            mi = db.get_metadata(book_id, index_is_id=True)

            outputepub = PersistentTemporaryFile(suffix='.epub')

            coverjpgpath = None
            if mi.has_cover:
                # grab the path to the real image.
                coverjpgpath = os.path.join(db.library_path,
                                            db.path(book_id, index_is_id=True),
                                            'cover.jpg')

            outlist = list(set(linenums + checkedalways))
            outlist.sort()
            splitepub.write_split_epub(outputepub,
                                       outlist,
                                       changedtocs=changedtocs,
                                       authoropts=mi.authors,
                                       titleopt=mi.title,
                                       descopt=mi.comments,
                                       tags=mi.tags,
                                       languages=mi.languages,
                                       coverjpgpath=coverjpgpath)

            #logger.debug("6:%s"%(time.time()-self.t))
            self.t = time.time()
            db.add_format_with_hooks(book_id,
                                     'EPUB',
                                     outputepub,
                                     index_is_id=True)

            #logger.debug("7:%s"%(time.time()-self.t))
            self.t = time.time()

            self.gui.status_bar.show_message(_('Finished splitting off EPUB.'),
                                             3000)
            self.gui.library_view.model().refresh_ids([book_id])
            self.gui.tags_view.recount()
            current = self.gui.library_view.currentIndex()
            self.gui.library_view.model().current_changed(
                current, self.previous)
            if self.gui.cover_flow:
                self.gui.cover_flow.dataChanged()
        finally:
            QApplication.restoreOverrideCursor()

        if not prefs['editmetadata']:
            confirm(
                '<p>' + '</p><p>'.join([
                    _('<b><u>%s</u> by %s</b> has been created and default metadata filled in.'
                      ) % (mi.title, ', '.join(mi.authors)),
                    _('EpubSplit now skips the Edit Metadata step by default.'
                      ), editconfig_txt
                ]) + '</p>', 'epubsplit_created_now_no_edit_again', self.gui)
Example #20
0
    def _start_merge(self, book_list):
        db = self.gui.current_db
        self.previous = self.gui.library_view.currentIndex()
        # if any bad, bail.
        bad_list = filter(lambda x: not x['good'], book_list)
        if len(bad_list) > 0:
            d = error_dialog(self.gui,
                             _('Cannot Merge Epubs'),
                             _('%s books failed.') % len(bad_list),
                             det_msg='\n'.join(
                                 map(lambda x: x['error'], bad_list)))
            d.exec_()
        else:
            d = OrderEPUBsDialog(
                self.gui,
                _('Order EPUBs to Merge'),
                prefs,
                self.qaction.icon(),
                book_list,
            )
            d.exec_()
            if d.result() != d.Accepted:
                return

            book_list = d.get_books()

            logger.debug("2:%s" % (time.time() - self.t))
            self.t = time.time()

            deftitle = "%s %s" % (book_list[0]['title'], prefs['mergeword'])
            mi = MetaInformation(deftitle, ["Temp Author"])

            # if all same series, use series for name.  But only if all.
            serieslist = map(lambda x: x['series'],
                             filter(lambda x: x['series'] != None, book_list))
            if len(serieslist) == len(book_list):
                mi.title = serieslist[0]
                for sr in serieslist:
                    if mi.title != sr:
                        mi.title = deftitle
                        break

            # logger.debug("======================= mi.title:\n%s\n========================="%mi.title)

            mi.authors = list()
            authorslists = map(lambda x: x['authors'], book_list)
            for l in authorslists:
                for a in l:
                    if a not in mi.authors:
                        mi.authors.append(a)
            #mi.authors = [item for sublist in authorslists for item in sublist]

            # logger.debug("======================= mi.authors:\n%s\n========================="%mi.authors)

            #mi.author_sort = ' & '.join(map(lambda x : x['author_sort'], book_list))

            # logger.debug("======================= mi.author_sort:\n%s\n========================="%mi.author_sort)

            # set publisher if all from same publisher.
            publishers = set(map(lambda x: x['publisher'], book_list))
            if len(publishers) == 1:
                mi.publisher = publishers.pop()

            # logger.debug("======================= mi.publisher:\n%s\n========================="%mi.publisher)

            tagslists = map(lambda x: x['tags'], book_list)
            mi.tags = [item for sublist in tagslists for item in sublist]
            mi.tags.extend(prefs['mergetags'].split(','))

            # logger.debug("======================= mergetags:\n%s\n========================="%prefs['mergetags'])
            # logger.debug("======================= m.tags:\n%s\n========================="%mi.tags)

            languageslists = map(lambda x: x['languages'], book_list)
            mi.languages = [
                item for sublist in languageslists for item in sublist
            ]

            mi.series = ''

            # ======================= make book comments =========================

            if len(mi.authors) > 1:
                booktitle = lambda x: _("%s by %s") % (x['title'], ' & '.join(
                    x['authors']))
            else:
                booktitle = lambda x: x['title']

            mi.comments = (_("%s containing:") + "\n\n") % prefs['mergeword']

            if prefs['includecomments']:

                def bookcomments(x):
                    if x['comments']:
                        return '<b>%s</b>\n\n%s' % (booktitle(x),
                                                    x['comments'])
                    else:
                        return '<b>%s</b>\n' % booktitle(x)

                mi.comments += ('<div class="mergedbook">' +
                                '<hr></div><div class="mergedbook">'.join(
                                    [bookcomments(x)
                                     for x in book_list]) + '</div>')
            else:
                mi.comments += '\n'.join([booktitle(x) for x in book_list])

            # ======================= make book entry =========================

            book_id = db.create_book_entry(mi, add_duplicates=True)

            # set default cover to same as first book
            coverdata = db.cover(book_list[0]['calibre_id'], index_is_id=True)
            if coverdata:
                db.set_cover(book_id, coverdata)

            # ======================= custom columns ===================

            logger.debug("3:%s" % (time.time() - self.t))
            self.t = time.time()

            # have to get custom from db for each book.
            idslist = map(lambda x: x['calibre_id'], book_list)

            custom_columns = self.gui.library_view.model().custom_columns
            for col, action in prefs['custom_cols'].iteritems():
                #logger.debug("col: %s action: %s"%(col,action))

                if col not in custom_columns:
                    logger.debug("%s not an existing column, skipping." % col)
                    continue

                coldef = custom_columns[col]
                #logger.debug("coldef:%s"%coldef)

                if action not in permitted_values[coldef['datatype']]:
                    logger.debug(
                        "%s not a valid column type for %s, skipping." %
                        (col, action))
                    continue

                label = coldef['label']

                found = False
                value = None
                idx = None
                if action == 'first':
                    idx = 0

                if action == 'last':
                    idx = -1

                if action in ['first', 'last']:
                    value = db.get_custom(idslist[idx],
                                          label=label,
                                          index_is_id=True)
                    if coldef['datatype'] == 'series' and value != None:
                        # get the number-in-series, too.
                        value = "%s [%s]" % (
                            value,
                            db.get_custom_extra(
                                idslist[idx], label=label, index_is_id=True))
                    found = True

                if action in ('add', 'average', 'averageall'):
                    value = 0.0
                    count = 0
                    for bid in idslist:
                        try:
                            value += db.get_custom(bid,
                                                   label=label,
                                                   index_is_id=True)
                            found = True
                            # only count ones with values unless averageall
                            count += 1
                        except:
                            # if not set, it's None and fails.
                            # only count ones with values unless averageall
                            if action == 'averageall':
                                count += 1

                    if found and action in ('average', 'averageall'):
                        value = value / count

                    if coldef['datatype'] == 'int':
                        value += 0.5  # so int rounds instead of truncs.

                if action == 'and':
                    value = True
                    for bid in idslist:
                        try:
                            value = value and db.get_custom(
                                bid, label=label, index_is_id=True)
                            found = True
                        except:
                            # if not set, it's None and fails.
                            pass

                if action == 'or':
                    value = False
                    for bid in idslist:
                        try:
                            value = value or db.get_custom(
                                bid, label=label, index_is_id=True)
                            found = True
                        except:
                            # if not set, it's None and fails.
                            pass

                if action == 'newest':
                    value = None
                    for bid in idslist:
                        try:
                            ivalue = db.get_custom(bid,
                                                   label=label,
                                                   index_is_id=True)
                            if not value or ivalue > value:
                                value = ivalue
                                found = True
                        except:
                            # if not set, it's None and fails.
                            pass

                if action == 'oldest':
                    value = None
                    for bid in idslist:
                        try:
                            ivalue = db.get_custom(bid,
                                                   label=label,
                                                   index_is_id=True)
                            if not value or ivalue < value:
                                value = ivalue
                                found = True
                        except:
                            # if not set, it's None and fails.
                            pass

                if action == 'union':
                    if not coldef['is_multiple']:
                        action = 'concat'
                    else:
                        value = set()
                        for bid in idslist:
                            try:
                                value = value.union(
                                    db.get_custom(bid,
                                                  label=label,
                                                  index_is_id=True))
                                found = True
                            except:
                                # if not set, it's None and fails.
                                pass

                if action == 'concat':
                    value = ""
                    for bid in idslist:
                        try:
                            value = value + ' ' + db.get_custom(
                                bid, label=label, index_is_id=True)
                            found = True
                        except:
                            # if not set, it's None and fails.
                            pass
                    value = value.strip()

                if found and value != None:
                    db.set_custom(book_id, value, label=label, commit=False)

            db.commit()

            logger.debug("4:%s" % (time.time() - self.t))
            self.t = time.time()

            self.gui.library_view.model().books_added(1)
            self.gui.library_view.select_rows([book_id])

            logger.debug("5:%s" % (time.time() - self.t))
            self.t = time.time()

            confirm(
                '\n' +
                _('''The book for the new Merged EPUB has been created and default metadata filled in.

However, the EPUB will *not* be created until after you've reviewed, edited, and closed the metadata dialog that follows.'''
                  ), 'epubmerge_created_now_edit_again', self.gui)

            self.gui.iactions['Edit Metadata'].edit_metadata(False)

            logger.debug("5:%s" % (time.time() - self.t))
            self.t = time.time()
            self.gui.tags_view.recount()

            totalsize = sum(map(lambda x: x['epub_size'], book_list))

            logger.debug("merging %s EPUBs totaling %s" %
                         (len(book_list), gethumanreadable(totalsize)))
            if len(book_list) > 100 or totalsize > 5 * 1024 * 1024:
                confirm(
                    '\n' +
                    _('''You're merging %s EPUBs totaling %s.  Calibre will be locked until the merge is finished.'''
                      ) % (len(book_list), gethumanreadable(totalsize)),
                    'epubmerge_edited_now_merge_again', self.gui)

            self.gui.status_bar.show_message(
                _('Merging %s EPUBs...') % len(book_list), 60000)

            mi = db.get_metadata(book_id, index_is_id=True)

            mergedepub = PersistentTemporaryFile(suffix='.epub')
            epubstomerge = map(lambda x: x['epub'], book_list)

            coverjpgpath = None
            if mi.has_cover:
                # grab the path to the real image.
                coverjpgpath = os.path.join(db.library_path,
                                            db.path(book_id, index_is_id=True),
                                            'cover.jpg')

            self.do_merge(mergedepub,
                          epubstomerge,
                          authoropts=mi.authors,
                          titleopt=mi.title,
                          descopt=mi.comments,
                          tags=mi.tags,
                          languages=mi.languages,
                          titlenavpoints=prefs['titlenavpoints'],
                          flattentoc=prefs['flattentoc'],
                          printtimes=True,
                          coverjpgpath=coverjpgpath,
                          keepmetadatafiles=prefs['keepmeta'])

            logger.debug("6:%s" % (time.time() - self.t))
            logger.debug(_("Merge finished, output in:\n%s") % mergedepub.name)
            self.t = time.time()
            db.add_format_with_hooks(book_id,
                                     'EPUB',
                                     mergedepub,
                                     index_is_id=True)

            logger.debug("7:%s" % (time.time() - self.t))
            self.t = time.time()

            self.gui.status_bar.show_message(
                _('Finished merging %s EPUBs.') % len(book_list), 3000)
            self.gui.library_view.model().refresh_ids([book_id])
            self.gui.tags_view.recount()
            current = self.gui.library_view.currentIndex()
            self.gui.library_view.model().current_changed(
                current, self.previous)
Example #21
0
    def _do_split(self,
                  db,
                  source_id,
                  misource,
                  splitepub,
                  newspecs,
                  deftitle=None,
                  editmeta=True):

        linenums, changedtocs = newspecs
        # logger.debug("updated tocs:%s"%changedtocs)

        # logger.debug("2:%s"%(time.time()-self.t))
        self.t = time.time()

        # logger.debug("linenums:%s"%linenums)

        defauthors = None

        if not deftitle and prefs['copytitle']:
            deftitle = _("نمونه %s") % misource.title

        if prefs['copyauthors']:
            defauthors = misource.authors

        mi = MetaInformation(deftitle, defauthors)

        if prefs['copytags']:
            mi.tags = misource.tags  # [item for sublist in tagslists for item in sublist]

        if prefs['copylanguages']:
            mi.languages = misource.languages

        if prefs['copyseries']:
            mi.series = misource.series

        if prefs['copydate']:
            mi.timestamp = misource.timestamp

        if prefs['copyrating']:
            mi.rating = misource.rating

        if prefs['copypubdate']:
            mi.pubdate = misource.pubdate

        if prefs['copypublisher']:
            mi.publisher = misource.publisher

        if prefs['copyidentifiers']:
            mi.set_identifiers(misource.get_identifiers())

        if prefs['copycomments'] and misource.comments:
            mi.comments = _("Split from:") + "\n\n" + misource.comments

        # logger.debug("mi:%s"%mi)
        book_id = db.create_book_entry(mi,
                                       add_duplicates=True)

        if prefs['copycover'] and misource.has_cover:
            db.set_cover(book_id, db.cover(source_id, index_is_id=True))

        # logger.debug("3:%s"%(time.time()-self.t))
        self.t = time.time()

        custom_columns = self.gui.library_view.model().custom_columns
        for col, action in prefs['custom_cols'].iteritems():
            # logger.debug("col: %s action: %s"%(col,action))

            if col not in custom_columns:
                # logger.debug("%s not an existing column, skipping."%col)
                continue

            coldef = custom_columns[col]
            # logger.debug("coldef:%s"%coldef)
            label = coldef['label']
            value = db.get_custom(source_id, label=label, index_is_id=True)
            if value:
                db.set_custom(book_id, value, label=label, commit=False)

        # logger.debug("3.5:%s"%(time.time()-self.t))
        self.t = time.time()

        if prefs['sourcecol'] != '' and prefs['sourcecol'] in custom_columns \
                and prefs['sourcetemplate']:
            val = SafeFormat().safe_format(prefs['sourcetemplate'], misource, 'EpubSplit Source Template Error',
                                           misource)
            # logger.debug("Attempting to set %s to %s"%(prefs['sourcecol'],val))
            label = custom_columns[prefs['sourcecol']]['label']
            db.set_custom(book_id, val, label=label, commit=False)

        db.commit()

        # logger.debug("4:%s"%(time.time()-self.t))
        self.t = time.time()

        self.gui.library_view.model().books_added(1)
        self.gui.library_view.select_rows([book_id])

        # logger.debug("5:%s"%(time.time()-self.t))
        self.t = time.time()

        # if editmeta:
        #     confirm('\n'+_('کتاب نمونه ساخته شود؟')+'\n',
        #             'epubsplit_created_now_edit_again',
        #             self.gui)
        #
        #     self.gui.iactions['Edit Metadata'].edit_metadata(False)

        # logger.debug("5:%s"%(time.time()-self.t))
        self.t = time.time()
        self.gui.tags_view.recount()

        self.gui.status_bar.show_message(_('فایل نمونه ساخته شد'), 60000)

        mi = db.get_metadata(book_id, index_is_id=True)

        outputepub = PersistentTemporaryFile(suffix='.epub')

        coverjpgpath = None
        # if mi.has_cover:
        #     # grab the path to the real image.
        #     coverjpgpath = os.path.join(db.library_path, db.path(book_id, index_is_id=True), 'cover.jpg')

        splitepub.write_split_epub(outputepub,
                                   linenums,
                                   changedtocs=changedtocs,
                                   authoropts=mi.authors,
                                   titleopt=mi.title,
                                   descopt=mi.comments,
                                   tags=mi.tags,
                                   languages=mi.languages,
                                   coverjpgpath=coverjpgpath)

        # logger.debug("6:%s"%(time.time()-self.t))
        self.t = time.time()
        db.add_format_with_hooks(book_id,
                                 'EPUB',
                                 outputepub, index_is_id=True)

        # logger.debug("7:%s"%(time.time()-self.t))
        self.t = time.time()

        self.gui.status_bar.show_message(_('Finished splitting off EPUB.'), 3000)
        self.gui.library_view.model().refresh_ids([book_id])
        self.gui.tags_view.recount()
        current = self.gui.library_view.currentIndex()
        self.gui.library_view.model().current_changed(current, self.previous)
Example #22
0
    def _do_split(self,
                  db,
                  source_id,
                  misource,
                  splitepub,
                  origlines,
                  newspecs,
                  deftitle=None):

        linenums, changedtocs, checkedalways = newspecs
        # logger.debug("updated tocs:%s"%changedtocs)
        if not self.has_lines(linenums):
            return
        #logger.debug("2:%s"%(time.time()-self.t))
        self.t = time.time()

        #logger.debug("linenums:%s"%linenums)

        defauthors = None

        if not deftitle and prefs['copytoctitle']:
            if linenums[0] in changedtocs:
                deftitle=changedtocs[linenums[0]][0] # already unicoded()'ed
            elif len(origlines[linenums[0]]['toc']) > 0:
                deftitle=unicode(origlines[linenums[0]]['toc'][0])
            #logger.debug("deftitle:%s"%deftitle)

        if not deftitle and prefs['copytitle']:
            deftitle = _("%s Split") % misource.title

        if prefs['copyauthors']:
            defauthors = misource.authors

        mi = MetaInformation(deftitle,defauthors)

        if prefs['copytags']:
            mi.tags = misource.tags # [item for sublist in tagslists for item in sublist]

        if prefs['copylanguages']:
            mi.languages = misource.languages

        if prefs['copyseries']:
            mi.series = misource.series

        if prefs['copydate']:
            mi.timestamp = misource.timestamp

        if prefs['copyrating']:
            mi.rating = misource.rating

        if prefs['copypubdate']:
            mi.pubdate = misource.pubdate

        if prefs['copypublisher']:
            mi.publisher = misource.publisher

        if prefs['copyidentifiers']:
            mi.set_identifiers(misource.get_identifiers())

        if prefs['copycomments'] and misource.comments:
            mi.comments = "<p>"+_("Split from:")+"</p>" + misource.comments

        #logger.debug("mi:%s"%mi)
        book_id = db.create_book_entry(mi,
                                       add_duplicates=True)

        if prefs['copycover'] and misource.has_cover:
            db.set_cover(book_id, db.cover(source_id,index_is_id=True))

        #logger.debug("3:%s"%(time.time()-self.t))
        self.t = time.time()

        custom_columns = self.gui.library_view.model().custom_columns
        for col, action in prefs['custom_cols'].iteritems():
            #logger.debug("col: %s action: %s"%(col,action))

            if col not in custom_columns:
                #logger.debug("%s not an existing column, skipping."%col)
                continue

            coldef = custom_columns[col]
            #logger.debug("coldef:%s"%coldef)
            label = coldef['label']
            value = db.get_custom(source_id, label=label, index_is_id=True)
            if value:
                db.set_custom(book_id,value,label=label,commit=False)

        #logger.debug("3.5:%s"%(time.time()-self.t))
        self.t = time.time()

        if prefs['sourcecol'] != '' and prefs['sourcecol'] in custom_columns \
                and prefs['sourcetemplate']:
            val = SafeFormat().safe_format(prefs['sourcetemplate'], misource, 'EpubSplit Source Template Error', misource)
            #logger.debug("Attempting to set %s to %s"%(prefs['sourcecol'],val))
            label = custom_columns[prefs['sourcecol']]['label']
            if custom_columns[prefs['sourcecol']]['datatype'] == 'series':
                val = val + (" [%s]"%self.book_count)
            db.set_custom(book_id, val, label=label, commit=False)
        self.book_count = self.book_count+1
        db.commit()

        #logger.debug("4:%s"%(time.time()-self.t))
        self.t = time.time()

        self.gui.library_view.model().books_added(1)
        self.gui.library_view.select_rows([book_id])

        #logger.debug("5:%s"%(time.time()-self.t))
        self.t = time.time()

        editconfig_txt = _('You can enable or disable Edit Metadata in Preferences > Plugins > EpubSplit.')
        if prefs['editmetadata']:
            confirm('\n'+_('''The book for the new Split EPUB has been created and default metadata filled in.

However, the EPUB will *not* be created until after you've reviewed, edited, and closed the metadata dialog that follows.

You can fill in the metadata yourself, or use download metadata for known books.

If you download or add a cover image, it will be included in the generated EPUB.''')+'\n\n'+
                    editconfig_txt+'\n',
                    'epubsplit_created_now_edit_again',
                    self.gui)
            self.gui.iactions['Edit Metadata'].edit_metadata(False)

        try:
            QApplication.setOverrideCursor(QCursor(Qt.WaitCursor))
            #logger.debug("5:%s"%(time.time()-self.t))
            self.t = time.time()
            self.gui.tags_view.recount()

            self.gui.status_bar.show_message(_('Splitting off from EPUB...'), 60000)

            mi = db.get_metadata(book_id,index_is_id=True)

            outputepub = PersistentTemporaryFile(suffix='.epub')

            coverjpgpath = None
            if mi.has_cover:
                # grab the path to the real image.
                coverjpgpath = os.path.join(db.library_path, db.path(book_id, index_is_id=True), 'cover.jpg')

            outlist = list(set(linenums + checkedalways))
            outlist.sort()
            splitepub.write_split_epub(outputepub,
                                       outlist,
                                       changedtocs=changedtocs,
                                       authoropts=mi.authors,
                                       titleopt=mi.title,
                                       descopt=mi.comments,
                                       tags=mi.tags,
                                       languages=mi.languages,
                                       coverjpgpath=coverjpgpath)

            #logger.debug("6:%s"%(time.time()-self.t))
            self.t = time.time()
            db.add_format_with_hooks(book_id,
                                     'EPUB',
                                     outputepub, index_is_id=True)

            #logger.debug("7:%s"%(time.time()-self.t))
            self.t = time.time()

            self.gui.status_bar.show_message(_('Finished splitting off EPUB.'), 3000)
            self.gui.library_view.model().refresh_ids([book_id])
            self.gui.tags_view.recount()
            current = self.gui.library_view.currentIndex()
            self.gui.library_view.model().current_changed(current, self.previous)
        finally:
            QApplication.restoreOverrideCursor()

        if not prefs['editmetadata']:
            confirm('<p>'+
                    '</p><p>'.join([_('<b><u>%s</u> by %s</b> has been created and default metadata filled in.')%(mi.title,', '.join(mi.authors)),
                                   _('EpubSplit now skips the Edit Metadata step by default.'),
                                   editconfig_txt])+
                    '</p>',
                    'epubsplit_created_now_no_edit_again',
                    self.gui)
Example #23
0
def get_metadata(stream, extract_cover=True):
    whitespace = re.compile(r'\s+')

    def normalize(s):
        return whitespace.sub(' ', s).strip()

    with ZipFile(stream) as zf:
        meta = zf.read('meta.xml')
        root = fromstring(meta)

        def find(field):
            ns, tag = fields[field]
            ans = root.xpath('//ns0:{}'.format(tag), namespaces={'ns0': ns})
            if ans:
                return normalize(
                    tostring(ans[0],
                             method='text',
                             encoding='unicode',
                             with_tail=False)).strip()

        mi = MetaInformation(None, [])
        title = find('title')
        if title:
            mi.title = title
        creator = find('initial-creator') or find('creator')
        if creator:
            mi.authors = string_to_authors(creator)
        desc = find('description')
        if desc:
            mi.comments = desc
        lang = find('language')
        if lang and canonicalize_lang(lang):
            mi.languages = [canonicalize_lang(lang)]
        kw = find('keyword') or find('keywords')
        if kw:
            mi.tags = [x.strip() for x in kw.split(',') if x.strip()]
        data = {}
        for tag in root.xpath('//ns0:user-defined',
                              namespaces={'ns0': fields['user-defined'][0]}):
            name = (tag.get('{%s}name' % METANS) or '').lower()
            vtype = tag.get('{%s}value-type' % METANS) or 'string'
            val = tag.text
            if name and val:
                if vtype == 'boolean':
                    val = val == 'true'
                data[name] = val
        opfmeta = False  # we need this later for the cover
        opfnocover = False
        if data.get('opf.metadata'):
            # custom metadata contains OPF information
            opfmeta = True
            if data.get('opf.titlesort', ''):
                mi.title_sort = data['opf.titlesort']
            if data.get('opf.authors', ''):
                mi.authors = string_to_authors(data['opf.authors'])
            if data.get('opf.authorsort', ''):
                mi.author_sort = data['opf.authorsort']
            if data.get('opf.isbn', ''):
                isbn = check_isbn(data['opf.isbn'])
                if isbn is not None:
                    mi.isbn = isbn
            if data.get('opf.publisher', ''):
                mi.publisher = data['opf.publisher']
            if data.get('opf.pubdate', ''):
                mi.pubdate = parse_date(data['opf.pubdate'], assume_utc=True)
            if data.get('opf.identifiers'):
                try:
                    mi.identifiers = json.loads(data['opf.identifiers'])
                except Exception:
                    pass
            if data.get('opf.rating'):
                try:
                    mi.rating = max(0, min(float(data['opf.rating']), 10))
                except Exception:
                    pass
            if data.get('opf.series', ''):
                mi.series = data['opf.series']
                if data.get('opf.seriesindex', ''):
                    try:
                        mi.series_index = float(data['opf.seriesindex'])
                    except Exception:
                        mi.series_index = 1.0
            if data.get('opf.language', ''):
                cl = canonicalize_lang(data['opf.language'])
                if cl:
                    mi.languages = [cl]
            opfnocover = data.get('opf.nocover', False)
        if not opfnocover:
            try:
                read_cover(stream, zf, mi, opfmeta, extract_cover)
            except Exception:
                pass  # Do not let an error reading the cover prevent reading other data

    return mi
Example #24
0
    def _start_merge(self,book_list):
        db=self.gui.current_db
        self.previous = self.gui.library_view.currentIndex()
        # if any bad, bail.
        bad_list = filter(lambda x : not x['good'], book_list)
        if len(bad_list) > 0:
            d = error_dialog(self.gui,
                             _('Cannot Merge Epubs'),
                             _('%s books failed.')%len(bad_list),
                             det_msg='\n'.join(map(lambda x : x['error'] , bad_list)))
            d.exec_()
        else:
            d = OrderEPUBsDialog(self.gui,
                                 _('Order EPUBs to Merge'),
                                 prefs,
                                 self.qaction.icon(),
                                 book_list,
                                 )
            d.exec_()
            if d.result() != d.Accepted:
                return

            book_list = d.get_books()
            
            print("2:%s"%(time.time()-self.t))
            self.t = time.time()

            deftitle = "%s %s" % (book_list[0]['title'],prefs['mergeword'])
            mi = MetaInformation(deftitle,["Temp Author"])

            # if all same series, use series for name.  But only if all.
            serieslist = map(lambda x : x['series'], filter(lambda x : x['series'] != None, book_list))
            if len(serieslist) == len(book_list):
                mi.title = serieslist[0]
                for sr in serieslist:
                    if mi.title != sr:
                        mi.title = deftitle;
                        break
                
            # print("======================= mi.title:\n%s\n========================="%mi.title)

            mi.authors = list()
            authorslists = map(lambda x : x['authors'], book_list)
            for l in authorslists:
                for a in l:
                    if a not in mi.authors:
                        mi.authors.append(a)
            #mi.authors = [item for sublist in authorslists for item in sublist]

            # print("======================= mi.authors:\n%s\n========================="%mi.authors)
            
            #mi.author_sort = ' & '.join(map(lambda x : x['author_sort'], book_list))

            # print("======================= mi.author_sort:\n%s\n========================="%mi.author_sort)

            # set publisher if all from same publisher.
            publishers = set(map(lambda x : x['publisher'], book_list))
            if len(publishers) == 1:
                mi.publisher = publishers.pop()
            
            # print("======================= mi.publisher:\n%s\n========================="%mi.publisher)

            tagslists = map(lambda x : x['tags'], book_list)
            mi.tags = [item for sublist in tagslists for item in sublist]
            mi.tags.extend(prefs['mergetags'].split(','))

            # print("======================= mergetags:\n%s\n========================="%prefs['mergetags'])
            # print("======================= m.tags:\n%s\n========================="%mi.tags)
            
            languageslists = map(lambda x : x['languages'], book_list)
            mi.languages = [item for sublist in languageslists for item in sublist]

            mi.series = ''

            # ======================= make book comments =========================
            
            if len(mi.authors) > 1:
                booktitle = lambda x : _("%s by %s") % (x['title'],' & '.join(x['authors']))
            else:
                booktitle = lambda x : x['title']
                
            mi.comments = (_("%s containing:")+"\n\n") % prefs['mergeword']
            
            if prefs['includecomments']:
                def bookcomments(x):
                    if x['comments']:
                        return '<b>%s</b>\n\n%s'%(booktitle(x),x['comments'])
                    else:
                        return '<b>%s</b>\n'%booktitle(x)
                    
                mi.comments += ('<div class="mergedbook">' +
                                '<hr></div><div class="mergedbook">'.join([ bookcomments(x) for x in book_list]) +
                                '</div>')
            else:
                mi.comments += '\n'.join( [ booktitle(x) for x in book_list ] )
                
            # ======================= make book entry =========================

            book_id = db.create_book_entry(mi,
                                           add_duplicates=True)

            # set default cover to same as first book
            coverdata = db.cover(book_list[0]['calibre_id'],index_is_id=True)
            if coverdata:
                db.set_cover(book_id, coverdata)
            
            # ======================= custom columns ===================

            print("3:%s"%(time.time()-self.t))
            self.t = time.time()

            # have to get custom from db for each book.
            idslist = map(lambda x : x['calibre_id'], book_list)
            
            custom_columns = self.gui.library_view.model().custom_columns
            for col, action in prefs['custom_cols'].iteritems():
                #print("col: %s action: %s"%(col,action))
                
                if col not in custom_columns:
                    print("%s not an existing column, skipping."%col)
                    continue
                
                coldef = custom_columns[col]
                #print("coldef:%s"%coldef)
                
                if action not in permitted_values[coldef['datatype']]:
                    print("%s not a valid column type for %s, skipping."%(col,action))
                    continue
                
                label = coldef['label']

                found = False
                value = None
                idx = None
                if action == 'first':
                    idx = 0

                if action == 'last':
                    idx = -1

                if action in ['first','last']:
                    value = db.get_custom(idslist[idx], label=label, index_is_id=True)
                    if coldef['datatype'] == 'series' and value != None:
                        # get the number-in-series, too.
                        value = "%s [%s]"%(value, db.get_custom_extra(idslist[idx], label=label, index_is_id=True))
                    found = True

                if action in ('add','average','averageall'):
                    value = 0.0
                    count = 0
                    for bid in idslist:
                        try:
                            value += db.get_custom(bid, label=label, index_is_id=True)
                            found = True
                            # only count ones with values unless averageall
                            count += 1
                        except:
                            # if not set, it's None and fails.
                            # only count ones with values unless averageall
                            if action == 'averageall':
                                count += 1
                                
                    if found and action in ('average','averageall'):
                        value = value / count
                        
                    if coldef['datatype'] == 'int':
                        value += 0.5 # so int rounds instead of truncs.
                
                if action == 'and':
                    value = True
                    for bid in idslist:
                        try:
                            value = value and db.get_custom(bid, label=label, index_is_id=True)
                            found = True
                        except:
                            # if not set, it's None and fails.
                            pass
                
                if action == 'or':
                    value = False
                    for bid in idslist:
                        try:
                            value = value or db.get_custom(bid, label=label, index_is_id=True)
                            found = True
                        except:
                            # if not set, it's None and fails.
                            pass
                
                if action == 'newest':
                    value = None
                    for bid in idslist:
                        try:
                            ivalue = db.get_custom(bid, label=label, index_is_id=True)
                            if not value or  ivalue > value:
                                value = ivalue
                                found = True
                        except:
                            # if not set, it's None and fails.
                            pass
                    
                if action == 'oldest':
                    value = None
                    for bid in idslist:
                        try:
                            ivalue = db.get_custom(bid, label=label, index_is_id=True)
                            if not value or  ivalue < value:
                                value = ivalue
                                found = True
                        except:
                            # if not set, it's None and fails.
                            pass
                    
                if action == 'union':
                    if not coldef['is_multiple']:
                        action = 'concat'
                    else:
                        value = set()
                        for bid in idslist:
                            try:
                                value = value.union(db.get_custom(bid, label=label, index_is_id=True))
                                found = True
                            except:
                                # if not set, it's None and fails.
                                pass
                        
                if action == 'concat':
                    value = ""
                    for bid in idslist:
                        try:
                            value = value + ' ' + db.get_custom(bid, label=label, index_is_id=True)
                            found = True
                        except:
                            # if not set, it's None and fails.
                            pass
                    value = value.strip()
                    
                if found and value != None:
                    db.set_custom(book_id,value,label=label,commit=False)
                
            db.commit()
            
            print("4:%s"%(time.time()-self.t))
            self.t = time.time()
            
            self.gui.library_view.model().books_added(1)
            self.gui.library_view.select_rows([book_id])
            
            print("5:%s"%(time.time()-self.t))
            self.t = time.time()
            
            confirm('\n'+_('''The book for the new Merged EPUB has been created and default metadata filled in.

However, the EPUB will *not* be created until after you've reviewed, edited, and closed the metadata dialog that follows.'''),
                    'epubmerge_created_now_edit_again',
                    self.gui)
            
            self.gui.iactions['Edit Metadata'].edit_metadata(False)

            print("5:%s"%(time.time()-self.t))
            self.t = time.time()
            self.gui.tags_view.recount()

            totalsize = sum(map(lambda x : x['epub_size'], book_list))

            print("merging %s EPUBs totaling %s"%(len(book_list),gethumanreadable(totalsize)))
            if len(book_list) > 100 or totalsize > 5*1024*1024:
                confirm('\n'+_('''You're merging %s EPUBs totaling %s.  Calibre will be locked until the merge is finished.''')%(len(book_list),gethumanreadable(totalsize)),
                        'epubmerge_edited_now_merge_again',
                        self.gui)
            
            self.gui.status_bar.show_message(_('Merging %s EPUBs...')%len(book_list), 60000)

            mi = db.get_metadata(book_id,index_is_id=True)
            
            mergedepub = PersistentTemporaryFile(suffix='.epub')
            epubstomerge = map(lambda x : x['epub'] , book_list)
            
            coverjpgpath = None
            if mi.has_cover:
                # grab the path to the real image.
                coverjpgpath = os.path.join(db.library_path, db.path(book_id, index_is_id=True), 'cover.jpg')
                
            self.do_merge( mergedepub,
                           epubstomerge,
                           authoropts=mi.authors,
                           titleopt=mi.title,
                           descopt=mi.comments,
                           tags=mi.tags,
                           languages=mi.languages,
                           titlenavpoints=prefs['titlenavpoints'],
                           flattentoc=prefs['flattentoc'],
                           printtimes=True,
                           coverjpgpath=coverjpgpath,
                           keepmetadatafiles=prefs['keepmeta'] )
                 
            print("6:%s"%(time.time()-self.t))
            print(_("Merge finished, output in:\n%s")%mergedepub.name)
            self.t = time.time()
            db.add_format_with_hooks(book_id,
                                     'EPUB',
                                     mergedepub, index_is_id=True)
            
            print("7:%s"%(time.time()-self.t))
            self.t = time.time()
            
            self.gui.status_bar.show_message(_('Finished merging %s EPUBs.')%len(book_list), 3000)
            self.gui.library_view.model().refresh_ids([book_id])
            self.gui.tags_view.recount()
            current = self.gui.library_view.currentIndex()
            self.gui.library_view.model().current_changed(current, self.previous)
Example #25
0
def get_metadata_(src, encoding=None):
    if not isinstance(src, unicode):
        if not encoding:
            src = xml_to_unicode(src)[0]
        else:
            src = src.decode(encoding, 'replace')

    # Meta data definitions as in
    # http://www.mobileread.com/forums/showpost.php?p=712544&postcount=9

    # Title
    title = None
    pat = re.compile(r'<!--.*?TITLE=(?P<q>[\'"])(.+?)(?P=q).*?-->', re.DOTALL)
    src = src[:150000]  # Searching shouldn't take too long
    match = pat.search(src)
    if match:
        title = match.group(2)
    else:
        for x in ('DC.title', 'DCTERMS.title', 'Title'):
            pat = get_meta_regexp_(x)
            match = pat.search(src)
            if match:
                title = match.group(1)
                break
    if not title:
        pat = re.compile('<title>([^<>]+?)</title>', re.IGNORECASE)
        match = pat.search(src)
        if match:
            title = match.group(1)

    # Author
    author = None
    pat = re.compile(r'<!--.*?AUTHOR=(?P<q>[\'"])(.+?)(?P=q).*?-->', re.DOTALL)
    match = pat.search(src)
    if match:
        author = match.group(2).replace(',', ';')
    else:
        for x in ('Author', 'DC.creator.aut', 'DCTERMS.creator.aut',
                  'DC.creator'):
            pat = get_meta_regexp_(x)
            match = pat.search(src)
            if match:
                author = match.group(1)
                break

    # Create MetaInformation with Title and Author
    ent_pat = re.compile(r'&(\S+)?;')
    if title:
        title = ent_pat.sub(entity_to_unicode, title)
    if author:
        author = ent_pat.sub(entity_to_unicode, author)
    mi = MetaInformation(title, [author] if author else None)

    # Publisher
    publisher = None
    pat = re.compile(r'<!--.*?PUBLISHER=(?P<q>[\'"])(.+?)(?P=q).*?-->',
                     re.DOTALL)
    match = pat.search(src)
    if match:
        publisher = match.group(2)
    else:
        for x in ('Publisher', 'DC.publisher', 'DCTERMS.publisher'):
            pat = get_meta_regexp_(x)
            match = pat.search(src)
            if match:
                publisher = match.group(1)
                break
    if publisher:
        mi.publisher = ent_pat.sub(entity_to_unicode, publisher)

    # ISBN
    isbn = None
    pat = re.compile(r'<!--.*?ISBN=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL)
    match = pat.search(src)
    if match:
        isbn = match.group(1)
    else:
        for x in ('ISBN', 'DC.identifier.ISBN', 'DCTERMS.identifier.ISBN'):
            pat = get_meta_regexp_(x)
            match = pat.search(src)
            if match:
                isbn = match.group(1)
                break
    if isbn:
        mi.isbn = re.sub(r'[^0-9xX]', '', isbn)

    # LANGUAGE
    language = None
    pat = re.compile(r'<!--.*?LANGUAGE=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL)
    match = pat.search(src)
    if match:
        language = match.group(1)
    else:
        for x in ('DC.language', 'DCTERMS.language'):
            pat = get_meta_regexp_(x)
            match = pat.search(src)
            if match:
                language = match.group(1)
                break
    if language:
        mi.language = language

    # PUBDATE
    pubdate = None
    pat = re.compile(r'<!--.*?PUBDATE=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL)
    match = pat.search(src)
    if match:
        pubdate = match.group(1)
    else:
        for x in ('Pubdate', 'Date of publication', 'DC.date.published',
                  'DC.date.publication', 'DC.date.issued', 'DCTERMS.issued'):
            pat = get_meta_regexp_(x)
            match = pat.search(src)
            if match:
                pubdate = match.group(1)
                break
    if pubdate:
        try:
            mi.pubdate = parse_date(pubdate)
        except:
            pass

    # TIMESTAMP
    timestamp = None
    pat = re.compile(r'<!--.*?TIMESTAMP=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL)
    match = pat.search(src)
    if match:
        timestamp = match.group(1)
    else:
        for x in ('Timestamp', 'Date of creation', 'DC.date.created',
                  'DC.date.creation', 'DCTERMS.created'):
            pat = get_meta_regexp_(x)
            match = pat.search(src)
            if match:
                timestamp = match.group(1)
                break
    if timestamp:
        try:
            mi.timestamp = parse_date(timestamp)
        except:
            pass

    # SERIES
    series = None
    pat = re.compile(r'<!--.*?SERIES=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL)
    match = pat.search(src)
    if match:
        series = match.group(1)
    else:
        pat = get_meta_regexp_("Series")
        match = pat.search(src)
        if match:
            series = match.group(1)
    if series:
        pat = re.compile(r'\[([.0-9]+)\]')
        match = pat.search(series)
        series_index = None
        if match is not None:
            try:
                series_index = float(match.group(1))
            except:
                pass
            series = series.replace(match.group(), '').strip()

        mi.series = ent_pat.sub(entity_to_unicode, series)
        if series_index is None:
            pat = get_meta_regexp_("Seriesnumber")
            match = pat.search(src)
            if match:
                try:
                    series_index = float(match.group(1))
                except:
                    pass
        if series_index is not None:
            mi.series_index = series_index

    # RATING
    rating = None
    pat = re.compile(r'<!--.*?RATING=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL)
    match = pat.search(src)
    if match:
        rating = match.group(1)
    else:
        pat = get_meta_regexp_("Rating")
        match = pat.search(src)
        if match:
            rating = match.group(1)
    if rating:
        try:
            mi.rating = float(rating)
            if mi.rating < 0:
                mi.rating = 0
            if mi.rating > 5:
                mi.rating /= 2.
            if mi.rating > 5:
                mi.rating = 0
        except:
            pass

    # COMMENTS
    comments = None
    pat = re.compile(r'<!--.*?COMMENTS=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL)
    match = pat.search(src)
    if match:
        comments = match.group(1)
    else:
        pat = get_meta_regexp_("Comments")
        match = pat.search(src)
        if match:
            comments = match.group(1)
    if comments:
        mi.comments = ent_pat.sub(entity_to_unicode, comments)

    # TAGS
    tags = None
    pat = re.compile(r'<!--.*?TAGS=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL)
    match = pat.search(src)
    if match:
        tags = match.group(1)
    else:
        pat = get_meta_regexp_("Tags")
        match = pat.search(src)
        if match:
            tags = match.group(1)
    if tags:
        mi.tags = [
            x.strip() for x in ent_pat.sub(entity_to_unicode, tags).split(",")
        ]

    # Ready to return MetaInformation
    return mi
Example #26
0
def metadata_from_filename(name, pat=None, fallback_pat=None):
    if isbytestring(name):
        name = name.decode(filesystem_encoding, 'replace')
    name = name.rpartition('.')[0]
    mi = MetaInformation(None, None)
    if pat is None:
        pat = re.compile(prefs.get('filename_pattern'))
    name = name.replace('_', ' ')
    match = pat.search(name)
    if match is None and fallback_pat is not None:
        match = fallback_pat.search(name)
    if match is not None:
        try:
            mi.title = match.group('title')
        except IndexError:
            pass
        try:
            au = match.group('author')
            aus = string_to_authors(au)
            if aus:
                mi.authors = aus
                if prefs['swap_author_names'] and mi.authors:
                    def swap(a):
                        if ',' in a:
                            parts = a.split(',', 1)
                        else:
                            parts = a.split(None, 1)
                        if len(parts) > 1:
                            t = parts[-1]
                            parts = parts[:-1]
                            parts.insert(0, t)
                        return ' '.join(parts)
                    mi.authors = [swap(x) for x in mi.authors]
        except (IndexError, ValueError):
            pass
        try:
            mi.series = match.group('series')
        except IndexError:
            pass
        try:
            si = match.group('series_index')
            mi.series_index = float(si)
        except (IndexError, ValueError, TypeError):
            pass
        try:
            si = match.group('isbn')
            mi.isbn = si
        except (IndexError, ValueError):
            pass
        try:
            publisher = match.group('publisher')
            mi.publisher = publisher
        except (IndexError, ValueError):
            pass
        try:
            pubdate = match.group('published')
            if pubdate:
                from calibre.utils.date import parse_only_date
                mi.pubdate = parse_only_date(pubdate)
        except:
            pass
        try:
            comments = match.group('comments')
            mi.comments = comments
        except (IndexError, ValueError):
            pass

    if mi.is_null('title'):
        mi.title = name
    return mi
Example #27
0
def metadata_from_filename(name, pat=None, fallback_pat=None):
    if isbytestring(name):
        name = name.decode(filesystem_encoding, 'replace')
    name = name.rpartition('.')[0]
    mi = MetaInformation(None, None)
    if pat is None:
        pat = re.compile(prefs.get('filename_pattern'))
    name = name.replace('_', ' ')
    match = pat.search(name)
    if match is None and fallback_pat is not None:
        match = fallback_pat.search(name)
    if match is not None:
        try:
            mi.title = match.group('title')
        except IndexError:
            pass
        try:
            au = match.group('author')
            aus = string_to_authors(au)
            if aus:
                mi.authors = aus
                if prefs['swap_author_names'] and mi.authors:

                    def swap(a):
                        if ',' in a:
                            parts = a.split(',', 1)
                        else:
                            parts = a.split(None, 1)
                        if len(parts) > 1:
                            t = parts[-1]
                            parts = parts[:-1]
                            parts.insert(0, t)
                        return ' '.join(parts)

                    mi.authors = [swap(x) for x in mi.authors]
        except (IndexError, ValueError):
            pass
        try:
            mi.series = match.group('series')
        except IndexError:
            pass
        try:
            si = match.group('series_index')
            mi.series_index = float(si)
        except (IndexError, ValueError, TypeError):
            pass
        try:
            si = match.group('isbn')
            mi.isbn = si
        except (IndexError, ValueError):
            pass
        try:
            publisher = match.group('publisher')
            mi.publisher = publisher
        except (IndexError, ValueError):
            pass
        try:
            pubdate = match.group('published')
            if pubdate:
                from calibre.utils.date import parse_only_date
                mi.pubdate = parse_only_date(pubdate)
        except:
            pass
        try:
            comments = match.group('comments')
            mi.comments = comments
        except (IndexError, ValueError):
            pass

    if mi.is_null('title'):
        mi.title = name
    return mi
Example #28
0
    def _start_merge(self,book_list,tdir=None):
        db=self.gui.current_db
        self.previous = self.gui.library_view.currentIndex()
        # if any bad, bail.
        bad_list = [ x for x in book_list if not x['good'] ]
        if len(bad_list) > 0:
            d = error_dialog(self.gui,
                             _('Cannot Merge Epubs'),
                             _('%s books failed.')%len(bad_list),
                             det_msg='\n'.join( [ x['error'] for x in bad_list ]))
            d.exec_()
        else:
            d = OrderEPUBsDialog(self.gui,
                                 _('Order EPUBs to Merge'),
                                 prefs,
                                 self.qaction.icon(),
                                 book_list,
                                 )
            d.exec_()
            if d.result() != d.Accepted:
                return

            book_list = d.get_books()

            logger.debug("2:%s"%(time.time()-self.t))
            self.t = time.time()

            deftitle = "%s %s" % (book_list[0]['title'],prefs['mergeword'])
            mi = MetaInformation(deftitle,["Temp Author"])

            # if all same series, use series for name.  But only if all.
            serieslist = [ x['series'] for x in book_list if x['series'] != None ]
            if len(serieslist) == len(book_list):
                mi.title = serieslist[0]
                for sr in serieslist:
                    if mi.title != sr:
                        mi.title = deftitle;
                        break

            # logger.debug("======================= mi.title:\n%s\n========================="%mi.title)

            mi.authors = list()
            authorslists = [ x['authors'] for x in book_list ]
            for l in authorslists:
                for a in l:
                    if a not in mi.authors:
                        mi.authors.append(a)
            #mi.authors = [item for sublist in authorslists for item in sublist]

            # logger.debug("======================= mi.authors:\n%s\n========================="%mi.authors)

            #mi.author_sort = ' & '.join([ x['author_sort'] for x in book_list ])

            # logger.debug("======================= mi.author_sort:\n%s\n========================="%mi.author_sort)

            # set publisher if all from same publisher.
            publishers = set([ x['publisher'] for x in book_list ])
            if len(publishers) == 1:
                mi.publisher = publishers.pop()

            # logger.debug("======================= mi.publisher:\n%s\n========================="%mi.publisher)

            tagslists = [ x['tags'] for x in book_list ]
            mi.tags = [item for sublist in tagslists for item in sublist]
            mi.tags.extend(prefs['mergetags'].split(','))

            # logger.debug("======================= mergetags:\n%s\n========================="%prefs['mergetags'])
            # logger.debug("======================= m.tags:\n%s\n========================="%mi.tags)

            languageslists = [ x['languages'] for x in book_list ]
            mi.languages = [item for sublist in languageslists for item in sublist]

            mi.series = ''
            if prefs['firstseries'] and book_list[0]['series']:
                mi.series = book_list[0]['series']
                mi.series_index = book_list[0]['series_index']

            # ======================= make book comments =========================

            if len(mi.authors) > 1:
                booktitle = lambda x : _("%s by %s") % (x['title'],' & '.join(x['authors']))
            else:
                booktitle = lambda x : x['title']

            mi.comments = ("<p>"+_("%s containing:")+"</p>") % prefs['mergeword']

            if prefs['includecomments']:
                def bookcomments(x):
                    if x['comments']:
                        return '<p><b>%s</b></p>%s'%(booktitle(x),x['comments'])
                    else:
                        return '<b>%s</b><br/>'%booktitle(x)

                mi.comments += ('<div class="mergedbook">' +
                                '<hr></div><div class="mergedbook">'.join([ bookcomments(x) for x in book_list]) +
                                '</div>')
            else:
                mi.comments += '<br/>'.join( [ booktitle(x) for x in book_list ] )

            # ======================= make book entry =========================

            book_id = db.create_book_entry(mi,
                                           add_duplicates=True)

            # set default cover to same as first book
            coverdata = db.cover(book_list[0]['calibre_id'],index_is_id=True)
            if coverdata:
                db.set_cover(book_id, coverdata)

            # ======================= custom columns ===================

            logger.debug("3:%s"%(time.time()-self.t))
            self.t = time.time()

            # have to get custom from db for each book.
            idslist = [ x['calibre_id'] for x in book_list ]

            custom_columns = self.gui.library_view.model().custom_columns
            for col, action in six.iteritems(prefs['custom_cols']):
                #logger.debug("col: %s action: %s"%(col,action))

                if col not in custom_columns:
                    logger.debug("%s not an existing column, skipping."%col)
                    continue

                coldef = custom_columns[col]
                #logger.debug("coldef:%s"%coldef)

                if action not in permitted_values[coldef['datatype']]:
                    logger.debug("%s not a valid column type for %s, skipping."%(col,action))
                    continue

                label = coldef['label']

                found = False
                value = None
                idx = None
                if action == 'first':
                    idx = 0

                if action == 'last':
                    idx = -1

                if action in ['first','last']:
                    value = db.get_custom(idslist[idx], label=label, index_is_id=True)
                    if coldef['datatype'] == 'series' and value != None:
                        # get the number-in-series, too.
                        value = "%s [%s]"%(value, db.get_custom_extra(idslist[idx], label=label, index_is_id=True))
                    found = True

                if action in ('add','average','averageall'):
                    value = 0.0
                    count = 0
                    for bid in idslist:
                        try:
                            value += db.get_custom(bid, label=label, index_is_id=True)
                            found = True
                            # only count ones with values unless averageall
                            count += 1
                        except:
                            # if not set, it's None and fails.
                            # only count ones with values unless averageall
                            if action == 'averageall':
                                count += 1

                    if found and action in ('average','averageall'):
                        value = value / count

                    if coldef['datatype'] == 'int':
                        value += 0.5 # so int rounds instead of truncs.

                if action == 'and':
                    value = True
                    for bid in idslist:
                        try:
                            value = value and db.get_custom(bid, label=label, index_is_id=True)
                            found = True
                        except:
                            # if not set, it's None and fails.
                            pass

                if action == 'or':
                    value = False
                    for bid in idslist:
                        try:
                            value = value or db.get_custom(bid, label=label, index_is_id=True)
                            found = True
                        except:
                            # if not set, it's None and fails.
                            pass

                if action == 'newest':
                    value = None
                    for bid in idslist:
                        try:
                            ivalue = db.get_custom(bid, label=label, index_is_id=True)
                            if not value or  ivalue > value:
                                value = ivalue
                                found = True
                        except:
                            # if not set, it's None and fails.
                            pass

                if action == 'oldest':
                    value = None
                    for bid in idslist:
                        try:
                            ivalue = db.get_custom(bid, label=label, index_is_id=True)
                            if not value or  ivalue < value:
                                value = ivalue
                                found = True
                        except:
                            # if not set, it's None and fails.
                            pass

                if action == 'union':
                    if not coldef['is_multiple']:
                        action = 'concat'
                    else:
                        value = set()
                        for bid in idslist:
                            try:
                                value = value.union(db.get_custom(bid, label=label, index_is_id=True))
                                found = True
                            except:
                                # if not set, it's None and fails.
                                pass

                if action == 'concat':
                    value = ""
                    for bid in idslist:
                        try:
                            value = value + ' ' + db.get_custom(bid, label=label, index_is_id=True)
                            found = True
                        except:
                            # if not set, it's None and fails.
                            pass
                    value = value.strip()

                if action == 'now':
                    value = datetime.now()
                    found = True
                    logger.debug("now: %s"%value)

                if found and value != None:
                    logger.debug("value: %s"%value)
                    db.set_custom(book_id,value,label=label,commit=False)

            db.commit()

            logger.debug("4:%s"%(time.time()-self.t))
            self.t = time.time()

            self.gui.library_view.model().books_added(1)
            self.gui.library_view.select_rows([book_id])

            logger.debug("5:%s"%(time.time()-self.t))
            self.t = time.time()

            confirm('\n'+_('''The book for the new Merged EPUB has been created and default metadata filled in.

However, the EPUB will *not* be created until after you've reviewed, edited, and closed the metadata dialog that follows.'''),
                    'epubmerge_created_now_edit_again',
                    self.gui,
                    title=_("EpubMerge"),
                    show_cancel_button=False)

            self.gui.iactions['Edit Metadata'].edit_metadata(False)

            logger.debug("5:%s"%(time.time()-self.t))
            self.t = time.time()
            self.gui.tags_view.recount()

            totalsize = sum([ x['epub_size'] for x in book_list ])
            logger.debug("merging %s EPUBs totaling %s"%(len(book_list),gethumanreadable(totalsize)))
            confirm('\n'+_('''EpubMerge will be done in a Background job.  The merged EPUB will not appear in the Library until finished.

You are merging %s EPUBs totaling %s.''')%(len(book_list),gethumanreadable(totalsize)),
                    'epubmerge_background_merge_again',
                    self.gui,
                    title=_("EpubMerge"),
                    show_cancel_button=False)

            # if len(book_list) > 100 or totalsize > 5*1024*1024:
            #     confirm('\n'+_('''You're merging %s EPUBs totaling %s.  Calibre will be locked until the merge is finished.''')%(len(book_list),gethumanreadable(totalsize)),
            #             'epubmerge_edited_now_merge_again',
            #             self.gui)

            self.gui.status_bar.show_message(_('Merging %s EPUBs...')%len(book_list), 60000)

            mi = db.get_metadata(book_id,index_is_id=True)

            mergedepub = PersistentTemporaryFile(prefix="output_",
                                                 suffix='.epub',
                                                 dir=tdir)
            epubstomerge = [ x['epub'] for x in book_list ]
            epubtitles = {}
            for x in book_list:
                # save titles indexed by epub for reporting from BG
                epubtitles[x['epub']]=_("%s by %s") % (x['title'],' & '.join(x['authors']))

            coverjpgpath = None
            if mi.has_cover:
                # grab the path to the real image.
                coverjpgpath = os.path.join(db.library_path, db.path(book_id, index_is_id=True), 'cover.jpg')


            func = 'arbitrary_n'
            cpus = self.gui.job_manager.server.pool_size
            args = ['calibre_plugins.epubmerge.jobs',
                    'do_merge_bg',
                    ({'book_id':book_id,
                      'book_count':len(book_list),
                      'tdir':tdir,
                      'outputepubfn':mergedepub.name,
                      'inputepubfns':epubstomerge, # already .name'ed
                      'epubtitles':epubtitles, # for reporting
                      'authoropts':mi.authors,
                      'titleopt':mi.title,
                      'descopt':mi.comments,
                      'tags':mi.tags,
                      'languages':mi.languages,
                      'titlenavpoints':prefs['titlenavpoints'],
                      'originalnavpoints':prefs['originalnavpoints'],
                      'flattentoc':prefs['flattentoc'],
                      'printtimes':True,
                      'coverjpgpath':coverjpgpath,
                      'keepmetadatafiles':prefs['keepmeta']
                      },
                     cpus)]
            desc = _('EpubMerge: %s')%mi.title
            job = self.gui.job_manager.run_job(
                self.Dispatcher(self.merge_done),
                func, args=args,
                description=desc)

            self.gui.jobs_pointer.start()
            self.gui.status_bar.show_message(_('Starting EpubMerge'),3000)