Example #1
0
def get_metadata(stream, extract_cover=True):
    zin = zipfile.ZipFile(stream, 'r')
    odfs = odfmetaparser()
    parser = xml.sax.make_parser()
    parser.setFeature(xml.sax.handler.feature_namespaces, 1)
    parser.setContentHandler(odfs)
    content = zin.read('meta.xml')
    parser.parse(StringIO(content))
    data = odfs.seenfields
    mi = MetaInformation(None, [])
    if 'title' in data:
        mi.title = data['title']
    if data.get('initial-creator', '').strip():
        mi.authors = string_to_authors(data['initial-creator'])
    elif 'creator' in data:
        mi.authors = string_to_authors(data['creator'])
    if 'description' in data:
        mi.comments = data['description']
    if 'language' in data:
        mi.language = data['language']
    if data.get('keywords', ''):
        mi.tags = [x.strip() for x in data['keywords'].split(',') if x.strip()]
    opfmeta = False  # we need this later for the cover
    opfnocover = False
    if data.get('opf.metadata','') == 'true':
        # custom metadata contains OPF information
        opfmeta = True
        if data.get('opf.titlesort', ''):
            mi.title_sort = data['opf.titlesort']
        if data.get('opf.authors', ''):
            mi.authors = string_to_authors(data['opf.authors'])
        if data.get('opf.authorsort', ''):
            mi.author_sort = data['opf.authorsort']
        if data.get('opf.isbn', ''):
            isbn = check_isbn(data['opf.isbn'])
            if isbn is not None:
                mi.isbn = isbn
        if data.get('opf.publisher', ''):
            mi.publisher = data['opf.publisher']
        if data.get('opf.pubdate', ''):
            mi.pubdate = parse_date(data['opf.pubdate'], assume_utc=True)
        if data.get('opf.series', ''):
            mi.series = data['opf.series']
            if data.get('opf.seriesindex', ''):
                try:
                    mi.series_index = float(data['opf.seriesindex'])
                except ValueError:
                    mi.series_index = 1.0
        if data.get('opf.language', ''):
            cl = canonicalize_lang(data['opf.language'])
            if cl:
                mi.languages = [cl]
        opfnocover = data.get('opf.nocover', 'false') == 'true'
    if not opfnocover:
        try:
            read_cover(stream, zin, mi, opfmeta, extract_cover)
        except:
            pass  # Do not let an error reading the cover prevent reading other data

    return mi
Example #2
0
File: odt.py Project: sss/calibre
def get_metadata(stream, extract_cover=True):
    zin = zipfile.ZipFile(stream, 'r')
    odfs = odfmetaparser()
    parser = xml.sax.make_parser()
    parser.setFeature(xml.sax.handler.feature_namespaces, 1)
    parser.setContentHandler(odfs)
    content = zin.read('meta.xml')
    parser.parse(StringIO(content))
    data = odfs.seenfields
    mi = MetaInformation(None, [])
    if data.has_key('title'):
        mi.title = data['title']
    if data.get('initial-creator', '').strip():
        mi.authors = string_to_authors(data['initial-creator'])
    elif data.has_key('creator'):
        mi.authors = string_to_authors(data['creator'])
    if data.has_key('description'):
        mi.comments = data['description']
    if data.has_key('language'):
        mi.language = data['language']
    if data.get('keywords', ''):
        mi.tags = [x.strip() for x in data['keywords'].split(',') if x.strip()]
    opfmeta = False # we need this later for the cover
    opfnocover = False
    if data.get('opf.metadata','') == 'true':
        # custom metadata contains OPF information
        opfmeta = True
        if data.get('opf.titlesort', ''):
            mi.title_sort = data['opf.titlesort']
        if data.get('opf.authors', ''):
            mi.authors = string_to_authors(data['opf.authors'])
        if data.get('opf.authorsort', ''):
            mi.author_sort = data['opf.authorsort']
        if data.get('opf.isbn', ''):
            isbn = check_isbn(data['opf.isbn'])
            if isbn is not None:
                mi.isbn = isbn
        if data.get('opf.publisher', ''):
            mi.publisher = data['opf.publisher']
        if data.get('opf.pubdate', ''):
            mi.pubdate = parse_date(data['opf.pubdate'], assume_utc=True)
        if data.get('opf.series', ''):
            mi.series = data['opf.series']
            if data.get('opf.seriesindex', ''):
                try:
                    mi.series_index = float(data['opf.seriesindex'])
                except ValueError:
                    mi.series_index = 1.0
        if data.get('opf.language', ''):
            cl = canonicalize_lang(data['opf.language'])
            if cl:
                mi.languages = [cl]
        opfnocover = data.get('opf.nocover', 'false') == 'true'
    if not opfnocover:
        try:
            read_cover(stream, zin, mi, opfmeta, extract_cover)
        except:
            pass # Do not let an error reading the cover prevent reading other data

    return mi
def get_metadata(stream, extract_cover=True):
    '''
    Return metadata as a L{MetaInfo} object
    '''
    mi = MetaInformation(_('Unknown'), [_('Unknown')])
    stream.seek(0)

    pheader = PdbHeaderReader(stream)
    section_data = None
    for i in range(1, pheader.num_sections):
        raw_data = pheader.section_data(i)
        section_header = SectionHeader(raw_data)
        if section_header.type == DATATYPE_METADATA:
            section_data = raw_data[8:]
            break

    if not section_data:
        return mi

    default_encoding = 'latin-1'
    record_count, = struct.unpack('>H', section_data[0:2])
    adv = 0
    title = None
    author = None
    pubdate = 0
    for i in range(record_count):
        try:
            type, length = struct.unpack_from('>HH', section_data, 2 + adv)
        except struct.error:
            break

        # CharSet
        if type == 1:
            val, = struct.unpack('>H', section_data[6+adv:8+adv])
            default_encoding = MIBNUM_TO_NAME.get(val, 'latin-1')
        # Author
        elif type == 4:
            author = section_data[6+adv+(2*length)]
        # Title
        elif type == 5:
            title = section_data[6+adv+(2*length)]
        # Publication Date
        elif type == 6:
            pubdate, = struct.unpack('>I', section_data[6+adv:6+adv+4])

        adv += 2*length

    if title:
        mi.title = title.replace('\0', '').decode(default_encoding, 'replace')
    if author:
        author = author.replace('\0', '').decode(default_encoding, 'replace')
        mi.author = author.split(',')
    mi.pubdate = datetime.fromtimestamp(pubdate)

    return mi
Example #4
0
def get_metadata(stream, extract_cover=True):
    '''
    Return metadata as a L{MetaInfo} object
    '''
    mi = MetaInformation(_('Unknown'), [_('Unknown')])
    stream.seek(0)

    pheader = PdbHeaderReader(stream)
    section_data = None
    for i in range(1, pheader.num_sections):
        raw_data = pheader.section_data(i)
        section_header = SectionHeader(raw_data)
        if section_header.type == DATATYPE_METADATA:
            section_data = raw_data[8:]
            break

    if not section_data:
        return mi

    default_encoding = 'latin-1'
    record_count, = struct.unpack('>H', section_data[0:2])
    adv = 0
    title = None
    author = None
    pubdate = 0
    for i in xrange(record_count):
        try:
            type, length = struct.unpack_from('>HH', section_data, 2 + adv)
        except struct.error:
            break

        # CharSet
        if type == 1:
            val, = struct.unpack('>H', section_data[6+adv:8+adv])
            default_encoding = MIBNUM_TO_NAME.get(val, 'latin-1')
        # Author
        elif type == 4:
            author = section_data[6+adv+(2*length)]
        # Title
        elif type == 5:
            title = section_data[6+adv+(2*length)]
        # Publication Date
        elif type == 6:
            pubdate, = struct.unpack('>I', section_data[6+adv:6+adv+4])

        adv += 2*length

    if title:
        mi.title = title.replace('\0', '').decode(default_encoding, 'replace')
    if author:
        author = author.replace('\0', '').decode(default_encoding, 'replace')
        mi.author = author.split(',')
    mi.pubdate = datetime.fromtimestamp(pubdate)

    return mi
Example #5
0
def do_set_metadata(opts, mi, stream, stream_type):
    mi = MetaInformation(mi)
    for x in ('guide', 'toc', 'manifest', 'spine'):
        setattr(mi, x, None)

    from_opf = getattr(opts, 'from_opf', None)
    if from_opf is not None:
        from calibre.ebooks.metadata.opf2 import OPF
        opf_mi = OPF(open(from_opf, 'rb')).to_book_metadata()
        mi.smart_update(opf_mi)

    for pref in config().option_set.preferences:
        if pref.name in ('to_opf', 'from_opf', 'authors', 'title_sort',
                         'author_sort', 'get_cover', 'cover', 'tags',
                         'lrf_bookid', 'identifiers'):
            continue
        val = getattr(opts, pref.name, None)
        if val is not None:
            setattr(mi, pref.name, val)
    if getattr(opts, 'authors', None) is not None:
        mi.authors = string_to_authors(opts.authors)
        mi.author_sort = authors_to_sort_string(mi.authors)
    if getattr(opts, 'author_sort', None) is not None:
        mi.author_sort = opts.author_sort
    if getattr(opts, 'title_sort', None) is not None:
        mi.title_sort = opts.title_sort
    elif getattr(opts, 'title', None) is not None:
        mi.title_sort = title_sort(opts.title)
    if getattr(opts, 'tags', None) is not None:
        mi.tags = [t.strip() for t in opts.tags.split(',')]
    if getattr(opts, 'series', None) is not None:
        mi.series = opts.series.strip()
    if getattr(opts, 'series_index', None) is not None:
        mi.series_index = float(opts.series_index.strip())
    if getattr(opts, 'pubdate', None) is not None:
        mi.pubdate = parse_date(opts.pubdate, assume_utc=False, as_utc=False)
    if getattr(opts, 'identifiers', None):
        val = {
            k.strip(): v.strip()
            for k, v in (x.partition(':')[0::2] for x in opts.identifiers)
        }
        if val:
            orig = mi.get_identifiers()
            orig.update(val)
            val = {k: v for k, v in orig.iteritems() if k and v}
            mi.set_identifiers(val)

    if getattr(opts, 'cover', None) is not None:
        ext = os.path.splitext(opts.cover)[1].replace('.', '').upper()
        mi.cover_data = (ext, open(opts.cover, 'rb').read())

    with force_identifiers:
        set_metadata(stream, mi, stream_type)
Example #6
0
def do_set_metadata(opts, mi, stream, stream_type):
    mi = MetaInformation(mi)
    for x in ('guide', 'toc', 'manifest', 'spine'):
        setattr(mi, x, None)

    from_opf = getattr(opts, 'from_opf', None)
    if from_opf is not None:
        from calibre.ebooks.metadata.opf2 import OPF
        opf_mi = OPF(open(from_opf, 'rb')).to_book_metadata()
        mi.smart_update(opf_mi)

    for pref in config().option_set.preferences:
        if pref.name in ('to_opf', 'from_opf', 'authors', 'title_sort',
                         'author_sort', 'get_cover', 'cover', 'tags',
                         'lrf_bookid', 'identifiers'):
            continue
        val = getattr(opts, pref.name, None)
        if val is not None:
            setattr(mi, pref.name, val)
    if getattr(opts, 'authors', None) is not None:
        mi.authors = string_to_authors(opts.authors)
        mi.author_sort = authors_to_sort_string(mi.authors)
    if getattr(opts, 'author_sort', None) is not None:
        mi.author_sort = opts.author_sort
    if getattr(opts, 'title_sort', None) is not None:
        mi.title_sort = opts.title_sort
    elif getattr(opts, 'title', None) is not None:
        mi.title_sort = title_sort(opts.title)
    if getattr(opts, 'tags', None) is not None:
        mi.tags = [t.strip() for t in opts.tags.split(',')]
    if getattr(opts, 'series', None) is not None:
        mi.series = opts.series.strip()
    if getattr(opts, 'series_index', None) is not None:
        mi.series_index = float(opts.series_index.strip())
    if getattr(opts, 'pubdate', None) is not None:
        mi.pubdate = parse_date(opts.pubdate, assume_utc=False, as_utc=False)
    if getattr(opts, 'identifiers', None):
        val = {k.strip():v.strip() for k, v in (x.partition(':')[0::2] for x in opts.identifiers)}
        if val:
            orig = mi.get_identifiers()
            orig.update(val)
            val = {k:v for k, v in iteritems(orig) if k and v}
            mi.set_identifiers(val)

    if getattr(opts, 'cover', None) is not None:
        ext = os.path.splitext(opts.cover)[1].replace('.', '').upper()
        mi.cover_data = (ext, open(opts.cover, 'rb').read())

    with force_identifiers:
        set_metadata(stream, mi, stream_type)
 def populate(self, entries, browser, verbose=False, api_key=''):
     for x in entries:
         try:
             id_url = entry_id(x)[0].text
             title = self.get_title(x)
         except:
             report(verbose)
         mi = MetaInformation(title, self.get_authors(x))
         try:
             if api_key != '':
                 id_url = id_url + "?apikey=" + api_key
             raw = browser.open(id_url).read()
             feed = etree.fromstring(raw)
             x = entry(feed)[0]
         except Exception, e:
             if verbose:
                 print 'Failed to get all details for an entry'
                 print e
         mi.comments = self.get_description(x, verbose)
         mi.tags = self.get_tags(x, verbose)
         mi.isbn = self.get_isbn(x, verbose)
         mi.publisher = self.get_publisher(x, verbose)
         mi.pubdate = self.get_date(x, verbose)
         self.append(mi)
Example #8
0
def metadata_from_filename(name, pat=None, fallback_pat=None):
    if isbytestring(name):
        name = name.decode(filesystem_encoding, 'replace')
    name = name.rpartition('.')[0]
    mi = MetaInformation(None, None)
    if pat is None:
        pat = re.compile(prefs.get('filename_pattern'))
    name = name.replace('_', ' ')
    match = pat.search(name)
    if match is None and fallback_pat is not None:
        match = fallback_pat.search(name)
    if match is not None:
        try:
            mi.title = match.group('title')
        except IndexError:
            pass
        try:
            au = match.group('author')
            aus = string_to_authors(au)
            if aus:
                mi.authors = aus
                if prefs['swap_author_names'] and mi.authors:
                    def swap(a):
                        if ',' in a:
                            parts = a.split(',', 1)
                        else:
                            parts = a.split(None, 1)
                        if len(parts) > 1:
                            t = parts[-1]
                            parts = parts[:-1]
                            parts.insert(0, t)
                        return ' '.join(parts)
                    mi.authors = [swap(x) for x in mi.authors]
        except (IndexError, ValueError):
            pass
        try:
            mi.series = match.group('series')
        except IndexError:
            pass
        try:
            si = match.group('series_index')
            mi.series_index = float(si)
        except (IndexError, ValueError, TypeError):
            pass
        try:
            si = match.group('isbn')
            mi.isbn = si
        except (IndexError, ValueError):
            pass
        try:
            publisher = match.group('publisher')
            mi.publisher = publisher
        except (IndexError, ValueError):
            pass
        try:
            pubdate = match.group('published')
            if pubdate:
                from calibre.utils.date import parse_only_date
                mi.pubdate = parse_only_date(pubdate)
        except:
            pass
        try:
            comments = match.group('comments')
            mi.comments = comments
        except (IndexError, ValueError):
            pass

    if mi.is_null('title'):
        mi.title = name
    return mi
Example #9
0
    def convert_comic_md_to_calibre_md(self, comic_metadata):
        '''
        Maps the entries in the comic_metadata to calibre metadata
        '''
        import unicodedata
        from calibre.ebooks.metadata import MetaInformation
        from calibre.utils.date import parse_only_date
        from datetime import date
        from calibre.utils.localization import calibre_langcode_to_name

        if self.comic_md_in_calibre_format:
            return

        # start with a fresh calibre metadata
        mi = MetaInformation(None, None)
        co = comic_metadata

        # shorten some functions
        role = partial(get_role, credits=co.credits)
        update_field = partial(update_calibre_field, target=mi)

        # Get title, if no title, try to assign series infos
        if co.title:
            mi.title = co.title
        elif co.series:
            mi.title = co.series
            if co.issue:
                mi.title += " " + str(co.issue)
        else:
            mi.title = ""

        # tags
        if co.tags != [] and prefs['import_tags']:
            if prefs['overwrite_calibre_tags']:
                mi.tags = co.tags
            else:
                mi.tags = list(set(self.calibre_metadata.tags + co.tags))

        # simple metadata
        update_field("authors", role(WRITER))
        update_field("series", co.series)
        update_field("rating", co.criticalRating)
        update_field("publisher", co.publisher)
        # special cases
        if co.language:
            update_field("language", calibre_langcode_to_name(co.language))
        if co.comments:
            update_field("comments", co.comments.strip())
        # issue
        if co.issue:
            try:
                if not python3 and isinstance(co.issue, unicode):
                    mi.series_index = unicodedata.numeric(co.issue)
                else:
                    mi.series_index = float(co.issue)
            except ValueError:
                pass
        # pub date
        puby = co.year
        pubm = co.month
        if puby is not None:
            try:
                dt = date(int(puby), 6 if pubm is None else int(pubm), 15)
                dt = parse_only_date(str(dt))
                mi.pubdate = dt
            except:
                pass

        # custom columns
        update_column = partial(
            update_custom_column,
            calibre_metadata=mi,
            custom_cols=self.db.field_metadata.custom_field_metadata())
        # artists
        update_column(prefs['penciller_column'], role(PENCILLER))
        update_column(prefs['inker_column'], role(INKER))
        update_column(prefs['colorist_column'], role(COLORIST))
        update_column(prefs['letterer_column'], role(LETTERER))
        update_column(prefs['cover_artist_column'], role(COVER_ARTIST))
        update_column(prefs['editor_column'], role(EDITOR))
        # others
        update_column(prefs['storyarc_column'], co.storyArc)
        update_column(prefs['characters_column'], co.characters)
        update_column(prefs['teams_column'], co.teams)
        update_column(prefs['locations_column'], co.locations)
        update_column(prefs['genre_column'], co.genre)
        ensure_int(co.issueCount, update_column, prefs['count_column'],
                   co.issueCount)
        ensure_int(co.volume, update_column, prefs['volume_column'], co.volume)
        if prefs['auto_count_pages']:
            update_column(prefs['pages_column'], self.count_pages())
        else:
            update_column(prefs['pages_column'], co.pageCount)
        if prefs['get_image_sizes']:
            update_column(prefs['image_size_column'], self.get_picture_size())
        update_column(prefs['comicvine_column'],
                      '<a href="{}">Comic Vine</a>'.format(co.webLink))
        update_column(prefs['manga_column'], co.manga)

        self.comic_md_in_calibre_format = mi
Example #10
0
def get_metadata_(src, encoding=None):
    if not isinstance(src, unicode):
        if not encoding:
            src = xml_to_unicode(src)[0]
        else:
            src = src.decode(encoding, 'replace')

    # Meta data definitions as in
    # http://www.mobileread.com/forums/showpost.php?p=712544&postcount=9

    # Title
    title = None
    pat = re.compile(r'<!--.*?TITLE=(?P<q>[\'"])(.+?)(?P=q).*?-->', re.DOTALL)
    src = src[:150000]  # Searching shouldn't take too long
    match = pat.search(src)
    if match:
        title = match.group(2)
    else:
        for x in ('DC.title', 'DCTERMS.title', 'Title'):
            pat = get_meta_regexp_(x)
            match = pat.search(src)
            if match:
                title = match.group(1)
                break
    if not title:
        pat = re.compile('<title>([^<>]+?)</title>', re.IGNORECASE)
        match = pat.search(src)
        if match:
            title = match.group(1)

    # Author
    author = None
    pat = re.compile(r'<!--.*?AUTHOR=(?P<q>[\'"])(.+?)(?P=q).*?-->', re.DOTALL)
    match = pat.search(src)
    if match:
        author = match.group(2).replace(',', ';')
    else:
        for x in ('Author', 'DC.creator.aut', 'DCTERMS.creator.aut',
                  'DC.creator'):
            pat = get_meta_regexp_(x)
            match = pat.search(src)
            if match:
                author = match.group(1)
                break

    # Create MetaInformation with Title and Author
    ent_pat = re.compile(r'&(\S+)?;')
    if title:
        title = ent_pat.sub(entity_to_unicode, title)
    if author:
        author = ent_pat.sub(entity_to_unicode, author)
    mi = MetaInformation(title, [author] if author else None)

    # Publisher
    publisher = None
    pat = re.compile(r'<!--.*?PUBLISHER=(?P<q>[\'"])(.+?)(?P=q).*?-->',
                     re.DOTALL)
    match = pat.search(src)
    if match:
        publisher = match.group(2)
    else:
        for x in ('Publisher', 'DC.publisher', 'DCTERMS.publisher'):
            pat = get_meta_regexp_(x)
            match = pat.search(src)
            if match:
                publisher = match.group(1)
                break
    if publisher:
        mi.publisher = ent_pat.sub(entity_to_unicode, publisher)

    # ISBN
    isbn = None
    pat = re.compile(r'<!--.*?ISBN=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL)
    match = pat.search(src)
    if match:
        isbn = match.group(1)
    else:
        for x in ('ISBN', 'DC.identifier.ISBN', 'DCTERMS.identifier.ISBN'):
            pat = get_meta_regexp_(x)
            match = pat.search(src)
            if match:
                isbn = match.group(1)
                break
    if isbn:
        mi.isbn = re.sub(r'[^0-9xX]', '', isbn)

    # LANGUAGE
    language = None
    pat = re.compile(r'<!--.*?LANGUAGE=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL)
    match = pat.search(src)
    if match:
        language = match.group(1)
    else:
        for x in ('DC.language', 'DCTERMS.language'):
            pat = get_meta_regexp_(x)
            match = pat.search(src)
            if match:
                language = match.group(1)
                break
    if language:
        mi.language = language

    # PUBDATE
    pubdate = None
    pat = re.compile(r'<!--.*?PUBDATE=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL)
    match = pat.search(src)
    if match:
        pubdate = match.group(1)
    else:
        for x in ('Pubdate', 'Date of publication', 'DC.date.published',
                  'DC.date.publication', 'DC.date.issued', 'DCTERMS.issued'):
            pat = get_meta_regexp_(x)
            match = pat.search(src)
            if match:
                pubdate = match.group(1)
                break
    if pubdate:
        try:
            mi.pubdate = parse_date(pubdate)
        except:
            pass

    # TIMESTAMP
    timestamp = None
    pat = re.compile(r'<!--.*?TIMESTAMP=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL)
    match = pat.search(src)
    if match:
        timestamp = match.group(1)
    else:
        for x in ('Timestamp', 'Date of creation', 'DC.date.created',
                  'DC.date.creation', 'DCTERMS.created'):
            pat = get_meta_regexp_(x)
            match = pat.search(src)
            if match:
                timestamp = match.group(1)
                break
    if timestamp:
        try:
            mi.timestamp = parse_date(timestamp)
        except:
            pass

    # SERIES
    series = None
    pat = re.compile(r'<!--.*?SERIES=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL)
    match = pat.search(src)
    if match:
        series = match.group(1)
    else:
        pat = get_meta_regexp_("Series")
        match = pat.search(src)
        if match:
            series = match.group(1)
    if series:
        pat = re.compile(r'\[([.0-9]+)\]')
        match = pat.search(series)
        series_index = None
        if match is not None:
            try:
                series_index = float(match.group(1))
            except:
                pass
            series = series.replace(match.group(), '').strip()

        mi.series = ent_pat.sub(entity_to_unicode, series)
        if series_index is None:
            pat = get_meta_regexp_("Seriesnumber")
            match = pat.search(src)
            if match:
                try:
                    series_index = float(match.group(1))
                except:
                    pass
        if series_index is not None:
            mi.series_index = series_index

    # RATING
    rating = None
    pat = re.compile(r'<!--.*?RATING=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL)
    match = pat.search(src)
    if match:
        rating = match.group(1)
    else:
        pat = get_meta_regexp_("Rating")
        match = pat.search(src)
        if match:
            rating = match.group(1)
    if rating:
        try:
            mi.rating = float(rating)
            if mi.rating < 0:
                mi.rating = 0
            if mi.rating > 5:
                mi.rating /= 2.
            if mi.rating > 5:
                mi.rating = 0
        except:
            pass

    # COMMENTS
    comments = None
    pat = re.compile(r'<!--.*?COMMENTS=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL)
    match = pat.search(src)
    if match:
        comments = match.group(1)
    else:
        pat = get_meta_regexp_("Comments")
        match = pat.search(src)
        if match:
            comments = match.group(1)
    if comments:
        mi.comments = ent_pat.sub(entity_to_unicode, comments)

    # TAGS
    tags = None
    pat = re.compile(r'<!--.*?TAGS=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL)
    match = pat.search(src)
    if match:
        tags = match.group(1)
    else:
        pat = get_meta_regexp_("Tags")
        match = pat.search(src)
        if match:
            tags = match.group(1)
    if tags:
        mi.tags = [
            x.strip() for x in ent_pat.sub(entity_to_unicode, tags).split(",")
        ]

    # Ready to return MetaInformation
    return mi
Example #11
0
def get_metadata_(src, encoding=None):
    if not isinstance(src, unicode):
        if not encoding:
            src = xml_to_unicode(src)[0]
        else:
            src = src.decode(encoding, "replace")

    # Meta data definitions as in
    # http://www.mobileread.com/forums/showpost.php?p=712544&postcount=9

    # Title
    title = None
    pat = re.compile(r'<!--.*?TITLE=(?P<q>[\'"])(.+?)(?P=q).*?-->', re.DOTALL)
    src = src[:150000]  # Searching shouldn't take too long
    match = pat.search(src)
    if match:
        title = match.group(2)
    else:
        for x in ("DC.title", "DCTERMS.title", "Title"):
            pat = get_meta_regexp_(x)
            match = pat.search(src)
            if match:
                title = match.group(1)
                break
    if not title:
        pat = re.compile("<title>([^<>]+?)</title>", re.IGNORECASE)
        match = pat.search(src)
        if match:
            title = match.group(1)

    # Author
    author = None
    pat = re.compile(r'<!--.*?AUTHOR=(?P<q>[\'"])(.+?)(?P=q).*?-->', re.DOTALL)
    match = pat.search(src)
    if match:
        author = match.group(2).replace(",", ";")
    else:
        for x in ("Author", "DC.creator.aut", "DCTERMS.creator.aut", "DC.creator"):
            pat = get_meta_regexp_(x)
            match = pat.search(src)
            if match:
                author = match.group(1)
                break

    # Create MetaInformation with Title and Author
    ent_pat = re.compile(r"&(\S+)?;")
    if title:
        title = ent_pat.sub(entity_to_unicode, title)
    if author:
        author = ent_pat.sub(entity_to_unicode, author)
    mi = MetaInformation(title, [author] if author else None)

    # Publisher
    publisher = None
    pat = re.compile(r'<!--.*?PUBLISHER=(?P<q>[\'"])(.+?)(?P=q).*?-->', re.DOTALL)
    match = pat.search(src)
    if match:
        publisher = match.group(2)
    else:
        for x in ("Publisher", "DC.publisher", "DCTERMS.publisher"):
            pat = get_meta_regexp_(x)
            match = pat.search(src)
            if match:
                publisher = match.group(1)
                break
    if publisher:
        mi.publisher = ent_pat.sub(entity_to_unicode, publisher)

    # ISBN
    isbn = None
    pat = re.compile(r'<!--.*?ISBN=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL)
    match = pat.search(src)
    if match:
        isbn = match.group(1)
    else:
        for x in ("ISBN", "DC.identifier.ISBN", "DCTERMS.identifier.ISBN"):
            pat = get_meta_regexp_(x)
            match = pat.search(src)
            if match:
                isbn = match.group(1)
                break
    if isbn:
        mi.isbn = re.sub(r"[^0-9xX]", "", isbn)

    # LANGUAGE
    language = None
    pat = re.compile(r'<!--.*?LANGUAGE=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL)
    match = pat.search(src)
    if match:
        language = match.group(1)
    else:
        for x in ("DC.language", "DCTERMS.language"):
            pat = get_meta_regexp_(x)
            match = pat.search(src)
            if match:
                language = match.group(1)
                break
    if language:
        mi.language = language

    # PUBDATE
    pubdate = None
    pat = re.compile(r'<!--.*?PUBDATE=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL)
    match = pat.search(src)
    if match:
        pubdate = match.group(1)
    else:
        for x in (
            "Pubdate",
            "Date of publication",
            "DC.date.published",
            "DC.date.publication",
            "DC.date.issued",
            "DCTERMS.issued",
        ):
            pat = get_meta_regexp_(x)
            match = pat.search(src)
            if match:
                pubdate = match.group(1)
                break
    if pubdate:
        try:
            mi.pubdate = parse_date(pubdate)
        except:
            pass

    # TIMESTAMP
    timestamp = None
    pat = re.compile(r'<!--.*?TIMESTAMP=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL)
    match = pat.search(src)
    if match:
        timestamp = match.group(1)
    else:
        for x in ("Timestamp", "Date of creation", "DC.date.created", "DC.date.creation", "DCTERMS.created"):
            pat = get_meta_regexp_(x)
            match = pat.search(src)
            if match:
                timestamp = match.group(1)
                break
    if timestamp:
        try:
            mi.timestamp = parse_date(timestamp)
        except:
            pass

    # SERIES
    series = None
    pat = re.compile(r'<!--.*?SERIES=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL)
    match = pat.search(src)
    if match:
        series = match.group(1)
    else:
        pat = get_meta_regexp_("Series")
        match = pat.search(src)
        if match:
            series = match.group(1)
    if series:
        pat = re.compile(r"\[([.0-9]+)\]")
        match = pat.search(series)
        series_index = None
        if match is not None:
            try:
                series_index = float(match.group(1))
            except:
                pass
            series = series.replace(match.group(), "").strip()

        mi.series = ent_pat.sub(entity_to_unicode, series)
        if series_index is None:
            pat = get_meta_regexp_("Seriesnumber")
            match = pat.search(src)
            if match:
                try:
                    series_index = float(match.group(1))
                except:
                    pass
        if series_index is not None:
            mi.series_index = series_index

    # RATING
    rating = None
    pat = re.compile(r'<!--.*?RATING=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL)
    match = pat.search(src)
    if match:
        rating = match.group(1)
    else:
        pat = get_meta_regexp_("Rating")
        match = pat.search(src)
        if match:
            rating = match.group(1)
    if rating:
        try:
            mi.rating = float(rating)
            if mi.rating < 0:
                mi.rating = 0
            if mi.rating > 5:
                mi.rating /= 2.0
            if mi.rating > 5:
                mi.rating = 0
        except:
            pass

    # COMMENTS
    comments = None
    pat = re.compile(r'<!--.*?COMMENTS=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL)
    match = pat.search(src)
    if match:
        comments = match.group(1)
    else:
        pat = get_meta_regexp_("Comments")
        match = pat.search(src)
        if match:
            comments = match.group(1)
    if comments:
        mi.comments = ent_pat.sub(entity_to_unicode, comments)

    # TAGS
    tags = None
    pat = re.compile(r'<!--.*?TAGS=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL)
    match = pat.search(src)
    if match:
        tags = match.group(1)
    else:
        pat = get_meta_regexp_("Tags")
        match = pat.search(src)
        if match:
            tags = match.group(1)
    if tags:
        mi.tags = [x.strip() for x in ent_pat.sub(entity_to_unicode, tags).split(",")]

    # Ready to return MetaInformation
    return mi
    def convert_comic_md_to_calibre_md(self, comic_metadata):
        '''
        Maps the entries in the comic_metadata to calibre metadata
        '''
        import unicodedata
        from calibre.ebooks.metadata import MetaInformation
        from calibre.utils.date import parse_only_date
        from datetime import date
        from calibre.utils.localization import calibre_langcode_to_name

        if self.comic_md_in_calibre_format:
            return

        # synonyms for artists
        WRITER = ['writer', 'plotter', 'scripter']
        PENCILLER = ['artist', 'penciller', 'penciler', 'breakdowns']
        INKER = ['inker', 'artist', 'finishes']
        COLORIST = ['colorist', 'colourist', 'colorer', 'colourer']
        LETTERER = ['letterer']
        COVER_ARTIST = ['cover', 'covers', 'coverartist', 'cover artist']
        EDITOR = ['editor']

        # start with a fresh calibre metadata
        mi = MetaInformation(None, None)
        co = comic_metadata

        # shorten some functions
        role = partial(get_role, credits=co.credits)
        update_field = partial(update_calibre_field, target=mi)

        # Get title, if no title, try to assign series infos
        if co.title:
            mi.title = co.title
        elif co.series:
            mi.title = co.series
            if co.issue:
                mi.title += " " + str(co.issue)
        else:
            mi.title = ""

        # tags
        if co.tags != [] and prefs['import_tags']:
            if prefs['overwrite_calibre_tags']:
                mi.tags = co.tags
            else:
                mi.tags = list(set(self.calibre_metadata.tags + co.tags))

        # simple metadata
        update_field("authors", role(WRITER))
        update_field("series", co.series)
        update_field("rating", co.criticalRating)
        update_field("publisher", co.publisher)
        # special cases
        if co.language:
            update_field("language", calibre_langcode_to_name(co.language))
        if co.comments:
            update_field("comments", co.comments.strip())
        # issue
        if co.issue:
            if isinstance(co.issue, unicode):
                mi.series_index = unicodedata.numeric(co.issue)
            else:
                mi.series_index = float(co.issue)
        # pub date
        puby = co.year
        pubm = co.month
        if puby is not None:
            try:
                dt = date(int(puby), 6 if pubm is None else int(pubm), 15)
                dt = parse_only_date(str(dt))
                mi.pubdate = dt
            except:
                pass

        # custom columns
        custom_cols = self.db.field_metadata.custom_field_metadata()
        update_column = partial(update_custom_column, calibre_metadata=mi, custom_cols=custom_cols)
        # artists
        update_column(prefs['penciller_column'], role(PENCILLER))
        update_column(prefs['inker_column'], role(INKER))
        update_column(prefs['colorist_column'], role(COLORIST))
        update_column(prefs['letterer_column'], role(LETTERER))
        update_column(prefs['cover_artist_column'], role(COVER_ARTIST))
        update_column(prefs['editor_column'], role(EDITOR))
        # others
        update_column(prefs['storyarc_column'], co.storyArc)
        update_column(prefs['characters_column'], co.characters)
        update_column(prefs['teams_column'], co.teams)
        update_column(prefs['locations_column'], co.locations)
        update_column(prefs['volume_column'], co.volume)
        update_column(prefs['genre_column'], co.genre)

        self.comic_md_in_calibre_format = mi
Example #13
0
def get_metadata(stream, extract_cover=True):
    whitespace = re.compile(r'\s+')

    def normalize(s):
        return whitespace.sub(' ', s).strip()

    with ZipFile(stream) as zf:
        meta = zf.read('meta.xml')
        root = fromstring(meta)

        def find(field):
            ns, tag = fields[field]
            ans = root.xpath('//ns0:{}'.format(tag), namespaces={'ns0': ns})
            if ans:
                return normalize(
                    tostring(ans[0],
                             method='text',
                             encoding='unicode',
                             with_tail=False)).strip()

        mi = MetaInformation(None, [])
        title = find('title')
        if title:
            mi.title = title
        creator = find('initial-creator') or find('creator')
        if creator:
            mi.authors = string_to_authors(creator)
        desc = find('description')
        if desc:
            mi.comments = desc
        lang = find('language')
        if lang and canonicalize_lang(lang):
            mi.languages = [canonicalize_lang(lang)]
        kw = find('keyword') or find('keywords')
        if kw:
            mi.tags = [x.strip() for x in kw.split(',') if x.strip()]
        data = {}
        for tag in root.xpath('//ns0:user-defined',
                              namespaces={'ns0': fields['user-defined'][0]}):
            name = (tag.get('{%s}name' % METANS) or '').lower()
            vtype = tag.get('{%s}value-type' % METANS) or 'string'
            val = tag.text
            if name and val:
                if vtype == 'boolean':
                    val = val == 'true'
                data[name] = val
        opfmeta = False  # we need this later for the cover
        opfnocover = False
        if data.get('opf.metadata'):
            # custom metadata contains OPF information
            opfmeta = True
            if data.get('opf.titlesort', ''):
                mi.title_sort = data['opf.titlesort']
            if data.get('opf.authors', ''):
                mi.authors = string_to_authors(data['opf.authors'])
            if data.get('opf.authorsort', ''):
                mi.author_sort = data['opf.authorsort']
            if data.get('opf.isbn', ''):
                isbn = check_isbn(data['opf.isbn'])
                if isbn is not None:
                    mi.isbn = isbn
            if data.get('opf.publisher', ''):
                mi.publisher = data['opf.publisher']
            if data.get('opf.pubdate', ''):
                mi.pubdate = parse_date(data['opf.pubdate'], assume_utc=True)
            if data.get('opf.identifiers'):
                try:
                    mi.identifiers = json.loads(data['opf.identifiers'])
                except Exception:
                    pass
            if data.get('opf.rating'):
                try:
                    mi.rating = max(0, min(float(data['opf.rating']), 10))
                except Exception:
                    pass
            if data.get('opf.series', ''):
                mi.series = data['opf.series']
                if data.get('opf.seriesindex', ''):
                    try:
                        mi.series_index = float(data['opf.seriesindex'])
                    except Exception:
                        mi.series_index = 1.0
            if data.get('opf.language', ''):
                cl = canonicalize_lang(data['opf.language'])
                if cl:
                    mi.languages = [cl]
            opfnocover = data.get('opf.nocover', False)
        if not opfnocover:
            try:
                read_cover(stream, zf, mi, opfmeta, extract_cover)
            except Exception:
                pass  # Do not let an error reading the cover prevent reading other data

    return mi
Example #14
0
    def _do_split(self,
                  db,
                  source_id,
                  misource,
                  splitepub,
                  origlines,
                  newspecs,
                  deftitle=None):

        linenums, changedtocs, checkedalways = newspecs
        # logger.debug("updated tocs:%s"%changedtocs)
        if not self.has_lines(linenums):
            return
        #logger.debug("2:%s"%(time.time()-self.t))
        self.t = time.time()

        #logger.debug("linenums:%s"%linenums)

        defauthors = None

        if not deftitle and prefs['copytoctitle']:
            if linenums[0] in changedtocs:
                deftitle=changedtocs[linenums[0]][0] # already unicoded()'ed
            elif len(origlines[linenums[0]]['toc']) > 0:
                deftitle=unicode(origlines[linenums[0]]['toc'][0])
            #logger.debug("deftitle:%s"%deftitle)

        if not deftitle and prefs['copytitle']:
            deftitle = _("%s Split") % misource.title

        if prefs['copyauthors']:
            defauthors = misource.authors

        mi = MetaInformation(deftitle,defauthors)

        if prefs['copytags']:
            mi.tags = misource.tags # [item for sublist in tagslists for item in sublist]

        if prefs['copylanguages']:
            mi.languages = misource.languages

        if prefs['copyseries']:
            mi.series = misource.series

        if prefs['copydate']:
            mi.timestamp = misource.timestamp

        if prefs['copyrating']:
            mi.rating = misource.rating

        if prefs['copypubdate']:
            mi.pubdate = misource.pubdate

        if prefs['copypublisher']:
            mi.publisher = misource.publisher

        if prefs['copyidentifiers']:
            mi.set_identifiers(misource.get_identifiers())

        if prefs['copycomments'] and misource.comments:
            mi.comments = "<p>"+_("Split from:")+"</p>" + misource.comments

        #logger.debug("mi:%s"%mi)
        book_id = db.create_book_entry(mi,
                                       add_duplicates=True)

        if prefs['copycover'] and misource.has_cover:
            db.set_cover(book_id, db.cover(source_id,index_is_id=True))

        #logger.debug("3:%s"%(time.time()-self.t))
        self.t = time.time()

        custom_columns = self.gui.library_view.model().custom_columns
        for col, action in prefs['custom_cols'].iteritems():
            #logger.debug("col: %s action: %s"%(col,action))

            if col not in custom_columns:
                #logger.debug("%s not an existing column, skipping."%col)
                continue

            coldef = custom_columns[col]
            #logger.debug("coldef:%s"%coldef)
            label = coldef['label']
            value = db.get_custom(source_id, label=label, index_is_id=True)
            if value:
                db.set_custom(book_id,value,label=label,commit=False)

        #logger.debug("3.5:%s"%(time.time()-self.t))
        self.t = time.time()

        if prefs['sourcecol'] != '' and prefs['sourcecol'] in custom_columns \
                and prefs['sourcetemplate']:
            val = SafeFormat().safe_format(prefs['sourcetemplate'], misource, 'EpubSplit Source Template Error', misource)
            #logger.debug("Attempting to set %s to %s"%(prefs['sourcecol'],val))
            label = custom_columns[prefs['sourcecol']]['label']
            if custom_columns[prefs['sourcecol']]['datatype'] == 'series':
                val = val + (" [%s]"%self.book_count)
            db.set_custom(book_id, val, label=label, commit=False)
        self.book_count = self.book_count+1
        db.commit()

        #logger.debug("4:%s"%(time.time()-self.t))
        self.t = time.time()

        self.gui.library_view.model().books_added(1)
        self.gui.library_view.select_rows([book_id])

        #logger.debug("5:%s"%(time.time()-self.t))
        self.t = time.time()

        editconfig_txt = _('You can enable or disable Edit Metadata in Preferences > Plugins > EpubSplit.')
        if prefs['editmetadata']:
            confirm('\n'+_('''The book for the new Split EPUB has been created and default metadata filled in.

However, the EPUB will *not* be created until after you've reviewed, edited, and closed the metadata dialog that follows.

You can fill in the metadata yourself, or use download metadata for known books.

If you download or add a cover image, it will be included in the generated EPUB.''')+'\n\n'+
                    editconfig_txt+'\n',
                    'epubsplit_created_now_edit_again',
                    self.gui)
            self.gui.iactions['Edit Metadata'].edit_metadata(False)

        try:
            QApplication.setOverrideCursor(QCursor(Qt.WaitCursor))
            #logger.debug("5:%s"%(time.time()-self.t))
            self.t = time.time()
            self.gui.tags_view.recount()

            self.gui.status_bar.show_message(_('Splitting off from EPUB...'), 60000)

            mi = db.get_metadata(book_id,index_is_id=True)

            outputepub = PersistentTemporaryFile(suffix='.epub')

            coverjpgpath = None
            if mi.has_cover:
                # grab the path to the real image.
                coverjpgpath = os.path.join(db.library_path, db.path(book_id, index_is_id=True), 'cover.jpg')

            outlist = list(set(linenums + checkedalways))
            outlist.sort()
            splitepub.write_split_epub(outputepub,
                                       outlist,
                                       changedtocs=changedtocs,
                                       authoropts=mi.authors,
                                       titleopt=mi.title,
                                       descopt=mi.comments,
                                       tags=mi.tags,
                                       languages=mi.languages,
                                       coverjpgpath=coverjpgpath)

            #logger.debug("6:%s"%(time.time()-self.t))
            self.t = time.time()
            db.add_format_with_hooks(book_id,
                                     'EPUB',
                                     outputepub, index_is_id=True)

            #logger.debug("7:%s"%(time.time()-self.t))
            self.t = time.time()

            self.gui.status_bar.show_message(_('Finished splitting off EPUB.'), 3000)
            self.gui.library_view.model().refresh_ids([book_id])
            self.gui.tags_view.recount()
            current = self.gui.library_view.currentIndex()
            self.gui.library_view.model().current_changed(current, self.previous)
        finally:
            QApplication.restoreOverrideCursor()

        if not prefs['editmetadata']:
            confirm('<p>'+
                    '</p><p>'.join([_('<b><u>%s</u> by %s</b> has been created and default metadata filled in.')%(mi.title,', '.join(mi.authors)),
                                   _('EpubSplit now skips the Edit Metadata step by default.'),
                                   editconfig_txt])+
                    '</p>',
                    'epubsplit_created_now_no_edit_again',
                    self.gui)
Example #15
0
    def _do_split(self,
                  db,
                  source_id,
                  misource,
                  splitepub,
                  newspecs,
                  deftitle=None,
                  editmeta=True):

        linenums, changedtocs = newspecs
        # logger.debug("updated tocs:%s"%changedtocs)

        # logger.debug("2:%s"%(time.time()-self.t))
        self.t = time.time()

        # logger.debug("linenums:%s"%linenums)

        defauthors = None

        if not deftitle and prefs['copytitle']:
            deftitle = _("نمونه %s") % misource.title

        if prefs['copyauthors']:
            defauthors = misource.authors

        mi = MetaInformation(deftitle, defauthors)

        if prefs['copytags']:
            mi.tags = misource.tags  # [item for sublist in tagslists for item in sublist]

        if prefs['copylanguages']:
            mi.languages = misource.languages

        if prefs['copyseries']:
            mi.series = misource.series

        if prefs['copydate']:
            mi.timestamp = misource.timestamp

        if prefs['copyrating']:
            mi.rating = misource.rating

        if prefs['copypubdate']:
            mi.pubdate = misource.pubdate

        if prefs['copypublisher']:
            mi.publisher = misource.publisher

        if prefs['copyidentifiers']:
            mi.set_identifiers(misource.get_identifiers())

        if prefs['copycomments'] and misource.comments:
            mi.comments = _("Split from:") + "\n\n" + misource.comments

        # logger.debug("mi:%s"%mi)
        book_id = db.create_book_entry(mi,
                                       add_duplicates=True)

        if prefs['copycover'] and misource.has_cover:
            db.set_cover(book_id, db.cover(source_id, index_is_id=True))

        # logger.debug("3:%s"%(time.time()-self.t))
        self.t = time.time()

        custom_columns = self.gui.library_view.model().custom_columns
        for col, action in prefs['custom_cols'].iteritems():
            # logger.debug("col: %s action: %s"%(col,action))

            if col not in custom_columns:
                # logger.debug("%s not an existing column, skipping."%col)
                continue

            coldef = custom_columns[col]
            # logger.debug("coldef:%s"%coldef)
            label = coldef['label']
            value = db.get_custom(source_id, label=label, index_is_id=True)
            if value:
                db.set_custom(book_id, value, label=label, commit=False)

        # logger.debug("3.5:%s"%(time.time()-self.t))
        self.t = time.time()

        if prefs['sourcecol'] != '' and prefs['sourcecol'] in custom_columns \
                and prefs['sourcetemplate']:
            val = SafeFormat().safe_format(prefs['sourcetemplate'], misource, 'EpubSplit Source Template Error',
                                           misource)
            # logger.debug("Attempting to set %s to %s"%(prefs['sourcecol'],val))
            label = custom_columns[prefs['sourcecol']]['label']
            db.set_custom(book_id, val, label=label, commit=False)

        db.commit()

        # logger.debug("4:%s"%(time.time()-self.t))
        self.t = time.time()

        self.gui.library_view.model().books_added(1)
        self.gui.library_view.select_rows([book_id])

        # logger.debug("5:%s"%(time.time()-self.t))
        self.t = time.time()

        # if editmeta:
        #     confirm('\n'+_('کتاب نمونه ساخته شود؟')+'\n',
        #             'epubsplit_created_now_edit_again',
        #             self.gui)
        #
        #     self.gui.iactions['Edit Metadata'].edit_metadata(False)

        # logger.debug("5:%s"%(time.time()-self.t))
        self.t = time.time()
        self.gui.tags_view.recount()

        self.gui.status_bar.show_message(_('فایل نمونه ساخته شد'), 60000)

        mi = db.get_metadata(book_id, index_is_id=True)

        outputepub = PersistentTemporaryFile(suffix='.epub')

        coverjpgpath = None
        # if mi.has_cover:
        #     # grab the path to the real image.
        #     coverjpgpath = os.path.join(db.library_path, db.path(book_id, index_is_id=True), 'cover.jpg')

        splitepub.write_split_epub(outputepub,
                                   linenums,
                                   changedtocs=changedtocs,
                                   authoropts=mi.authors,
                                   titleopt=mi.title,
                                   descopt=mi.comments,
                                   tags=mi.tags,
                                   languages=mi.languages,
                                   coverjpgpath=coverjpgpath)

        # logger.debug("6:%s"%(time.time()-self.t))
        self.t = time.time()
        db.add_format_with_hooks(book_id,
                                 'EPUB',
                                 outputepub, index_is_id=True)

        # logger.debug("7:%s"%(time.time()-self.t))
        self.t = time.time()

        self.gui.status_bar.show_message(_('Finished splitting off EPUB.'), 3000)
        self.gui.library_view.model().refresh_ids([book_id])
        self.gui.tags_view.recount()
        current = self.gui.library_view.currentIndex()
        self.gui.library_view.model().current_changed(current, self.previous)
Example #16
0
    def _do_split(self,
                  db,
                  source_id,
                  misource,
                  splitepub,
                  origlines,
                  newspecs,
                  deftitle=None):

        linenums, changedtocs, checkedalways = newspecs
        # logger.debug("updated tocs:%s"%changedtocs)
        if not self.has_lines(linenums):
            return
        #logger.debug("2:%s"%(time.time()-self.t))
        self.t = time.time()

        #logger.debug("linenums:%s"%linenums)

        defauthors = None

        if not deftitle and prefs['copytoctitle']:
            if linenums[0] in changedtocs:
                deftitle = changedtocs[linenums[0]][0]  # already unicoded()'ed
            elif len(origlines[linenums[0]]['toc']) > 0:
                deftitle = unicode(origlines[linenums[0]]['toc'][0])
            #logger.debug("deftitle:%s"%deftitle)

        if not deftitle and prefs['copytitle']:
            deftitle = _("%s Split") % misource.title

        if prefs['copyauthors']:
            defauthors = misource.authors

        mi = MetaInformation(deftitle, defauthors)

        if prefs['copytags']:
            mi.tags = misource.tags  # [item for sublist in tagslists for item in sublist]

        if prefs['copylanguages']:
            mi.languages = misource.languages

        if prefs['copyseries']:
            mi.series = misource.series

        if prefs['copydate']:
            mi.timestamp = misource.timestamp

        if prefs['copyrating']:
            mi.rating = misource.rating

        if prefs['copypubdate']:
            mi.pubdate = misource.pubdate

        if prefs['copypublisher']:
            mi.publisher = misource.publisher

        if prefs['copyidentifiers']:
            mi.set_identifiers(misource.get_identifiers())

        if prefs['copycomments'] and misource.comments:
            mi.comments = "<p>" + _("Split from:") + "</p>" + misource.comments

        #logger.debug("mi:%s"%mi)
        book_id = db.create_book_entry(mi, add_duplicates=True)

        if prefs['copycover'] and misource.has_cover:
            db.set_cover(book_id, db.cover(source_id, index_is_id=True))

        #logger.debug("3:%s"%(time.time()-self.t))
        self.t = time.time()

        custom_columns = self.gui.library_view.model().custom_columns
        for col, action in six.iteritems(prefs['custom_cols']):
            #logger.debug("col: %s action: %s"%(col,action))

            if col not in custom_columns:
                #logger.debug("%s not an existing column, skipping."%col)
                continue

            coldef = custom_columns[col]
            #logger.debug("coldef:%s"%coldef)
            label = coldef['label']
            value = db.get_custom(source_id, label=label, index_is_id=True)
            if value:
                db.set_custom(book_id, value, label=label, commit=False)

        #logger.debug("3.5:%s"%(time.time()-self.t))
        self.t = time.time()

        if prefs['sourcecol'] != '' and prefs['sourcecol'] in custom_columns \
                and prefs['sourcetemplate']:
            val = SafeFormat().safe_format(prefs['sourcetemplate'], misource,
                                           'EpubSplit Source Template Error',
                                           misource)
            #logger.debug("Attempting to set %s to %s"%(prefs['sourcecol'],val))
            label = custom_columns[prefs['sourcecol']]['label']
            if custom_columns[prefs['sourcecol']]['datatype'] == 'series':
                val = val + (" [%s]" % self.book_count)
            db.set_custom(book_id, val, label=label, commit=False)
        self.book_count = self.book_count + 1
        db.commit()

        #logger.debug("4:%s"%(time.time()-self.t))
        self.t = time.time()

        self.gui.library_view.model().books_added(1)
        self.gui.library_view.select_rows([book_id])

        #logger.debug("5:%s"%(time.time()-self.t))
        self.t = time.time()

        editconfig_txt = _(
            'You can enable or disable Edit Metadata in Preferences > Plugins > EpubSplit.'
        )
        if prefs['editmetadata']:
            confirm(
                '\n' +
                _('''The book for the new Split EPUB has been created and default metadata filled in.

However, the EPUB will *not* be created until after you've reviewed, edited, and closed the metadata dialog that follows.

You can fill in the metadata yourself, or use download metadata for known books.

If you download or add a cover image, it will be included in the generated EPUB.'''
                  ) + '\n\n' + editconfig_txt + '\n',
                'epubsplit_created_now_edit_again', self.gui)
            self.gui.iactions['Edit Metadata'].edit_metadata(False)

        try:
            QApplication.setOverrideCursor(QCursor(Qt.WaitCursor))
            #logger.debug("5:%s"%(time.time()-self.t))
            self.t = time.time()
            self.gui.tags_view.recount()

            self.gui.status_bar.show_message(_('Splitting off from EPUB...'),
                                             60000)

            mi = db.get_metadata(book_id, index_is_id=True)

            outputepub = PersistentTemporaryFile(suffix='.epub')

            coverjpgpath = None
            if mi.has_cover:
                # grab the path to the real image.
                coverjpgpath = os.path.join(db.library_path,
                                            db.path(book_id, index_is_id=True),
                                            'cover.jpg')

            outlist = list(set(linenums + checkedalways))
            outlist.sort()
            splitepub.write_split_epub(outputepub,
                                       outlist,
                                       changedtocs=changedtocs,
                                       authoropts=mi.authors,
                                       titleopt=mi.title,
                                       descopt=mi.comments,
                                       tags=mi.tags,
                                       languages=mi.languages,
                                       coverjpgpath=coverjpgpath)

            #logger.debug("6:%s"%(time.time()-self.t))
            self.t = time.time()
            db.add_format_with_hooks(book_id,
                                     'EPUB',
                                     outputepub,
                                     index_is_id=True)

            #logger.debug("7:%s"%(time.time()-self.t))
            self.t = time.time()

            self.gui.status_bar.show_message(_('Finished splitting off EPUB.'),
                                             3000)
            self.gui.library_view.model().refresh_ids([book_id])
            self.gui.tags_view.recount()
            current = self.gui.library_view.currentIndex()
            self.gui.library_view.model().current_changed(
                current, self.previous)
            if self.gui.cover_flow:
                self.gui.cover_flow.dataChanged()
        finally:
            QApplication.restoreOverrideCursor()

        if not prefs['editmetadata']:
            confirm(
                '<p>' + '</p><p>'.join([
                    _('<b><u>%s</u> by %s</b> has been created and default metadata filled in.'
                      ) % (mi.title, ', '.join(mi.authors)),
                    _('EpubSplit now skips the Edit Metadata step by default.'
                      ), editconfig_txt
                ]) + '</p>', 'epubsplit_created_now_no_edit_again', self.gui)
Example #17
0
def metadata_from_filename(name, pat=None, fallback_pat=None):
    if isbytestring(name):
        name = name.decode(filesystem_encoding, 'replace')
    name = name.rpartition('.')[0]
    mi = MetaInformation(None, None)
    if pat is None:
        pat = re.compile(prefs.get('filename_pattern'))
    name = name.replace('_', ' ')
    match = pat.search(name)
    if match is None and fallback_pat is not None:
        match = fallback_pat.search(name)
    if match is not None:
        try:
            mi.title = match.group('title')
        except IndexError:
            pass
        try:
            au = match.group('author')
            aus = string_to_authors(au)
            if aus:
                mi.authors = aus
                if prefs['swap_author_names'] and mi.authors:

                    def swap(a):
                        if ',' in a:
                            parts = a.split(',', 1)
                        else:
                            parts = a.split(None, 1)
                        if len(parts) > 1:
                            t = parts[-1]
                            parts = parts[:-1]
                            parts.insert(0, t)
                        return ' '.join(parts)

                    mi.authors = [swap(x) for x in mi.authors]
        except (IndexError, ValueError):
            pass
        try:
            mi.series = match.group('series')
        except IndexError:
            pass
        try:
            si = match.group('series_index')
            mi.series_index = float(si)
        except (IndexError, ValueError, TypeError):
            pass
        try:
            si = match.group('isbn')
            mi.isbn = si
        except (IndexError, ValueError):
            pass
        try:
            publisher = match.group('publisher')
            mi.publisher = publisher
        except (IndexError, ValueError):
            pass
        try:
            pubdate = match.group('published')
            if pubdate:
                from calibre.utils.date import parse_only_date
                mi.pubdate = parse_only_date(pubdate)
        except:
            pass
        try:
            comments = match.group('comments')
            mi.comments = comments
        except (IndexError, ValueError):
            pass

    if mi.is_null('title'):
        mi.title = name
    return mi
Example #18
0
def metadata_from_filename(name, pat=None, fallback_pat=None):
    if isbytestring(name):
        name = name.decode(filesystem_encoding, "replace")
    name = name.rpartition(".")[0]
    mi = MetaInformation(None, None)
    if pat is None:
        pat = re.compile(prefs.get("filename_pattern"))
    name = name.replace("_", " ")
    match = pat.search(name)
    if match is None and fallback_pat is not None:
        match = fallback_pat.search(name)
    if match is not None:
        try:
            mi.title = match.group("title")
        except IndexError:
            pass
        try:
            au = match.group("author")
            aus = string_to_authors(au)
            if aus:
                mi.authors = aus
                if prefs["swap_author_names"] and mi.authors:

                    def swap(a):
                        if "," in a:
                            parts = a.split(",", 1)
                        else:
                            parts = a.split(None, 1)
                        if len(parts) > 1:
                            t = parts[-1]
                            parts = parts[:-1]
                            parts.insert(0, t)
                        return " ".join(parts)

                    mi.authors = [swap(x) for x in mi.authors]
        except (IndexError, ValueError):
            pass
        try:
            mi.series = match.group("series")
        except IndexError:
            pass
        try:
            si = match.group("series_index")
            mi.series_index = float(si)
        except (IndexError, ValueError, TypeError):
            pass
        try:
            si = match.group("isbn")
            mi.isbn = si
        except (IndexError, ValueError):
            pass
        try:
            publisher = match.group("publisher")
            mi.publisher = publisher
        except (IndexError, ValueError):
            pass
        try:
            pubdate = match.group("published")
            if pubdate:
                from calibre.utils.date import parse_only_date

                mi.pubdate = parse_only_date(pubdate)
        except:
            pass

    if mi.is_null("title"):
        mi.title = name
    return mi