Example #1
0
def get_metadata(stream, extract_cover=True):
    """
    Return metadata as a L{MetaInfo} object
    """
    mi = MetaInformation(None, [_('Unknown')])
    stream.seek(0)

    pheader = PdbHeaderReader(stream)

    # Only Dropbook produced 132 byte record0 files are supported
    if len(pheader.section_data(0)) == 132:
        hr = HeaderRecord(pheader.section_data(0))

        if hr.compression in (2, 10) and hr.has_metadata == 1:
            try:
                mdata = pheader.section_data(hr.metadata_offset)

                mdata = mdata.split('\x00')
                mi.title = re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[0])
                mi.authors = [re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[1])]
                mi.publisher = re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[3])
                mi.isbn = re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[4])
            except:
                pass

            if extract_cover:
                mi.cover_data = get_cover(pheader, hr)

    if not mi.title:
        mi.title = pheader.title if pheader.title else _('Unknown')

    return mi
Example #2
0
def get_metadata(stream, extract_cover=True):
    """
    Return metadata as a L{MetaInfo} object
    """
    mi = MetaInformation(None, [_('Unknown')])
    stream.seek(0)

    pheader = PdbHeaderReader(stream)

    # Only Dropbook produced 132 byte record0 files are supported
    if len(pheader.section_data(0)) == 132:
        hr = HeaderRecord(pheader.section_data(0))

        if hr.compression in (2, 10) and hr.has_metadata == 1:
            try:
                mdata = pheader.section_data(hr.metadata_offset)

                mdata = mdata.decode('cp1252', 'replace').split('\x00')
                mi.title = re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[0])
                mi.authors = [re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[1])]
                mi.publisher = re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[3])
                mi.isbn = re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[4])
            except Exception:
                pass

            if extract_cover:
                mi.cover_data = get_cover(pheader, hr)

    if not mi.title:
        mi.title = pheader.title if pheader.title else _('Unknown')

    return mi
Example #3
0
def get_metadata(stream, extract_cover=True):
    """ Return metadata as a L{MetaInfo} object """
    mi = MetaInformation(_('Unknown'), [_('Unknown')])
    snbFile = SNBFile()

    try:
        if not hasattr(stream, 'write'):
            snbFile.Parse(io.BytesIO(stream), True)
        else:
            stream.seek(0)
            snbFile.Parse(stream, True)

        meta = snbFile.GetFileStream('snbf/book.snbf')

        if meta is not None:
            meta = etree.fromstring(meta)
            mi.title = meta.find('.//head/name').text
            mi.authors = [meta.find('.//head/author').text]
            mi.language = meta.find('.//head/language').text.lower().replace('_', '-')
            mi.publisher = meta.find('.//head/publisher').text

            if extract_cover:
                cover = meta.find('.//head/cover')
                if cover is not None and cover.text is not None:
                    root, ext = os.path.splitext(cover.text)
                    if ext == '.jpeg':
                        ext = '.jpg'
                    mi.cover_data = (ext[-3:], snbFile.GetFileStream('snbc/images/' + cover.text))

    except Exception:
        import traceback
        traceback.print_exc()

    return mi
Example #4
0
def _metadata_from_formats(formats, force_read_metadata=False, pattern=None):
    mi = MetaInformation(None, None)
    formats.sort(cmp=lambda x,y: cmp(METADATA_PRIORITIES[path_to_ext(x)],
                                     METADATA_PRIORITIES[path_to_ext(y)]))
    extensions = list(map(path_to_ext, formats))
    if 'opf' in extensions:
        opf = formats[extensions.index('opf')]
        mi2 = opf_metadata(opf)
        if mi2 is not None and mi2.title:
            return mi2

    for path, ext in zip(formats, extensions):
        with lopen(path, 'rb') as stream:
            try:
                newmi = get_metadata(stream, stream_type=ext,
                                     use_libprs_metadata=True,
                                     force_read_metadata=force_read_metadata,
                                     pattern=pattern)
                mi.smart_update(newmi)
            except:
                continue
            if getattr(mi, 'application_id', None) is not None:
                return mi

    if not mi.title:
        mi.title = _('Unknown')
    if not mi.authors:
        mi.authors = [_('Unknown')]

    return mi
Example #5
0
File: odt.py Project: sss/calibre
def get_metadata(stream, extract_cover=True):
    zin = zipfile.ZipFile(stream, 'r')
    odfs = odfmetaparser()
    parser = xml.sax.make_parser()
    parser.setFeature(xml.sax.handler.feature_namespaces, 1)
    parser.setContentHandler(odfs)
    content = zin.read('meta.xml')
    parser.parse(StringIO(content))
    data = odfs.seenfields
    mi = MetaInformation(None, [])
    if data.has_key('title'):
        mi.title = data['title']
    if data.get('initial-creator', '').strip():
        mi.authors = string_to_authors(data['initial-creator'])
    elif data.has_key('creator'):
        mi.authors = string_to_authors(data['creator'])
    if data.has_key('description'):
        mi.comments = data['description']
    if data.has_key('language'):
        mi.language = data['language']
    if data.get('keywords', ''):
        mi.tags = [x.strip() for x in data['keywords'].split(',') if x.strip()]
    opfmeta = False # we need this later for the cover
    opfnocover = False
    if data.get('opf.metadata','') == 'true':
        # custom metadata contains OPF information
        opfmeta = True
        if data.get('opf.titlesort', ''):
            mi.title_sort = data['opf.titlesort']
        if data.get('opf.authors', ''):
            mi.authors = string_to_authors(data['opf.authors'])
        if data.get('opf.authorsort', ''):
            mi.author_sort = data['opf.authorsort']
        if data.get('opf.isbn', ''):
            isbn = check_isbn(data['opf.isbn'])
            if isbn is not None:
                mi.isbn = isbn
        if data.get('opf.publisher', ''):
            mi.publisher = data['opf.publisher']
        if data.get('opf.pubdate', ''):
            mi.pubdate = parse_date(data['opf.pubdate'], assume_utc=True)
        if data.get('opf.series', ''):
            mi.series = data['opf.series']
            if data.get('opf.seriesindex', ''):
                try:
                    mi.series_index = float(data['opf.seriesindex'])
                except ValueError:
                    mi.series_index = 1.0
        if data.get('opf.language', ''):
            cl = canonicalize_lang(data['opf.language'])
            if cl:
                mi.languages = [cl]
        opfnocover = data.get('opf.nocover', 'false') == 'true'
    if not opfnocover:
        try:
            read_cover(stream, zin, mi, opfmeta, extract_cover)
        except:
            pass # Do not let an error reading the cover prevent reading other data

    return mi
Example #6
0
def _metadata_from_formats(formats, force_read_metadata=False, pattern=None):
    mi = MetaInformation(None, None)
    formats.sort(key=lambda x: METADATA_PRIORITIES[path_to_ext(x)])
    extensions = list(map(path_to_ext, formats))
    if 'opf' in extensions:
        opf = formats[extensions.index('opf')]
        mi2 = opf_metadata(opf)
        if mi2 is not None and mi2.title:
            return mi2

    for path, ext in zip(formats, extensions):
        with lopen(path, 'rb') as stream:
            try:
                newmi = get_metadata(stream,
                                     stream_type=ext,
                                     use_libprs_metadata=True,
                                     force_read_metadata=force_read_metadata,
                                     pattern=pattern)
                mi.smart_update(newmi)
            except:
                continue
            if getattr(mi, 'application_id', None) is not None:
                return mi

    if not mi.title:
        mi.title = _('Unknown')
    if not mi.authors:
        mi.authors = [_('Unknown')]

    return mi
Example #7
0
def get_metadata(stream, extract_cover=True):
    zin = zipfile.ZipFile(stream, 'r')
    odfs = odfmetaparser()
    parser = xml.sax.make_parser()
    parser.setFeature(xml.sax.handler.feature_namespaces, 1)
    parser.setContentHandler(odfs)
    content = zin.read('meta.xml')
    parser.parse(StringIO(content))
    data = odfs.seenfields
    mi = MetaInformation(None, [])
    if 'title' in data:
        mi.title = data['title']
    if data.get('initial-creator', '').strip():
        mi.authors = string_to_authors(data['initial-creator'])
    elif 'creator' in data:
        mi.authors = string_to_authors(data['creator'])
    if 'description' in data:
        mi.comments = data['description']
    if 'language' in data:
        mi.language = data['language']
    if data.get('keywords', ''):
        mi.tags = [x.strip() for x in data['keywords'].split(',') if x.strip()]
    opfmeta = False  # we need this later for the cover
    opfnocover = False
    if data.get('opf.metadata','') == 'true':
        # custom metadata contains OPF information
        opfmeta = True
        if data.get('opf.titlesort', ''):
            mi.title_sort = data['opf.titlesort']
        if data.get('opf.authors', ''):
            mi.authors = string_to_authors(data['opf.authors'])
        if data.get('opf.authorsort', ''):
            mi.author_sort = data['opf.authorsort']
        if data.get('opf.isbn', ''):
            isbn = check_isbn(data['opf.isbn'])
            if isbn is not None:
                mi.isbn = isbn
        if data.get('opf.publisher', ''):
            mi.publisher = data['opf.publisher']
        if data.get('opf.pubdate', ''):
            mi.pubdate = parse_date(data['opf.pubdate'], assume_utc=True)
        if data.get('opf.series', ''):
            mi.series = data['opf.series']
            if data.get('opf.seriesindex', ''):
                try:
                    mi.series_index = float(data['opf.seriesindex'])
                except ValueError:
                    mi.series_index = 1.0
        if data.get('opf.language', ''):
            cl = canonicalize_lang(data['opf.language'])
            if cl:
                mi.languages = [cl]
        opfnocover = data.get('opf.nocover', 'false') == 'true'
    if not opfnocover:
        try:
            read_cover(stream, zin, mi, opfmeta, extract_cover)
        except:
            pass  # Do not let an error reading the cover prevent reading other data

    return mi
Example #8
0
def get_metadata(stream, extract_cover=True):
    '''
    Return metadata as a L{MetaInfo} object
    '''
    name = getattr(stream, 'name', '').rpartition('.')[0]
    if name:
        name = os.path.basename(name)
    mi = MetaInformation(name or _('Unknown'), [_('Unknown')])
    stream.seek(0)

    mdata = u''
    for x in range(0, 4):
        line = stream.readline().decode('utf-8', 'replace')
        if line == '':
            break
        else:
            mdata += line

    mdata = mdata[:100]

    mo = re.search('(?u)^[ ]*(?P<title>.+)[ ]*(\n{3}|(\r\n){3}|\r{3})[ ]*(?P<author>.+)[ ]*(\n|\r\n|\r)$', mdata)
    if mo is not None:
        mi.title = mo.group('title')
        mi.authors = mo.group('author').split(',')

    return mi
Example #9
0
File: snb.py Project: Farb/calibre
def get_metadata(stream, extract_cover=True):
    """ Return metadata as a L{MetaInfo} object """
    mi = MetaInformation(_('Unknown'), [_('Unknown')])
    snbFile = SNBFile()

    try:
        if not hasattr(stream, 'write'):
            snbFile.Parse(StringIO(stream), True)
        else:
            stream.seek(0)
            snbFile.Parse(stream, True)

        meta = snbFile.GetFileStream('snbf/book.snbf')

        if meta is not None:
            meta = etree.fromstring(meta)
            mi.title = meta.find('.//head/name').text
            mi.authors = [meta.find('.//head/author').text]
            mi.language = meta.find('.//head/language').text.lower().replace('_', '-')
            mi.publisher = meta.find('.//head/publisher').text

            if extract_cover:
                cover = meta.find('.//head/cover')
                if cover is not None and cover.text is not None:
                    root, ext = os.path.splitext(cover.text)
                    if ext == '.jpeg':
                        ext = '.jpg'
                    mi.cover_data = (ext[-3:], snbFile.GetFileStream('snbc/images/' + cover.text))

    except Exception:
        import traceback
        traceback.print_exc()

    return mi
Example #10
0
def get_metadata(stream):
    from calibre.ebooks.metadata import MetaInformation
    from calibre.ptempfile import TemporaryDirectory
    from calibre.ebooks.mobi.reader.headers import MetadataHeader
    from calibre.ebooks.mobi.reader.mobi6 import MobiReader
    from calibre.utils.img import save_cover_data_to
    from calibre import CurrentDir

    stream.seek(0)
    try:
        raw = stream.read(3)
    except Exception:
        raw = b''
    stream.seek(0)
    if raw == b'TPZ':
        from calibre.ebooks.metadata.topaz import get_metadata
        return get_metadata(stream)
    from calibre.utils.logging import Log
    log = Log()
    try:
        mi = MetaInformation(os.path.basename(stream.name), [_('Unknown')])
    except:
        mi = MetaInformation(_('Unknown'), [_('Unknown')])
    mh = MetadataHeader(stream, log)
    if mh.title and mh.title != _('Unknown'):
        mi.title = mh.title

    if mh.exth is not None:
        if mh.exth.mi is not None:
            mi = mh.exth.mi
    else:
        size = 1024**3
        if hasattr(stream, 'seek') and hasattr(stream, 'tell'):
            pos = stream.tell()
            stream.seek(0, 2)
            size = stream.tell()
            stream.seek(pos)
        if size < 4 * 1024 * 1024:
            with TemporaryDirectory('_mobi_meta_reader') as tdir:
                with CurrentDir(tdir):
                    mr = MobiReader(stream, log)
                    parse_cache = {}
                    mr.extract_content(tdir, parse_cache)
                    if mr.embedded_mi is not None:
                        mi = mr.embedded_mi
    if hasattr(mh.exth, 'cover_offset'):
        cover_index = mh.first_image_index + mh.exth.cover_offset
        data = mh.section_data(int(cover_index))
    else:
        try:
            data = mh.section_data(mh.first_image_index)
        except Exception:
            data = b''
    if data and what(None,
                     data) in {'jpg', 'jpeg', 'gif', 'png', 'bmp', 'webp'}:
        try:
            mi.cover_data = ('jpg', save_cover_data_to(data))
        except Exception:
            log.exception('Failed to read MOBI cover')
    return mi
Example #11
0
def get_metadata(stream, extract_cover=True):
    '''
    Return metadata as a L{MetaInfo} object
    '''
    name = getattr(stream, 'name', '').rpartition('.')[0]
    if name:
        name = os.path.basename(name)
    mi = MetaInformation(name or _('Unknown'), [_('Unknown')])
    stream.seek(0)

    mdata = ''
    for x in range(0, 4):
        line = stream.readline().decode('utf-8', 'replace')
        if not line:
            break
        else:
            mdata += line

    mdata = mdata[:1024]

    mo = re.search(
        '(?u)^[ ]*(?P<title>.+)[ ]*(\n{3}|(\r\n){3}|\r{3})[ ]*(?P<author>.+)[ ]*(\n|\r\n|\r)$',
        mdata)
    if mo is not None:
        mi.title = mo.group('title')
        mi.authors = mo.group('author').split(',')

    return mi
Example #12
0
def get_metadata(stream):
    """ Return metadata as a L{MetaInfo} object """
    title = 'Unknown'
    mi = MetaInformation(title, ['Unknown'])
    stream.seek(0)
    try:
        if stream.read(10) not in MAGIC:
            print >>sys.stderr, u'Couldn\'t read IMP header from file'
            return mi

        def cString(skip=0):
            result = ''
            while 1:
                data = stream.read(1)
                if data == '\x00':
                    if not skip: return result
                    skip -= 1
                    result, data = '', ''
                result += data

        stream.read(38) # skip past some uninteresting headers
        _, category, title, author = cString(), cString(), cString(1), cString(2)

        if title:
            mi.title = title
        if author:
            mi.authors = string_to_authors(author)
            mi.author = author
        if category:
            mi.category = category
    except Exception as err:
        msg = u'Couldn\'t read metadata from imp: %s with error %s'%(mi.title, unicode(err))
        print >>sys.stderr, msg.encode('utf8')
    return mi
Example #13
0
def get_metadata(stream):
    from calibre.ebooks.metadata import MetaInformation
    from calibre.ptempfile import TemporaryDirectory
    from calibre.ebooks.mobi.reader.headers import MetadataHeader
    from calibre.ebooks.mobi.reader.mobi6 import MobiReader
    from calibre.utils.magick.draw import save_cover_data_to
    from calibre import CurrentDir

    stream.seek(0)
    try:
        raw = stream.read(3)
    except:
        raw = ''
    stream.seek(0)
    if raw == b'TPZ':
        from calibre.ebooks.metadata.topaz import get_metadata
        return get_metadata(stream)
    from calibre.utils.logging import Log
    log = Log()
    try:
        mi = MetaInformation(os.path.basename(stream.name), [_('Unknown')])
    except:
        mi = MetaInformation(_('Unknown'), [_('Unknown')])
    mh = MetadataHeader(stream, log)
    if mh.title and mh.title != _('Unknown'):
        mi.title = mh.title

    if mh.exth is not None:
        if mh.exth.mi is not None:
            mi = mh.exth.mi
    else:
        size = 1024**3
        if hasattr(stream, 'seek') and hasattr(stream, 'tell'):
            pos = stream.tell()
            stream.seek(0, 2)
            size = stream.tell()
            stream.seek(pos)
        if size < 4*1024*1024:
            with TemporaryDirectory('_mobi_meta_reader') as tdir:
                with CurrentDir(tdir):
                    mr = MobiReader(stream, log)
                    parse_cache = {}
                    mr.extract_content(tdir, parse_cache)
                    if mr.embedded_mi is not None:
                        mi = mr.embedded_mi
    if hasattr(mh.exth, 'cover_offset'):
        cover_index = mh.first_image_index + mh.exth.cover_offset
        data  = mh.section_data(int(cover_index))
    else:
        try:
            data  = mh.section_data(mh.first_image_index)
        except:
            data = ''
    if data and what(None, data) in {'jpg', 'jpeg', 'gif', 'png', 'bmp', 'webp'}:
        try:
            mi.cover_data = ('jpg', save_cover_data_to(data, 'cover.jpg', return_data=True))
        except Exception:
            log.exception('Failed to read MOBI cover')
    return mi
Example #14
0
def get_social_metadata(title, authors, publisher, isbn, username=None,
        password=None):
    from calibre.ebooks.metadata import MetaInformation
    mi = MetaInformation(title, authors)
    if isbn:
        br = get_browser()
        try:
            login(br, username, password)

            raw = br.open_novisit('http://www.librarything.com/isbn/'
                        +isbn).read()
        except:
            return mi
        if '/wiki/index.php/HelpThing:Verify' in raw:
            raise Exception('LibraryThing is blocking calibre.')
        if not raw:
            return mi
        raw = raw.decode('utf-8', 'replace')
        raw = strip_encoding_declarations(raw)
        root = html.fromstring(raw)
        h1 = root.xpath('//div[@class="headsummary"]/h1')
        if h1 and not mi.title:
            mi.title = html.tostring(h1[0], method='text', encoding=unicode)
        h2 = root.xpath('//div[@class="headsummary"]/h2/a')
        if h2 and not mi.authors:
            mi.authors = [html.tostring(x, method='text', encoding=unicode) for
                    x in h2]
        h3 = root.xpath('//div[@class="headsummary"]/h3/a')
        if h3:
            match = None
            for h in h3:
               series = html.tostring(h, method='text', encoding=unicode)
               match = re.search(r'(.+) \((.+)\)', series)
               if match is not None:
                   break
            if match is not None:
                mi.series = match.group(1).strip()
                match = re.search(r'[0-9.]+', match.group(2))
                si = 1.0
                if match is not None:
                    si = float(match.group())
                mi.series_index = si
        #tags = root.xpath('//div[@class="tags"]/span[@class="tag"]/a')
        #if tags:
        #    mi.tags = [html.tostring(x, method='text', encoding=unicode) for x
        #            in tags]
        span = root.xpath(
                '//table[@class="wsltable"]/tr[@class="wslcontent"]/td[4]//span')
        if span:
            raw = html.tostring(span[0], method='text', encoding=unicode)
            match = re.search(r'([0-9.]+)', raw)
            if match is not None:
                rating = float(match.group())
                if rating > 0 and rating <= 5:
                    mi.rating = rating
    return mi
Example #15
0
def get_metadata(stream, extract_cover=True):
    '''
    Return metadata as a L{MetaInfo} object
    '''
    mi = MetaInformation(_('Unknown'), [_('Unknown')])
    stream.seek(0)

    pheader = PdbHeaderReader(stream)
    section_data = None
    for i in range(1, pheader.num_sections):
        raw_data = pheader.section_data(i)
        section_header = SectionHeader(raw_data)
        if section_header.type == DATATYPE_METADATA:
            section_data = raw_data[8:]
            break

    if not section_data:
        return mi

    default_encoding = 'latin-1'
    record_count, = struct.unpack('>H', section_data[0:2])
    adv = 0
    title = None
    author = None
    pubdate = 0
    for i in range(record_count):
        try:
            type, length = struct.unpack_from('>HH', section_data, 2 + adv)
        except struct.error:
            break

        # CharSet
        if type == 1:
            val, = struct.unpack('>H', section_data[6+adv:8+adv])
            default_encoding = MIBNUM_TO_NAME.get(val, 'latin-1')
        # Author
        elif type == 4:
            author = section_data[6+adv+(2*length)]
        # Title
        elif type == 5:
            title = section_data[6+adv+(2*length)]
        # Publication Date
        elif type == 6:
            pubdate, = struct.unpack('>I', section_data[6+adv:6+adv+4])

        adv += 2*length

    if title:
        mi.title = title.replace('\0', '').decode(default_encoding, 'replace')
    if author:
        author = author.replace('\0', '').decode(default_encoding, 'replace')
        mi.author = author.split(',')
    mi.pubdate = datetime.fromtimestamp(pubdate)

    return mi
Example #16
0
def get_metadata(stream, extract_cover=True):
    '''
    Return metadata as a L{MetaInfo} object
    '''
    mi = MetaInformation(_('Unknown'), [_('Unknown')])
    stream.seek(0)

    pheader = PdbHeaderReader(stream)
    section_data = None
    for i in range(1, pheader.num_sections):
        raw_data = pheader.section_data(i)
        section_header = SectionHeader(raw_data)
        if section_header.type == DATATYPE_METADATA:
            section_data = raw_data[8:]
            break

    if not section_data:
        return mi

    default_encoding = 'latin-1'
    record_count, = struct.unpack('>H', section_data[0:2])
    adv = 0
    title = None
    author = None
    pubdate = 0
    for i in xrange(record_count):
        try:
            type, length = struct.unpack_from('>HH', section_data, 2 + adv)
        except struct.error:
            break

        # CharSet
        if type == 1:
            val, = struct.unpack('>H', section_data[6+adv:8+adv])
            default_encoding = MIBNUM_TO_NAME.get(val, 'latin-1')
        # Author
        elif type == 4:
            author = section_data[6+adv+(2*length)]
        # Title
        elif type == 5:
            title = section_data[6+adv+(2*length)]
        # Publication Date
        elif type == 6:
            pubdate, = struct.unpack('>I', section_data[6+adv:6+adv+4])

        adv += 2*length

    if title:
        mi.title = title.replace('\0', '').decode(default_encoding, 'replace')
    if author:
        author = author.replace('\0', '').decode(default_encoding, 'replace')
        mi.author = author.split(',')
    mi.pubdate = datetime.fromtimestamp(pubdate)

    return mi
Example #17
0
def do_add_empty(db, title, authors, isbn, tags, series, series_index):
    from calibre.ebooks.metadata import MetaInformation
    mi = MetaInformation(None)
    if title is not None:
        mi.title = title
    if authors:
        mi.authors = authors
    if isbn:
        mi.isbn = isbn
    if tags:
        mi.tags = tags
    if series:
        mi.series, mi.series_index = series, series_index
    db.import_book(mi, [])
    write_dirtied(db)
    send_message()
Example #18
0
def do_add_empty(db, title, authors, isbn, tags, series, series_index):
    from calibre.ebooks.metadata import MetaInformation
    mi = MetaInformation(None)
    if title is not None:
        mi.title = title
    if authors:
        mi.authors = authors
    if isbn:
        mi.isbn = isbn
    if tags:
        mi.tags = tags
    if series:
        mi.series, mi.series_index = series, series_index
    db.import_book(mi, [])
    write_dirtied(db)
    send_message()
Example #19
0
    def add_document(self, document):
        from calibre.ebooks.metadata import MetaInformation

        mi = MetaInformation('', [_('Unknown')])
        mi.title = document['title']
        mi.authors = document['authors']
        mi.tags = ["Mendeley"]

        mendeley_id = {}
        mendeley_id['mendeley'] = document['mendeley_id']

        mi.identifiers = mendeley_id
        mi.series_index = 1  # needed?

        self.db.add_books([document['path']], ['pdf'], [mi], False, True)

        os.remove(document['path'])
Example #20
0
    def add_document(self,document):
        from calibre.ebooks.metadata import MetaInformation

        mi = MetaInformation('', [_('Unknown')])
        mi.title = document['title']
        mi.authors = document['authors']
        mi.tags = ["Mendeley"]

        mendeley_id = {}
        mendeley_id['mendeley'] = document['mendeley_id']

        mi.identifiers = mendeley_id
        mi.series_index = 1 # needed?

        self.db.add_books([document['path']], ['pdf'], [mi], False, True)

        os.remove(document['path'])
Example #21
0
File: meta.py Project: Farb/calibre
def get_metadata(stream):
    """
    Return basic meta-data about the LRF file in C{stream} as a
    L{MetaInformation} object.
    @param stream: A file like object or an instance of L{LRFMetaFile}
    """
    lrf = stream if isinstance(stream, LRFMetaFile) else LRFMetaFile(stream)
    authors = string_to_authors(lrf.author)
    mi = MetaInformation(lrf.title.strip(), authors)
    mi.author = lrf.author.strip()
    mi.comments = lrf.free_text.strip()
    mi.category = lrf.category.strip()+', '+lrf.classification.strip()
    tags = [x.strip() for x in mi.category.split(',') if x.strip()]
    if tags:
        mi.tags = tags
    if mi.category.strip() == ',':
        mi.category = None
    mi.publisher = lrf.publisher.strip()
    mi.cover_data = lrf.get_cover()
    try:
        mi.title_sort = lrf.title_reading.strip()
        if not mi.title_sort:
            mi.title_sort = None
    except:
        pass
    try:
        mi.author_sort = lrf.author_reading.strip()
        if not mi.author_sort:
            mi.author_sort = None
    except:
        pass
    if not mi.title or 'unknown' in mi.title.lower():
        mi.title = None
    if not mi.authors:
        mi.authors = None
    if not mi.author or 'unknown' in mi.author.lower():
        mi.author = None
    if not mi.category or 'unknown' in mi.category.lower():
        mi.category = None
    if not mi.publisher or 'unknown' in mi.publisher.lower() or \
            'some publisher' in mi.publisher.lower():
        mi.publisher = None

    return mi
Example #22
0
def get_metadata(stream):
    """
    Return basic meta-data about the LRF file in C{stream} as a
    L{MetaInformation} object.
    @param stream: A file like object or an instance of L{LRFMetaFile}
    """
    lrf = stream if isinstance(stream, LRFMetaFile) else LRFMetaFile(stream)
    authors = string_to_authors(lrf.author)
    mi = MetaInformation(lrf.title.strip(), authors)
    mi.author = lrf.author.strip()
    mi.comments = lrf.free_text.strip()
    mi.category = lrf.category.strip() + ', ' + lrf.classification.strip()
    tags = [x.strip() for x in mi.category.split(',') if x.strip()]
    if tags:
        mi.tags = tags
    if mi.category.strip() == ',':
        mi.category = None
    mi.publisher = lrf.publisher.strip()
    mi.cover_data = lrf.get_cover()
    try:
        mi.title_sort = lrf.title_reading.strip()
        if not mi.title_sort:
            mi.title_sort = None
    except:
        pass
    try:
        mi.author_sort = lrf.author_reading.strip()
        if not mi.author_sort:
            mi.author_sort = None
    except:
        pass
    if not mi.title or 'unknown' in mi.title.lower():
        mi.title = None
    if not mi.authors:
        mi.authors = None
    if not mi.author or 'unknown' in mi.author.lower():
        mi.author = None
    if not mi.category or 'unknown' in mi.category.lower():
        mi.category = None
    if not mi.publisher or 'unknown' in mi.publisher.lower() or \
            'some publisher' in mi.publisher.lower():
        mi.publisher = None

    return mi
Example #23
0
def get_metadata(stream):
    """ Return metadata as a L{MetaInfo} object """
    title = 'Unknown'
    mi = MetaInformation(title, ['Unknown'])
    stream.seek(0)
    try:
        if not stream.read(14) == MAGIC:
            print >> sys.stderr, u'Couldn\'t read RB header from file'
            return mi
        stream.read(10)

        read_i32 = lambda: struct.unpack('<I', stream.read(4))[0]

        stream.seek(read_i32())
        toc_count = read_i32()

        for i in range(toc_count):
            stream.read(32)
            length, offset, flag = read_i32(), read_i32(), read_i32()
            if flag == 2:
                break
        else:
            print >> sys.stderr, u'Couldn\'t find INFO from RB file'
            return mi

        stream.seek(offset)
        info = stream.read(length).splitlines()
        for line in info:
            if '=' not in line:
                continue
            key, value = line.split('=')
            if key.strip() == 'TITLE':
                mi.title = value.strip()
            elif key.strip() == 'AUTHOR':
                mi.author = value
                mi.authors = string_to_authors(value)
    except Exception as err:
        msg = u'Couldn\'t read metadata from rb: %s with error %s' % (
            mi.title, unicode(err))
        print >> sys.stderr, msg.encode('utf8')
        raise
    return mi
Example #24
0
def get_metadata(stream, extract_cover=True):
    """ Return metadata as a L{MetaInfo} object """
    mi = MetaInformation(_('Unknown'), [_('Unknown')])
    stream.seek(0)

    pml = b''
    if stream.name.endswith('.pmlz'):
        with TemporaryDirectory('_unpmlz') as tdir:
            zf = ZipFile(stream)
            zf.extractall(tdir)

            pmls = glob.glob(os.path.join(tdir, '*.pml'))
            for p in pmls:
                with open(p, 'r+b') as p_stream:
                    pml += p_stream.read()
            if extract_cover:
                mi.cover_data = get_cover(os.path.splitext(os.path.basename(stream.name))[0], tdir, True)
    else:
        pml = stream.read()
        if extract_cover:
            mi.cover_data = get_cover(os.path.splitext(os.path.basename(stream.name))[0], os.path.abspath(os.path.dirname(stream.name)))

    for comment in re.findall(br'(?ms)\\v.*?\\v', pml):
        m = re.search(br'TITLE="(.*?)"', comment)
        if m:
            mi.title = re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace')))
        m = re.search(br'AUTHOR="(.*?)"', comment)
        if m:
            if mi.authors == [_('Unknown')]:
                mi.authors = []
            mi.authors.append(re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace'))))
        m = re.search(br'PUBLISHER="(.*?)"', comment)
        if m:
            mi.publisher = re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace')))
        m = re.search(br'COPYRIGHT="(.*?)"', comment)
        if m:
            mi.rights = re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace')))
        m = re.search(br'ISBN="(.*?)"', comment)
        if m:
            mi.isbn = re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace')))

    return mi
Example #25
0
def get_metadata(stream, extract_cover=True):
    """ Return metadata as a L{MetaInfo} object """
    mi = MetaInformation(_('Unknown'), [_('Unknown')])
    stream.seek(0)

    pml = ''
    if stream.name.endswith('.pmlz'):
        with TemporaryDirectory('_unpmlz') as tdir:
            zf = ZipFile(stream)
            zf.extractall(tdir)

            pmls = glob.glob(os.path.join(tdir, '*.pml'))
            for p in pmls:
                with open(p, 'r+b') as p_stream:
                    pml += p_stream.read()
            if extract_cover:
                mi.cover_data = get_cover(os.path.splitext(os.path.basename(stream.name))[0], tdir, True)
    else:
        pml = stream.read()
        if extract_cover:
            mi.cover_data = get_cover(os.path.splitext(os.path.basename(stream.name))[0], os.path.abspath(os.path.dirname(stream.name)))

    for comment in re.findall(r'(?mus)\\v.*?\\v', pml):
        m = re.search(r'TITLE="(.*?)"', comment)
        if m:
            mi.title = re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace')))
        m = re.search(r'AUTHOR="(.*?)"', comment)
        if m:
            if mi.authors == [_('Unknown')]:
                mi.authors = []
            mi.authors.append(re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace'))))
        m = re.search(r'PUBLISHER="(.*?)"', comment)
        if m:
            mi.publisher = re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace')))
        m = re.search(r'COPYRIGHT="(.*?)"', comment)
        if m:
            mi.rights = re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace')))
        m = re.search(r'ISBN="(.*?)"', comment)
        if m:
            mi.isbn = re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace')))

    return mi
Example #26
0
def get_metadata(stream):
    """ Return metadata as a L{MetaInfo} object """
    title = 'Unknown'
    mi = MetaInformation(title, ['Unknown'])
    stream.seek(0)
    try:
        if not stream.read(14) == MAGIC:
            print(u'Couldn\'t read RB header from file', file=sys.stderr)
            return mi
        stream.read(10)

        read_i32 = lambda: struct.unpack('<I', stream.read(4))[0]

        stream.seek(read_i32())
        toc_count = read_i32()

        for i in range(toc_count):
            stream.read(32)
            length, offset, flag = read_i32(), read_i32(), read_i32()
            if flag == 2:
                break
        else:
            print(u'Couldn\'t find INFO from RB file', file=sys.stderr)
            return mi

        stream.seek(offset)
        info = stream.read(length).splitlines()
        for line in info:
            if '=' not in line:
                continue
            key, value = line.split('=')
            if key.strip() == 'TITLE':
                mi.title = value.strip()
            elif key.strip() == 'AUTHOR':
                mi.author = value
                mi.authors = string_to_authors(value)
    except Exception as err:
        msg = u'Couldn\'t read metadata from rb: %s with error %s'%(mi.title, unicode(err))
        print(msg.encode('utf8'), file=sys.stderr)
        raise
    return mi
Example #27
0
def do_add_empty(dbctx, title, authors, isbn, tags, series, series_index,
                 cover, identifiers, languages):
    mi = MetaInformation(None)
    if title is not None:
        mi.title = title
    if authors:
        mi.authors = authors
    if identifiers:
        mi.set_identifiers(identifiers)
    if isbn:
        mi.isbn = isbn
    if tags:
        mi.tags = tags
    if series:
        mi.series, mi.series_index = series, series_index
    if cover:
        mi.cover = cover
    if languages:
        mi.languages = languages
    ids, duplicates = dbctx.run('add', 'empty', read_cover(mi))
    prints(_('Added book ids: %s') % ','.join(map(str, ids)))
Example #28
0
def do_add_empty(
    dbctx, title, authors, isbn, tags, series, series_index, cover, identifiers,
    languages
):
    mi = MetaInformation(None)
    if title is not None:
        mi.title = title
    if authors:
        mi.authors = authors
    if identifiers:
        mi.set_identifiers(identifiers)
    if isbn:
        mi.isbn = isbn
    if tags:
        mi.tags = tags
    if series:
        mi.series, mi.series_index = series, series_index
    if cover:
        mi.cover = cover
    if languages:
        mi.languages = languages
    ids, duplicates = dbctx.run('add', 'empty', read_cover(mi))
    prints(_('Added book ids: %s') % ','.join(map(str, ids)))
Example #29
0
File: odt.py Project: Eksmo/calibre
def get_metadata(stream):
    zin = zipfile.ZipFile(stream, 'r')
    odfs = odfmetaparser()
    parser = xml.sax.make_parser()
    parser.setFeature(xml.sax.handler.feature_namespaces, 1)
    parser.setContentHandler(odfs)
    content = zin.read('meta.xml')
    parser.parse(StringIO(content))
    data = odfs.seenfields
    mi = MetaInformation(None, [])
    if data.has_key('title'):
        mi.title = data['title']
    if data.get('initial-creator', '').strip():
        mi.authors = string_to_authors(data['initial-creator'])
    elif data.has_key('creator'):
        mi.authors = string_to_authors(data['creator'])
    if data.has_key('description'):
        mi.comments = data['description']
    if data.has_key('language'):
        mi.language = data['language']
    if data.get('keywords', ''):
        mi.tags = data['keywords'].split(',')

    return mi
Example #30
0
def get_metadata(stream):
    zin = zipfile.ZipFile(stream, 'r')
    odfs = odfmetaparser()
    parser = xml.sax.make_parser()
    parser.setFeature(xml.sax.handler.feature_namespaces, 1)
    parser.setContentHandler(odfs)
    content = zin.read('meta.xml')
    parser.parse(StringIO(content))
    data = odfs.seenfields
    mi = MetaInformation(None, [])
    if data.has_key('title'):
        mi.title = data['title']
    if data.get('initial-creator', '').strip():
        mi.authors = string_to_authors(data['initial-creator'])
    elif data.has_key('creator'):
        mi.authors = string_to_authors(data['creator'])
    if data.has_key('description'):
        mi.comments = data['description']
    if data.has_key('language'):
        mi.language = data['language']
    if data.get('keywords', ''):
        mi.tags = data['keywords'].split(',')

    return mi
Example #31
0
    def add(self, id, opf, cover, name):
        formats = self.ids.pop(id)
        if opf.endswith('.error'):
            mi = MetaInformation('', [_('Unknown')])
            self.critical[name] = open(opf,
                                       'rb').read().decode('utf-8', 'replace')
        else:
            try:
                mi = OPF(opf).to_book_metadata()
            except:
                import traceback
                mi = MetaInformation('', [_('Unknown')])
                self.critical[name] = traceback.format_exc()
        formats = self.process_formats(opf, formats)
        if not mi.title:
            mi.title = os.path.splitext(name)[0]
        mi.title = mi.title if isinstance(mi.title, unicode) else \
                   mi.title.decode(preferred_encoding, 'replace')
        if mi.application_id == '__calibre_dummy__':
            mi.application_id = None
        if self.db is not None:
            if cover:
                with open(cover, 'rb') as f:
                    cover = f.read()
            orig_formats = formats
            formats = [f for f in formats if not f.lower().endswith('.opf')]
            if prefs['add_formats_to_existing']:  #automerge is on
                identical_book_list = self.db.find_identical_books(mi)
                if identical_book_list:  # books with same author and nearly same title exist in db
                    self.merged_books.add(mi.title)
                    seen_fmts = set([])

                    for identical_book in identical_book_list:
                        ib_fmts = self.db.formats(identical_book,
                                                  index_is_id=True)
                        if ib_fmts:
                            seen_fmts |= set(ib_fmts.split(','))
                        replace = gprefs['automerge'] == 'overwrite'
                        self.add_formats(identical_book,
                                         formats,
                                         replace=replace)
                    if gprefs['automerge'] == 'new record':
                        incoming_fmts = \
                            set([os.path.splitext(path)[-1].replace('.',
                                '').upper() for path in formats])
                        if incoming_fmts.intersection(seen_fmts):
                            # There was at least one duplicate format
                            # so create a new record and put the
                            # incoming formats into it
                            # We should arguably put only the duplicate
                            # formats, but no real harm is done by having
                            # all formats
                            id_ = self.db.create_book_entry(
                                mi, cover=cover, add_duplicates=True)
                            self.number_of_books_added += 1
                            self.add_formats(id_, formats)

                else:
                    # books with same author and nearly same title do not exist in db
                    id_ = self.db.create_book_entry(mi,
                                                    cover=cover,
                                                    add_duplicates=True)
                    self.number_of_books_added += 1
                    self.add_formats(id_, formats)

            else:  #automerge is off
                id_ = self.db.create_book_entry(mi,
                                                cover=cover,
                                                add_duplicates=False)
                if id_ is None:
                    self.duplicates.append((mi, cover, orig_formats))
                else:
                    self.add_formats(id_, formats)
                    self.number_of_books_added += 1
        else:
            self.names.append(name)
            self.paths.append(formats[0])
            self.infos.append(mi)
        return mi.title
Example #32
0
    def add(self, id, opf, cover, name):
        formats = self.ids.pop(id)
        if opf.endswith(".error"):
            mi = MetaInformation("", [_("Unknown")])
            self.critical[name] = open(opf, "rb").read().decode("utf-8", "replace")
        else:
            try:
                mi = OPF(opf).to_book_metadata()
            except:
                import traceback

                mi = MetaInformation("", [_("Unknown")])
                self.critical[name] = traceback.format_exc()
        formats = self.process_formats(opf, formats)
        if not mi.title:
            mi.title = os.path.splitext(name)[0]
        mi.title = mi.title if isinstance(mi.title, unicode) else mi.title.decode(preferred_encoding, "replace")
        if mi.application_id == "__calibre_dummy__":
            mi.application_id = None
        if self.db is not None:
            if cover:
                with open(cover, "rb") as f:
                    cover = f.read()
            orig_formats = formats
            formats = [f2 for f2 in formats if not f2.lower().endswith(".opf")]
            if prefs["add_formats_to_existing"]:  # automerge is on
                identical_book_list = self.db.find_identical_books(mi)
                if identical_book_list:  # books with same author and nearly same title exist in db
                    self.merged_books.add((mi.title, " & ".join(mi.authors)))
                    seen_fmts = set([])

                    for identical_book in identical_book_list:
                        ib_fmts = self.db.formats(identical_book, index_is_id=True)
                        if ib_fmts:
                            seen_fmts |= set(ib_fmts.split(","))
                        replace = gprefs["automerge"] == "overwrite"
                        self.add_formats(identical_book, formats, replace=replace)
                    if gprefs["automerge"] == "new record":
                        incoming_fmts = set([os.path.splitext(path)[-1].replace(".", "").upper() for path in formats])
                        if incoming_fmts.intersection(seen_fmts):
                            # There was at least one duplicate format
                            # so create a new record and put the
                            # incoming formats into it
                            # We should arguably put only the duplicate
                            # formats, but no real harm is done by having
                            # all formats
                            id_ = self.db.create_book_entry(mi, cover=cover, add_duplicates=True)
                            self.number_of_books_added += 1
                            self.add_formats(id_, formats)

                else:
                    # books with same author and nearly same title do not exist in db
                    id_ = self.db.create_book_entry(mi, cover=cover, add_duplicates=True)
                    self.number_of_books_added += 1
                    self.add_formats(id_, formats)

            else:  # automerge is off
                id_ = self.db.create_book_entry(mi, cover=cover, add_duplicates=False)
                if id_ is None:
                    self.duplicates.append((mi, cover, orig_formats))
                else:
                    self.add_formats(id_, formats)
                    self.auto_convert_books.add(id_)
                    self.number_of_books_added += 1
        else:
            self.names.append(name)
            self.paths.append(formats[0])
            self.infos.append(mi)
        return mi.title
Example #33
0
def metadata_from_filename(name, pat=None, fallback_pat=None):
    if isbytestring(name):
        name = name.decode(filesystem_encoding, 'replace')
    name = name.rpartition('.')[0]
    mi = MetaInformation(None, None)
    if pat is None:
        pat = re.compile(prefs.get('filename_pattern'))
    name = name.replace('_', ' ')
    match = pat.search(name)
    if match is None and fallback_pat is not None:
        match = fallback_pat.search(name)
    if match is not None:
        try:
            mi.title = match.group('title')
        except IndexError:
            pass
        try:
            au = match.group('author')
            aus = string_to_authors(au)
            if aus:
                mi.authors = aus
                if prefs['swap_author_names'] and mi.authors:
                    def swap(a):
                        if ',' in a:
                            parts = a.split(',', 1)
                        else:
                            parts = a.split(None, 1)
                        if len(parts) > 1:
                            t = parts[-1]
                            parts = parts[:-1]
                            parts.insert(0, t)
                        return ' '.join(parts)
                    mi.authors = [swap(x) for x in mi.authors]
        except (IndexError, ValueError):
            pass
        try:
            mi.series = match.group('series')
        except IndexError:
            pass
        try:
            si = match.group('series_index')
            mi.series_index = float(si)
        except (IndexError, ValueError, TypeError):
            pass
        try:
            si = match.group('isbn')
            mi.isbn = si
        except (IndexError, ValueError):
            pass
        try:
            publisher = match.group('publisher')
            mi.publisher = publisher
        except (IndexError, ValueError):
            pass
        try:
            pubdate = match.group('published')
            if pubdate:
                from calibre.utils.date import parse_only_date
                mi.pubdate = parse_only_date(pubdate)
        except:
            pass
        try:
            comments = match.group('comments')
            mi.comments = comments
        except (IndexError, ValueError):
            pass

    if mi.is_null('title'):
        mi.title = name
    return mi
Example #34
0
    def _start_merge(self,book_list):
        db=self.gui.current_db
        self.previous = self.gui.library_view.currentIndex()
        # if any bad, bail.
        bad_list = filter(lambda x : not x['good'], book_list)
        if len(bad_list) > 0:
            d = error_dialog(self.gui,
                             _('Cannot Merge Epubs'),
                             _('%s books failed.')%len(bad_list),
                             det_msg='\n'.join(map(lambda x : x['error'] , bad_list)))
            d.exec_()
        else:
            d = OrderEPUBsDialog(self.gui,
                                 _('Order EPUBs to Merge'),
                                 prefs,
                                 self.qaction.icon(),
                                 book_list,
                                 )
            d.exec_()
            if d.result() != d.Accepted:
                return

            book_list = d.get_books()
            
            print("2:%s"%(time.time()-self.t))
            self.t = time.time()

            deftitle = "%s %s" % (book_list[0]['title'],prefs['mergeword'])
            mi = MetaInformation(deftitle,["Temp Author"])

            # if all same series, use series for name.  But only if all.
            serieslist = map(lambda x : x['series'], filter(lambda x : x['series'] != None, book_list))
            if len(serieslist) == len(book_list):
                mi.title = serieslist[0]
                for sr in serieslist:
                    if mi.title != sr:
                        mi.title = deftitle;
                        break
                
            # print("======================= mi.title:\n%s\n========================="%mi.title)

            mi.authors = list()
            authorslists = map(lambda x : x['authors'], book_list)
            for l in authorslists:
                for a in l:
                    if a not in mi.authors:
                        mi.authors.append(a)
            #mi.authors = [item for sublist in authorslists for item in sublist]

            # print("======================= mi.authors:\n%s\n========================="%mi.authors)
            
            #mi.author_sort = ' & '.join(map(lambda x : x['author_sort'], book_list))

            # print("======================= mi.author_sort:\n%s\n========================="%mi.author_sort)

            # set publisher if all from same publisher.
            publishers = set(map(lambda x : x['publisher'], book_list))
            if len(publishers) == 1:
                mi.publisher = publishers.pop()
            
            # print("======================= mi.publisher:\n%s\n========================="%mi.publisher)

            tagslists = map(lambda x : x['tags'], book_list)
            mi.tags = [item for sublist in tagslists for item in sublist]
            mi.tags.extend(prefs['mergetags'].split(','))

            # print("======================= mergetags:\n%s\n========================="%prefs['mergetags'])
            # print("======================= m.tags:\n%s\n========================="%mi.tags)
            
            languageslists = map(lambda x : x['languages'], book_list)
            mi.languages = [item for sublist in languageslists for item in sublist]

            mi.series = ''

            # ======================= make book comments =========================
            
            if len(mi.authors) > 1:
                booktitle = lambda x : _("%s by %s") % (x['title'],' & '.join(x['authors']))
            else:
                booktitle = lambda x : x['title']
                
            mi.comments = (_("%s containing:")+"\n\n") % prefs['mergeword']
            
            if prefs['includecomments']:
                def bookcomments(x):
                    if x['comments']:
                        return '<b>%s</b>\n\n%s'%(booktitle(x),x['comments'])
                    else:
                        return '<b>%s</b>\n'%booktitle(x)
                    
                mi.comments += ('<div class="mergedbook">' +
                                '<hr></div><div class="mergedbook">'.join([ bookcomments(x) for x in book_list]) +
                                '</div>')
            else:
                mi.comments += '\n'.join( [ booktitle(x) for x in book_list ] )
                
            # ======================= make book entry =========================

            book_id = db.create_book_entry(mi,
                                           add_duplicates=True)

            # set default cover to same as first book
            coverdata = db.cover(book_list[0]['calibre_id'],index_is_id=True)
            if coverdata:
                db.set_cover(book_id, coverdata)
            
            # ======================= custom columns ===================

            print("3:%s"%(time.time()-self.t))
            self.t = time.time()

            # have to get custom from db for each book.
            idslist = map(lambda x : x['calibre_id'], book_list)
            
            custom_columns = self.gui.library_view.model().custom_columns
            for col, action in prefs['custom_cols'].iteritems():
                #print("col: %s action: %s"%(col,action))
                
                if col not in custom_columns:
                    print("%s not an existing column, skipping."%col)
                    continue
                
                coldef = custom_columns[col]
                #print("coldef:%s"%coldef)
                
                if action not in permitted_values[coldef['datatype']]:
                    print("%s not a valid column type for %s, skipping."%(col,action))
                    continue
                
                label = coldef['label']

                found = False
                value = None
                idx = None
                if action == 'first':
                    idx = 0

                if action == 'last':
                    idx = -1

                if action in ['first','last']:
                    value = db.get_custom(idslist[idx], label=label, index_is_id=True)
                    if coldef['datatype'] == 'series' and value != None:
                        # get the number-in-series, too.
                        value = "%s [%s]"%(value, db.get_custom_extra(idslist[idx], label=label, index_is_id=True))
                    found = True

                if action in ('add','average','averageall'):
                    value = 0.0
                    count = 0
                    for bid in idslist:
                        try:
                            value += db.get_custom(bid, label=label, index_is_id=True)
                            found = True
                            # only count ones with values unless averageall
                            count += 1
                        except:
                            # if not set, it's None and fails.
                            # only count ones with values unless averageall
                            if action == 'averageall':
                                count += 1
                                
                    if found and action in ('average','averageall'):
                        value = value / count
                        
                    if coldef['datatype'] == 'int':
                        value += 0.5 # so int rounds instead of truncs.
                
                if action == 'and':
                    value = True
                    for bid in idslist:
                        try:
                            value = value and db.get_custom(bid, label=label, index_is_id=True)
                            found = True
                        except:
                            # if not set, it's None and fails.
                            pass
                
                if action == 'or':
                    value = False
                    for bid in idslist:
                        try:
                            value = value or db.get_custom(bid, label=label, index_is_id=True)
                            found = True
                        except:
                            # if not set, it's None and fails.
                            pass
                
                if action == 'newest':
                    value = None
                    for bid in idslist:
                        try:
                            ivalue = db.get_custom(bid, label=label, index_is_id=True)
                            if not value or  ivalue > value:
                                value = ivalue
                                found = True
                        except:
                            # if not set, it's None and fails.
                            pass
                    
                if action == 'oldest':
                    value = None
                    for bid in idslist:
                        try:
                            ivalue = db.get_custom(bid, label=label, index_is_id=True)
                            if not value or  ivalue < value:
                                value = ivalue
                                found = True
                        except:
                            # if not set, it's None and fails.
                            pass
                    
                if action == 'union':
                    if not coldef['is_multiple']:
                        action = 'concat'
                    else:
                        value = set()
                        for bid in idslist:
                            try:
                                value = value.union(db.get_custom(bid, label=label, index_is_id=True))
                                found = True
                            except:
                                # if not set, it's None and fails.
                                pass
                        
                if action == 'concat':
                    value = ""
                    for bid in idslist:
                        try:
                            value = value + ' ' + db.get_custom(bid, label=label, index_is_id=True)
                            found = True
                        except:
                            # if not set, it's None and fails.
                            pass
                    value = value.strip()
                    
                if found and value != None:
                    db.set_custom(book_id,value,label=label,commit=False)
                
            db.commit()
            
            print("4:%s"%(time.time()-self.t))
            self.t = time.time()
            
            self.gui.library_view.model().books_added(1)
            self.gui.library_view.select_rows([book_id])
            
            print("5:%s"%(time.time()-self.t))
            self.t = time.time()
            
            confirm('\n'+_('''The book for the new Merged EPUB has been created and default metadata filled in.

However, the EPUB will *not* be created until after you've reviewed, edited, and closed the metadata dialog that follows.'''),
                    'epubmerge_created_now_edit_again',
                    self.gui)
            
            self.gui.iactions['Edit Metadata'].edit_metadata(False)

            print("5:%s"%(time.time()-self.t))
            self.t = time.time()
            self.gui.tags_view.recount()

            totalsize = sum(map(lambda x : x['epub_size'], book_list))

            print("merging %s EPUBs totaling %s"%(len(book_list),gethumanreadable(totalsize)))
            if len(book_list) > 100 or totalsize > 5*1024*1024:
                confirm('\n'+_('''You're merging %s EPUBs totaling %s.  Calibre will be locked until the merge is finished.''')%(len(book_list),gethumanreadable(totalsize)),
                        'epubmerge_edited_now_merge_again',
                        self.gui)
            
            self.gui.status_bar.show_message(_('Merging %s EPUBs...')%len(book_list), 60000)

            mi = db.get_metadata(book_id,index_is_id=True)
            
            mergedepub = PersistentTemporaryFile(suffix='.epub')
            epubstomerge = map(lambda x : x['epub'] , book_list)
            
            coverjpgpath = None
            if mi.has_cover:
                # grab the path to the real image.
                coverjpgpath = os.path.join(db.library_path, db.path(book_id, index_is_id=True), 'cover.jpg')
                
            self.do_merge( mergedepub,
                           epubstomerge,
                           authoropts=mi.authors,
                           titleopt=mi.title,
                           descopt=mi.comments,
                           tags=mi.tags,
                           languages=mi.languages,
                           titlenavpoints=prefs['titlenavpoints'],
                           flattentoc=prefs['flattentoc'],
                           printtimes=True,
                           coverjpgpath=coverjpgpath,
                           keepmetadatafiles=prefs['keepmeta'] )
                 
            print("6:%s"%(time.time()-self.t))
            print(_("Merge finished, output in:\n%s")%mergedepub.name)
            self.t = time.time()
            db.add_format_with_hooks(book_id,
                                     'EPUB',
                                     mergedepub, index_is_id=True)
            
            print("7:%s"%(time.time()-self.t))
            self.t = time.time()
            
            self.gui.status_bar.show_message(_('Finished merging %s EPUBs.')%len(book_list), 3000)
            self.gui.library_view.model().refresh_ids([book_id])
            self.gui.tags_view.recount()
            current = self.gui.library_view.currentIndex()
            self.gui.library_view.model().current_changed(current, self.previous)
Example #35
0
def get_metadata(stream, extract_cover=True):
    whitespace = re.compile(r'\s+')

    def normalize(s):
        return whitespace.sub(' ', s).strip()

    with ZipFile(stream) as zf:
        meta = zf.read('meta.xml')
        root = fromstring(meta)

        def find(field):
            ns, tag = fields[field]
            ans = root.xpath('//ns0:{}'.format(tag), namespaces={'ns0': ns})
            if ans:
                return normalize(
                    tostring(ans[0],
                             method='text',
                             encoding='unicode',
                             with_tail=False)).strip()

        mi = MetaInformation(None, [])
        title = find('title')
        if title:
            mi.title = title
        creator = find('initial-creator') or find('creator')
        if creator:
            mi.authors = string_to_authors(creator)
        desc = find('description')
        if desc:
            mi.comments = desc
        lang = find('language')
        if lang and canonicalize_lang(lang):
            mi.languages = [canonicalize_lang(lang)]
        kw = find('keyword') or find('keywords')
        if kw:
            mi.tags = [x.strip() for x in kw.split(',') if x.strip()]
        data = {}
        for tag in root.xpath('//ns0:user-defined',
                              namespaces={'ns0': fields['user-defined'][0]}):
            name = (tag.get('{%s}name' % METANS) or '').lower()
            vtype = tag.get('{%s}value-type' % METANS) or 'string'
            val = tag.text
            if name and val:
                if vtype == 'boolean':
                    val = val == 'true'
                data[name] = val
        opfmeta = False  # we need this later for the cover
        opfnocover = False
        if data.get('opf.metadata'):
            # custom metadata contains OPF information
            opfmeta = True
            if data.get('opf.titlesort', ''):
                mi.title_sort = data['opf.titlesort']
            if data.get('opf.authors', ''):
                mi.authors = string_to_authors(data['opf.authors'])
            if data.get('opf.authorsort', ''):
                mi.author_sort = data['opf.authorsort']
            if data.get('opf.isbn', ''):
                isbn = check_isbn(data['opf.isbn'])
                if isbn is not None:
                    mi.isbn = isbn
            if data.get('opf.publisher', ''):
                mi.publisher = data['opf.publisher']
            if data.get('opf.pubdate', ''):
                mi.pubdate = parse_date(data['opf.pubdate'], assume_utc=True)
            if data.get('opf.identifiers'):
                try:
                    mi.identifiers = json.loads(data['opf.identifiers'])
                except Exception:
                    pass
            if data.get('opf.rating'):
                try:
                    mi.rating = max(0, min(float(data['opf.rating']), 10))
                except Exception:
                    pass
            if data.get('opf.series', ''):
                mi.series = data['opf.series']
                if data.get('opf.seriesindex', ''):
                    try:
                        mi.series_index = float(data['opf.seriesindex'])
                    except Exception:
                        mi.series_index = 1.0
            if data.get('opf.language', ''):
                cl = canonicalize_lang(data['opf.language'])
                if cl:
                    mi.languages = [cl]
            opfnocover = data.get('opf.nocover', False)
        if not opfnocover:
            try:
                read_cover(stream, zf, mi, opfmeta, extract_cover)
            except Exception:
                pass  # Do not let an error reading the cover prevent reading other data

    return mi
Example #36
0
    def _start_merge(self,book_list,tdir=None):
        db=self.gui.current_db
        self.previous = self.gui.library_view.currentIndex()
        # if any bad, bail.
        bad_list = [ x for x in book_list if not x['good'] ]
        if len(bad_list) > 0:
            d = error_dialog(self.gui,
                             _('Cannot Merge Epubs'),
                             _('%s books failed.')%len(bad_list),
                             det_msg='\n'.join( [ x['error'] for x in bad_list ]))
            d.exec_()
        else:
            d = OrderEPUBsDialog(self.gui,
                                 _('Order EPUBs to Merge'),
                                 prefs,
                                 self.qaction.icon(),
                                 book_list,
                                 )
            d.exec_()
            if d.result() != d.Accepted:
                return

            book_list = d.get_books()

            logger.debug("2:%s"%(time.time()-self.t))
            self.t = time.time()

            deftitle = "%s %s" % (book_list[0]['title'],prefs['mergeword'])
            mi = MetaInformation(deftitle,["Temp Author"])

            # if all same series, use series for name.  But only if all.
            serieslist = [ x['series'] for x in book_list if x['series'] != None ]
            if len(serieslist) == len(book_list):
                mi.title = serieslist[0]
                for sr in serieslist:
                    if mi.title != sr:
                        mi.title = deftitle;
                        break

            # logger.debug("======================= mi.title:\n%s\n========================="%mi.title)

            mi.authors = list()
            authorslists = [ x['authors'] for x in book_list ]
            for l in authorslists:
                for a in l:
                    if a not in mi.authors:
                        mi.authors.append(a)
            #mi.authors = [item for sublist in authorslists for item in sublist]

            # logger.debug("======================= mi.authors:\n%s\n========================="%mi.authors)

            #mi.author_sort = ' & '.join([ x['author_sort'] for x in book_list ])

            # logger.debug("======================= mi.author_sort:\n%s\n========================="%mi.author_sort)

            # set publisher if all from same publisher.
            publishers = set([ x['publisher'] for x in book_list ])
            if len(publishers) == 1:
                mi.publisher = publishers.pop()

            # logger.debug("======================= mi.publisher:\n%s\n========================="%mi.publisher)

            tagslists = [ x['tags'] for x in book_list ]
            mi.tags = [item for sublist in tagslists for item in sublist]
            mi.tags.extend(prefs['mergetags'].split(','))

            # logger.debug("======================= mergetags:\n%s\n========================="%prefs['mergetags'])
            # logger.debug("======================= m.tags:\n%s\n========================="%mi.tags)

            languageslists = [ x['languages'] for x in book_list ]
            mi.languages = [item for sublist in languageslists for item in sublist]

            mi.series = ''
            if prefs['firstseries'] and book_list[0]['series']:
                mi.series = book_list[0]['series']
                mi.series_index = book_list[0]['series_index']

            # ======================= make book comments =========================

            if len(mi.authors) > 1:
                booktitle = lambda x : _("%s by %s") % (x['title'],' & '.join(x['authors']))
            else:
                booktitle = lambda x : x['title']

            mi.comments = ("<p>"+_("%s containing:")+"</p>") % prefs['mergeword']

            if prefs['includecomments']:
                def bookcomments(x):
                    if x['comments']:
                        return '<p><b>%s</b></p>%s'%(booktitle(x),x['comments'])
                    else:
                        return '<b>%s</b><br/>'%booktitle(x)

                mi.comments += ('<div class="mergedbook">' +
                                '<hr></div><div class="mergedbook">'.join([ bookcomments(x) for x in book_list]) +
                                '</div>')
            else:
                mi.comments += '<br/>'.join( [ booktitle(x) for x in book_list ] )

            # ======================= make book entry =========================

            book_id = db.create_book_entry(mi,
                                           add_duplicates=True)

            # set default cover to same as first book
            coverdata = db.cover(book_list[0]['calibre_id'],index_is_id=True)
            if coverdata:
                db.set_cover(book_id, coverdata)

            # ======================= custom columns ===================

            logger.debug("3:%s"%(time.time()-self.t))
            self.t = time.time()

            # have to get custom from db for each book.
            idslist = [ x['calibre_id'] for x in book_list ]

            custom_columns = self.gui.library_view.model().custom_columns
            for col, action in six.iteritems(prefs['custom_cols']):
                #logger.debug("col: %s action: %s"%(col,action))

                if col not in custom_columns:
                    logger.debug("%s not an existing column, skipping."%col)
                    continue

                coldef = custom_columns[col]
                #logger.debug("coldef:%s"%coldef)

                if action not in permitted_values[coldef['datatype']]:
                    logger.debug("%s not a valid column type for %s, skipping."%(col,action))
                    continue

                label = coldef['label']

                found = False
                value = None
                idx = None
                if action == 'first':
                    idx = 0

                if action == 'last':
                    idx = -1

                if action in ['first','last']:
                    value = db.get_custom(idslist[idx], label=label, index_is_id=True)
                    if coldef['datatype'] == 'series' and value != None:
                        # get the number-in-series, too.
                        value = "%s [%s]"%(value, db.get_custom_extra(idslist[idx], label=label, index_is_id=True))
                    found = True

                if action in ('add','average','averageall'):
                    value = 0.0
                    count = 0
                    for bid in idslist:
                        try:
                            value += db.get_custom(bid, label=label, index_is_id=True)
                            found = True
                            # only count ones with values unless averageall
                            count += 1
                        except:
                            # if not set, it's None and fails.
                            # only count ones with values unless averageall
                            if action == 'averageall':
                                count += 1

                    if found and action in ('average','averageall'):
                        value = value / count

                    if coldef['datatype'] == 'int':
                        value += 0.5 # so int rounds instead of truncs.

                if action == 'and':
                    value = True
                    for bid in idslist:
                        try:
                            value = value and db.get_custom(bid, label=label, index_is_id=True)
                            found = True
                        except:
                            # if not set, it's None and fails.
                            pass

                if action == 'or':
                    value = False
                    for bid in idslist:
                        try:
                            value = value or db.get_custom(bid, label=label, index_is_id=True)
                            found = True
                        except:
                            # if not set, it's None and fails.
                            pass

                if action == 'newest':
                    value = None
                    for bid in idslist:
                        try:
                            ivalue = db.get_custom(bid, label=label, index_is_id=True)
                            if not value or  ivalue > value:
                                value = ivalue
                                found = True
                        except:
                            # if not set, it's None and fails.
                            pass

                if action == 'oldest':
                    value = None
                    for bid in idslist:
                        try:
                            ivalue = db.get_custom(bid, label=label, index_is_id=True)
                            if not value or  ivalue < value:
                                value = ivalue
                                found = True
                        except:
                            # if not set, it's None and fails.
                            pass

                if action == 'union':
                    if not coldef['is_multiple']:
                        action = 'concat'
                    else:
                        value = set()
                        for bid in idslist:
                            try:
                                value = value.union(db.get_custom(bid, label=label, index_is_id=True))
                                found = True
                            except:
                                # if not set, it's None and fails.
                                pass

                if action == 'concat':
                    value = ""
                    for bid in idslist:
                        try:
                            value = value + ' ' + db.get_custom(bid, label=label, index_is_id=True)
                            found = True
                        except:
                            # if not set, it's None and fails.
                            pass
                    value = value.strip()

                if action == 'now':
                    value = datetime.now()
                    found = True
                    logger.debug("now: %s"%value)

                if found and value != None:
                    logger.debug("value: %s"%value)
                    db.set_custom(book_id,value,label=label,commit=False)

            db.commit()

            logger.debug("4:%s"%(time.time()-self.t))
            self.t = time.time()

            self.gui.library_view.model().books_added(1)
            self.gui.library_view.select_rows([book_id])

            logger.debug("5:%s"%(time.time()-self.t))
            self.t = time.time()

            confirm('\n'+_('''The book for the new Merged EPUB has been created and default metadata filled in.

However, the EPUB will *not* be created until after you've reviewed, edited, and closed the metadata dialog that follows.'''),
                    'epubmerge_created_now_edit_again',
                    self.gui,
                    title=_("EpubMerge"),
                    show_cancel_button=False)

            self.gui.iactions['Edit Metadata'].edit_metadata(False)

            logger.debug("5:%s"%(time.time()-self.t))
            self.t = time.time()
            self.gui.tags_view.recount()

            totalsize = sum([ x['epub_size'] for x in book_list ])
            logger.debug("merging %s EPUBs totaling %s"%(len(book_list),gethumanreadable(totalsize)))
            confirm('\n'+_('''EpubMerge will be done in a Background job.  The merged EPUB will not appear in the Library until finished.

You are merging %s EPUBs totaling %s.''')%(len(book_list),gethumanreadable(totalsize)),
                    'epubmerge_background_merge_again',
                    self.gui,
                    title=_("EpubMerge"),
                    show_cancel_button=False)

            # if len(book_list) > 100 or totalsize > 5*1024*1024:
            #     confirm('\n'+_('''You're merging %s EPUBs totaling %s.  Calibre will be locked until the merge is finished.''')%(len(book_list),gethumanreadable(totalsize)),
            #             'epubmerge_edited_now_merge_again',
            #             self.gui)

            self.gui.status_bar.show_message(_('Merging %s EPUBs...')%len(book_list), 60000)

            mi = db.get_metadata(book_id,index_is_id=True)

            mergedepub = PersistentTemporaryFile(prefix="output_",
                                                 suffix='.epub',
                                                 dir=tdir)
            epubstomerge = [ x['epub'] for x in book_list ]
            epubtitles = {}
            for x in book_list:
                # save titles indexed by epub for reporting from BG
                epubtitles[x['epub']]=_("%s by %s") % (x['title'],' & '.join(x['authors']))

            coverjpgpath = None
            if mi.has_cover:
                # grab the path to the real image.
                coverjpgpath = os.path.join(db.library_path, db.path(book_id, index_is_id=True), 'cover.jpg')


            func = 'arbitrary_n'
            cpus = self.gui.job_manager.server.pool_size
            args = ['calibre_plugins.epubmerge.jobs',
                    'do_merge_bg',
                    ({'book_id':book_id,
                      'book_count':len(book_list),
                      'tdir':tdir,
                      'outputepubfn':mergedepub.name,
                      'inputepubfns':epubstomerge, # already .name'ed
                      'epubtitles':epubtitles, # for reporting
                      'authoropts':mi.authors,
                      'titleopt':mi.title,
                      'descopt':mi.comments,
                      'tags':mi.tags,
                      'languages':mi.languages,
                      'titlenavpoints':prefs['titlenavpoints'],
                      'originalnavpoints':prefs['originalnavpoints'],
                      'flattentoc':prefs['flattentoc'],
                      'printtimes':True,
                      'coverjpgpath':coverjpgpath,
                      'keepmetadatafiles':prefs['keepmeta']
                      },
                     cpus)]
            desc = _('EpubMerge: %s')%mi.title
            job = self.gui.job_manager.run_job(
                self.Dispatcher(self.merge_done),
                func, args=args,
                description=desc)

            self.gui.jobs_pointer.start()
            self.gui.status_bar.show_message(_('Starting EpubMerge'),3000)
Example #37
0
def get_social_metadata(title,
                        authors,
                        publisher,
                        isbn,
                        username=None,
                        password=None):
    from calibre.ebooks.metadata import MetaInformation
    mi = MetaInformation(title, authors)
    if isbn:
        br = get_browser()
        try:
            login(br, username, password)

            raw = br.open_novisit('http://www.librarything.com/isbn/' +
                                  isbn).read()
        except:
            return mi
        if '/wiki/index.php/HelpThing:Verify' in raw:
            raise Exception('LibraryThing is blocking calibre.')
        if not raw:
            return mi
        raw = raw.decode('utf-8', 'replace')
        raw = strip_encoding_declarations(raw)
        root = html.fromstring(raw)
        h1 = root.xpath('//div[@class="headsummary"]/h1')
        if h1 and not mi.title:
            mi.title = html.tostring(h1[0], method='text', encoding=unicode)
        h2 = root.xpath('//div[@class="headsummary"]/h2/a')
        if h2 and not mi.authors:
            mi.authors = [
                html.tostring(x, method='text', encoding=unicode) for x in h2
            ]
        h3 = root.xpath('//div[@class="headsummary"]/h3/a')
        if h3:
            match = None
            for h in h3:
                series = html.tostring(h, method='text', encoding=unicode)
                match = re.search(r'(.+) \((.+)\)', series)
                if match is not None:
                    break
            if match is not None:
                mi.series = match.group(1).strip()
                match = re.search(r'[0-9.]+', match.group(2))
                si = 1.0
                if match is not None:
                    si = float(match.group())
                mi.series_index = si
        #tags = root.xpath('//div[@class="tags"]/span[@class="tag"]/a')
        #if tags:
        #    mi.tags = [html.tostring(x, method='text', encoding=unicode) for x
        #            in tags]
        span = root.xpath(
            '//table[@class="wsltable"]/tr[@class="wslcontent"]/td[4]//span')
        if span:
            raw = html.tostring(span[0], method='text', encoding=unicode)
            match = re.search(r'([0-9.]+)', raw)
            if match is not None:
                rating = float(match.group())
                if rating > 0 and rating <= 5:
                    mi.rating = rating
    return mi
    def convert_comic_md_to_calibre_md(self, comic_metadata):
        '''
        Maps the entries in the comic_metadata to calibre metadata
        '''
        import unicodedata
        from calibre.ebooks.metadata import MetaInformation
        from calibre.utils.date import parse_only_date
        from datetime import date
        from calibre.utils.localization import calibre_langcode_to_name

        if self.comic_md_in_calibre_format:
            return

        # synonyms for artists
        WRITER = ['writer', 'plotter', 'scripter']
        PENCILLER = ['artist', 'penciller', 'penciler', 'breakdowns']
        INKER = ['inker', 'artist', 'finishes']
        COLORIST = ['colorist', 'colourist', 'colorer', 'colourer']
        LETTERER = ['letterer']
        COVER_ARTIST = ['cover', 'covers', 'coverartist', 'cover artist']
        EDITOR = ['editor']

        # start with a fresh calibre metadata
        mi = MetaInformation(None, None)
        co = comic_metadata

        # shorten some functions
        role = partial(get_role, credits=co.credits)
        update_field = partial(update_calibre_field, target=mi)

        # Get title, if no title, try to assign series infos
        if co.title:
            mi.title = co.title
        elif co.series:
            mi.title = co.series
            if co.issue:
                mi.title += " " + str(co.issue)
        else:
            mi.title = ""

        # tags
        if co.tags != [] and prefs['import_tags']:
            if prefs['overwrite_calibre_tags']:
                mi.tags = co.tags
            else:
                mi.tags = list(set(self.calibre_metadata.tags + co.tags))

        # simple metadata
        update_field("authors", role(WRITER))
        update_field("series", co.series)
        update_field("rating", co.criticalRating)
        update_field("publisher", co.publisher)
        # special cases
        if co.language:
            update_field("language", calibre_langcode_to_name(co.language))
        if co.comments:
            update_field("comments", co.comments.strip())
        # issue
        if co.issue:
            if isinstance(co.issue, unicode):
                mi.series_index = unicodedata.numeric(co.issue)
            else:
                mi.series_index = float(co.issue)
        # pub date
        puby = co.year
        pubm = co.month
        if puby is not None:
            try:
                dt = date(int(puby), 6 if pubm is None else int(pubm), 15)
                dt = parse_only_date(str(dt))
                mi.pubdate = dt
            except:
                pass

        # custom columns
        custom_cols = self.db.field_metadata.custom_field_metadata()
        update_column = partial(update_custom_column, calibre_metadata=mi, custom_cols=custom_cols)
        # artists
        update_column(prefs['penciller_column'], role(PENCILLER))
        update_column(prefs['inker_column'], role(INKER))
        update_column(prefs['colorist_column'], role(COLORIST))
        update_column(prefs['letterer_column'], role(LETTERER))
        update_column(prefs['cover_artist_column'], role(COVER_ARTIST))
        update_column(prefs['editor_column'], role(EDITOR))
        # others
        update_column(prefs['storyarc_column'], co.storyArc)
        update_column(prefs['characters_column'], co.characters)
        update_column(prefs['teams_column'], co.teams)
        update_column(prefs['locations_column'], co.locations)
        update_column(prefs['volume_column'], co.volume)
        update_column(prefs['genre_column'], co.genre)

        self.comic_md_in_calibre_format = mi
Example #39
0
File: mobi.py Project: sss/calibre
def get_metadata(stream):
    from calibre.ebooks.metadata import MetaInformation
    from calibre.ptempfile import TemporaryDirectory
    from calibre.ebooks.mobi.reader.headers import MetadataHeader
    from calibre.ebooks.mobi.reader.mobi6 import MobiReader
    from calibre import CurrentDir

    try:
        from PIL import Image as PILImage
        PILImage
    except ImportError:
        import Image as PILImage

    stream.seek(0)
    try:
        raw = stream.read(3)
    except:
        raw = ''
    stream.seek(0)
    if raw == b'TPZ':
        from calibre.ebooks.metadata.topaz import get_metadata
        return get_metadata(stream)
    from calibre.utils.logging import Log
    log = Log()
    try:
        mi = MetaInformation(os.path.basename(stream.name), [_('Unknown')])
    except:
        mi = MetaInformation(_('Unknown'), [_('Unknown')])
    mh = MetadataHeader(stream, log)
    if mh.title and mh.title != _('Unknown'):
        mi.title = mh.title

    if mh.exth is not None:
        if mh.exth.mi is not None:
            mi = mh.exth.mi
    else:
        size = 1024**3
        if hasattr(stream, 'seek') and hasattr(stream, 'tell'):
            pos = stream.tell()
            stream.seek(0, 2)
            size = stream.tell()
            stream.seek(pos)
        if size < 4 * 1024 * 1024:
            with TemporaryDirectory('_mobi_meta_reader') as tdir:
                with CurrentDir(tdir):
                    mr = MobiReader(stream, log)
                    parse_cache = {}
                    mr.extract_content(tdir, parse_cache)
                    if mr.embedded_mi is not None:
                        mi = mr.embedded_mi
    if hasattr(mh.exth, 'cover_offset'):
        cover_index = mh.first_image_index + mh.exth.cover_offset
        data = mh.section_data(int(cover_index))
    else:
        try:
            data = mh.section_data(mh.first_image_index)
        except:
            data = ''
    buf = cStringIO.StringIO(data)
    try:
        im = PILImage.open(buf)
    except:
        log.exception('Failed to read MOBI cover')
    else:
        obuf = cStringIO.StringIO()
        im.convert('RGB').save(obuf, format='JPEG')
        mi.cover_data = ('jpg', obuf.getvalue())
    return mi
Example #40
0
def metadata_from_filename(name, pat=None, fallback_pat=None):
    if isbytestring(name):
        name = name.decode(filesystem_encoding, "replace")
    name = name.rpartition(".")[0]
    mi = MetaInformation(None, None)
    if pat is None:
        pat = re.compile(prefs.get("filename_pattern"))
    name = name.replace("_", " ")
    match = pat.search(name)
    if match is None and fallback_pat is not None:
        match = fallback_pat.search(name)
    if match is not None:
        try:
            mi.title = match.group("title")
        except IndexError:
            pass
        try:
            au = match.group("author")
            aus = string_to_authors(au)
            if aus:
                mi.authors = aus
                if prefs["swap_author_names"] and mi.authors:

                    def swap(a):
                        if "," in a:
                            parts = a.split(",", 1)
                        else:
                            parts = a.split(None, 1)
                        if len(parts) > 1:
                            t = parts[-1]
                            parts = parts[:-1]
                            parts.insert(0, t)
                        return " ".join(parts)

                    mi.authors = [swap(x) for x in mi.authors]
        except (IndexError, ValueError):
            pass
        try:
            mi.series = match.group("series")
        except IndexError:
            pass
        try:
            si = match.group("series_index")
            mi.series_index = float(si)
        except (IndexError, ValueError, TypeError):
            pass
        try:
            si = match.group("isbn")
            mi.isbn = si
        except (IndexError, ValueError):
            pass
        try:
            publisher = match.group("publisher")
            mi.publisher = publisher
        except (IndexError, ValueError):
            pass
        try:
            pubdate = match.group("published")
            if pubdate:
                from calibre.utils.date import parse_only_date

                mi.pubdate = parse_only_date(pubdate)
        except:
            pass

    if mi.is_null("title"):
        mi.title = name
    return mi
Example #41
0
def metadata_from_filename(name, pat=None, fallback_pat=None):
    if isbytestring(name):
        name = name.decode(filesystem_encoding, 'replace')
    name = name.rpartition('.')[0]
    mi = MetaInformation(None, None)
    if pat is None:
        pat = re.compile(prefs.get('filename_pattern'))
    name = name.replace('_', ' ')
    match = pat.search(name)
    if match is None and fallback_pat is not None:
        match = fallback_pat.search(name)
    if match is not None:
        try:
            mi.title = match.group('title')
        except IndexError:
            pass
        try:
            au = match.group('author')
            aus = string_to_authors(au)
            if aus:
                mi.authors = aus
                if prefs['swap_author_names'] and mi.authors:

                    def swap(a):
                        if ',' in a:
                            parts = a.split(',', 1)
                        else:
                            parts = a.split(None, 1)
                        if len(parts) > 1:
                            t = parts[-1]
                            parts = parts[:-1]
                            parts.insert(0, t)
                        return ' '.join(parts)

                    mi.authors = [swap(x) for x in mi.authors]
        except (IndexError, ValueError):
            pass
        try:
            mi.series = match.group('series')
        except IndexError:
            pass
        try:
            si = match.group('series_index')
            mi.series_index = float(si)
        except (IndexError, ValueError, TypeError):
            pass
        try:
            si = match.group('isbn')
            mi.isbn = si
        except (IndexError, ValueError):
            pass
        try:
            publisher = match.group('publisher')
            mi.publisher = publisher
        except (IndexError, ValueError):
            pass
        try:
            pubdate = match.group('published')
            if pubdate:
                from calibre.utils.date import parse_only_date
                mi.pubdate = parse_only_date(pubdate)
        except:
            pass
        try:
            comments = match.group('comments')
            mi.comments = comments
        except (IndexError, ValueError):
            pass

    if mi.is_null('title'):
        mi.title = name
    return mi
Example #42
0
    def convert_comic_md_to_calibre_md(self, comic_metadata):
        '''
        Maps the entries in the comic_metadata to calibre metadata
        '''
        import unicodedata
        from calibre.ebooks.metadata import MetaInformation
        from calibre.utils.date import parse_only_date
        from datetime import date
        from calibre.utils.localization import calibre_langcode_to_name

        if self.comic_md_in_calibre_format:
            return

        # start with a fresh calibre metadata
        mi = MetaInformation(None, None)
        co = comic_metadata

        # shorten some functions
        role = partial(get_role, credits=co.credits)
        update_field = partial(update_calibre_field, target=mi)

        # Get title, if no title, try to assign series infos
        if co.title:
            mi.title = co.title
        elif co.series:
            mi.title = co.series
            if co.issue:
                mi.title += " " + str(co.issue)
        else:
            mi.title = ""

        # tags
        if co.tags != [] and prefs['import_tags']:
            if prefs['overwrite_calibre_tags']:
                mi.tags = co.tags
            else:
                mi.tags = list(set(self.calibre_metadata.tags + co.tags))

        # simple metadata
        update_field("authors", role(WRITER))
        update_field("series", co.series)
        update_field("rating", co.criticalRating)
        update_field("publisher", co.publisher)
        # special cases
        if co.language:
            update_field("language", calibre_langcode_to_name(co.language))
        if co.comments:
            update_field("comments", co.comments.strip())
        # issue
        if co.issue:
            try:
                if not python3 and isinstance(co.issue, unicode):
                    mi.series_index = unicodedata.numeric(co.issue)
                else:
                    mi.series_index = float(co.issue)
            except ValueError:
                pass
        # pub date
        puby = co.year
        pubm = co.month
        if puby is not None:
            try:
                dt = date(int(puby), 6 if pubm is None else int(pubm), 15)
                dt = parse_only_date(str(dt))
                mi.pubdate = dt
            except:
                pass

        # custom columns
        update_column = partial(
            update_custom_column,
            calibre_metadata=mi,
            custom_cols=self.db.field_metadata.custom_field_metadata())
        # artists
        update_column(prefs['penciller_column'], role(PENCILLER))
        update_column(prefs['inker_column'], role(INKER))
        update_column(prefs['colorist_column'], role(COLORIST))
        update_column(prefs['letterer_column'], role(LETTERER))
        update_column(prefs['cover_artist_column'], role(COVER_ARTIST))
        update_column(prefs['editor_column'], role(EDITOR))
        # others
        update_column(prefs['storyarc_column'], co.storyArc)
        update_column(prefs['characters_column'], co.characters)
        update_column(prefs['teams_column'], co.teams)
        update_column(prefs['locations_column'], co.locations)
        update_column(prefs['genre_column'], co.genre)
        ensure_int(co.issueCount, update_column, prefs['count_column'],
                   co.issueCount)
        ensure_int(co.volume, update_column, prefs['volume_column'], co.volume)
        if prefs['auto_count_pages']:
            update_column(prefs['pages_column'], self.count_pages())
        else:
            update_column(prefs['pages_column'], co.pageCount)
        if prefs['get_image_sizes']:
            update_column(prefs['image_size_column'], self.get_picture_size())
        update_column(prefs['comicvine_column'],
                      '<a href="{}">Comic Vine</a>'.format(co.webLink))
        update_column(prefs['manga_column'], co.manga)

        self.comic_md_in_calibre_format = mi
Example #43
0
    def _start_merge(self, book_list):
        db = self.gui.current_db
        self.previous = self.gui.library_view.currentIndex()
        # if any bad, bail.
        bad_list = filter(lambda x: not x['good'], book_list)
        if len(bad_list) > 0:
            d = error_dialog(self.gui,
                             _('Cannot Merge Epubs'),
                             _('%s books failed.') % len(bad_list),
                             det_msg='\n'.join(
                                 map(lambda x: x['error'], bad_list)))
            d.exec_()
        else:
            d = OrderEPUBsDialog(
                self.gui,
                _('Order EPUBs to Merge'),
                prefs,
                self.qaction.icon(),
                book_list,
            )
            d.exec_()
            if d.result() != d.Accepted:
                return

            book_list = d.get_books()

            logger.debug("2:%s" % (time.time() - self.t))
            self.t = time.time()

            deftitle = "%s %s" % (book_list[0]['title'], prefs['mergeword'])
            mi = MetaInformation(deftitle, ["Temp Author"])

            # if all same series, use series for name.  But only if all.
            serieslist = map(lambda x: x['series'],
                             filter(lambda x: x['series'] != None, book_list))
            if len(serieslist) == len(book_list):
                mi.title = serieslist[0]
                for sr in serieslist:
                    if mi.title != sr:
                        mi.title = deftitle
                        break

            # logger.debug("======================= mi.title:\n%s\n========================="%mi.title)

            mi.authors = list()
            authorslists = map(lambda x: x['authors'], book_list)
            for l in authorslists:
                for a in l:
                    if a not in mi.authors:
                        mi.authors.append(a)
            #mi.authors = [item for sublist in authorslists for item in sublist]

            # logger.debug("======================= mi.authors:\n%s\n========================="%mi.authors)

            #mi.author_sort = ' & '.join(map(lambda x : x['author_sort'], book_list))

            # logger.debug("======================= mi.author_sort:\n%s\n========================="%mi.author_sort)

            # set publisher if all from same publisher.
            publishers = set(map(lambda x: x['publisher'], book_list))
            if len(publishers) == 1:
                mi.publisher = publishers.pop()

            # logger.debug("======================= mi.publisher:\n%s\n========================="%mi.publisher)

            tagslists = map(lambda x: x['tags'], book_list)
            mi.tags = [item for sublist in tagslists for item in sublist]
            mi.tags.extend(prefs['mergetags'].split(','))

            # logger.debug("======================= mergetags:\n%s\n========================="%prefs['mergetags'])
            # logger.debug("======================= m.tags:\n%s\n========================="%mi.tags)

            languageslists = map(lambda x: x['languages'], book_list)
            mi.languages = [
                item for sublist in languageslists for item in sublist
            ]

            mi.series = ''

            # ======================= make book comments =========================

            if len(mi.authors) > 1:
                booktitle = lambda x: _("%s by %s") % (x['title'], ' & '.join(
                    x['authors']))
            else:
                booktitle = lambda x: x['title']

            mi.comments = (_("%s containing:") + "\n\n") % prefs['mergeword']

            if prefs['includecomments']:

                def bookcomments(x):
                    if x['comments']:
                        return '<b>%s</b>\n\n%s' % (booktitle(x),
                                                    x['comments'])
                    else:
                        return '<b>%s</b>\n' % booktitle(x)

                mi.comments += ('<div class="mergedbook">' +
                                '<hr></div><div class="mergedbook">'.join(
                                    [bookcomments(x)
                                     for x in book_list]) + '</div>')
            else:
                mi.comments += '\n'.join([booktitle(x) for x in book_list])

            # ======================= make book entry =========================

            book_id = db.create_book_entry(mi, add_duplicates=True)

            # set default cover to same as first book
            coverdata = db.cover(book_list[0]['calibre_id'], index_is_id=True)
            if coverdata:
                db.set_cover(book_id, coverdata)

            # ======================= custom columns ===================

            logger.debug("3:%s" % (time.time() - self.t))
            self.t = time.time()

            # have to get custom from db for each book.
            idslist = map(lambda x: x['calibre_id'], book_list)

            custom_columns = self.gui.library_view.model().custom_columns
            for col, action in prefs['custom_cols'].iteritems():
                #logger.debug("col: %s action: %s"%(col,action))

                if col not in custom_columns:
                    logger.debug("%s not an existing column, skipping." % col)
                    continue

                coldef = custom_columns[col]
                #logger.debug("coldef:%s"%coldef)

                if action not in permitted_values[coldef['datatype']]:
                    logger.debug(
                        "%s not a valid column type for %s, skipping." %
                        (col, action))
                    continue

                label = coldef['label']

                found = False
                value = None
                idx = None
                if action == 'first':
                    idx = 0

                if action == 'last':
                    idx = -1

                if action in ['first', 'last']:
                    value = db.get_custom(idslist[idx],
                                          label=label,
                                          index_is_id=True)
                    if coldef['datatype'] == 'series' and value != None:
                        # get the number-in-series, too.
                        value = "%s [%s]" % (
                            value,
                            db.get_custom_extra(
                                idslist[idx], label=label, index_is_id=True))
                    found = True

                if action in ('add', 'average', 'averageall'):
                    value = 0.0
                    count = 0
                    for bid in idslist:
                        try:
                            value += db.get_custom(bid,
                                                   label=label,
                                                   index_is_id=True)
                            found = True
                            # only count ones with values unless averageall
                            count += 1
                        except:
                            # if not set, it's None and fails.
                            # only count ones with values unless averageall
                            if action == 'averageall':
                                count += 1

                    if found and action in ('average', 'averageall'):
                        value = value / count

                    if coldef['datatype'] == 'int':
                        value += 0.5  # so int rounds instead of truncs.

                if action == 'and':
                    value = True
                    for bid in idslist:
                        try:
                            value = value and db.get_custom(
                                bid, label=label, index_is_id=True)
                            found = True
                        except:
                            # if not set, it's None and fails.
                            pass

                if action == 'or':
                    value = False
                    for bid in idslist:
                        try:
                            value = value or db.get_custom(
                                bid, label=label, index_is_id=True)
                            found = True
                        except:
                            # if not set, it's None and fails.
                            pass

                if action == 'newest':
                    value = None
                    for bid in idslist:
                        try:
                            ivalue = db.get_custom(bid,
                                                   label=label,
                                                   index_is_id=True)
                            if not value or ivalue > value:
                                value = ivalue
                                found = True
                        except:
                            # if not set, it's None and fails.
                            pass

                if action == 'oldest':
                    value = None
                    for bid in idslist:
                        try:
                            ivalue = db.get_custom(bid,
                                                   label=label,
                                                   index_is_id=True)
                            if not value or ivalue < value:
                                value = ivalue
                                found = True
                        except:
                            # if not set, it's None and fails.
                            pass

                if action == 'union':
                    if not coldef['is_multiple']:
                        action = 'concat'
                    else:
                        value = set()
                        for bid in idslist:
                            try:
                                value = value.union(
                                    db.get_custom(bid,
                                                  label=label,
                                                  index_is_id=True))
                                found = True
                            except:
                                # if not set, it's None and fails.
                                pass

                if action == 'concat':
                    value = ""
                    for bid in idslist:
                        try:
                            value = value + ' ' + db.get_custom(
                                bid, label=label, index_is_id=True)
                            found = True
                        except:
                            # if not set, it's None and fails.
                            pass
                    value = value.strip()

                if found and value != None:
                    db.set_custom(book_id, value, label=label, commit=False)

            db.commit()

            logger.debug("4:%s" % (time.time() - self.t))
            self.t = time.time()

            self.gui.library_view.model().books_added(1)
            self.gui.library_view.select_rows([book_id])

            logger.debug("5:%s" % (time.time() - self.t))
            self.t = time.time()

            confirm(
                '\n' +
                _('''The book for the new Merged EPUB has been created and default metadata filled in.

However, the EPUB will *not* be created until after you've reviewed, edited, and closed the metadata dialog that follows.'''
                  ), 'epubmerge_created_now_edit_again', self.gui)

            self.gui.iactions['Edit Metadata'].edit_metadata(False)

            logger.debug("5:%s" % (time.time() - self.t))
            self.t = time.time()
            self.gui.tags_view.recount()

            totalsize = sum(map(lambda x: x['epub_size'], book_list))

            logger.debug("merging %s EPUBs totaling %s" %
                         (len(book_list), gethumanreadable(totalsize)))
            if len(book_list) > 100 or totalsize > 5 * 1024 * 1024:
                confirm(
                    '\n' +
                    _('''You're merging %s EPUBs totaling %s.  Calibre will be locked until the merge is finished.'''
                      ) % (len(book_list), gethumanreadable(totalsize)),
                    'epubmerge_edited_now_merge_again', self.gui)

            self.gui.status_bar.show_message(
                _('Merging %s EPUBs...') % len(book_list), 60000)

            mi = db.get_metadata(book_id, index_is_id=True)

            mergedepub = PersistentTemporaryFile(suffix='.epub')
            epubstomerge = map(lambda x: x['epub'], book_list)

            coverjpgpath = None
            if mi.has_cover:
                # grab the path to the real image.
                coverjpgpath = os.path.join(db.library_path,
                                            db.path(book_id, index_is_id=True),
                                            'cover.jpg')

            self.do_merge(mergedepub,
                          epubstomerge,
                          authoropts=mi.authors,
                          titleopt=mi.title,
                          descopt=mi.comments,
                          tags=mi.tags,
                          languages=mi.languages,
                          titlenavpoints=prefs['titlenavpoints'],
                          flattentoc=prefs['flattentoc'],
                          printtimes=True,
                          coverjpgpath=coverjpgpath,
                          keepmetadatafiles=prefs['keepmeta'])

            logger.debug("6:%s" % (time.time() - self.t))
            logger.debug(_("Merge finished, output in:\n%s") % mergedepub.name)
            self.t = time.time()
            db.add_format_with_hooks(book_id,
                                     'EPUB',
                                     mergedepub,
                                     index_is_id=True)

            logger.debug("7:%s" % (time.time() - self.t))
            self.t = time.time()

            self.gui.status_bar.show_message(
                _('Finished merging %s EPUBs.') % len(book_list), 3000)
            self.gui.library_view.model().refresh_ids([book_id])
            self.gui.tags_view.recount()
            current = self.gui.library_view.currentIndex()
            self.gui.library_view.model().current_changed(
                current, self.previous)
Example #44
0
def get_metadata(stream):
    from calibre.ebooks.metadata import MetaInformation
    from calibre.ptempfile import TemporaryDirectory
    from calibre.ebooks.mobi.reader.headers import MetadataHeader
    from calibre.ebooks.mobi.reader.mobi6 import MobiReader
    from calibre import CurrentDir

    try:
        from PIL import Image as PILImage
        PILImage
    except ImportError:
        import Image as PILImage


    stream.seek(0)
    try:
        raw = stream.read(3)
    except:
        raw = ''
    stream.seek(0)
    if raw == b'TPZ':
        from calibre.ebooks.metadata.topaz import get_metadata
        return get_metadata(stream)
    from calibre.utils.logging import Log
    log = Log()
    try:
        mi = MetaInformation(os.path.basename(stream.name), [_('Unknown')])
    except:
        mi = MetaInformation(_('Unknown'), [_('Unknown')])
    mh = MetadataHeader(stream, log)
    if mh.title and mh.title != _('Unknown'):
        mi.title = mh.title

    if mh.exth is not None:
        if mh.exth.mi is not None:
            mi = mh.exth.mi
    else:
        size = 1024**3
        if hasattr(stream, 'seek') and hasattr(stream, 'tell'):
            pos = stream.tell()
            stream.seek(0, 2)
            size = stream.tell()
            stream.seek(pos)
        if size < 4*1024*1024:
            with TemporaryDirectory('_mobi_meta_reader') as tdir:
                with CurrentDir(tdir):
                    mr = MobiReader(stream, log)
                    parse_cache = {}
                    mr.extract_content(tdir, parse_cache)
                    if mr.embedded_mi is not None:
                        mi = mr.embedded_mi
    if hasattr(mh.exth, 'cover_offset'):
        cover_index = mh.first_image_index + mh.exth.cover_offset
        data  = mh.section_data(int(cover_index))
    else:
        try:
            data  = mh.section_data(mh.first_image_index)
        except:
            data = ''
    buf = cStringIO.StringIO(data)
    try:
        im = PILImage.open(buf)
    except:
        log.exception('Failed to read MOBI cover')
    else:
        obuf = cStringIO.StringIO()
        im.convert('RGB').save(obuf, format='JPEG')
        mi.cover_data = ('jpg', obuf.getvalue())
    return mi