Example #1
0
def _metadata_from_formats(formats, force_read_metadata=False, pattern=None):
    mi = MetaInformation(None, None)
    formats.sort(key=lambda x: METADATA_PRIORITIES[path_to_ext(x)])
    extensions = list(map(path_to_ext, formats))
    if 'opf' in extensions:
        opf = formats[extensions.index('opf')]
        mi2 = opf_metadata(opf)
        if mi2 is not None and mi2.title:
            return mi2

    for path, ext in zip(formats, extensions):
        with lopen(path, 'rb') as stream:
            try:
                newmi = get_metadata(stream,
                                     stream_type=ext,
                                     use_libprs_metadata=True,
                                     force_read_metadata=force_read_metadata,
                                     pattern=pattern)
                mi.smart_update(newmi)
            except:
                continue
            if getattr(mi, 'application_id', None) is not None:
                return mi

    if not mi.title:
        mi.title = _('Unknown')
    if not mi.authors:
        mi.authors = [_('Unknown')]

    return mi
def get_metadata_from_reader(rdr):
    raw = rdr.GetFile(rdr.home)
    home = BeautifulSoup(xml_to_unicode(raw, strip_encoding_pats=True,
        resolve_entities=True)[0])

    title = rdr.title
    try:
        x = rdr.GetEncoding()
        codecs.lookup(x)
        enc = x
    except:
        enc = 'cp1252'
    title = force_unicode(title, enc)
    authors = _get_authors(home)
    mi = MetaInformation(title, authors)
    publisher = _get_publisher(home)
    if publisher:
        mi.publisher = publisher
    isbn = _get_isbn(home)
    if isbn:
        mi.isbn = isbn
    comments = _get_comments(home)
    if comments:
        mi.comments = comments

    cdata = _get_cover(home, rdr)
    if cdata is not None:
        mi.cover_data = ('jpg', cdata)

    return mi
Example #3
0
def get_metadata(stream, extract_cover=True):
    '''
    Return metadata as a L{MetaInfo} object
    '''
    name = getattr(stream, 'name', '').rpartition('.')[0]
    if name:
        name = os.path.basename(name)
    mi = MetaInformation(name or _('Unknown'), [_('Unknown')])
    stream.seek(0)

    mdata = u''
    for x in range(0, 4):
        line = stream.readline().decode('utf-8', 'replace')
        if line == '':
            break
        else:
            mdata += line

    mdata = mdata[:100]

    mo = re.search(
        '(?u)^[ ]*(?P<title>.+)[ ]*(\n{3}|(\r\n){3}|\r{3})[ ]*(?P<author>.+)[ ]*(\n|\r\n|\r)$',
        mdata)
    if mo is not None:
        mi.title = mo.group('title')
        mi.authors = mo.group('author').split(',')

    return mi
Example #4
0
    def do_one_isbn_add(self):
        try:
            db = self.gui.library_view.model().db

            try:
                x = self.isbn_books.pop(0)
            except IndexError:
                self.gui.library_view.model().books_added(self.isbn_add_dialog.value)
                self.isbn_add_dialog.accept()
                self.gui.iactions['Edit Metadata'].download_metadata(
                    ids=self.add_by_isbn_ids, ensure_fields=frozenset(['title',
                        'authors']))
                return

            mi = MetaInformation(None)
            mi.isbn = x['isbn']
            if self.isbn_add_tags:
                mi.tags = list(self.isbn_add_tags)
            fmts = [] if x['path'] is None else [x['path']]
            self.add_by_isbn_ids.add(db.import_book(mi, fmts))
            self.isbn_add_dialog.value += 1
            QTimer.singleShot(10, self.do_one_isbn_add)
        except:
            self.isbn_add_dialog.accept()
            raise
Example #5
0
 def _get_metadata(self, id, args, kwargs):
     from calibre.ebooks.metadata.meta import get_metadata
     try:
         mi = get_metadata(*args, **kwargs)
     except:
         mi = MetaInformation('', [_('Unknown')])
     self.metadata.emit(id, mi)
Example #6
0
def get_metadata(stream, extract_cover=True):
    '''
    Return metadata as a L{MetaInfo} object
    '''
    mi = MetaInformation(_('Unknown'), [_('Unknown')])
    stream.seek(0)
    try:
        with ZipFile(stream) as zf:
            opf_name = get_first_opf_name(zf)
            opf_stream = StringIO(zf.read(opf_name))
            opf = OPF(opf_stream)
            mi = opf.to_book_metadata()
            if extract_cover:
                cover_href = opf.raster_cover
                if not cover_href:
                    for meta in opf.metadata.xpath(
                            '//*[local-name()="meta" and @name="cover"]'):
                        val = meta.get('content')
                        if val.rpartition('.')[2].lower() in {
                                'jpeg', 'jpg', 'png'
                        }:
                            cover_href = val
                            break
                if cover_href:
                    try:
                        mi.cover_data = (os.path.splitext(cover_href)[1],
                                         zf.read(cover_href))
                    except Exception:
                        pass
    except Exception:
        return mi
    return mi
Example #7
0
 def add_empty(self, *args):
     '''
     Add an empty book item to the library. This does not import any formats
     from a book file.
     '''
     author = series = None
     index = self.gui.library_view.currentIndex()
     if index.isValid():
         raw = index.model().db.authors(index.row())
         if raw:
             authors = [a.strip().replace('|', ',') for a in raw.split(',')]
             if authors:
                 author = authors[0]
         series = index.model().db.series(index.row())
     dlg = AddEmptyBookDialog(self.gui,
                              self.gui.library_view.model().db, author,
                              series)
     if dlg.exec_() == dlg.Accepted:
         num = dlg.qty_to_add
         series = dlg.selected_series
         db = self.gui.library_view.model().db
         ids = []
         for x in xrange(num):
             mi = MetaInformation(_('Unknown'), dlg.selected_authors)
             if series:
                 mi.series = series
                 mi.series_index = db.get_next_series_num_for(series)
             ids.append(db.import_book(mi, []))
         self.gui.library_view.model().books_added(num)
         if hasattr(self.gui, 'db_images'):
             self.gui.db_images.reset()
         self.gui.tags_view.recount()
         if ids:
             ids.reverse()
             self.gui.library_view.select_rows(ids)
Example #8
0
 def _from_formats(self, id, args, kwargs):
     from calibre.ebooks.metadata.meta import metadata_from_formats
     try:
         mi = metadata_from_formats(*args, **kwargs)
     except:
         mi = MetaInformation('', [_('Unknown')])
     self.metadataf.emit(id, mi)
Example #9
0
def get_metadata(stream, extract_cover=True):
    """ Return metadata as a L{MetaInfo} object """
    mi = MetaInformation(_('Unknown'), [_('Unknown')])
    snbFile = SNBFile()

    try:
        if not hasattr(stream, 'write'):
            snbFile.Parse(io.BytesIO(stream), True)
        else:
            stream.seek(0)
            snbFile.Parse(stream, True)

        meta = snbFile.GetFileStream('snbf/book.snbf')

        if meta is not None:
            meta = etree.fromstring(meta)
            mi.title = meta.find('.//head/name').text
            mi.authors = [meta.find('.//head/author').text]
            mi.language = meta.find('.//head/language').text.lower().replace('_', '-')
            mi.publisher = meta.find('.//head/publisher').text

            if extract_cover:
                cover = meta.find('.//head/cover')
                if cover is not None and cover.text is not None:
                    root, ext = os.path.splitext(cover.text)
                    if ext == '.jpeg':
                        ext = '.jpg'
                    mi.cover_data = (ext[-3:], snbFile.GetFileStream('snbc/images/' + cover.text))

    except Exception:
        import traceback
        traceback.print_exc()

    return mi
Example #10
0
def get_comic_metadata(stream, stream_type):
    # See http://code.google.com/p/comicbookinfo/wiki/Example
    from calibre.ebooks.metadata import MetaInformation

    comment = None

    mi = MetaInformation(None, None)

    if stream_type == 'cbz':
        from calibre.utils.zipfile import ZipFile
        zf = ZipFile(stream)
        comment = zf.comment
    elif stream_type == 'cbr':
        from calibre.utils.unrar import RARFile
        f = RARFile(stream, get_comment=True)
        comment = f.comment

    if comment:
        import json
        m = json.loads(comment)
        if hasattr(m, 'iterkeys'):
            for cat in m.iterkeys():
                if cat.startswith('ComicBookInfo'):
                    get_comic_book_info(m[cat], mi)
                    break
    return mi
Example #11
0
    def add_empty(self, *args):
        '''
        Add an empty book item to the library. This does not import any formats
        from a book file.
        '''
        author = series = title = None
        index = self.gui.library_view.currentIndex()
        if index.isValid():
            raw = index.model().db.authors(index.row())
            if raw:
                authors = [a.strip().replace('|', ',') for a in raw.split(',')]
                if authors:
                    author = authors[0]
            series = index.model().db.series(index.row())
            title = index.model().db.title(index.row())
        dlg = AddEmptyBookDialog(self.gui, self.gui.library_view.model().db,
                                 author, series, dup_title=title)
        if dlg.exec_() == dlg.Accepted:
            temp_files = []
            num = dlg.qty_to_add
            series = dlg.selected_series
            title = dlg.selected_title or _('Unknown')
            db = self.gui.library_view.model().db
            ids, orig_fmts = [], []
            if dlg.duplicate_current_book:
                origmi = db.get_metadata(index.row(), get_cover=True, cover_as_data=True)
                if dlg.copy_formats.isChecked():
                    book_id = db.id(index.row())
                    orig_fmts = tuple(db.new_api.format(book_id, fmt, as_path=True) for fmt in db.new_api.formats(book_id))

            for x in xrange(num):
                if dlg.duplicate_current_book:
                    mi = origmi
                else:
                    mi = MetaInformation(title, dlg.selected_authors)
                    if series:
                        mi.series = series
                        mi.series_index = db.get_next_series_num_for(series)
                fmts = []
                empty_format = gprefs.get('create_empty_format_file', '')
                if dlg.duplicate_current_book and dlg.copy_formats.isChecked():
                    fmts = orig_fmts
                elif empty_format:
                    from calibre.ebooks.oeb.polish.create import create_book
                    pt = PersistentTemporaryFile(suffix='.' + empty_format)
                    pt.close()
                    temp_files.append(pt.name)
                    create_book(mi, pt.name, fmt=empty_format)
                    fmts = [pt.name]
                ids.append(db.import_book(mi, fmts))
            tuple(map(os.remove, orig_fmts))
            self.gui.library_view.model().books_added(num)
            self.gui.refresh_cover_browser()
            self.gui.tags_view.recount()
            if ids:
                ids.reverse()
                self.gui.library_view.select_rows(ids)
            for path in temp_files:
                os.remove(path)
    def _start_splitmerge(self,book_list, tdir=None, db=None):
        # logger.debug(book_list)

        em = self.get_epubmerge_plugin()
        es = self.get_epubsplit_plugin()

        good_list = [ b for b in book_list if b['good'] ]

        tmp = PersistentTemporaryFile(prefix='merge-',
                                      suffix='.epub',
                                      dir=tdir)
        if len(good_list) == 1:
            deftitle = "New "+good_list[0]['title']
            defauthors = good_list[0]['authors']
        else:
            deftitle = "New Chapters Anthology"
            defauthors = ["Various Authors"]

        mi = MetaInformation(deftitle,defauthors)
        tagslists = [ x['tags'] for x in good_list ]
        mi.tags = [item for sublist in tagslists for item in sublist]
        mi.comments = "<p>New Chapters from:</p>"
        mi.comments += '<br/>'.join( [ "%s by %s"%(x['title'],", ".join(x['authors'])) for x in good_list ] )
    
        em.do_merge(tmp,
                    [b['splittmp'] for b in good_list],
                    authoropts=mi.authors,
                    titleopt=mi.title,
                    descopt=mi.comments,
                    tags=mi.tags,
                    keepmetadatafiles=False,
                    )

        book_id = db.create_book_entry(mi,
                                       add_duplicates=True)

        db.add_format_with_hooks(book_id,
                                 'EPUB',
                                 tmp, index_is_id=True)
        
        self.gui.library_view.model().books_added(1)
        self.gui.library_view.model().refresh_ids([book_id])
        # self.gui.iactions['Edit Metadata'].edit_metadata(False)
        self.gui.tags_view.recount()
        
        ## run word counts
        cp_plugin = self.gui.iactions['Count Pages']
        cp_plugin.count_statistics([book_id],['WordCount'])
        
        ## run auto convert
        self.gui.iactions['Convert Books'].auto_convert_auto_add([book_id])

        ## add to FFF update lists
        self.gui.library_view.select_rows([book_id])
        fff_plugin = self.gui.iactions['FanFicFare']
        fff_plugin.update_lists()
        
        remove_dir(tdir)
Example #13
0
    def add_annotation_to_library(self, db, db_id, annotation):
        from calibre.ebooks.BeautifulSoup import Tag
        from calibre.ebooks.metadata import MetaInformation

        bm = annotation
        ignore_tags = set(['Catalog', 'Clippings'])

        if bm.type == 'kindle_bookmark':
            mi = db.get_metadata(db_id, index_is_id=True)
            user_notes_soup = self.generate_annotation_html(bm.value)
            if mi.comments:
                a_offset = mi.comments.find('<div class="user_annotations">')
                ad_offset = mi.comments.find(
                    '<hr class="annotations_divider" />')

                if a_offset >= 0:
                    mi.comments = mi.comments[:a_offset]
                if ad_offset >= 0:
                    mi.comments = mi.comments[:ad_offset]
                if set(mi.tags).intersection(ignore_tags):
                    return
                if mi.comments:
                    hrTag = Tag(user_notes_soup, 'hr')
                    hrTag['class'] = 'annotations_divider'
                    user_notes_soup.insert(0, hrTag)

                mi.comments += unicode(user_notes_soup.prettify())
            else:
                mi.comments = unicode(user_notes_soup.prettify())
            # Update library comments
            db.set_comment(db_id, mi.comments)

            # Add bookmark file to db_id
            db.add_format_with_hooks(db_id,
                                     bm.value.bookmark_extension,
                                     bm.value.path,
                                     index_is_id=True)
        elif bm.type == 'kindle_clippings':
            # Find 'My Clippings' author=Kindle in database, or add
            last_update = 'Last modified %s' % strftime(
                u'%x %X', bm.value['timestamp'].timetuple())
            mc_id = list(
                db.data.search_getting_ids('title:"My Clippings"',
                                           '',
                                           sort_results=False))
            if mc_id:
                db.add_format_with_hooks(mc_id[0],
                                         'TXT',
                                         bm.value['path'],
                                         index_is_id=True)
                mi = db.get_metadata(mc_id[0], index_is_id=True)
                mi.comments = last_update
                db.set_metadata(mc_id[0], mi)
            else:
                mi = MetaInformation('My Clippings', authors=['Kindle'])
                mi.tags = ['Clippings']
                mi.comments = last_update
                db.add_books([bm.value['path']], ['txt'], [mi])
Example #14
0
def get_metadata(stream):
    ''' Return fb2 metadata as a L{MetaInformation} object '''

    root = _get_fbroot(stream)
    book_title = _parse_book_title(root)
    authors = _parse_authors(root)

    # fallback for book_title
    if book_title:
        book_title = unicode(book_title)
    else:
        book_title = force_unicode(
            os.path.splitext(
                os.path.basename(getattr(stream, 'name', _('Unknown'))))[0])
    mi = MetaInformation(book_title, authors)

    try:
        _parse_cover(root, mi)
    except:
        pass
    try:
        _parse_comments(root, mi)
    except:
        pass
    try:
        _parse_tags(root, mi)
    except:
        pass
    try:
        _parse_series(root, mi)
    except:
        pass
    try:
        _parse_isbn(root, mi)
    except:
        pass
    try:
        _parse_publisher(root, mi)
    except:
        pass
    try:
        _parse_pubdate(root, mi)
    except:
        pass
    #try:
    #    _parse_timestamp(root, mi)
    #except:
    #    pass

    try:
        _parse_language(root, mi)
    except:
        pass
    #_parse_uuid(root, mi)

    #if DEBUG:
    #   prints(mi)
    return mi
Example #15
0
def set_metadata(stream,
                 mi,
                 apply_null=False,
                 update_timestamp=False,
                 force_identifiers=False):
    stream.seek(0)
    reader = get_zip_reader(stream, root=os.getcwdu())
    raster_cover = reader.opf.raster_cover
    mi = MetaInformation(mi)
    new_cdata = None
    replacements = {}
    try:
        new_cdata = mi.cover_data[1]
        if not new_cdata:
            raise Exception('no cover')
    except:
        try:
            new_cdata = open(mi.cover, 'rb').read()
        except:
            pass
    new_cover = cpath = None
    if new_cdata and raster_cover:
        try:
            cpath = posixpath.join(posixpath.dirname(reader.opf_path),
                                   raster_cover)
            cover_replacable = not reader.encryption_meta.is_encrypted(cpath) and \
                    os.path.splitext(cpath)[1].lower() in ('.png', '.jpg', '.jpeg')
            if cover_replacable:
                new_cover = _write_new_cover(new_cdata, cpath)
                replacements[cpath] = open(new_cover.name, 'rb')
        except:
            import traceback
            traceback.print_exc()

    update_metadata(reader.opf,
                    mi,
                    apply_null=apply_null,
                    update_timestamp=update_timestamp,
                    force_identifiers=force_identifiers)

    newopf = StringIO(reader.opf.render())
    if isinstance(reader.archive, LocalZipFile):
        reader.archive.safe_replace(reader.container[OPF.MIMETYPE],
                                    newopf,
                                    extra_replacements=replacements)
    else:
        safe_replace(stream,
                     reader.container[OPF.MIMETYPE],
                     newopf,
                     extra_replacements=replacements)
    try:
        if cpath is not None:
            replacements[cpath].close()
            os.remove(replacements[cpath].name)
    except:
        pass
Example #16
0
def get_metadata(stream):
    """
    Return metadata as a L{MetaInfo} object
    """
    stream.seek(0)
    if stream.read(5) != br'{\rtf':
        return MetaInformation(_('Unknown'))
    block = get_document_info(stream)[0]
    if not block:
        return MetaInformation(_('Unknown'))

    stream.seek(0)
    cpg = detect_codepage(stream)
    stream.seek(0)

    title_match = title_pat.search(block)
    if title_match is not None:
        title = decode(title_match.group(1).strip(), cpg)
    else:
        title = _('Unknown')
    author_match = author_pat.search(block)
    if author_match is not None:
        author = decode(author_match.group(1).strip(), cpg)
    else:
        author = None
    mi = MetaInformation(title)
    if author:
        mi.authors = [x.strip() for x in author.split(',')]

    comment_match = comment_pat.search(block)
    if comment_match is not None:
        comment = decode(comment_match.group(1).strip(), cpg)
        mi.comments = comment
    tags_match = tags_pat.search(block)
    if tags_match is not None:
        tags = decode(tags_match.group(1).strip(), cpg)
        mi.tags = list(filter(None, (x.strip() for x in tags.split(','))))
    publisher_match = publisher_pat.search(block)
    if publisher_match is not None:
        publisher = decode(publisher_match.group(1).strip(), cpg)
        mi.publisher = publisher

    return mi
Example #17
0
File: pdf.py Project: kmshi/calibre
def get_metadata(stream, cover=True):
    with TemporaryDirectory('_pdf_metadata_read') as pdfpath:
        stream.seek(0)
        with open(os.path.join(pdfpath, 'src.pdf'), 'wb') as f:
            shutil.copyfileobj(stream, f)
        try:
            res = fork_job('calibre.ebooks.metadata.pdf', 'read_info',
                           (pdfpath, bool(cover)))
        except WorkerError as e:
            prints(e.orig_tb)
            raise RuntimeError('Failed to run pdfinfo')
        info = res['result']
        with open(res['stdout_stderr'], 'rb') as f:
            raw = f.read().strip()
            if raw:
                prints(raw)
        if not info:
            raise ValueError('Could not read info dict from PDF')
        covpath = os.path.join(pdfpath, 'cover.jpg')
        cdata = None
        if cover and os.path.exists(covpath):
            with open(covpath, 'rb') as f:
                cdata = f.read()

    title = info.get('Title', None)
    au = info.get('Author', None)
    if au is None:
        au = [_('Unknown')]
    else:
        au = string_to_authors(au)
    mi = MetaInformation(title, au)
    # if isbn is not None:
    #    mi.isbn = isbn

    creator = info.get('Creator', None)
    if creator:
        mi.book_producer = creator

    keywords = info.get('Keywords', None)
    mi.tags = []
    if keywords:
        mi.tags = [x.strip() for x in keywords.split(',')]
        isbn = [check_isbn(x) for x in mi.tags if check_isbn(x)]
        if isbn:
            mi.isbn = isbn = isbn[0]
        mi.tags = [x for x in mi.tags if check_isbn(x) != isbn]

    subject = info.get('Subject', None)
    if subject:
        mi.tags.insert(0, subject)

    if cdata:
        mi.cover_data = ('jpeg', cdata)

    return mi
Example #18
0
def get_metadata(stream, extract_cover=True):
    '''
    Return metadata as a L{MetaInfo} object
    '''
    mi = MetaInformation(_('Unknown'), [_('Unknown')])
    stream.seek(0)

    pheader = PdbHeaderReader(stream)
    section_data = None
    for i in range(1, pheader.num_sections):
        raw_data = pheader.section_data(i)
        section_header = SectionHeader(raw_data)
        if section_header.type == DATATYPE_METADATA:
            section_data = raw_data[8:]
            break

    if not section_data:
        return mi

    default_encoding = 'latin-1'
    record_count, = struct.unpack('>H', section_data[0:2])
    adv = 0
    title = None
    author = None
    pubdate = 0
    for i in range(record_count):
        try:
            type, length = struct.unpack_from('>HH', section_data, 2 + adv)
        except struct.error:
            break

        # CharSet
        if type == 1:
            val, = struct.unpack('>H', section_data[6+adv:8+adv])
            default_encoding = MIBNUM_TO_NAME.get(val, 'latin-1')
        # Author
        elif type == 4:
            author = section_data[6+adv+(2*length)]
        # Title
        elif type == 5:
            title = section_data[6+adv+(2*length)]
        # Publication Date
        elif type == 6:
            pubdate, = struct.unpack('>I', section_data[6+adv:6+adv+4])

        adv += 2*length

    if title:
        mi.title = title.replace('\0', '').decode(default_encoding, 'replace')
    if author:
        author = author.replace('\0', '').decode(default_encoding, 'replace')
        mi.author = author.split(',')
    mi.pubdate = datetime.fromtimestamp(pubdate)

    return mi
Example #19
0
    def __init__(self, raw, codec, title):
        self.doctype = raw[:4]
        self.length, self.num_items = struct.unpack('>LL', raw[4:12])
        raw = raw[12:]
        pos = 0
        self.mi = MetaInformation(_('Unknown'), [_('Unknown')])
        self.has_fake_cover = True
        self.start_offset = None
        left = self.num_items
        self.kf8_header = None
        self.uuid = self.cdetype = None

        while left > 0:
            left -= 1
            idx, size = struct.unpack('>LL', raw[pos:pos + 8])
            content = raw[pos + 8:pos + size]
            pos += size
            if idx >= 100 and idx < 200:
                self.process_metadata(idx, content, codec)
            elif idx == 203:
                self.has_fake_cover = bool(struct.unpack('>L', content)[0])
            elif idx == 201:
                co, = struct.unpack('>L', content)
                if co < NULL_INDEX:
                    self.cover_offset = co
            elif idx == 202:
                self.thumbnail_offset, = struct.unpack('>L', content)
            elif idx == 501:
                try:
                    self.cdetype = content.decode('ascii')
                except UnicodeDecodeError:
                    self.cdetype = None
                # cdetype
                if content == b'EBSP':
                    if not self.mi.tags:
                        self.mi.tags = []
                    self.mi.tags.append(_('Sample Book'))
            elif idx == 502:
                # last update time
                pass
            elif idx == 503: # Long title
                # Amazon seems to regard this as the definitive book title
                # rather than the title from the PDB header. In fact when
                # sending MOBI files through Amazon's email service if the
                # title contains non ASCII chars or non filename safe chars
                # they are messed up in the PDB header
                try:
                    title = content.decode(codec)
                except:
                    pass
            #else:
            #    print 'unknown record', idx, repr(content)
        if title:
            self.mi.title = replace_entities(title)
Example #20
0
def parse_comic_comment(comment, series_index='volume'):
    # See http://code.google.com/p/comicbookinfo/wiki/Example
    from calibre.ebooks.metadata import MetaInformation
    import json
    mi = MetaInformation(None, None)
    m = json.loads(comment)
    if isinstance(m, dict):
        for cat in m:
            if cat.startswith('ComicBookInfo'):
                get_comic_book_info(m[cat], mi, series_index=series_index)
                break
    return mi
Example #21
0
def do_set_metadata(opts, mi, stream, stream_type):
    mi = MetaInformation(mi)
    for x in ('guide', 'toc', 'manifest', 'spine'):
        setattr(mi, x, None)

    from_opf = getattr(opts, 'from_opf', None)
    if from_opf is not None:
        from calibre.ebooks.metadata.opf2 import OPF
        opf_mi = OPF(open(from_opf, 'rb')).to_book_metadata()
        mi.smart_update(opf_mi)

    for pref in config().option_set.preferences:
        if pref.name in ('to_opf', 'from_opf', 'authors', 'title_sort',
                         'author_sort', 'get_cover', 'cover', 'tags',
                         'lrf_bookid', 'identifiers'):
            continue
        val = getattr(opts, pref.name, None)
        if val is not None:
            setattr(mi, pref.name, val)
    if getattr(opts, 'authors', None) is not None:
        mi.authors = string_to_authors(opts.authors)
        mi.author_sort = authors_to_sort_string(mi.authors)
    if getattr(opts, 'author_sort', None) is not None:
        mi.author_sort = opts.author_sort
    if getattr(opts, 'title_sort', None) is not None:
        mi.title_sort = opts.title_sort
    elif getattr(opts, 'title', None) is not None:
        mi.title_sort = title_sort(opts.title)
    if getattr(opts, 'tags', None) is not None:
        mi.tags = [t.strip() for t in opts.tags.split(',')]
    if getattr(opts, 'series', None) is not None:
        mi.series = opts.series.strip()
    if getattr(opts, 'series_index', None) is not None:
        mi.series_index = float(opts.series_index.strip())
    if getattr(opts, 'pubdate', None) is not None:
        mi.pubdate = parse_date(opts.pubdate, assume_utc=False, as_utc=False)
    if getattr(opts, 'identifiers', None):
        val = {
            k.strip(): v.strip()
            for k, v in (x.partition(':')[0::2] for x in opts.identifiers)
        }
        if val:
            orig = mi.get_identifiers()
            orig.update(val)
            val = {k: v for k, v in orig.iteritems() if k and v}
            mi.set_identifiers(val)

    if getattr(opts, 'cover', None) is not None:
        ext = os.path.splitext(opts.cover)[1].replace('.', '').upper()
        mi.cover_data = (ext, open(opts.cover, 'rb').read())

    with force_identifiers:
        set_metadata(stream, mi, stream_type)
Example #22
0
def get_metadata(stream):
    ''' Return fb2 metadata as a L{MetaInformation} object '''

    root = _get_fbroot(get_fb2_data(stream)[0])
    ctx = Context(root)
    book_title = _parse_book_title(root, ctx)
    authors = _parse_authors(root, ctx) or [_('Unknown')]

    # fallback for book_title
    if book_title:
        book_title = unicode_type(book_title)
    else:
        book_title = force_unicode(
            os.path.splitext(
                os.path.basename(getattr(stream, 'name', _('Unknown'))))[0])
    mi = MetaInformation(book_title, authors)

    try:
        _parse_cover(root, mi, ctx)
    except:
        pass
    try:
        _parse_comments(root, mi, ctx)
    except:
        pass
    try:
        _parse_tags(root, mi, ctx)
    except:
        pass
    try:
        _parse_series(root, mi, ctx)
    except:
        pass
    try:
        _parse_isbn(root, mi, ctx)
    except:
        pass
    try:
        _parse_publisher(root, mi, ctx)
    except:
        pass
    try:
        _parse_pubdate(root, mi, ctx)
    except:
        pass

    try:
        _parse_language(root, mi, ctx)
    except:
        pass

    return mi
Example #23
0
def _get_metadata(stream,
                  stream_type,
                  use_libprs_metadata,
                  force_read_metadata=False,
                  pattern=None):
    if stream_type:
        stream_type = stream_type.lower()
    if stream_type in ('html', 'html', 'xhtml', 'xhtm', 'xml'):
        stream_type = 'html'
    if stream_type in ('mobi', 'prc', 'azw'):
        stream_type = 'mobi'
    if stream_type in ('odt', 'ods', 'odp', 'odg', 'odf'):
        stream_type = 'odt'

    opf = None
    if hasattr(stream, 'name'):
        c = os.path.splitext(stream.name)[0] + '.opf'
        if os.access(c, os.R_OK):
            opf = opf_metadata(os.path.abspath(c))

    if use_libprs_metadata and getattr(opf, 'application_id',
                                       None) is not None:
        return opf

    mi = MetaInformation(None, None)
    name = os.path.basename(getattr(stream, 'name', ''))
    base = metadata_from_filename(name, pat=pattern)
    if force_read_metadata or prefs['read_file_metadata']:
        mi = get_file_type_metadata(stream, stream_type)
    if base.title == os.path.splitext(name)[0] and \
            base.is_null('authors') and base.is_null('isbn'):
        # Assume that there was no metadata in the file and the user set pattern
        # to match meta info from the file name did not match.
        # The regex is meant to match the standard format filenames are written
        # in the library title - author.extension
        base.smart_update(
            metadata_from_filename(
                name,
                re.compile(r'^(?P<title>.+)[ _]-[ _](?P<author>[^-]+)$')))
        if base.title:
            base.title = base.title.replace('_', ' ')
        if base.authors:
            base.authors = [a.replace('_', ' ').strip() for a in base.authors]
    if not base.authors:
        base.authors = [_('Unknown')]
    if not base.title:
        base.title = _('Unknown')
    base.smart_update(mi)
    if opf is not None:
        base.smart_update(opf)

    return base
 def get_clippings_cid(self, title):
     '''
     Find or create cid for title
     '''
     cid = None
     try:
         cid = list(self.parent.opts.gui.current_db.data.parse('title:"%s" and tag:Clippings' % title))[0]
     except:
         mi = MetaInformation(title, authors = ['Various'])
         mi.tags = ['Clippings']
         cid = self.parent.opts.gui.current_db.create_book_entry(mi, cover=None,
             add_duplicates=False, force_id=None)
     return cid
Example #25
0
def get_metadata(stream, extract_cover=True):
    """
    Return metadata as a L{MetaInfo} object
    """

    pheader = PdbHeaderReader(stream)

    MetadataReader = MREADER.get(pheader.ident, None)

    if MetadataReader is None:
        return MetaInformation(pheader.title, [_('Unknown')])

    return MetadataReader(stream, extract_cover)
Example #26
0
 def add_empty(self, *args):
     '''
     Add an empty book item to the library. This does not import any formats
     from a book file.
     '''
     author = series = None
     index = self.gui.library_view.currentIndex()
     if index.isValid():
         raw = index.model().db.authors(index.row())
         if raw:
             authors = [a.strip().replace('|', ',') for a in raw.split(',')]
             if authors:
                 author = authors[0]
         series = index.model().db.series(index.row())
     dlg = AddEmptyBookDialog(self.gui,
                              self.gui.library_view.model().db, author,
                              series)
     if dlg.exec_() == dlg.Accepted:
         temp_files = []
         num = dlg.qty_to_add
         series = dlg.selected_series
         title = dlg.selected_title or _('Unknown')
         db = self.gui.library_view.model().db
         ids = []
         for x in xrange(num):
             mi = MetaInformation(title, dlg.selected_authors)
             if series:
                 mi.series = series
                 mi.series_index = db.get_next_series_num_for(series)
             fmts = []
             empty_format = gprefs.get('create_empty_format_file', '')
             if empty_format:
                 from calibre.ebooks.oeb.polish.create import create_book
                 pt = PersistentTemporaryFile(suffix='.' + empty_format)
                 pt.close()
                 temp_files.append(pt.name)
                 create_book(mi, pt.name, fmt=empty_format)
                 fmts = [pt.name]
             ids.append(db.import_book(mi, fmts))
         self.gui.library_view.model().books_added(num)
         if hasattr(self.gui, 'db_images'):
             self.gui.db_images.beginResetModel(
             ), self.gui.db_images.endResetModel()
         self.gui.tags_view.recount()
         if ids:
             ids.reverse()
             self.gui.library_view.select_rows(ids)
         for path in temp_files:
             os.remove(path)
Example #27
0
def get_metadata(f):
    read = lambda at, amount: _read(f, at, amount)
    f.seek(0)
    buf = f.read(12)
    if buf[4:] == b'ftypLRX2':
        offset = 0
        while True:
            offset += word_be(buf[:4])
            try:
                buf = read(offset, 8)
            except:
                raise ValueError('Not a valid LRX file')
            if buf[4:] == b'bbeb':
                break
        offset += 8
        buf = read(offset, 16)
        if buf[:8].decode('utf-16-le') != 'LRF\x00':
            raise ValueError('Not a valid LRX file')
        lrf_version = word_le(buf[8:12])
        offset += 0x4c
        compressed_size = short_le(read(offset, 2))
        offset += 2
        if lrf_version >= 800:
            offset += 6
        compressed_size -= 4
        uncompressed_size = word_le(read(offset, 4))
        info = decompress(f.read(compressed_size))
        if len(info) != uncompressed_size:
            raise ValueError('LRX file has malformed metadata section')
        root = safe_xml_fromstring(info)
        bi = root.find('BookInfo')
        title = bi.find('Title')
        title_sort = title.get('reading', None)
        title = title.text
        author = bi.find('Author')
        author_sort = author.get('reading', None)
        mi = MetaInformation(title, string_to_authors(author.text))
        mi.title_sort, mi.author_sort = title_sort, author_sort
        author = author.text
        publisher = bi.find('Publisher')
        mi.publisher = getattr(publisher, 'text', None)
        mi.tags = [x.text for x in bi.findall('Category')]
        mi.language = root.find('DocInfo').find('Language').text
        return mi

    elif buf[4:8] == b'LRX':
        raise ValueError('Librie LRX format not supported')
    else:
        raise ValueError('Not a LRX file')
Example #28
0
def get_cbz_metadata(stream):
    # See http://code.google.com/p/comicbookinfo/wiki/Example
    from calibre.utils.zipfile import ZipFile
    from calibre.ebooks.metadata import MetaInformation
    import json

    zf = ZipFile(stream)
    mi = MetaInformation(None, None)
    if zf.comment:
        m = json.loads(zf.comment)
        if hasattr(m, 'keys'):
            for cat in m.keys():
                if cat.startswith('ComicBookInfo'):
                    get_comic_book_info(m[cat], mi)
    return mi
Example #29
0
def get_metadata(stream, extract_cover=True):
    """
    Return metadata as a L{MetaInfo} object
    """

    pheader = PdbHeaderReader(stream)

    MetadataReader = MREADER.get(pheader.ident, None)

    if MetadataReader is None:
        t = pheader.title
        if isinstance(t, bytes):
            t = t.decode('utf-8', 'replace')
        return MetaInformation(t, [_('Unknown')])

    return MetadataReader(stream, extract_cover)
Example #30
0
def do_add_empty(db, title, authors, isbn, tags, series, series_index):
    from calibre.ebooks.metadata import MetaInformation
    mi = MetaInformation(None)
    if title is not None:
        mi.title = title
    if authors:
        mi.authors = authors
    if isbn:
        mi.isbn = isbn
    if tags:
        mi.tags = tags
    if series:
        mi.series, mi.series_index = series, series_index
    db.import_book(mi, [])
    write_dirtied(db)
    send_message()