Example #1
1
    def do_one_isbn_add(self):
        try:
            db = self.gui.library_view.model().db

            try:
                x = self.isbn_books.pop(0)
            except IndexError:
                self.gui.library_view.model().books_added(self.isbn_add_dialog.value)
                self.isbn_add_dialog.accept()
                self.gui.iactions['Edit Metadata'].download_metadata(
                    ids=self.add_by_isbn_ids, ensure_fields=frozenset(['title',
                        'authors']))
                return

            mi = MetaInformation(None)
            mi.isbn = x['isbn']
            if self.isbn_add_tags:
                mi.tags = list(self.isbn_add_tags)
            fmts = [] if x['path'] is None else [x['path']]
            self.add_by_isbn_ids.add(db.import_book(mi, fmts))
            self.isbn_add_dialog.value += 1
            QTimer.singleShot(10, self.do_one_isbn_add)
        except:
            self.isbn_add_dialog.accept()
            raise
Example #2
0
def get_metadata_from_reader(rdr):
    raw = rdr.GetFile(rdr.home)
    home = BeautifulSoup(xml_to_unicode(raw, strip_encoding_pats=True,
        resolve_entities=True)[0])

    title = rdr.title
    try:
        x = rdr.GetEncoding()
        codecs.lookup(x)
        enc = x
    except:
        enc = 'cp1252'
    title = force_unicode(title, enc)
    authors = _get_authors(home)
    mi = MetaInformation(title, authors)
    publisher = _get_publisher(home)
    if publisher:
        mi.publisher = publisher
    isbn = _get_isbn(home)
    if isbn:
        mi.isbn = isbn
    comments = _get_comments(home)
    if comments:
        mi.comments = comments

    cdata = _get_cover(home, rdr)
    if cdata is not None:
        mi.cover_data = ('jpg', cdata)

    return mi
Example #3
0
    def do_one_isbn_add(self):
        try:
            db = self.gui.library_view.model().db

            try:
                x = self.isbn_books.pop(0)
            except IndexError:
                self.gui.library_view.model().books_added(
                    self.isbn_add_dialog.value)
                self.isbn_add_dialog.accept()
                self.gui.iactions['Edit Metadata'].download_metadata(
                    ids=self.add_by_isbn_ids,
                    ensure_fields=frozenset(['title', 'authors']))
                return

            mi = MetaInformation(None)
            mi.isbn = x['isbn']
            if self.isbn_add_tags:
                mi.tags = list(self.isbn_add_tags)
            fmts = [] if x['path'] is None else [x['path']]
            self.add_by_isbn_ids.add(db.import_book(mi, fmts))
            self.isbn_add_dialog.value += 1
            QTimer.singleShot(10, self.do_one_isbn_add)
        except:
            self.isbn_add_dialog.accept()
            raise
Example #4
0
def get_metadata(stream, cpath=None):
    if not podofo:
        raise Unavailable(podofo_err)
    pt = PersistentTemporaryFile('_podofo.pdf')
    pt.write(stream.read())
    pt.close()
    server = Server(pool_size=1)
    job = ParallelJob('read_pdf_metadata', 'Read pdf metadata',
        lambda x,y:x,  args=[pt.name, cpath])
    server.add_job(job)
    while not job.is_finished:
        time.sleep(0.1)
        job.update()

    job.update()
    server.close()
    if job.result is None:
        raise ValueError('Failed to read metadata: ' + job.details)
    title, authors, creator, tags, ok = job.result
    if not ok:
        print 'Failed to extract cover:'
        print job.details
    if title == '_':
        title = getattr(stream, 'name', _('Unknown'))
        title = os.path.splitext(title)[0]

    mi = MetaInformation(title, authors)
    if creator:
        mi.book_producer = creator
    if tags:
        mi.tags = tags
    if os.path.exists(pt.name): os.remove(pt.name)
    if ok:
        mi.cover = cpath
    return mi
Example #5
0
def _metadata_from_formats(formats, force_read_metadata=False, pattern=None):
    mi = MetaInformation(None, None)
    formats.sort(cmp=lambda x,y: cmp(METADATA_PRIORITIES[path_to_ext(x)],
                                     METADATA_PRIORITIES[path_to_ext(y)]))
    extensions = list(map(path_to_ext, formats))
    if 'opf' in extensions:
        opf = formats[extensions.index('opf')]
        mi2 = opf_metadata(opf)
        if mi2 is not None and mi2.title:
            return mi2

    for path, ext in zip(formats, extensions):
        with lopen(path, 'rb') as stream:
            try:
                newmi = get_metadata(stream, stream_type=ext,
                                     use_libprs_metadata=True,
                                     force_read_metadata=force_read_metadata,
                                     pattern=pattern)
                mi.smart_update(newmi)
            except:
                continue
            if getattr(mi, 'application_id', None) is not None:
                return mi

    if not mi.title:
        mi.title = _('Unknown')
    if not mi.authors:
        mi.authors = [_('Unknown')]

    return mi
Example #6
0
def get_djvu_metadata(stream, cover=True):
    with TemporaryDirectory('_djvu_metadata_read') as djvupath:
        stream.seek(0)
        with open(os.path.join(djvupath, 'src.djvu'), 'wb') as f:
            shutil.copyfileobj(stream, f)
        try:
            res = fork_job('calibre_plugins.djvu_metadata.djvu', 'get_djvu_metadata_worker', (djvupath, bool(cover)))
        except WorkerError as e:
            prints(e.orig_tb)
            raise RuntimeError('Failed to run djvused')
        info = res['result']
        with open(res['stdout_stderr'], 'rb') as f:
            raw = f.read().strip()
            if raw:
                prints(raw)
        if info is None:
            raise ValueError('Could not read metadata from djvu')
        covpath = os.path.join(djvupath, 'cover.jpg')
        cdata = None
        if cover and os.path.exists(covpath):
            with open(covpath, 'rb') as f:
                cdata = f.read()

    title = info.get('Title', None)
    au = info.get('Author', None)
    if au is None:
        au = [_('Unknown')]
    else:
        au = string_to_authors(au)
    mi = MetaInformation(title, au)

    if cdata:
        mi.cover_data = ('jpg', cdata)

    return mi
Example #7
0
def get_metadata(stream, extract_cover=True):
    '''
    Return metadata as a L{MetaInfo} object
    '''
    mi = MetaInformation(_('Unknown'), [_('Unknown')])
    stream.seek(0)
    try:
        with ZipFile(stream) as zf:
            opf_name = get_first_opf_name(zf)
            opf_stream = StringIO(zf.read(opf_name))
            opf = OPF(opf_stream)
            mi = opf.to_book_metadata()
            if extract_cover:
                cover_href = opf.raster_cover
                if not cover_href:
                    for meta in opf.metadata.xpath('//*[local-name()="meta" and @name="cover"]'):
                        val = meta.get('content')
                        if val.rpartition('.')[2].lower() in {'jpeg', 'jpg', 'png'}:
                            cover_href = val
                            break
                if cover_href:
                    try:
                        mi.cover_data = (os.path.splitext(cover_href)[1], zf.read(cover_href))
                    except Exception:
                        pass
    except Exception:
        return mi
    return mi
Example #8
0
def get_metadata(stream, extract_cover=True):
    '''
    Return metadata as a L{MetaInfo} object
    '''
    name = getattr(stream, 'name', '').rpartition('.')[0]
    if name:
        name = os.path.basename(name)
    mi = MetaInformation(name or _('Unknown'), [_('Unknown')])
    stream.seek(0)

    mdata = ''
    for x in range(0, 4):
        line = stream.readline().decode('utf-8', 'replace')
        if not line:
            break
        else:
            mdata += line

    mdata = mdata[:1024]

    mo = re.search(
        '(?u)^[ ]*(?P<title>.+)[ ]*(\n{3}|(\r\n){3}|\r{3})[ ]*(?P<author>.+)[ ]*(\n|\r\n|\r)$',
        mdata)
    if mo is not None:
        mi.title = mo.group('title')
        mi.authors = mo.group('author').split(',')

    return mi
Example #9
0
def get_metadata(stream, extract_cover=True):
    '''
    Return metadata as a L{MetaInfo} object
    '''
    mi = MetaInformation(_('Unknown'), [_('Unknown')])
    stream.seek(0)
    try:
        with ZipFile(stream) as zf:
            opf_name = get_first_opf_name(zf)
            opf_stream = StringIO(zf.read(opf_name))
            opf = OPF(opf_stream)
            mi = opf.to_book_metadata()
            if extract_cover:
                cover_href = opf.raster_cover
                if not cover_href:
                    for meta in opf.metadata.xpath('//*[local-name()="meta" and @name="cover"]'):
                        val = meta.get('content')
                        if val.rpartition('.')[2].lower() in {'jpeg', 'jpg', 'png'}:
                            cover_href = val
                            break
                if cover_href:
                    try:
                        mi.cover_data = (os.path.splitext(cover_href)[1], zf.read(cover_href))
                    except Exception:
                        pass
    except Exception:
        return mi
    return mi
Example #10
0
 def add_empty(self, *args):
     '''
     Add an empty book item to the library. This does not import any formats
     from a book file.
     '''
     author = series = None
     index = self.gui.library_view.currentIndex()
     if index.isValid():
         raw = index.model().db.authors(index.row())
         if raw:
             authors = [a.strip().replace('|', ',') for a in raw.split(',')]
             if authors:
                 author = authors[0]
         series = index.model().db.series(index.row())
     dlg = AddEmptyBookDialog(self.gui, self.gui.library_view.model().db,
                              author, series)
     if dlg.exec_() == dlg.Accepted:
         num = dlg.qty_to_add
         series = dlg.selected_series
         db = self.gui.library_view.model().db
         ids = []
         for x in xrange(num):
             mi = MetaInformation(_('Unknown'), dlg.selected_authors)
             if series:
                 mi.series = series
                 mi.series_index = db.get_next_series_num_for(series)
             ids.append(db.import_book(mi, []))
         self.gui.library_view.model().books_added(num)
         if hasattr(self.gui, 'db_images'):
             self.gui.db_images.reset()
         self.gui.tags_view.recount()
         if ids:
             ids.reverse()
             self.gui.library_view.select_rows(ids)
Example #11
0
def get_metadata(stream, extract_cover=True):
    '''
    Return metadata as a L{MetaInfo} object
    '''
    name = getattr(stream, 'name', '').rpartition('.')[0]
    if name:
        name = os.path.basename(name)
    mi = MetaInformation(name or _('Unknown'), [_('Unknown')])
    stream.seek(0)

    mdata = u''
    for x in range(0, 4):
        line = stream.readline().decode('utf-8', 'replace')
        if line == '':
            break
        else:
            mdata += line

    mdata = mdata[:100]

    mo = re.search('(?u)^[ ]*(?P<title>.+)[ ]*(\n{3}|(\r\n){3}|\r{3})[ ]*(?P<author>.+)[ ]*(\n|\r\n|\r)$', mdata)
    if mo is not None:
        mi.title = mo.group('title')
        mi.authors = mo.group('author').split(',')

    return mi
Example #12
0
 def add_empty(self, *args):
     '''
     Add an empty book item to the library. This does not import any formats
     from a book file.
     '''
     author = series = None
     index = self.gui.library_view.currentIndex()
     if index.isValid():
         raw = index.model().db.authors(index.row())
         if raw:
             authors = [a.strip().replace('|', ',') for a in raw.split(',')]
             if authors:
                 author = authors[0]
         series = index.model().db.series(index.row())
     dlg = AddEmptyBookDialog(self.gui,
                              self.gui.library_view.model().db, author,
                              series)
     if dlg.exec_() == dlg.Accepted:
         num = dlg.qty_to_add
         series = dlg.selected_series
         db = self.gui.library_view.model().db
         ids = []
         for x in xrange(num):
             mi = MetaInformation(_('Unknown'), dlg.selected_authors)
             if series:
                 mi.series = series
                 mi.series_index = db.get_next_series_num_for(series)
             ids.append(db.import_book(mi, []))
         self.gui.library_view.model().books_added(num)
         if hasattr(self.gui, 'db_images'):
             self.gui.db_images.reset()
         self.gui.tags_view.recount()
         if ids:
             ids.reverse()
             self.gui.library_view.select_rows(ids)
Example #13
0
    def add_empty(self, *args):
        '''
        Add an empty book item to the library. This does not import any formats
        from a book file.
        '''
        author = series = title = None
        index = self.gui.library_view.currentIndex()
        if index.isValid():
            raw = index.model().db.authors(index.row())
            if raw:
                authors = [a.strip().replace('|', ',') for a in raw.split(',')]
                if authors:
                    author = authors[0]
            series = index.model().db.series(index.row())
            title = index.model().db.title(index.row())
        dlg = AddEmptyBookDialog(self.gui, self.gui.library_view.model().db,
                                 author, series, dup_title=title)
        if dlg.exec_() == dlg.Accepted:
            temp_files = []
            num = dlg.qty_to_add
            series = dlg.selected_series
            title = dlg.selected_title or _('Unknown')
            db = self.gui.library_view.model().db
            ids, orig_fmts = [], []
            if dlg.duplicate_current_book:
                origmi = db.get_metadata(index.row(), get_cover=True, cover_as_data=True)
                if dlg.copy_formats.isChecked():
                    book_id = db.id(index.row())
                    orig_fmts = tuple(db.new_api.format(book_id, fmt, as_path=True) for fmt in db.new_api.formats(book_id))

            for x in xrange(num):
                if dlg.duplicate_current_book:
                    mi = origmi
                else:
                    mi = MetaInformation(title, dlg.selected_authors)
                    if series:
                        mi.series = series
                        mi.series_index = db.get_next_series_num_for(series)
                fmts = []
                empty_format = gprefs.get('create_empty_format_file', '')
                if dlg.duplicate_current_book and dlg.copy_formats.isChecked():
                    fmts = orig_fmts
                elif empty_format:
                    from calibre.ebooks.oeb.polish.create import create_book
                    pt = PersistentTemporaryFile(suffix='.' + empty_format)
                    pt.close()
                    temp_files.append(pt.name)
                    create_book(mi, pt.name, fmt=empty_format)
                    fmts = [pt.name]
                ids.append(db.import_book(mi, fmts))
            tuple(map(os.remove, orig_fmts))
            self.gui.library_view.model().books_added(num)
            self.gui.refresh_cover_browser()
            self.gui.tags_view.recount()
            if ids:
                ids.reverse()
                self.gui.library_view.select_rows(ids)
            for path in temp_files:
                os.remove(path)
Example #14
0
def get_metadata(stream):
    from calibre.ebooks.metadata import MetaInformation
    from calibre.ptempfile import TemporaryDirectory
    from calibre.ebooks.mobi.reader.headers import MetadataHeader
    from calibre.ebooks.mobi.reader.mobi6 import MobiReader
    from calibre.utils.magick.draw import save_cover_data_to
    from calibre import CurrentDir

    stream.seek(0)
    try:
        raw = stream.read(3)
    except:
        raw = ''
    stream.seek(0)
    if raw == b'TPZ':
        from calibre.ebooks.metadata.topaz import get_metadata
        return get_metadata(stream)
    from calibre.utils.logging import Log
    log = Log()
    try:
        mi = MetaInformation(os.path.basename(stream.name), [_('Unknown')])
    except:
        mi = MetaInformation(_('Unknown'), [_('Unknown')])
    mh = MetadataHeader(stream, log)
    if mh.title and mh.title != _('Unknown'):
        mi.title = mh.title

    if mh.exth is not None:
        if mh.exth.mi is not None:
            mi = mh.exth.mi
    else:
        size = 1024**3
        if hasattr(stream, 'seek') and hasattr(stream, 'tell'):
            pos = stream.tell()
            stream.seek(0, 2)
            size = stream.tell()
            stream.seek(pos)
        if size < 4*1024*1024:
            with TemporaryDirectory('_mobi_meta_reader') as tdir:
                with CurrentDir(tdir):
                    mr = MobiReader(stream, log)
                    parse_cache = {}
                    mr.extract_content(tdir, parse_cache)
                    if mr.embedded_mi is not None:
                        mi = mr.embedded_mi
    if hasattr(mh.exth, 'cover_offset'):
        cover_index = mh.first_image_index + mh.exth.cover_offset
        data  = mh.section_data(int(cover_index))
    else:
        try:
            data  = mh.section_data(mh.first_image_index)
        except:
            data = ''
    if data and what(None, data) in {'jpg', 'jpeg', 'gif', 'png', 'bmp', 'webp'}:
        try:
            mi.cover_data = ('jpg', save_cover_data_to(data, 'cover.jpg', return_data=True))
        except Exception:
            log.exception('Failed to read MOBI cover')
    return mi
Example #15
0
    def add_empty(self, *args):
        '''
        Add an empty book item to the library. This does not import any formats
        from a book file.
        '''
        author = series = title = None
        index = self.gui.library_view.currentIndex()
        if index.isValid():
            raw = index.model().db.authors(index.row())
            if raw:
                authors = [a.strip().replace('|', ',') for a in raw.split(',')]
                if authors:
                    author = authors[0]
            series = index.model().db.series(index.row())
            title = index.model().db.title(index.row())
        dlg = AddEmptyBookDialog(self.gui, self.gui.library_view.model().db,
                                 author, series, dup_title=title)
        if dlg.exec_() == dlg.Accepted:
            temp_files = []
            num = dlg.qty_to_add
            series = dlg.selected_series
            title = dlg.selected_title or _('Unknown')
            db = self.gui.library_view.model().db
            ids, orig_fmts = [], []
            if dlg.duplicate_current_book:
                origmi = db.get_metadata(index.row(), get_cover=True, cover_as_data=True)
                if dlg.copy_formats.isChecked():
                    book_id = db.id(index.row())
                    orig_fmts = tuple(db.new_api.format(book_id, fmt, as_path=True) for fmt in db.new_api.formats(book_id))

            for x in xrange(num):
                if dlg.duplicate_current_book:
                    mi = origmi
                else:
                    mi = MetaInformation(title, dlg.selected_authors)
                    if series:
                        mi.series = series
                        mi.series_index = db.get_next_series_num_for(series)
                fmts = []
                empty_format = gprefs.get('create_empty_format_file', '')
                if dlg.duplicate_current_book and dlg.copy_formats.isChecked():
                    fmts = orig_fmts
                elif empty_format:
                    from calibre.ebooks.oeb.polish.create import create_book
                    pt = PersistentTemporaryFile(suffix='.' + empty_format)
                    pt.close()
                    temp_files.append(pt.name)
                    create_book(mi, pt.name, fmt=empty_format)
                    fmts = [pt.name]
                ids.append(db.import_book(mi, fmts))
            tuple(map(os.remove, orig_fmts))
            self.gui.library_view.model().books_added(num)
            self.gui.refresh_cover_browser()
            self.gui.tags_view.recount()
            if ids:
                ids.reverse()
                self.gui.library_view.select_rows(ids)
            for path in temp_files:
                os.remove(path)
Example #16
0
    def add_annotation_to_library(self, db, db_id, annotation):
        from calibre.ebooks.BeautifulSoup import Tag
        from calibre.ebooks.metadata import MetaInformation

        bm = annotation
        ignore_tags = set(['Catalog', 'Clippings'])

        if bm.type == 'kindle_bookmark':
            mi = db.get_metadata(db_id, index_is_id=True)
            user_notes_soup = self.generate_annotation_html(bm.value)
            if mi.comments:
                a_offset = mi.comments.find('<div class="user_annotations">')
                ad_offset = mi.comments.find(
                    '<hr class="annotations_divider" />')

                if a_offset >= 0:
                    mi.comments = mi.comments[:a_offset]
                if ad_offset >= 0:
                    mi.comments = mi.comments[:ad_offset]
                if set(mi.tags).intersection(ignore_tags):
                    return
                if mi.comments:
                    hrTag = Tag(user_notes_soup, 'hr')
                    hrTag['class'] = 'annotations_divider'
                    user_notes_soup.insert(0, hrTag)

                mi.comments += unicode(user_notes_soup.prettify())
            else:
                mi.comments = unicode(user_notes_soup.prettify())
            # Update library comments
            db.set_comment(db_id, mi.comments)

            # Add bookmark file to db_id
            db.add_format_with_hooks(db_id,
                                     bm.value.bookmark_extension,
                                     bm.value.path,
                                     index_is_id=True)
        elif bm.type == 'kindle_clippings':
            # Find 'My Clippings' author=Kindle in database, or add
            last_update = 'Last modified %s' % strftime(
                u'%x %X', bm.value['timestamp'].timetuple())
            mc_id = list(
                db.data.search_getting_ids('title:"My Clippings"',
                                           '',
                                           sort_results=False))
            if mc_id:
                db.add_format_with_hooks(mc_id[0],
                                         'TXT',
                                         bm.value['path'],
                                         index_is_id=True)
                mi = db.get_metadata(mc_id[0], index_is_id=True)
                mi.comments = last_update
                db.set_metadata(mc_id[0], mi)
            else:
                mi = MetaInformation('My Clippings', authors=['Kindle'])
                mi.tags = ['Clippings']
                mi.comments = last_update
                db.add_books([bm.value['path']], ['txt'], [mi])
    def _start_splitmerge(self,book_list, tdir=None, db=None):
        # logger.debug(book_list)

        em = self.get_epubmerge_plugin()
        es = self.get_epubsplit_plugin()

        good_list = [ b for b in book_list if b['good'] ]

        tmp = PersistentTemporaryFile(prefix='merge-',
                                      suffix='.epub',
                                      dir=tdir)
        if len(good_list) == 1:
            deftitle = "New "+good_list[0]['title']
            defauthors = good_list[0]['authors']
        else:
            deftitle = "New Chapters Anthology"
            defauthors = ["Various Authors"]

        mi = MetaInformation(deftitle,defauthors)
        tagslists = [ x['tags'] for x in good_list ]
        mi.tags = [item for sublist in tagslists for item in sublist]
        mi.comments = "<p>New Chapters from:</p>"
        mi.comments += '<br/>'.join( [ "%s by %s"%(x['title'],", ".join(x['authors'])) for x in good_list ] )
    
        em.do_merge(tmp,
                    [b['splittmp'] for b in good_list],
                    authoropts=mi.authors,
                    titleopt=mi.title,
                    descopt=mi.comments,
                    tags=mi.tags,
                    keepmetadatafiles=False,
                    )

        book_id = db.create_book_entry(mi,
                                       add_duplicates=True)

        db.add_format_with_hooks(book_id,
                                 'EPUB',
                                 tmp, index_is_id=True)
        
        self.gui.library_view.model().books_added(1)
        self.gui.library_view.model().refresh_ids([book_id])
        # self.gui.iactions['Edit Metadata'].edit_metadata(False)
        self.gui.tags_view.recount()
        
        ## run word counts
        cp_plugin = self.gui.iactions['Count Pages']
        cp_plugin.count_statistics([book_id],['WordCount'])
        
        ## run auto convert
        self.gui.iactions['Convert Books'].auto_convert_auto_add([book_id])

        ## add to FFF update lists
        self.gui.library_view.select_rows([book_id])
        fff_plugin = self.gui.iactions['FanFicFare']
        fff_plugin.update_lists()
        
        remove_dir(tdir)
Example #18
0
def get_metadata(stream, extract_cover=True):
    '''
    Return metadata as a L{MetaInfo} object
    '''
    mi = MetaInformation(_('Unknown'), [_('Unknown')])
    stream.seek(0)

    pheader = PdbHeaderReader(stream)
    section_data = None
    for i in range(1, pheader.num_sections):
        raw_data = pheader.section_data(i)
        section_header = SectionHeader(raw_data)
        if section_header.type == DATATYPE_METADATA:
            section_data = raw_data[8:]
            break

    if not section_data:
        return mi

    default_encoding = 'latin-1'
    record_count, = struct.unpack('>H', section_data[0:2])
    adv = 0
    title = None
    author = None
    pubdate = 0
    for i in range(record_count):
        try:
            type, length = struct.unpack_from('>HH', section_data, 2 + adv)
        except struct.error:
            break

        # CharSet
        if type == 1:
            val, = struct.unpack('>H', section_data[6+adv:8+adv])
            default_encoding = MIBNUM_TO_NAME.get(val, 'latin-1')
        # Author
        elif type == 4:
            author = section_data[6+adv+(2*length)]
        # Title
        elif type == 5:
            title = section_data[6+adv+(2*length)]
        # Publication Date
        elif type == 6:
            pubdate, = struct.unpack('>I', section_data[6+adv:6+adv+4])

        adv += 2*length

    if title:
        mi.title = title.replace('\0', '').decode(default_encoding, 'replace')
    if author:
        author = author.replace('\0', '').decode(default_encoding, 'replace')
        mi.author = author.split(',')
    mi.pubdate = datetime.fromtimestamp(pubdate)

    return mi
Example #19
0
def get_metadata(stream, extract_cover=True):
    '''
    Return metadata as a L{MetaInfo} object
    '''
    mi = MetaInformation(_('Unknown'), [_('Unknown')])
    stream.seek(0)

    pheader = PdbHeaderReader(stream)
    section_data = None
    for i in range(1, pheader.num_sections):
        raw_data = pheader.section_data(i)
        section_header = SectionHeader(raw_data)
        if section_header.type == DATATYPE_METADATA:
            section_data = raw_data[8:]
            break

    if not section_data:
        return mi

    default_encoding = 'latin-1'
    record_count, = struct.unpack('>H', section_data[0:2])
    adv = 0
    title = None
    author = None
    pubdate = 0
    for i in xrange(record_count):
        try:
            type, length = struct.unpack_from('>HH', section_data, 2 + adv)
        except struct.error:
            break

        # CharSet
        if type == 1:
            val, = struct.unpack('>H', section_data[6+adv:8+adv])
            default_encoding = MIBNUM_TO_NAME.get(val, 'latin-1')
        # Author
        elif type == 4:
            author = section_data[6+adv+(2*length)]
        # Title
        elif type == 5:
            title = section_data[6+adv+(2*length)]
        # Publication Date
        elif type == 6:
            pubdate, = struct.unpack('>I', section_data[6+adv:6+adv+4])

        adv += 2*length

    if title:
        mi.title = title.replace('\0', '').decode(default_encoding, 'replace')
    if author:
        author = author.replace('\0', '').decode(default_encoding, 'replace')
        mi.author = author.split(',')
    mi.pubdate = datetime.fromtimestamp(pubdate)

    return mi
Example #20
0
    def __init__(self, raw, codec, title):
        self.doctype = raw[:4]
        self.length, self.num_items = struct.unpack('>LL', raw[4:12])
        raw = raw[12:]
        pos = 0
        self.mi = MetaInformation(_('Unknown'), [_('Unknown')])
        self.has_fake_cover = True
        self.start_offset = None
        left = self.num_items
        self.kf8_header = None
        self.uuid = self.cdetype = None

        while left > 0:
            left -= 1
            idx, size = struct.unpack('>LL', raw[pos:pos + 8])
            content = raw[pos + 8:pos + size]
            pos += size
            if idx >= 100 and idx < 200:
                self.process_metadata(idx, content, codec)
            elif idx == 203:
                self.has_fake_cover = bool(struct.unpack('>L', content)[0])
            elif idx == 201:
                co, = struct.unpack('>L', content)
                if co < NULL_INDEX:
                    self.cover_offset = co
            elif idx == 202:
                self.thumbnail_offset, = struct.unpack('>L', content)
            elif idx == 501:
                try:
                    self.cdetype = content.decode('ascii')
                except UnicodeDecodeError:
                    self.cdetype = None
                # cdetype
                if content == b'EBSP':
                    if not self.mi.tags:
                        self.mi.tags = []
                    self.mi.tags.append(_('Sample Book'))
            elif idx == 502:
                # last update time
                pass
            elif idx == 503: # Long title
                # Amazon seems to regard this as the definitive book title
                # rather than the title from the PDB header. In fact when
                # sending MOBI files through Amazon's email service if the
                # title contains non ASCII chars or non filename safe chars
                # they are messed up in the PDB header
                try:
                    title = content.decode(codec)
                except:
                    pass
            #else:
            #    print 'unknown record', idx, repr(content)
        if title:
            self.mi.title = replace_entities(title)
 def get_clippings_cid(self, title):
     '''
     Find or create cid for title
     '''
     cid = None
     try:
         cid = list(self.parent.opts.gui.current_db.data.parse('title:"%s" and tag:Clippings' % title))[0]
     except:
         mi = MetaInformation(title, authors = ['Various'])
         mi.tags = ['Clippings']
         cid = self.parent.opts.gui.current_db.create_book_entry(mi, cover=None,
             add_duplicates=False, force_id=None)
     return cid
 def get_clippings_cid(self, title):
     '''
     Find or create cid for title
     '''
     cid = None
     try:
         cid = list(self.parent.opts.gui.current_db.data.parse('title:"%s" and tag:Clippings' % title))[0]
     except:
         mi = MetaInformation(title, authors = ['Various'])
         mi.tags = ['Clippings']
         cid = self.parent.opts.gui.current_db.create_book_entry(mi, cover=None,
             add_duplicates=False, force_id=None)
     return cid
Example #23
0
File: pdf.py Project: Eksmo/calibre
def get_metadata(stream, cover=True):
    with TemporaryDirectory('_pdf_metadata_read') as pdfpath:
        stream.seek(0)
        with open(os.path.join(pdfpath, 'src.pdf'), 'wb') as f:
            shutil.copyfileobj(stream, f)
        try:
            res = fork_job('calibre.ebooks.metadata.pdf', 'read_info',
                    (pdfpath, bool(cover)))
        except WorkerError as e:
            prints(e.orig_tb)
            raise RuntimeError('Failed to run pdfinfo')
        info = res['result']
        with open(res['stdout_stderr'], 'rb') as f:
            raw = f.read().strip()
            if raw:
                prints(raw)
        if not info:
            raise ValueError('Could not read info dict from PDF')
        covpath = os.path.join(pdfpath, 'cover.jpg')
        cdata = None
        if cover and os.path.exists(covpath):
            with open(covpath, 'rb') as f:
                cdata = f.read()

    title = info.get('Title', None)
    au = info.get('Author', None)
    if au is None:
        au = [_('Unknown')]
    else:
        au = string_to_authors(au)
    mi = MetaInformation(title, au)
    #if isbn is not None:
    #    mi.isbn = isbn

    creator = info.get('Creator', None)
    if creator:
        mi.book_producer = creator

    keywords = info.get('Keywords', None)
    mi.tags = []
    if keywords:
        mi.tags = [x.strip() for x in keywords.split(',')]

    subject = info.get('Subject', None)
    if subject:
        mi.tags.insert(0, subject)

    if cdata:
        mi.cover_data = ('jpeg', cdata)

    return mi
Example #24
0
def get_metadata(stream, extract_cover=True):
    """
    Return metadata as a L{MetaInfo} object
    """
    mi = MetaInformation(None, [_('Unknown')])
    stream.seek(0)

    pheader = PdbHeaderReader(stream)

    # Only Dropbook produced 132 byte record0 files are supported
    if len(pheader.section_data(0)) == 132:
        hr = HeaderRecord(pheader.section_data(0))

        if hr.compression in (2, 10) and hr.has_metadata == 1:
            try:
                mdata = pheader.section_data(hr.metadata_offset)

                mdata = mdata.decode('cp1252', 'replace').split('\x00')
                mi.title = re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[0])
                mi.authors = [re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[1])]
                mi.publisher = re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[3])
                mi.isbn = re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[4])
            except Exception:
                pass

            if extract_cover:
                mi.cover_data = get_cover(pheader, hr)

    if not mi.title:
        mi.title = pheader.title if pheader.title else _('Unknown')

    return mi
Example #25
0
 def add_empty(self, *args):
     '''
     Add an empty book item to the library. This does not import any formats
     from a book file.
     '''
     author = series = None
     index = self.gui.library_view.currentIndex()
     if index.isValid():
         raw = index.model().db.authors(index.row())
         if raw:
             authors = [a.strip().replace('|', ',') for a in raw.split(',')]
             if authors:
                 author = authors[0]
         series = index.model().db.series(index.row())
     dlg = AddEmptyBookDialog(self.gui,
                              self.gui.library_view.model().db, author,
                              series)
     if dlg.exec_() == dlg.Accepted:
         temp_files = []
         num = dlg.qty_to_add
         series = dlg.selected_series
         title = dlg.selected_title or _('Unknown')
         db = self.gui.library_view.model().db
         ids = []
         for x in xrange(num):
             mi = MetaInformation(title, dlg.selected_authors)
             if series:
                 mi.series = series
                 mi.series_index = db.get_next_series_num_for(series)
             fmts = []
             empty_format = gprefs.get('create_empty_format_file', '')
             if empty_format:
                 from calibre.ebooks.oeb.polish.create import create_book
                 pt = PersistentTemporaryFile(suffix='.' + empty_format)
                 pt.close()
                 temp_files.append(pt.name)
                 create_book(mi, pt.name, fmt=empty_format)
                 fmts = [pt.name]
             ids.append(db.import_book(mi, fmts))
         self.gui.library_view.model().books_added(num)
         if hasattr(self.gui, 'db_images'):
             self.gui.db_images.beginResetModel(
             ), self.gui.db_images.endResetModel()
         self.gui.tags_view.recount()
         if ids:
             ids.reverse()
             self.gui.library_view.select_rows(ids)
         for path in temp_files:
             os.remove(path)
Example #26
0
    def add_annotation_to_library(self, db, db_id, annotation):
        from calibre.ebooks.BeautifulSoup import Tag
        from calibre.ebooks.metadata import MetaInformation

        bm = annotation
        ignore_tags = set(['Catalog', 'Clippings'])

        if bm.type == 'kindle_bookmark':
            mi = db.get_metadata(db_id, index_is_id=True)
            user_notes_soup = self.generate_annotation_html(bm.value)
            if mi.comments:
                a_offset = mi.comments.find('<div class="user_annotations">')
                ad_offset = mi.comments.find('<hr class="annotations_divider" />')

                if a_offset >= 0:
                    mi.comments = mi.comments[:a_offset]
                if ad_offset >= 0:
                    mi.comments = mi.comments[:ad_offset]
                if set(mi.tags).intersection(ignore_tags):
                    return
                if mi.comments:
                    hrTag = Tag(user_notes_soup,'hr')
                    hrTag['class'] = 'annotations_divider'
                    user_notes_soup.insert(0, hrTag)

                mi.comments += unicode(user_notes_soup.prettify())
            else:
                mi.comments = unicode(user_notes_soup.prettify())
            # Update library comments
            db.set_comment(db_id, mi.comments)

            # Add bookmark file to db_id
            db.add_format_with_hooks(db_id, bm.value.bookmark_extension,
                                            bm.value.path, index_is_id=True)
        elif bm.type == 'kindle_clippings':
            # Find 'My Clippings' author=Kindle in database, or add
            last_update = 'Last modified %s' % strftime(u'%x %X',bm.value['timestamp'].timetuple())
            mc_id = list(db.data.search_getting_ids('title:"My Clippings"', '', sort_results=False))
            if mc_id:
                db.add_format_with_hooks(mc_id[0], 'TXT', bm.value['path'],
                        index_is_id=True)
                mi = db.get_metadata(mc_id[0], index_is_id=True)
                mi.comments = last_update
                db.set_metadata(mc_id[0], mi)
            else:
                mi = MetaInformation('My Clippings', authors=['Kindle'])
                mi.tags = ['Clippings']
                mi.comments = last_update
                db.add_books([bm.value['path']], ['txt'], [mi])
Example #27
0
File: lrx.py Project: AEliu/calibre
def get_metadata(f):
    read = lambda at, amount: _read(f, at, amount)
    f.seek(0)
    buf = f.read(12)
    if buf[4:] == 'ftypLRX2':
        offset = 0
        while True:
            offset += word_be(buf[:4])
            try:
                buf = read(offset, 8)
            except:
                raise ValueError('Not a valid LRX file')
            if buf[4:] == 'bbeb':
                break
        offset += 8
        buf = read(offset, 16)
        if buf[:8].decode('utf-16-le') != 'LRF\x00':
            raise ValueError('Not a valid LRX file')
        lrf_version = word_le(buf[8:12])
        offset += 0x4c
        compressed_size = short_le(read(offset, 2))
        offset += 2
        if lrf_version >= 800:
            offset += 6
        compressed_size -= 4
        uncompressed_size = word_le(read(offset, 4))
        info = decompress(f.read(compressed_size))
        if len(info) != uncompressed_size:
            raise ValueError('LRX file has malformed metadata section')
        root = etree.fromstring(info)
        bi = root.find('BookInfo')
        title = bi.find('Title')
        title_sort = title.get('reading', None)
        title = title.text
        author = bi.find('Author')
        author_sort = author.get('reading', None)
        mi = MetaInformation(title, string_to_authors(author.text))
        mi.title_sort, mi.author_sort = title_sort, author_sort
        author = author.text
        publisher = bi.find('Publisher')
        mi.publisher = getattr(publisher, 'text', None)
        mi.tags = [x.text for x in  bi.findall('Category')]
        mi.language = root.find('DocInfo').find('Language').text
        return mi

    elif buf[4:8] == 'LRX':
        raise ValueError('Librie LRX format not supported')
    else:
        raise ValueError('Not a LRX file')
Example #28
0
 def _get_metadata(self, id, args, kwargs):
     from calibre.ebooks.metadata.meta import get_metadata
     try:
         mi = get_metadata(*args, **kwargs)
     except:
         mi = MetaInformation('', [_('Unknown')])
     self.emit(SIGNAL('metadata(PyQt_PyObject, PyQt_PyObject)'), id, mi)
Example #29
0
 def _from_formats(self, id, args, kwargs):
     from calibre.ebooks.metadata.meta import metadata_from_formats
     try:
         mi = metadata_from_formats(*args, **kwargs)
     except:
         mi = MetaInformation('', [_('Unknown')])
     self.metadataf.emit(id, mi)
Example #30
0
 def _get_metadata(self, id, args, kwargs):
     from calibre.ebooks.metadata.meta import get_metadata
     try:
         mi = get_metadata(*args, **kwargs)
     except:
         mi = MetaInformation('', [_('Unknown')])
     self.metadata.emit(id, mi)
Example #31
0
def get_comic_metadata(stream, stream_type, series_index='volume'):
    # See http://code.google.com/p/comicbookinfo/wiki/Example
    from calibre.ebooks.metadata import MetaInformation

    comment = None

    mi = MetaInformation(None, None)

    if stream_type == 'cbz':
        from calibre.utils.zipfile import ZipFile
        zf = ZipFile(stream)
        comment = zf.comment
    elif stream_type == 'cbr':
        from calibre.utils.unrar import comment as get_comment
        comment = get_comment(stream)

    if comment:
        import json
        m = json.loads(comment)
        if hasattr(m, 'iterkeys'):
            for cat in m.iterkeys():
                if cat.startswith('ComicBookInfo'):
                    get_comic_book_info(m[cat], mi, series_index=series_index)
                    break
    return mi
Example #32
0
    def add_annotation_to_library(self, db, db_id, annotation):
        from calibre.ebooks.BeautifulSoup import Tag
        from calibre.ebooks.metadata import MetaInformation

        bm = annotation
        ignore_tags = set(["Catalog", "Clippings"])

        if bm.type == "kindle_bookmark":
            mi = db.get_metadata(db_id, index_is_id=True)
            user_notes_soup = self.generate_annotation_html(bm.value)
            if mi.comments:
                a_offset = mi.comments.find('<div class="user_annotations">')
                ad_offset = mi.comments.find('<hr class="annotations_divider" />')

                if a_offset >= 0:
                    mi.comments = mi.comments[:a_offset]
                if ad_offset >= 0:
                    mi.comments = mi.comments[:ad_offset]
                if set(mi.tags).intersection(ignore_tags):
                    return
                if mi.comments:
                    hrTag = Tag(user_notes_soup, "hr")
                    hrTag["class"] = "annotations_divider"
                    user_notes_soup.insert(0, hrTag)

                mi.comments += unicode(user_notes_soup.prettify())
            else:
                mi.comments = unicode(user_notes_soup.prettify())
            # Update library comments
            db.set_comment(db_id, mi.comments)

            # Add bookmark file to db_id
            db.add_format_with_hooks(db_id, bm.value.bookmark_extension, bm.value.path, index_is_id=True)
        elif bm.type == "kindle_clippings":
            # Find 'My Clippings' author=Kindle in database, or add
            last_update = "Last modified %s" % strftime(u"%x %X", bm.value["timestamp"].timetuple())
            mc_id = list(db.data.search_getting_ids('title:"My Clippings"', ""))
            if mc_id:
                db.add_format_with_hooks(mc_id[0], "TXT", bm.value["path"], index_is_id=True)
                mi = db.get_metadata(mc_id[0], index_is_id=True)
                mi.comments = last_update
                db.set_metadata(mc_id[0], mi)
            else:
                mi = MetaInformation("My Clippings", authors=["Kindle"])
                mi.tags = ["Clippings"]
                mi.comments = last_update
                db.add_books([bm.value["path"]], ["txt"], [mi])
Example #33
0
 def add_empty(self, *args):
     '''
     Add an empty book item to the library. This does not import any formats
     from a book file.
     '''
     author = series = None
     index = self.gui.library_view.currentIndex()
     if index.isValid():
         raw = index.model().db.authors(index.row())
         if raw:
             authors = [a.strip().replace('|', ',') for a in raw.split(',')]
             if authors:
                 author = authors[0]
         series = index.model().db.series(index.row())
     dlg = AddEmptyBookDialog(self.gui, self.gui.library_view.model().db,
                              author, series)
     if dlg.exec_() == dlg.Accepted:
         temp_files = []
         num = dlg.qty_to_add
         series = dlg.selected_series
         title = dlg.selected_title or _('Unknown')
         db = self.gui.library_view.model().db
         ids = []
         for x in xrange(num):
             mi = MetaInformation(title, dlg.selected_authors)
             if series:
                 mi.series = series
                 mi.series_index = db.get_next_series_num_for(series)
             fmts = []
             empty_format = gprefs.get('create_empty_format_file', '')
             if empty_format:
                 from calibre.ebooks.oeb.polish.create import create_book
                 pt = PersistentTemporaryFile(suffix='.' + empty_format)
                 pt.close()
                 temp_files.append(pt.name)
                 create_book(mi, pt.name, fmt=empty_format)
                 fmts = [pt.name]
             ids.append(db.import_book(mi, fmts))
         self.gui.library_view.model().books_added(num)
         if hasattr(self.gui, 'db_images'):
             self.gui.db_images.beginResetModel(), self.gui.db_images.endResetModel()
         self.gui.tags_view.recount()
         if ids:
             ids.reverse()
             self.gui.library_view.select_rows(ids)
         for path in temp_files:
             os.remove(path)
Example #34
0
def set_metadata_opf2(root, cover_prefix, mi, opf_version,
                      cover_data=None, apply_null=False, update_timestamp=False, force_identifiers=False, add_missing_cover=True):
    mi = MetaInformation(mi)
    for x in ('guide', 'toc', 'manifest', 'spine'):
        setattr(mi, x, None)
    opf = OPF(None, preparsed_opf=root, read_toc=False)
    if mi.languages:
        mi.languages = normalize_languages(list(opf.raw_languages) or [], mi.languages)

    opf.smart_update(mi, apply_null=apply_null)
    if getattr(mi, 'uuid', None):
        opf.application_id = mi.uuid
    if apply_null or force_identifiers:
        opf.set_identifiers(mi.get_identifiers())
    else:
        orig = opf.get_identifiers()
        orig.update(mi.get_identifiers())
        opf.set_identifiers({k:v for k, v in orig.iteritems() if k and v})
    if update_timestamp and mi.timestamp is not None:
        opf.timestamp = mi.timestamp
    raster_cover = opf.raster_cover
    if raster_cover is None and cover_data is not None and add_missing_cover:
        guide_raster_cover = opf.guide_raster_cover
        i = None
        if guide_raster_cover is not None:
            i = guide_raster_cover
            raster_cover = i.get('href')
        else:
            if cover_prefix and not cover_prefix.endswith('/'):
                cover_prefix += '/'
            name = cover_prefix + 'cover.jpg'
            i = create_manifest_item(opf.root, name, 'cover')
            if i is not None:
                raster_cover = name
        if i is not None:
            if opf_version.major < 3:
                [x.getparent().remove(x) for x in opf.root.xpath('//*[local-name()="meta" and @name="cover"]')]
                m = opf.create_metadata_element('meta', is_dc=False)
                m.set('name', 'cover'), m.set('content', i.get('id'))
            else:
                for x in opf.root.xpath('//*[local-name()="item" and contains(@properties, "cover-image")]'):
                    x.set('properties', x.get('properties').replace('cover-image', '').strip())
                i.set('properties', 'cover-image')

    with pretty_print:
        return opf.render(), raster_cover
Example #35
0
def set_metadata_opf2(root, cover_prefix, mi, opf_version,
                      cover_data=None, apply_null=False, update_timestamp=False, force_identifiers=False, add_missing_cover=True):
    mi = MetaInformation(mi)
    for x in ('guide', 'toc', 'manifest', 'spine'):
        setattr(mi, x, None)
    opf = OPF(None, preparsed_opf=root, read_toc=False)
    if mi.languages:
        mi.languages = normalize_languages(list(opf.raw_languages) or [], mi.languages)

    opf.smart_update(mi, apply_null=apply_null)
    if getattr(mi, 'uuid', None):
        opf.application_id = mi.uuid
    if apply_null or force_identifiers:
        opf.set_identifiers(mi.get_identifiers())
    else:
        orig = opf.get_identifiers()
        orig.update(mi.get_identifiers())
        opf.set_identifiers({k:v for k, v in orig.items() if k and v})
    if update_timestamp and mi.timestamp is not None:
        opf.timestamp = mi.timestamp
    raster_cover = opf.raster_cover
    if raster_cover is None and cover_data is not None and add_missing_cover:
        guide_raster_cover = opf.guide_raster_cover
        i = None
        if guide_raster_cover is not None:
            i = guide_raster_cover
            raster_cover = i.get('href')
        else:
            if cover_prefix and not cover_prefix.endswith('/'):
                cover_prefix += '/'
            name = cover_prefix + 'cover.jpg'
            i = create_manifest_item(opf.root, name, 'cover')
            if i is not None:
                raster_cover = name
        if i is not None:
            if opf_version.major < 3:
                [x.getparent().remove(x) for x in opf.root.xpath('//*[local-name()="meta" and @name="cover"]')]
                m = opf.create_metadata_element('meta', is_dc=False)
                m.set('name', 'cover'), m.set('content', i.get('id'))
            else:
                for x in opf.root.xpath('//*[local-name()="item" and contains(@properties, "cover-image")]'):
                    x.set('properties', x.get('properties').replace('cover-image', '').strip())
                i.set('properties', 'cover-image')

    with pretty_print:
        return opf.render(), raster_cover
Example #36
0
def get_metadata(stream):
    ''' Return fb2 metadata as a L{MetaInformation} object '''

    root = _get_fbroot(stream)
    book_title = _parse_book_title(root)
    authors = _parse_authors(root)

    # fallback for book_title
    if book_title:
        book_title = unicode(book_title)
    else:
        book_title = force_unicode(
            os.path.splitext(
                os.path.basename(getattr(stream, 'name', _('Unknown'))))[0])
    mi = MetaInformation(book_title, authors)

    try:
        _parse_cover(root, mi)
    except:
        pass
    try:
        _parse_comments(root, mi)
    except:
        pass
    try:
        _parse_tags(root, mi)
    except:
        pass
    try:
        _parse_series(root, mi)
    except:
        pass
    try:
        _parse_isbn(root, mi)
    except:
        pass
    try:
        _parse_publisher(root, mi)
    except:
        pass
    try:
        _parse_pubdate(root, mi)
    except:
        pass
    #try:
    #    _parse_timestamp(root, mi)
    #except:
    #    pass

    try:
        _parse_language(root, mi)
    except:
        pass
    #_parse_uuid(root, mi)

    #if DEBUG:
    #   prints(mi)
    return mi
Example #37
0
def get_metadata(stream, extract_cover=True):
    """
    Return metadata as a L{MetaInfo} object
    """
    mi = MetaInformation(None, [_('Unknown')])
    stream.seek(0)

    pheader = PdbHeaderReader(stream)

    # Only Dropbook produced 132 byte record0 files are supported
    if len(pheader.section_data(0)) == 132:
        hr = HeaderRecord(pheader.section_data(0))

        if hr.compression in (2, 10) and hr.has_metadata == 1:
            try:
                mdata = pheader.section_data(hr.metadata_offset)

                mdata = mdata.split('\x00')
                mi.title = re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[0])
                mi.authors = [re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[1])]
                mi.publisher = re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[3])
                mi.isbn = re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[4])
            except:
                pass

            if extract_cover:
                mi.cover_data = get_cover(pheader, hr)

    if not mi.title:
        mi.title = pheader.title if pheader.title else _('Unknown')

    return mi
Example #38
0
def get_metadata(stream, extract_cover=True):
    """ Return metadata as a L{MetaInfo} object """
    mi = MetaInformation(_('Unknown'), [_('Unknown')])
    snbFile = SNBFile()

    try:
        if not hasattr(stream, 'write'):
            snbFile.Parse(io.BytesIO(stream), True)
        else:
            stream.seek(0)
            snbFile.Parse(stream, True)

        meta = snbFile.GetFileStream('snbf/book.snbf')

        if meta is not None:
            meta = etree.fromstring(meta)
            mi.title = meta.find('.//head/name').text
            mi.authors = [meta.find('.//head/author').text]
            mi.language = meta.find('.//head/language').text.lower().replace('_', '-')
            mi.publisher = meta.find('.//head/publisher').text

            if extract_cover:
                cover = meta.find('.//head/cover')
                if cover is not None and cover.text is not None:
                    root, ext = os.path.splitext(cover.text)
                    if ext == '.jpeg':
                        ext = '.jpg'
                    mi.cover_data = (ext[-3:], snbFile.GetFileStream('snbc/images/' + cover.text))

    except Exception:
        import traceback
        traceback.print_exc()

    return mi
Example #39
0
def set_metadata(stream,
                 mi,
                 apply_null=False,
                 update_timestamp=False,
                 force_identifiers=False):
    stream.seek(0)
    reader = get_zip_reader(stream, root=os.getcwdu())
    raster_cover = reader.opf.raster_cover
    mi = MetaInformation(mi)
    new_cdata = None
    replacements = {}
    try:
        new_cdata = mi.cover_data[1]
        if not new_cdata:
            raise Exception('no cover')
    except:
        try:
            new_cdata = open(mi.cover, 'rb').read()
        except:
            pass
    new_cover = cpath = None
    if new_cdata and raster_cover:
        try:
            cpath = posixpath.join(posixpath.dirname(reader.opf_path),
                                   raster_cover)
            cover_replacable = not reader.encryption_meta.is_encrypted(cpath) and \
                    os.path.splitext(cpath)[1].lower() in ('.png', '.jpg', '.jpeg')
            if cover_replacable:
                new_cover = _write_new_cover(new_cdata, cpath)
                replacements[cpath] = open(new_cover.name, 'rb')
        except:
            import traceback
            traceback.print_exc()

    update_metadata(reader.opf,
                    mi,
                    apply_null=apply_null,
                    update_timestamp=update_timestamp,
                    force_identifiers=force_identifiers)

    newopf = StringIO(reader.opf.render())
    if isinstance(reader.archive, LocalZipFile):
        reader.archive.safe_replace(reader.container[OPF.MIMETYPE],
                                    newopf,
                                    extra_replacements=replacements)
    else:
        safe_replace(stream,
                     reader.container[OPF.MIMETYPE],
                     newopf,
                     extra_replacements=replacements)
    try:
        if cpath is not None:
            replacements[cpath].close()
            os.remove(replacements[cpath].name)
    except:
        pass
Example #40
0
def get_metadata(stream):
    """
    Return metadata as a L{MetaInfo} object
    """
    stream.seek(0)
    if stream.read(5) != r'{\rtf':
        return MetaInformation(_('Unknown'))
    block = get_document_info(stream)[0]
    if not block:
        return MetaInformation(_('Unknown'))

    stream.seek(0)
    cpg = detect_codepage(stream)
    stream.seek(0)

    title_match = title_pat.search(block)
    if title_match is not None:
        title = decode(title_match.group(1).strip(), cpg)
    else:
        title = _('Unknown')
    author_match = author_pat.search(block)
    if author_match is not None:
        author = decode(author_match.group(1).strip(), cpg)
    else:
        author = None
    mi = MetaInformation(title)
    if author:
        mi.authors = string_to_authors(author)

    comment_match = comment_pat.search(block)
    if comment_match is not None:
        comment = decode(comment_match.group(1).strip(), cpg)
        mi.comments = comment
    tags_match = tags_pat.search(block)
    if tags_match is not None:
        tags = decode(tags_match.group(1).strip(), cpg)
        mi.tags = list(filter(None, (x.strip() for x in tags.split(','))))
    publisher_match = publisher_pat.search(block)
    if publisher_match is not None:
        publisher = decode(publisher_match.group(1).strip(), cpg)
        mi.publisher = publisher

    return mi
Example #41
0
def get_metadata(stream):
    """ Return metadata as a L{MetaInfo} object """
    title = 'Unknown'
    mi = MetaInformation(title, ['Unknown'])
    stream.seek(0)
    try:
        if not stream.read(14) == MAGIC:
            print >> sys.stderr, u'Couldn\'t read RB header from file'
            return mi
        stream.read(10)

        read_i32 = lambda: struct.unpack('<I', stream.read(4))[0]

        stream.seek(read_i32())
        toc_count = read_i32()

        for i in range(toc_count):
            stream.read(32)
            length, offset, flag = read_i32(), read_i32(), read_i32()
            if flag == 2:
                break
        else:
            print >> sys.stderr, u'Couldn\'t find INFO from RB file'
            return mi

        stream.seek(offset)
        info = stream.read(length).splitlines()
        for line in info:
            if '=' not in line:
                continue
            key, value = line.split('=')
            if key.strip() == 'TITLE':
                mi.title = value.strip()
            elif key.strip() == 'AUTHOR':
                mi.author = value
                mi.authors = string_to_authors(value)
    except Exception as err:
        msg = u'Couldn\'t read metadata from rb: %s with error %s' % (
            mi.title, unicode(err))
        print >> sys.stderr, msg.encode('utf8')
        raise
    return mi
Example #42
0
def get_metadata(stream, extract_cover=True):
    '''
    Return metadata as a L{MetaInfo} object
    '''
    mi = MetaInformation(_('Unknown'), [_('Unknown')])
    stream.seek(0)

    try:
        with ZipFile(stream) as zf:
            opf_name = get_first_opf_name(zf)
            opf_stream = StringIO(zf.read(opf_name))
            opf = OPF(opf_stream)
            mi = opf.to_book_metadata()
            if extract_cover:
                cover_href = opf.raster_cover
                if cover_href:
                    mi.cover_data = (os.path.splitext(cover_href)[1], zf.read(cover_href))
    except:
        return mi
    return mi
Example #43
0
def get_metadata(stream):
    """ Return metadata as a L{MetaInfo} object """
    title = 'Unknown'
    mi = MetaInformation(title, ['Unknown'])
    stream.seek(0)
    try:
        if not stream.read(14) == MAGIC:
            print(u'Couldn\'t read RB header from file', file=sys.stderr)
            return mi
        stream.read(10)

        read_i32 = lambda: struct.unpack('<I', stream.read(4))[0]

        stream.seek(read_i32())
        toc_count = read_i32()

        for i in range(toc_count):
            stream.read(32)
            length, offset, flag = read_i32(), read_i32(), read_i32()
            if flag == 2:
                break
        else:
            print(u'Couldn\'t find INFO from RB file', file=sys.stderr)
            return mi

        stream.seek(offset)
        info = stream.read(length).splitlines()
        for line in info:
            if '=' not in line:
                continue
            key, value = line.split('=')
            if key.strip() == 'TITLE':
                mi.title = value.strip()
            elif key.strip() == 'AUTHOR':
                mi.author = value
                mi.authors = string_to_authors(value)
    except Exception as err:
        msg = u'Couldn\'t read metadata from rb: %s with error %s'%(mi.title, unicode(err))
        print(msg.encode('utf8'), file=sys.stderr)
        raise
    return mi
Example #44
0
def get_metadata_quick(raw):
    p = podofo.PDFDoc()
    p.load(raw)
    title = p.title
    if not title:
        title = '_'
    author = p.author
    authors = string_to_authors(author) if author else [_('Unknown')]
    creator = p.creator
    try:
        tags = [x.strip() for x in p.keywords.split(u',')]
        tags = [x for x in tags if x]
    except:
        tags = []

    mi = MetaInformation(title, authors)
    if creator:
        mi.book_producer = creator
    if tags:
        mi.tags = tags
    return mi
Example #45
0
def get_metadata_quick(raw):
    p = podofo.PDFDoc()
    p.load(raw)
    title = p.title
    if not title:
        title = '_'
    author = p.author
    authors = string_to_authors(author) if author else  [_('Unknown')]
    creator = p.creator
    try:
        tags = [x.strip() for x in p.keywords.split(u',')]
        tags = [x for x in tags if x]
    except:
        tags = []

    mi = MetaInformation(title, authors)
    if creator:
        mi.book_producer = creator
    if tags:
        mi.tags = tags
    return mi
Example #46
0
def get_metadata(stream, extract_cover=True):
    '''
    Return metadata as a L{MetaInfo} object
    '''
    mi = MetaInformation(_('Unknown'), [_('Unknown')])
    stream.seek(0)

    try:
        with ZipFile(stream) as zf:
            opf_name = get_first_opf_name(zf)
            opf_stream = StringIO(zf.read(opf_name))
            opf = OPF(opf_stream)
            mi = opf.to_book_metadata()
            if extract_cover:
                cover_href = opf.raster_cover
                if cover_href:
                    mi.cover_data = (os.path.splitext(cover_href)[1],
                                     zf.read(cover_href))
    except:
        return mi
    return mi
Example #47
0
def parse_comic_comment(comment, series_index='volume'):
    # See http://code.google.com/p/comicbookinfo/wiki/Example
    from calibre.ebooks.metadata import MetaInformation
    import json
    mi = MetaInformation(None, None)
    m = json.loads(comment)
    if isinstance(m, dict):
        for cat in m:
            if cat.startswith('ComicBookInfo'):
                get_comic_book_info(m[cat], mi, series_index=series_index)
                break
    return mi
Example #48
0
def _metadata_from_formats(formats, force_read_metadata=False, pattern=None):
    mi = MetaInformation(None, None)
    formats.sort(key=lambda x: METADATA_PRIORITIES[path_to_ext(x)])
    extensions = list(map(path_to_ext, formats))
    if 'opf' in extensions:
        opf = formats[extensions.index('opf')]
        mi2 = opf_metadata(opf)
        if mi2 is not None and mi2.title:
            return mi2

    for path, ext in zip(formats, extensions):
        with lopen(path, 'rb') as stream:
            try:
                newmi = get_metadata(stream,
                                     stream_type=ext,
                                     use_libprs_metadata=True,
                                     force_read_metadata=force_read_metadata,
                                     pattern=pattern)
                mi.smart_update(newmi)
            except:
                continue
            if getattr(mi, 'application_id', None) is not None:
                return mi

    if not mi.title:
        mi.title = _('Unknown')
    if not mi.authors:
        mi.authors = [_('Unknown')]

    return mi
Example #49
0
def get_metadata_from_reader(rdr):
    raw = rdr.GetFile(rdr.home)
    home = BeautifulSoup(
        xml_to_unicode(raw, strip_encoding_pats=True,
                       resolve_entities=True)[0])

    title = rdr.title
    try:
        x = rdr.GetEncoding()
        codecs.lookup(x)
        enc = x
    except:
        enc = 'cp1252'
    title = force_unicode(title, enc)
    authors = _get_authors(home)
    mi = MetaInformation(title, authors)
    publisher = _get_publisher(home)
    if publisher:
        mi.publisher = publisher
    isbn = _get_isbn(home)
    if isbn:
        mi.isbn = isbn
    comments = _get_comments(home)
    if comments:
        mi.comments = comments

    cdata = _get_cover(home, rdr)
    if cdata is not None:
        mi.cover_data = ('jpg', cdata)

    return mi
Example #50
0
def get_metadata(stream):
    """ Return metadata as a L{MetaInfo} object """
    title = 'Unknown'
    mi = MetaInformation(title, ['Unknown'])
    stream.seek(0)
    try:
        if stream.read(10) not in MAGIC:
            print >>sys.stderr, u'Couldn\'t read IMP header from file'
            return mi

        def cString(skip=0):
            result = ''
            while 1:
                data = stream.read(1)
                if data == '\x00':
                    if not skip: return result
                    skip -= 1
                    result, data = '', ''
                result += data

        stream.read(38) # skip past some uninteresting headers
        _, category, title, author = cString(), cString(), cString(1), cString(2)

        if title:
            mi.title = title
        if author:
            mi.authors = string_to_authors(author)
            mi.author = author
        if category:
            mi.category = category
    except Exception as err:
        msg = u'Couldn\'t read metadata from imp: %s with error %s'%(mi.title, unicode(err))
        print >>sys.stderr, msg.encode('utf8')
    return mi
Example #51
0
File: pdf.py Project: kmshi/calibre
def get_metadata(stream, cover=True):
    with TemporaryDirectory('_pdf_metadata_read') as pdfpath:
        stream.seek(0)
        with open(os.path.join(pdfpath, 'src.pdf'), 'wb') as f:
            shutil.copyfileobj(stream, f)
        try:
            res = fork_job('calibre.ebooks.metadata.pdf', 'read_info',
                           (pdfpath, bool(cover)))
        except WorkerError as e:
            prints(e.orig_tb)
            raise RuntimeError('Failed to run pdfinfo')
        info = res['result']
        with open(res['stdout_stderr'], 'rb') as f:
            raw = f.read().strip()
            if raw:
                prints(raw)
        if not info:
            raise ValueError('Could not read info dict from PDF')
        covpath = os.path.join(pdfpath, 'cover.jpg')
        cdata = None
        if cover and os.path.exists(covpath):
            with open(covpath, 'rb') as f:
                cdata = f.read()

    title = info.get('Title', None)
    au = info.get('Author', None)
    if au is None:
        au = [_('Unknown')]
    else:
        au = string_to_authors(au)
    mi = MetaInformation(title, au)
    # if isbn is not None:
    #    mi.isbn = isbn

    creator = info.get('Creator', None)
    if creator:
        mi.book_producer = creator

    keywords = info.get('Keywords', None)
    mi.tags = []
    if keywords:
        mi.tags = [x.strip() for x in keywords.split(',')]
        isbn = [check_isbn(x) for x in mi.tags if check_isbn(x)]
        if isbn:
            mi.isbn = isbn = isbn[0]
        mi.tags = [x for x in mi.tags if check_isbn(x) != isbn]

    subject = info.get('Subject', None)
    if subject:
        mi.tags.insert(0, subject)

    if cdata:
        mi.cover_data = ('jpeg', cdata)

    return mi
Example #52
0
File: snb.py Project: Farb/calibre
def get_metadata(stream, extract_cover=True):
    """ Return metadata as a L{MetaInfo} object """
    mi = MetaInformation(_('Unknown'), [_('Unknown')])
    snbFile = SNBFile()

    try:
        if not hasattr(stream, 'write'):
            snbFile.Parse(StringIO(stream), True)
        else:
            stream.seek(0)
            snbFile.Parse(stream, True)

        meta = snbFile.GetFileStream('snbf/book.snbf')

        if meta is not None:
            meta = etree.fromstring(meta)
            mi.title = meta.find('.//head/name').text
            mi.authors = [meta.find('.//head/author').text]
            mi.language = meta.find('.//head/language').text.lower().replace('_', '-')
            mi.publisher = meta.find('.//head/publisher').text

            if extract_cover:
                cover = meta.find('.//head/cover')
                if cover is not None and cover.text is not None:
                    root, ext = os.path.splitext(cover.text)
                    if ext == '.jpeg':
                        ext = '.jpg'
                    mi.cover_data = (ext[-3:], snbFile.GetFileStream('snbc/images/' + cover.text))

    except Exception:
        import traceback
        traceback.print_exc()

    return mi
Example #53
0
def get_metadata(stream):
    from calibre.ebooks.metadata import MetaInformation
    from calibre.ptempfile import TemporaryDirectory
    from calibre.ebooks.mobi.reader.headers import MetadataHeader
    from calibre.ebooks.mobi.reader.mobi6 import MobiReader
    from calibre.utils.img import save_cover_data_to
    from calibre import CurrentDir

    stream.seek(0)
    try:
        raw = stream.read(3)
    except Exception:
        raw = b''
    stream.seek(0)
    if raw == b'TPZ':
        from calibre.ebooks.metadata.topaz import get_metadata
        return get_metadata(stream)
    from calibre.utils.logging import Log
    log = Log()
    try:
        mi = MetaInformation(os.path.basename(stream.name), [_('Unknown')])
    except:
        mi = MetaInformation(_('Unknown'), [_('Unknown')])
    mh = MetadataHeader(stream, log)
    if mh.title and mh.title != _('Unknown'):
        mi.title = mh.title

    if mh.exth is not None:
        if mh.exth.mi is not None:
            mi = mh.exth.mi
    else:
        size = 1024**3
        if hasattr(stream, 'seek') and hasattr(stream, 'tell'):
            pos = stream.tell()
            stream.seek(0, 2)
            size = stream.tell()
            stream.seek(pos)
        if size < 4 * 1024 * 1024:
            with TemporaryDirectory('_mobi_meta_reader') as tdir:
                with CurrentDir(tdir):
                    mr = MobiReader(stream, log)
                    parse_cache = {}
                    mr.extract_content(tdir, parse_cache)
                    if mr.embedded_mi is not None:
                        mi = mr.embedded_mi
    if hasattr(mh.exth, 'cover_offset'):
        cover_index = mh.first_image_index + mh.exth.cover_offset
        data = mh.section_data(int(cover_index))
    else:
        try:
            data = mh.section_data(mh.first_image_index)
        except Exception:
            data = b''
    if data and what(None,
                     data) in {'jpg', 'jpeg', 'gif', 'png', 'bmp', 'webp'}:
        try:
            mi.cover_data = ('jpg', save_cover_data_to(data))
        except Exception:
            log.exception('Failed to read MOBI cover')
    return mi
Example #54
0
def get_metadata(stream):
    ''' Return fb2 metadata as a L{MetaInformation} object '''

    root = _get_fbroot(get_fb2_data(stream)[0])
    ctx = Context(root)
    book_title = _parse_book_title(root, ctx)
    authors = _parse_authors(root, ctx) or [_('Unknown')]

    # fallback for book_title
    if book_title:
        book_title = unicode_type(book_title)
    else:
        book_title = force_unicode(
            os.path.splitext(
                os.path.basename(getattr(stream, 'name', _('Unknown'))))[0])
    mi = MetaInformation(book_title, authors)

    try:
        _parse_cover(root, mi, ctx)
    except:
        pass
    try:
        _parse_comments(root, mi, ctx)
    except:
        pass
    try:
        _parse_tags(root, mi, ctx)
    except:
        pass
    try:
        _parse_series(root, mi, ctx)
    except:
        pass
    try:
        _parse_isbn(root, mi, ctx)
    except:
        pass
    try:
        _parse_publisher(root, mi, ctx)
    except:
        pass
    try:
        _parse_pubdate(root, mi, ctx)
    except:
        pass

    try:
        _parse_language(root, mi, ctx)
    except:
        pass

    return mi
Example #55
0
def do_add_empty(
    dbctx, title, authors, isbn, tags, series, series_index, cover, identifiers,
    languages
):
    mi = MetaInformation(None)
    if title is not None:
        mi.title = title
    if authors:
        mi.authors = authors
    if identifiers:
        mi.set_identifiers(identifiers)
    if isbn:
        mi.isbn = isbn
    if tags:
        mi.tags = tags
    if series:
        mi.series, mi.series_index = series, series_index
    if cover:
        mi.cover = cover
    if languages:
        mi.languages = languages
    ids, duplicates = dbctx.run('add', 'empty', read_cover(mi))
    prints(_('Added book ids: %s') % ','.join(map(str, ids)))
Example #56
0
def get_social_metadata(title, authors, publisher, isbn, username=None,
        password=None):
    from calibre.ebooks.metadata import MetaInformation
    mi = MetaInformation(title, authors)
    if isbn:
        br = get_browser()
        try:
            login(br, username, password)

            raw = br.open_novisit('http://www.librarything.com/isbn/'
                        +isbn).read()
        except:
            return mi
        if '/wiki/index.php/HelpThing:Verify' in raw:
            raise Exception('LibraryThing is blocking calibre.')
        if not raw:
            return mi
        raw = raw.decode('utf-8', 'replace')
        raw = strip_encoding_declarations(raw)
        root = html.fromstring(raw)
        h1 = root.xpath('//div[@class="headsummary"]/h1')
        if h1 and not mi.title:
            mi.title = html.tostring(h1[0], method='text', encoding=unicode)
        h2 = root.xpath('//div[@class="headsummary"]/h2/a')
        if h2 and not mi.authors:
            mi.authors = [html.tostring(x, method='text', encoding=unicode) for
                    x in h2]
        h3 = root.xpath('//div[@class="headsummary"]/h3/a')
        if h3:
            match = None
            for h in h3:
               series = html.tostring(h, method='text', encoding=unicode)
               match = re.search(r'(.+) \((.+)\)', series)
               if match is not None:
                   break
            if match is not None:
                mi.series = match.group(1).strip()
                match = re.search(r'[0-9.]+', match.group(2))
                si = 1.0
                if match is not None:
                    si = float(match.group())
                mi.series_index = si
        #tags = root.xpath('//div[@class="tags"]/span[@class="tag"]/a')
        #if tags:
        #    mi.tags = [html.tostring(x, method='text', encoding=unicode) for x
        #            in tags]
        span = root.xpath(
                '//table[@class="wsltable"]/tr[@class="wslcontent"]/td[4]//span')
        if span:
            raw = html.tostring(span[0], method='text', encoding=unicode)
            match = re.search(r'([0-9.]+)', raw)
            if match is not None:
                rating = float(match.group())
                if rating > 0 and rating <= 5:
                    mi.rating = rating
    return mi
Example #57
0
 def read_user_metadata(self):
     '''
     Read all metadata specified by the user. Command line options override
     metadata from a specified OPF file.
     '''
     from calibre.ebooks.metadata import MetaInformation
     from calibre.ebooks.metadata.opf2 import OPF
     mi = MetaInformation(None, [])
     if self.opts.read_metadata_from_opf is not None:
         self.opts.read_metadata_from_opf = os.path.abspath(
                                         self.opts.read_metadata_from_opf)
         opf = OPF(open(self.opts.read_metadata_from_opf, 'rb'),
                   os.path.dirname(self.opts.read_metadata_from_opf))
         mi = opf.to_book_metadata()
     self.opts_to_mi(mi)
     if mi.cover:
         if mi.cover.startswith('http:') or mi.cover.startswith('https:'):
             mi.cover = self.download_cover(mi.cover)
         ext = mi.cover.rpartition('.')[-1].lower().strip()
         if ext not in ('png', 'jpg', 'jpeg', 'gif'):
             ext = 'jpg'
         mi.cover_data = (ext, open(mi.cover, 'rb').read())
         mi.cover = None
     self.user_metadata = mi
Example #58
0
def get_metadata(stream, cover=True):
    with TemporaryDirectory("_pdf_metadata_read") as pdfpath:
        stream.seek(0)
        with open(os.path.join(pdfpath, "src.pdf"), "wb") as f:
            shutil.copyfileobj(stream, f)
        try:
            res = fork_job("calibre.ebooks.metadata.pdf", "read_info", (pdfpath, bool(cover)))
        except WorkerError as e:
            prints(e.orig_tb)
            raise RuntimeError("Failed to run pdfinfo")
        info = res["result"]
        with open(res["stdout_stderr"], "rb") as f:
            raw = f.read().strip()
            if raw:
                prints(raw)
        if not info:
            raise ValueError("Could not read info dict from PDF")
        covpath = os.path.join(pdfpath, "cover.jpg")
        cdata = None
        if cover and os.path.exists(covpath):
            with open(covpath, "rb") as f:
                cdata = f.read()

    title = info.get("Title", None)
    au = info.get("Author", None)
    if au is None:
        au = [_("Unknown")]
    else:
        au = string_to_authors(au)
    mi = MetaInformation(title, au)
    # if isbn is not None:
    #    mi.isbn = isbn

    creator = info.get("Creator", None)
    if creator:
        mi.book_producer = creator

    keywords = info.get("Keywords", None)
    mi.tags = []
    if keywords:
        mi.tags = [x.strip() for x in keywords.split(",")]
        isbn = [check_isbn(x) for x in mi.tags if check_isbn(x)]
        if isbn:
            mi.isbn = isbn = isbn[0]
        mi.tags = [x for x in mi.tags if check_isbn(x) != isbn]

    subject = info.get("Subject", None)
    if subject:
        mi.tags.insert(0, subject)

    if cdata:
        mi.cover_data = ("jpeg", cdata)

    return mi