def get_metadata(self, stream, ftype):
     if hasattr(stream, 'seek') and hasattr(stream, 'tell'):
         pos = stream.tell()
         id_ = stream.read(3)
         stream.seek(pos)
         if id_ == b'Rar':
             ftype = 'cbr'
         elif id_.startswith(b'PK'):
             ftype = 'cbz'
     if ftype == 'cbr':
         from ebook_converter.utils.unrar import extract_cover_image
     else:
         from ebook_converter.libunzip import extract_cover_image
     from ebook_converter.ebooks.metadata import MetaInformation
     ret = extract_cover_image(stream)
     mi = MetaInformation(None, None)
     stream.seek(0)
     if ftype in {'cbr', 'cbz'}:
         series_index = self.site_customization
         if series_index not in {'volume', 'issue'}:
             series_index = 'volume'
         try:
             mi.smart_update(
                 get_comic_metadata(stream,
                                    ftype,
                                    series_index=series_index))
         except:
             pass
     if ret is not None:
         path, data = ret
         ext = os.path.splitext(path)[1][1:]
         mi.cover_data = (ext.lower(), data)
     return mi
Exemple #2
0
def get_metadata(stream, cover=True):
    with TemporaryDirectory('_pdf_metadata_read') as pdfpath:
        stream.seek(0)
        with open(os.path.join(pdfpath, 'src.pdf'), 'wb') as f:
            shutil.copyfileobj(stream, f)
        info = read_info(pdfpath, bool(cover))
        if info is None:
            raise ValueError('Could not read info dict from PDF')
        covpath = os.path.join(pdfpath, 'cover.jpg')
        cdata = None
        if cover and os.path.exists(covpath):
            with open(covpath, 'rb') as f:
                cdata = f.read()

    title = info.get('Title', None) or 'Unknown'
    au = info.get('Author', None)
    if au is None:
        au = ['Unknown']
    else:
        au = string_to_authors(au)
    mi = MetaInformation(title, au)

    creator = info.get('Creator', None)
    if creator:
        mi.book_producer = creator

    keywords = info.get('Keywords', None)
    mi.tags = []
    if keywords:
        mi.tags = [x.strip() for x in keywords.split(',')]
        isbn = [check_isbn(x) for x in mi.tags if check_isbn(x)]
        if isbn:
            mi.isbn = isbn = isbn[0]
        mi.tags = [x for x in mi.tags if check_isbn(x) != isbn]

    subject = info.get('Subject', None)
    if subject:
        mi.tags.insert(0, subject)

    if 'xmp_metadata' in info:
        from ebook_converter.ebooks.metadata.xmp import consolidate_metadata
        mi = consolidate_metadata(mi, info)

    # Look for recognizable identifiers in the info dict, if they were not
    # found in the XMP metadata
    for scheme, check_func in {'doi': check_doi, 'isbn': check_isbn}.items():
        if scheme not in mi.get_identifiers():
            for k, v in info.items():
                if k != 'xmp_metadata':
                    val = check_func(v)
                    if val:
                        mi.set_identifier(scheme, val)
                        break

    if cdata:
        mi.cover_data = ('jpeg', cdata)
    return mi
Exemple #3
0
def get_metadata(stream):
    """
    Return basic meta-data about the LRF file in C{stream} as a
    L{MetaInformation} object.
    @param stream: A file like object or an instance of L{LRFMetaFile}
    """
    lrf = stream if isinstance(stream, LRFMetaFile) else LRFMetaFile(stream)
    authors = string_to_authors(lrf.author)
    mi = MetaInformation(lrf.title.strip(), authors)
    mi.author = lrf.author.strip()
    mi.comments = lrf.free_text.strip()
    mi.category = lrf.category.strip() + ', ' + lrf.classification.strip()
    tags = [x.strip() for x in mi.category.split(',') if x.strip()]
    if tags:
        mi.tags = tags
    if mi.category.strip() == ',':
        mi.category = None
    mi.publisher = lrf.publisher.strip()
    mi.cover_data = lrf.get_cover()
    try:
        mi.title_sort = lrf.title_reading.strip()
        if not mi.title_sort:
            mi.title_sort = None
    except Exception:
        pass

    try:
        mi.author_sort = lrf.author_reading.strip()
        if not mi.author_sort:
            mi.author_sort = None
    except Exception:
        pass

    if not mi.title or 'unknown' in mi.title.lower():
        mi.title = None
    if not mi.authors:
        mi.authors = None
    if not mi.author or 'unknown' in mi.author.lower():
        mi.author = None
    if not mi.category or 'unknown' in mi.category.lower():
        mi.category = None
    if not mi.publisher or 'unknown' in mi.publisher.lower() or \
            'some publisher' in mi.publisher.lower():
        mi.publisher = None

    return mi