Ejemplo n.º 1
0
def process_mobi_metadata (metadata):
    ebook = EbookMetadata(metadata.filetype)

    if 'mobi' not in metadata:
        return ebook

    # Determine the codec used for strings. Defaults to 'cp1252'
    codec = 'cp1252'
    if 'text_encoding' in metadata.mobi:
        try:
            codec = {1252:'cp1252', 65001:'utf-8'}[metadata.mobi.text_encoding]
        except (IndexError, KeyError):
            print "Unknown codepage %d. Assuming '%s'" % (metadata.mobi.text_encoding, codec)

    try:
        ebook.title = metadata.mobi.fullname.decode(codec, 'replace')
    except AttributeError:
        ebook.title = re.sub('[^-A-Za-z0-9\"";:., ]+', '_', metadata.pdb.name.replace('\x00', ''))

    ebook.setdefault('languages', []).append(mobi2iana_language(metadata.mobi.locale))

    if 'exth' in metadata.mobi:
        for record in metadata.mobi.exth.records:
            if record.type == 100:
                from biblio.ebook import parse_ebook_authors
                authors = parse_ebook_authors(record.data.decode(codec, 'ignore').strip())
                ebook.setdefault('authors', []).extend(authors)
            elif record.type == 101:
                ebook.publisher = record.data.decode(codec, 'ignore').strip()
            elif record.type == 103:
                ebook.description = record.data.decode(codec, 'ignore')
            elif record.type == 104:
                ebook.setdefault('identifiers', {})['isbn'] = record.data.decode(codec, 'ignore').strip().replace('-', '')
            elif record.type == 105:
                tags = [ t.strip() for t in record.data.decode(codec, 'ignore').split(';') ]
                ebook.setdefault('tags', []).extend(tags)
                ebook.tags = list(set(ebook.tags))
            elif record.type == 106:
                from biblio.ebook import parse_ebook_date
                ebook.date_published = parse_ebook_date(record.data.decode(codec, 'ignore'))
            elif record.type == 108:
                pass # contributor
            elif record.type == 109:
                ebook.rights = record.data.decode(codec, 'ignore')
            elif record.type == 503:
                ebook.title = record.data.decode(codec, 'ignore')

    if ebook.title:
        ebook.title = replace_entities(ebook.title, codec)

    return ebook
Ejemplo n.º 2
0
def process_opf_metadata (metadata, ebook):
    for tag,attribs,text in metadata.metadata:
        if tag == '{http://purl.org/dc/elements/1.1/}title':
            ebook.title = text.strip()
        elif tag == '{http://purl.org/dc/elements/1.1/}publisher':
            ebook.publisher = text.strip()
        elif tag == '{http://purl.org/dc/elements/1.1/}date':
            from biblio.ebook import parse_ebook_date
            ebook.date_published = parse_ebook_date(text.strip())
        elif tag == '{http://purl.org/dc/elements/1.1/}description':
            ebook.description = text.strip()
        elif tag == '{http://purl.org/dc/elements/1.1/}rights':
            ebook.rights = text.strip()
        elif tag == '{http://purl.org/dc/elements/1.1/}language':
            ebook.setdefault('languages', []).append(text.strip())
        elif tag == '{http://purl.org/dc/elements/1.1/}subject':
            if text and text.strip():
                ebook.setdefault('tags', []).extend([ x.strip() for x in text.split(',')])
        elif tag == '{http://purl.org/dc/elements/1.1/}identifier':
            if text and text.strip():
                for attr,val in attribs.iteritems():
                    if attr.endswith('scheme'):
                        typ = val.lower()
                        ebook.setdefault('identifiers', {})[typ] = text.strip()
        elif tag == '{http://purl.org/dc/elements/1.1/}creator':
            if ('role' in attribs and attribs['role'] == 'aut') or \
               ('{http://www.idpf.org/2007/opf}role' in attribs and 
                attribs['{http://www.idpf.org/2007/opf}role'] == 'aut') or \
               ('role' not in attribs and 
                '{http://www.idpf.org/2007/opf}role' not in attribs):
                from biblio.ebook import parse_ebook_authors
                ebook.setdefault('authors', []).extend(parse_ebook_authors(text))
        elif tag == '{http://www.idpf.org/2007/opf}meta':
            if not ('name' in attribs and 'content' in attribs): continue
            name = attribs['name']
            content = attribs['content']
            if name == 'calibre:series':
                ebook.series = content.strip()
            elif name == 'calibre:series_index':
                ebook.series_index = float(content.strip())

    return ebook