def read_oeb_metadata(metadata): for child in metadata: if child.tag == 'dc-metadata': metadata = child break print 'OEB:' dump(metadata)
def read_opf_metadata(metadata): print 'OPF:' dump(metadata) if len(metadata) == 0: return title = None language = None authors = [] series = [] tags = [] annotation = None if metadata[0].tag.endswith('dc-metadata'): metadata = metadata[0] for child in metadata: index = child.tag.find('}') if index == -1: # skipped unqualified names continue name = child.tag[index+1:] if name == 'title': title = child.text.strip() elif name == 'language': language = child.text.strip() elif name == 'subject': tags.append(child.text.strip()) elif name == 'description': if child.text: annotation = child.text.strip() else: annotation = '' elif name == 'creator': role = child.get('{%s}role', 'aut') if role == 'aut': authors.append(child.text.strip()) result = { 'title': title, 'language': language, 'authors': authors, 'series': series, 'tags': list(set(tags)), 'annotation': annotation, } LOG.debug('read_opf_metadata output: %s', result) return result
def read(path): LOG.debug("reading %s", path) archive = get_good_zip(path) try: container = archive.read(CONTAINER) except KeyError: LOG.error('%s has no container', path) return container = parse_xml(container) if container is None: LOG.error('%s has invalid container', path) return opf_name = container.find('.//{%s}rootfile' % CONTAINER_NS).get('full-path') try: opf = archive.read(opf_name) except KeyError: LOG.error('Could not open opf file (%s)', opf_name.encode('utf-8')) return opf = parse_xml(opf) if 0: dump(opf) if opf is None: return metadata = None for child in opf: if child.tag == 'metadata' or child.tag.endswith('}metadata'): metadata = child break if metadata is None: LOG.error('Could not find metadata in the opf file') return if metadata.tag == 'metadata': return read_oeb_metadata(metadata) else: return read_opf_metadata(metadata)
def get_root(data, path): """parses supplied data and returns its root element""" root = parse_xml(data, True, path) if root is None: LOG.error('Invalid data in %s', path) return if root.tag != ROOT_ELEM: LOG.error('%s has wrong root element: %s (expected %s)', path, root.tag, ROOT_ELEM) return if 0: dump(root) return root
def read(path): """read a FB2 file and return extracted meta-information""" LOG.debug("reading %s", path) data = read_fb2_file(path) if data is None: return root = get_root(data, path) title_info = root.find('.//%s' % TITLE_INFO_ELEM) if not title_info: LOG.error('Could not find title-info for %s', path) LOG.error(' elem:', TITLE_INFO_ELEM) dump(root, stream=sys.stderr) return title = None language = None authors = [] series = [] tags = [] annotation = None cover = None for child in title_info: if child.tag == AUTHOR_ELEM: authors.append(person2str(child)) elif child.tag == TITLE_ELEM: title = strip_text(child.text) elif child.tag == TAG_ELEM: tag = tag2tag(child) if tag: # add only non-empty tags tags.append(tag) elif child.tag == LANG_ELEM: language = strip_text(child.text) elif child.tag == SERIES_ELEM: if 'name' in child.attrib and child.attrib['name'] \ and child.attrib.get('number', 0): series.append((child.attrib['name'], child.attrib['number'])) elif child.tag == COVER_ELEM: if len(child) and \ child[0].tag == IMAGE_ELEM and XLINK_HREF in child[0].attrib: value = child[0].attrib[XLINK_HREF] if value[0] == '#': value = value[1:] binary = [binary for binary in root.findall(BINARY_ELEM) \ if 'id' in binary.attrib and \ binary.attrib['id'] == value] if len(binary) == 1: cover = Image.open(StringIO(decodestring(binary[0].text))) return { 'title': title, 'language': language, 'authors': authors, 'series': series, 'tags': normalize_tags(tags), 'annotation': annotation, 'cover': cover, }