def __init__(self, collection=False, title=''): self.collection = collection #Special navigation structures: List of Equations/Figures/Tables self.equations_list = [] self.figures_list = [] self.tables_list = [] self.article = None self.article_doi = None self.all_dois = [] # Used to create UID #These are the limited forms of metadata that might make it in to the #navigation document. Both are used for EPUB2, only the title is used #for EPUB3 self.title = title self.contributors = OrderedSet() #The nav structure is a list of navpoint trees. Each navpoint may have #children navpoints. This structure will be converted to the appropriate #xml/xhtml structure and written to file when required. self.nav = [] self.nav_depth = 0 self._play_order = 0 self._auto_id = 0
def __init__(self, collection=False, title=''): self.collection = collection self.spine_list = [] self.article = None self.article_doi = None self.all_dois = [] # Used to create unique id and rights in collections #self.all_articles = [] #Metadata elements self.pub_id = None self.contributors = OrderedSet() # 0+ Authors/Editors/Reviewers self.coverage = OrderedSet() # 0+ Not used yet self.dates = OrderedSet() # 0+ Publication date (probably) self.descriptions = OrderedSet() # 0+ Long descriptions (abstracts) self.format = 'application/epub+zip' # 1 Always epub self.languages = OrderedSet() # 1+ All languages present in doc self.publishers = OrderedSet() # 0+ All publishers of content self.relation = OrderedSet() # 0+ Not used yet self.rights = OrderedSet() # 1 License, details TBD self.rights_associations = {} # Keeps track per-article self.source = OrderedSet() # 0+ Not used yet self.subjects = OrderedSet() # 0+ Subjects covered in doc self.title = None # 1 Title of publication self.type = 'text' # 1 Always text if self.collection: # Collections receive assigned titles self.title = title
class Package(object): """ The Package class """ def __init__(self, collection=False, title=''): self.collection = collection self.spine_list = [] self.article = None self.article_doi = None self.all_dois = [] # Used to create unique id and rights in collections #self.all_articles = [] #Metadata elements self.pub_id = None self.contributors = OrderedSet() # 0+ Authors/Editors/Reviewers self.coverage = OrderedSet() # 0+ Not used yet self.dates = OrderedSet() # 0+ Publication date (probably) self.descriptions = OrderedSet() # 0+ Long descriptions (abstracts) self.format = 'application/epub+zip' # 1 Always epub self.languages = OrderedSet() # 1+ All languages present in doc self.publishers = OrderedSet() # 0+ All publishers of content self.relation = OrderedSet() # 0+ Not used yet self.rights = OrderedSet() # 1 License, details TBD self.rights_associations = {} # Keeps track per-article self.source = OrderedSet() # 0+ Not used yet self.subjects = OrderedSet() # 0+ Subjects covered in doc self.title = None # 1 Title of publication self.type = 'text' # 1 Always text if self.collection: # Collections receive assigned titles self.title = title def process(self, article): """ Ingests an article and processes it for metadata and elements to provide proper references in the EPUB spine. This method may only be called once unless the Package was instantiated in collection mode using ``Package(collection=True)``. It places entries in an internal spine list for the Main Content Document, the Bibliographic Content Document (if there are ref elements in Back), and the Tables Content Document (if there are table elements). It then employs the publisher specific methods for extracting article metadata using the article's publisher attribute (an instance of a Publisher class). Parameters ---------- article : openaccess_epub.article.Article instance An article to be included in the EPUB, to be processed for metadata and appropriate content document references. """ if self.article is not None and not self.collection: log.warning('Could not process additional article. Package only \ handles one article unless collection mode is set.') return False if article.publisher is None: log.error('''Package cannot be generated for an Article \ without a publisher!''') return self.article = article self.article_doi = self.article.doi.split('/')[1] self.all_dois.append(self.article.doi) #Analyze the article to add entries to the spine dash_doi = self.article_doi.replace('.', '-') #Entry for the main content document main_idref = 'main-{0}-xhtml'.format(dash_doi) self.spine_list.append(spine_item(main_idref, True)) #Entry for the biblio content document biblio_idref = 'biblio-{0}-xhtml'.format(dash_doi) if self.article.root.xpath('./back/ref-list/ref'): self.spine_list.append(spine_item(biblio_idref, True)) #Entry for the tables content document tables_idref = 'tables-{0}-xhtml'.format(dash_doi) if self.article.publisher.has_out_of_flow_tables(): self.spine_list.append(spine_item(tables_idref, False)) self.acquire_metadata() def acquire_metadata(self): """ Handles the acquisition of metadata for both collection mode and single mode, uses the metadata methods belonging to the article's publisher attribute. """ #For space economy publisher = self.article.publisher if self.collection: # collection mode metadata gathering pass else: # single mode metadata gathering self.pub_id = publisher.package_identifier() self.title = publisher.package_title() for date in publisher.package_date(): self.dates.add(date) #Common metadata gathering for lang in publisher.package_language(): self.languages.add(lang) # languages for contributor in publisher.package_contributors(): # contributors self.contributors.add(contributor) self.publishers.add(publisher.package_publisher()) # publisher names desc = publisher.package_description() if desc is not None: self.descriptions.add(desc) for subj in publisher.package_subject(): self.subjects.add(subj) # subjects #Rights art_rights = publisher.package_rights() self.rights.add(art_rights) if art_rights not in self.rights_associations: self.rights_associations[art_rights] = [self.article.doi] else: self.rights_associations[art_rights].append(self.article.doi) def file_manifest(self, location): """ An iterator through the files in a location which yields item elements suitable for insertion into the package manifest. """ #Maps file extensions to mimetypes mimetypes = {'.jpg': 'image/jpeg', '.jpeg': 'image/jpeg', '.xml': 'application/xhtml+xml', '.png': 'image/png', '.css': 'text/css', '.ncx': 'application/x-dtbncx+xml', '.gif': 'image/gif', '.tif': 'image/tif', '.pdf': 'application/pdf', '.xhtml': 'application/xhtml+xml', '.ttf': 'application/vnd.ms-opentype', '.otf': 'application/vnd.ms-opentype'} current_dir = os.getcwd() os.chdir(location) for dirpath, _dirnames, filenames in os.walk('.'): dirpath = dirpath[2:] # A means to avoid dirpath prefix of './' for fn in filenames: fn_ext = os.path.splitext(fn)[-1] item = etree.Element('item') #Here we set three attributes: href, media-type, and id if not dirpath: item.attrib['href'] = fn else: item.attrib['href'] = '/'.join([dirpath, fn]) item.attrib['media-type'] = mimetypes[fn_ext] #Special handling for common image types if fn_ext in ['.jpg', '.png', '.tif', '.jpeg']: #the following lines assume we are using the convention #where the article doi is prefixed by 'images-' item.attrib['id'] = '-'.join([dirpath[7:], fn.replace('.', '-')]) else: item.attrib['id'] = fn.replace('.', '-') yield item os.chdir(current_dir) def make_element(self, tagname, doc, attrs={}, text=''): new_element = etree.Element(self.ns_rectify(tagname, doc)) for kwd, val in attrs.items(): if val is None: # None values will not become attributes continue new_element.attrib[self.ns_rectify(kwd, doc)] = val new_element.text = text return new_element def ns_rectify(self, tagname, document): if ':' not in tagname: return tagname else: ns, tag = tagname.split(':') return '{' + document.getroot().nsmap[ns] + '}' + tag def _init_package_doc(self, version): root = etree.XML('''\ <?xml version="1.0"?> <package xmlns="http://www.idpf.org/2007/opf" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf" xmlns:dcterms="http://purl.org/dc/terms/" version="{0}" unique-identifier="pub-identifier">\ </package>'''.format(version)) document = etree.ElementTree(root) return document def render_EPUB2(self, location): log.info('Rendering Package Document for EPUB2') document = self._init_package_doc(version='2.0') package = document.getroot() #Make the Metadata metadata = etree.SubElement(package, 'metadata') #Metadata: Identifier if not self.collection: # Identifier for single article ident = self.make_element('dc:identifier', document, {'id': 'pub-identifier', 'opf:scheme': self.pub_id.scheme}, self.pub_id.value) metadata.append(ident) else: # Identifier for collection ident = self.make_element('dc:identifier', document, {'id': 'pub-identifier', 'opf:scheme': 'DOI'}, ','.join(self.all_dois)) metadata.append(ident) #Metadata: Title #Divergence between single articles and collections for titles is #handled during initiation and selective metadata acquisition, not here title = self.make_element('dc:title', document, text=self.title) metadata.append(title) #Metadata: Languages for lang in self.languages: lang_el = self.make_element('dc:language', document, text=lang) metadata.append(lang_el) #So here's the deal about creators/contributors: #The EPUB2 OPF spec indicates a distinction between primary authors #(contained in dc:creator) and secondary authors (contained in #dc:contributor, along with all the other options in # http://www.idpf.org/epub/20/spec/OPF_2.0.1_draft.htm#TOC2.2.6). As far #as I can think there is no real use case in academic articles for #<dc:contributor role="aut">... We'll just make all contributors with #the 'aut' role as <dc:creator>s for contrib in self.contributors: tag = 'dc:creator' if contrib.role == 'aut' else 'dc:contributor' metadata.append(self.make_element(tag, document, {'opf:role': contrib.role, 'opf:file-as': contrib.file_as}, contrib.name)) #Metadata: Descriptions for description in self.descriptions: metadata.append(self.make_element('dc:description', document, text=description)) #Metadata: Subjects for subject in self.subjects: metadata.append(self.make_element('dc:subject', document, text=subject)) #Metadata: Format metadata.append(self.make_element('dc:format', document, text=self.format)) #Metadata: Publishers for publisher in self.publishers: metadata.append(self.make_element('dc:publisher', document, text=publisher)) #Metadata: Dates for date in self.dates: #I use str coercion just to be safe, in case someone returns ints date_text = str(date.year) if date.month: date_text = '-'.join([date_text, str(date.month)]) if date.day: date_text = '-'.join([date_text, str(date.day)]) metadata.append(self.make_element('dc:date', document, {'opf:event': date.event}, date_text)) #Metadata: Rights if self.collection: if len(self.rights) == 1: # Only one license string present rights_text = '''\ All articles in this collection published according to the following license: ''' rights_text = ''.join([rights_text, self.rights.pop()]) else: # More than one, we need to refer to rights_associations rights_text = '''\ Articles in this collection were published according to different licenses. Each unique license will be listed below, preceded by every article DOI to which it applies.''' for lic, doi_list in self.rights_associations.items(): doi_line = ','.join(doi_list) rights_text = '\n'.join([rights_text, doi_line, lic]) metadata.append(self.make_element('dc:rights', document, text=rights_text)) else: metadata.append(self.make_element('dc:rights', document, text=self.rights.pop())) #Not Implemented Metadata: Source, Type, Coverage, Relation #Make the Manifest manifest = etree.SubElement(package, 'manifest') for item in self.file_manifest(os.path.join(location, 'EPUB')): if item.attrib['id'] == 'toc-ncx': item.attrib['id'] = 'ncx' # Special id for toc.ncx manifest.append(item) #Make the Spine spine = etree.SubElement(package, 'spine') spine.attrib['toc'] = 'ncx' for item in self.spine_list: itemref = etree.SubElement(spine, 'itemref') itemref.attrib['idref'] = item.idref itemref.attrib['linear'] = 'yes' if item.linear else 'no' with open(os.path.join(location, 'EPUB', 'package.opf'), 'wb') as output: output.write(etree.tostring(document, encoding='utf-8', pretty_print=True)) def render_EPUB3(self, location): log.info('Rendering Package Document for EPUB3') document = self._init_package_doc(version='3.0') package = document.getroot() #Make the Metadata metadata = etree.SubElement(package, 'metadata') #Metadata: Identifier today = datetime.date.today().strftime('%Y.%m.%d') if not self.collection: # Identifier for single article ident = self.make_element('dc:identifier', document, {'id': 'pub-identifier'}, '.'.join([self.pub_id.value, today])) metadata.append(ident) else: # Identifier for collection ident = self.make_element('dc:identifier', document, {'id': 'pub-identifier'}, ','.join(self.all_dois) + '.' + today) metadata.append(ident) #Metadata: Identifier Refinement meta = self.make_element('meta', document, {'refines': '#pub-identifier', 'property': 'identifier-type', 'scheme': 'onix:codelist5'}) if self.collection: # Collections are always DOIs currently meta.text = '06' metadata.append(meta) else: if self.pub_id.scheme is not None: if self.pub_id.scheme == 'DOI': meta.text = '06' metadata.append(meta) else: # We could do an ONIXlist lookup map here raise ValueError('Unhandled id scheme!') #Metadata: Title #Divergence between single articles and collections for titles is #handled during initiation and selective metadata acquisition, not here title = self.make_element('dc:title', document, {'id': 'pub-title'}, text=self.title) metadata.append(title) #Metadata: Title Refinement meta = self.make_element('meta', document, {'refines': '#pub-title', 'property': 'title-type'}, 'main') metadata.append(meta) #Metadata: Languages for lang in self.languages: lang_el = self.make_element('dc:language', document, text=lang) metadata.append(lang_el) #Metadata: Contributors/Creators #So here's the deal about creators/contributors: #The EPUB2 OPF spec indicates a distinction between primary authors #(contained in dc:creator) and secondary authors (contained in #dc:contributor, along with all the other options in # http://www.idpf.org/epub/20/spec/OPF_2.0.1_draft.htm#TOC2.2.6). As far #as I can think there is no real use case in academic articles for #<dc:contributor role="aut">... We'll just make all contributors with #the 'aut' role as <dc:creator>s contrib_count = 0 for contrib in self.contributors: tag = 'dc:creator' if contrib.role == 'aut' else 'dc:contributor' contrib_id = 'contrib{0}'.format(contrib_count) metadata.append(self.make_element(tag, document, {'id': contrib_id}, text=contrib.name)) #Metadata: Contributors/Creators Refinement #MARC Relators: http://www.loc.gov/marc/relators/relaterm.html #MARC Relators: http://www.loc.gov/marc/relators/relacode.html role_meta = self.make_element('meta', document, {'refines': '#' + contrib_id, 'property': 'role', 'scheme': 'marc:relators'}) if contrib.role is not None: role_meta.text = contrib.role metadata.append(role_meta) file_as_meta = self.make_element('meta', document, {'refines': '#' + contrib_id, 'property': 'file-as'}) if contrib.file_as is not None: file_as_meta.text = contrib.file_as metadata.append(file_as_meta) contrib_count += 1 #Metadata: Descriptions for description in self.descriptions: metadata.append(self.make_element('dc:description', document, text=description)) #Metadata: Subjects for subject in self.subjects: metadata.append(self.make_element('dc:subject', document, text=subject)) #Metadata: Format metadata.append(self.make_element('dc:format', document, text=self.format)) #Metadata: Publishers for publisher in self.publishers: metadata.append(self.make_element('dc:publisher', document, text=publisher)) #Metadata: Dates #EPUB3 differs significantly from EPUB2, only one dc:date is allowed #and it must be the date of EPUB publication #Must also be of proper format: http://www.w3.org/TR/NOTE-datetime simple_date = datetime.date.today().strftime('%Y-%m-%d') metadata.append(self.make_element('dc:date', document, {'id': 'pub-date'}, simple_date)) #Must have meta with dcterms:modified now = datetime.datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%SZ') metadata.append(self.make_element('meta', document, {'property': 'dcterms:modified'}, now)) #Metadata: Dates Refinement #values are dateAccepted, dateCopyrighted, dateSubmitted accepted = self.make_element('meta', document, {'refines': '#pub-date', 'property': 'dcterms:dateAccepted'}) copyrighted = self.make_element('meta', document, {'refines': '#pub-date', 'property': 'dcterms:dateCopyrighted'}) submitted = self.make_element('meta', document, {'refines': '#pub-date', 'property': 'dcterms:dateSubmitted'}) def date_text(date): text = str(date.year) if date.month: text = '-'.join([text, str(date.month)]) if date.day: text = '-'.join([text, str(date.day)]) return text for date in self.dates: if date.event == 'accepted': accepted.text = date_text(date) metadata.append(accepted) elif date.event == 'copyrighted': copyrighted.text = date_text(date) metadata.append(copyrighted) elif date.event == 'submitted': submitted.text = date_text(date) metadata.append(submitted) #Metadata: Rights if self.collection: if len(self.rights) == 1: # Only one license string present rights_text = '''\ All articles in this collection published according to the following license: ''' rights_text = ''.join([rights_text, self.rights.pop()]) else: # More than one, we need to refer to rights_associations rights_text = '''\ Articles in this collection were published according to different licenses. Each unique license will be listed below, preceded by every article DOI to which it applies.''' for lic, doi_list in self.rights_associations.items(): doi_line = ','.join(doi_list) rights_text = '\n'.join([rights_text, doi_line, lic]) metadata.append(self.make_element('dc:rights', document, text=rights_text)) else: metadata.append(self.make_element('dc:rights', document, text=self.rights.pop())) #Not Implemented Metadata: Source, Type, Coverage, Relation #Make the Manifest manifest = etree.SubElement(package, 'manifest') for item in self.file_manifest(os.path.join(location, 'EPUB')): if item.attrib['id'] == 'nav-xhtml': item.attrib['id'] = 'htmltoc' # Special id for nav.xhtml item.attrib['properties'] = 'nav' if item.attrib['id'] == 'toc-ncx': item.attrib['id'] = 'ncx' # Special id for toc.ncx manifest.append(item) #Make the Spine spine = etree.SubElement(package, 'spine') for item in self.spine_list: itemref = etree.SubElement(spine, 'itemref') itemref.attrib['idref'] = item.idref itemref.attrib['linear'] = 'yes' if item.linear else 'no' with open(os.path.join(location, 'EPUB', 'package.opf'), 'wb') as output: output.write(etree.tostring(document, encoding='utf-8', pretty_print=True))
def __init__(self, collection=False, title=''): self.collection = collection self.spine_list = [] self.article = None self.article_doi = None self.all_dois = [ ] # Used to create unique id and rights in collections #self.all_articles = [] #Metadata elements self.pub_id = None self.contributors = OrderedSet() # 0+ Authors/Editors/Reviewers self.coverage = OrderedSet() # 0+ Not used yet self.dates = OrderedSet() # 0+ Publication date (probably) self.descriptions = OrderedSet() # 0+ Long descriptions (abstracts) self.format = 'application/epub+zip' # 1 Always epub self.languages = OrderedSet() # 1+ All languages present in doc self.publishers = OrderedSet() # 0+ All publishers of content self.relation = OrderedSet() # 0+ Not used yet self.rights = OrderedSet() # 1 License, details TBD self.rights_associations = {} # Keeps track per-article self.source = OrderedSet() # 0+ Not used yet self.subjects = OrderedSet() # 0+ Subjects covered in doc self.title = None # 1 Title of publication self.type = 'text' # 1 Always text if self.collection: # Collections receive assigned titles self.title = title
class Package(object): """ The Package class """ def __init__(self, collection=False, title=''): self.collection = collection self.spine_list = [] self.article = None self.article_doi = None self.all_dois = [ ] # Used to create unique id and rights in collections #self.all_articles = [] #Metadata elements self.pub_id = None self.contributors = OrderedSet() # 0+ Authors/Editors/Reviewers self.coverage = OrderedSet() # 0+ Not used yet self.dates = OrderedSet() # 0+ Publication date (probably) self.descriptions = OrderedSet() # 0+ Long descriptions (abstracts) self.format = 'application/epub+zip' # 1 Always epub self.languages = OrderedSet() # 1+ All languages present in doc self.publishers = OrderedSet() # 0+ All publishers of content self.relation = OrderedSet() # 0+ Not used yet self.rights = OrderedSet() # 1 License, details TBD self.rights_associations = {} # Keeps track per-article self.source = OrderedSet() # 0+ Not used yet self.subjects = OrderedSet() # 0+ Subjects covered in doc self.title = None # 1 Title of publication self.type = 'text' # 1 Always text if self.collection: # Collections receive assigned titles self.title = title def process(self, article): """ Ingests an article and processes it for metadata and elements to provide proper references in the EPUB spine. This method may only be called once unless the Package was instantiated in collection mode using ``Package(collection=True)``. It places entries in an internal spine list for the Main Content Document, the Bibliographic Content Document (if there are ref elements in Back), and the Tables Content Document (if there are table elements). It then employs the publisher specific methods for extracting article metadata using the article's publisher attribute (an instance of a Publisher class). Parameters ---------- article : openaccess_epub.article.Article instance An article to be included in the EPUB, to be processed for metadata and appropriate content document references. """ if self.article is not None and not self.collection: log.warning('Could not process additional article. Package only \ handles one article unless collection mode is set.') return False if article.publisher is None: log.error('''Package cannot be generated for an Article \ without a publisher!''') return self.article = article self.article_doi = self.article.doi.split('/')[1] self.all_dois.append(self.article.doi) #Analyze the article to add entries to the spine dash_doi = self.article_doi.replace('.', '-') #Entry for the main content document main_idref = 'main-{0}-xhtml'.format(dash_doi) self.spine_list.append(spine_item(main_idref, True)) #Entry for the biblio content document biblio_idref = 'biblio-{0}-xhtml'.format(dash_doi) if self.article.root.xpath('./back/ref-list/ref'): self.spine_list.append(spine_item(biblio_idref, True)) #Entry for the tables content document tables_idref = 'tables-{0}-xhtml'.format(dash_doi) if self.article.publisher.has_out_of_flow_tables(): self.spine_list.append(spine_item(tables_idref, False)) self.acquire_metadata() def acquire_metadata(self): """ Handles the acquisition of metadata for both collection mode and single mode, uses the metadata methods belonging to the article's publisher attribute. """ #For space economy publisher = self.article.publisher if self.collection: # collection mode metadata gathering pass else: # single mode metadata gathering self.pub_id = publisher.package_identifier() self.title = publisher.package_title() for date in publisher.package_date(): self.dates.add(date) #Common metadata gathering for lang in publisher.package_language(): self.languages.add(lang) # languages for contributor in publisher.package_contributors(): # contributors self.contributors.add(contributor) self.publishers.add(publisher.package_publisher()) # publisher names desc = publisher.package_description() if desc is not None: self.descriptions.add(desc) for subj in publisher.package_subject(): self.subjects.add(subj) # subjects #Rights art_rights = publisher.package_rights() self.rights.add(art_rights) if art_rights not in self.rights_associations: self.rights_associations[art_rights] = [self.article.doi] else: self.rights_associations[art_rights].append(self.article.doi) def file_manifest(self, location): """ An iterator through the files in a location which yields item elements suitable for insertion into the package manifest. """ #Maps file extensions to mimetypes mimetypes = { '.jpg': 'image/jpeg', '.jpeg': 'image/jpeg', '.xml': 'application/xhtml+xml', '.png': 'image/png', '.css': 'text/css', '.ncx': 'application/x-dtbncx+xml', '.gif': 'image/gif', '.tif': 'image/tif', '.pdf': 'application/pdf', '.xhtml': 'application/xhtml+xml', '.ttf': 'application/vnd.ms-opentype', '.otf': 'application/vnd.ms-opentype' } current_dir = os.getcwd() os.chdir(location) for dirpath, _dirnames, filenames in os.walk('.'): dirpath = dirpath[2:] # A means to avoid dirpath prefix of './' for fn in filenames: fn_ext = os.path.splitext(fn)[-1] item = etree.Element('item') #Here we set three attributes: href, media-type, and id if not dirpath: item.attrib['href'] = fn else: item.attrib['href'] = '/'.join([dirpath, fn]) item.attrib['media-type'] = mimetypes[fn_ext] #Special handling for common image types if fn_ext in ['.jpg', '.png', '.tif', '.jpeg']: #the following lines assume we are using the convention #where the article doi is prefixed by 'images-' item.attrib['id'] = '-'.join( [dirpath[7:], fn.replace('.', '-')]) else: item.attrib['id'] = fn.replace('.', '-') yield item os.chdir(current_dir) def make_element(self, tagname, doc, attrs={}, text=''): new_element = etree.Element(self.ns_rectify(tagname, doc)) for kwd, val in list(attrs.items()): if val is None: # None values will not become attributes continue new_element.attrib[self.ns_rectify(kwd, doc)] = val new_element.text = text return new_element def ns_rectify(self, tagname, document): if ':' not in tagname: return tagname else: ns, tag = tagname.split(':') return '{' + document.getroot().nsmap[ns] + '}' + tag def _init_package_doc(self, version): root = etree.XML('''\ <?xml version="1.0"?> <package xmlns="http://www.idpf.org/2007/opf" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf" xmlns:dcterms="http://purl.org/dc/terms/" version="{0}" unique-identifier="pub-identifier">\ </package>'''.format(version)) document = etree.ElementTree(root) return document def render_EPUB2(self, location): log.info('Rendering Package Document for EPUB2') document = self._init_package_doc(version='2.0') package = document.getroot() #Make the Metadata metadata = etree.SubElement(package, 'metadata') #Metadata: Identifier if not self.collection: # Identifier for single article ident = self.make_element('dc:identifier', document, { 'id': 'pub-identifier', 'opf:scheme': self.pub_id.scheme }, self.pub_id.value) metadata.append(ident) else: # Identifier for collection ident = self.make_element('dc:identifier', document, { 'id': 'pub-identifier', 'opf:scheme': 'DOI' }, ','.join(self.all_dois)) metadata.append(ident) #Metadata: Title #Divergence between single articles and collections for titles is #handled during initiation and selective metadata acquisition, not here title = self.make_element('dc:title', document, text=self.title) metadata.append(title) #Metadata: Languages for lang in self.languages: lang_el = self.make_element('dc:language', document, text=lang) metadata.append(lang_el) #So here's the deal about creators/contributors: #The EPUB2 OPF spec indicates a distinction between primary authors #(contained in dc:creator) and secondary authors (contained in #dc:contributor, along with all the other options in # http://www.idpf.org/epub/20/spec/OPF_2.0.1_draft.htm#TOC2.2.6). As far #as I can think there is no real use case in academic articles for #<dc:contributor role="aut">... We'll just make all contributors with #the 'aut' role as <dc:creator>s for contrib in self.contributors: tag = 'dc:creator' if contrib.role == 'aut' else 'dc:contributor' metadata.append( self.make_element(tag, document, { 'opf:role': contrib.role, 'opf:file-as': contrib.file_as }, contrib.name)) #Metadata: Descriptions for description in self.descriptions: metadata.append( self.make_element('dc:description', document, text=description)) #Metadata: Subjects for subject in self.subjects: metadata.append( self.make_element('dc:subject', document, text=subject)) #Metadata: Format metadata.append( self.make_element('dc:format', document, text=self.format)) #Metadata: Publishers for publisher in self.publishers: metadata.append( self.make_element('dc:publisher', document, text=publisher)) #Metadata: Dates for date in self.dates: #I use str coercion just to be safe, in case someone returns ints date_text = str(date.year) if date.month: date_text = '-'.join([date_text, str(date.month)]) if date.day: date_text = '-'.join([date_text, str(date.day)]) metadata.append( self.make_element('dc:date', document, {'opf:event': date.event}, date_text)) #Metadata: Rights if self.collection: if len(self.rights) == 1: # Only one license string present rights_text = '''\ All articles in this collection published according to the following license: ''' rights_text = ''.join([rights_text, self.rights.pop()]) else: # More than one, we need to refer to rights_associations rights_text = '''\ Articles in this collection were published according to different licenses. Each unique license will be listed below, preceded by every article DOI to which it applies.''' for lic, doi_list in list(self.rights_associations.items()): doi_line = ','.join(doi_list) rights_text = '\n'.join([rights_text, doi_line, lic]) metadata.append( self.make_element('dc:rights', document, text=rights_text)) else: metadata.append( self.make_element('dc:rights', document, text=self.rights.pop())) #Not Implemented Metadata: Source, Type, Coverage, Relation #Make the Manifest manifest = etree.SubElement(package, 'manifest') for item in self.file_manifest(os.path.join(location, 'EPUB')): if item.attrib['id'] == 'toc-ncx': item.attrib['id'] = 'ncx' # Special id for toc.ncx manifest.append(item) #Make the Spine spine = etree.SubElement(package, 'spine') spine.attrib['toc'] = 'ncx' for item in self.spine_list: itemref = etree.SubElement(spine, 'itemref') itemref.attrib['idref'] = item.idref itemref.attrib['linear'] = 'yes' if item.linear else 'no' with open(os.path.join(location, 'EPUB', 'package.opf'), 'wb') as output: output.write( etree.tostring(document, encoding='utf-8', pretty_print=True)) def render_EPUB3(self, location): log.info('Rendering Package Document for EPUB3') document = self._init_package_doc(version='3.0') package = document.getroot() #Make the Metadata metadata = etree.SubElement(package, 'metadata') #Metadata: Identifier today = datetime.date.today().strftime('%Y.%m.%d') if not self.collection: # Identifier for single article ident = self.make_element('dc:identifier', document, {'id': 'pub-identifier'}, '.'.join([self.pub_id.value, today])) metadata.append(ident) else: # Identifier for collection ident = self.make_element('dc:identifier', document, {'id': 'pub-identifier'}, ','.join(self.all_dois) + '.' + today) metadata.append(ident) #Metadata: Identifier Refinement meta = self.make_element( 'meta', document, { 'refines': '#pub-identifier', 'property': 'identifier-type', 'scheme': 'onix:codelist5' }) if self.collection: # Collections are always DOIs currently meta.text = '06' metadata.append(meta) else: if self.pub_id.scheme is not None: if self.pub_id.scheme == 'DOI': meta.text = '06' metadata.append(meta) else: # We could do an ONIXlist lookup map here raise ValueError('Unhandled id scheme!') #Metadata: Title #Divergence between single articles and collections for titles is #handled during initiation and selective metadata acquisition, not here title = self.make_element('dc:title', document, {'id': 'pub-title'}, text=self.title) metadata.append(title) #Metadata: Title Refinement meta = self.make_element('meta', document, { 'refines': '#pub-title', 'property': 'title-type' }, 'main') metadata.append(meta) #Metadata: Languages for lang in self.languages: lang_el = self.make_element('dc:language', document, text=lang) metadata.append(lang_el) #Metadata: Contributors/Creators #So here's the deal about creators/contributors: #The EPUB2 OPF spec indicates a distinction between primary authors #(contained in dc:creator) and secondary authors (contained in #dc:contributor, along with all the other options in # http://www.idpf.org/epub/20/spec/OPF_2.0.1_draft.htm#TOC2.2.6). As far #as I can think there is no real use case in academic articles for #<dc:contributor role="aut">... We'll just make all contributors with #the 'aut' role as <dc:creator>s contrib_count = 0 for contrib in self.contributors: tag = 'dc:creator' if contrib.role == 'aut' else 'dc:contributor' contrib_id = 'contrib{0}'.format(contrib_count) metadata.append( self.make_element(tag, document, {'id': contrib_id}, text=contrib.name)) #Metadata: Contributors/Creators Refinement #MARC Relators: http://www.loc.gov/marc/relators/relaterm.html #MARC Relators: http://www.loc.gov/marc/relators/relacode.html role_meta = self.make_element( 'meta', document, { 'refines': '#' + contrib_id, 'property': 'role', 'scheme': 'marc:relators' }) if contrib.role is not None: role_meta.text = contrib.role metadata.append(role_meta) file_as_meta = self.make_element('meta', document, { 'refines': '#' + contrib_id, 'property': 'file-as' }) if contrib.file_as is not None: file_as_meta.text = contrib.file_as metadata.append(file_as_meta) contrib_count += 1 #Metadata: Descriptions for description in self.descriptions: metadata.append( self.make_element('dc:description', document, text=description)) #Metadata: Subjects for subject in self.subjects: metadata.append( self.make_element('dc:subject', document, text=subject)) #Metadata: Format metadata.append( self.make_element('dc:format', document, text=self.format)) #Metadata: Publishers for publisher in self.publishers: metadata.append( self.make_element('dc:publisher', document, text=publisher)) #Metadata: Dates #EPUB3 differs significantly from EPUB2, only one dc:date is allowed #and it must be the date of EPUB publication #Must also be of proper format: http://www.w3.org/TR/NOTE-datetime simple_date = datetime.date.today().strftime('%Y-%m-%d') metadata.append( self.make_element('dc:date', document, {'id': 'pub-date'}, simple_date)) #Must have meta with dcterms:modified now = datetime.datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%SZ') metadata.append( self.make_element('meta', document, {'property': 'dcterms:modified'}, now)) #Metadata: Dates Refinement #values are dateAccepted, dateCopyrighted, dateSubmitted accepted = self.make_element('meta', document, { 'refines': '#pub-date', 'property': 'dcterms:dateAccepted' }) copyrighted = self.make_element('meta', document, { 'refines': '#pub-date', 'property': 'dcterms:dateCopyrighted' }) submitted = self.make_element('meta', document, { 'refines': '#pub-date', 'property': 'dcterms:dateSubmitted' }) def date_text(date): text = str(date.year) if date.month: text = '-'.join([text, str(date.month)]) if date.day: text = '-'.join([text, str(date.day)]) return text for date in self.dates: if date.event == 'accepted': accepted.text = date_text(date) metadata.append(accepted) elif date.event == 'copyrighted': copyrighted.text = date_text(date) metadata.append(copyrighted) elif date.event == 'submitted': submitted.text = date_text(date) metadata.append(submitted) #Metadata: Rights if self.collection: if len(self.rights) == 1: # Only one license string present rights_text = '''\ All articles in this collection published according to the following license: ''' rights_text = ''.join([rights_text, self.rights.pop()]) else: # More than one, we need to refer to rights_associations rights_text = '''\ Articles in this collection were published according to different licenses. Each unique license will be listed below, preceded by every article DOI to which it applies.''' for lic, doi_list in list(self.rights_associations.items()): doi_line = ','.join(doi_list) rights_text = '\n'.join([rights_text, doi_line, lic]) metadata.append( self.make_element('dc:rights', document, text=rights_text)) else: metadata.append( self.make_element('dc:rights', document, text=self.rights.pop())) #Not Implemented Metadata: Source, Type, Coverage, Relation #Make the Manifest manifest = etree.SubElement(package, 'manifest') for item in self.file_manifest(os.path.join(location, 'EPUB')): if item.attrib['id'] == 'nav-xhtml': item.attrib['id'] = 'htmltoc' # Special id for nav.xhtml item.attrib['properties'] = 'nav' if item.attrib['id'] == 'toc-ncx': item.attrib['id'] = 'ncx' # Special id for toc.ncx manifest.append(item) #Make the Spine spine = etree.SubElement(package, 'spine') for item in self.spine_list: itemref = etree.SubElement(spine, 'itemref') itemref.attrib['idref'] = item.idref itemref.attrib['linear'] = 'yes' if item.linear else 'no' with open(os.path.join(location, 'EPUB', 'package.opf'), 'wb') as output: output.write( etree.tostring(document, encoding='utf-8', pretty_print=True))
class Navigation(object): def __init__(self, collection=False, title=''): self.collection = collection #Special navigation structures: List of Equations/Figures/Tables self.equations_list = [] self.figures_list = [] self.tables_list = [] self.article = None self.article_doi = None self.all_dois = [] # Used to create UID #These are the limited forms of metadata that might make it in to the #navigation document. Both are used for EPUB2, only the title is used #for EPUB3 self.title = title self.contributors = OrderedSet() #The nav structure is a list of navpoint trees. Each navpoint may have #children navpoints. This structure will be converted to the appropriate #xml/xhtml structure and written to file when required. self.nav = [] self.nav_depth = 0 self._play_order = 0 self._auto_id = 0 def process(self, article): """ Ingests an Article to create navigation structures and parse global metadata. """ if self.article is not None and not self.collection: log.warning('Could not process additional article. Navigation only \ handles one article unless collection mode is set.') return False if article.publisher is None: log.error('''Navigation cannot be generated for an Article \ without a publisher!''') return self.article = article self.article_doi = self.article.doi.split('/')[1] self.all_dois.append(self.article.doi) if self.collection: pass else: self.title = self.article.publisher.nav_title() for author in self.article.publisher.nav_contributors(): self.contributors.add(author) #Analyze the structure of the article to create internal mapping self.map_navigation() def map_navigation(self): """ This is a wrapper for depth-first recursive analysis of the article """ #All articles should have titles title_id = 'titlepage-{0}'.format(self.article_doi) title_label = self.article.publisher.nav_title() title_source = 'main.{0}.xhtml#title'.format(self.article_doi) title_navpoint = navpoint(title_id, title_label, self.play_order, title_source, []) self.nav.append(title_navpoint) #When processing a collection of articles, we will want all subsequent #navpoints for this article to be located under the title if self.collection: nav_insertion = title_navpoint.children else: nav_insertion = self.nav #If the article has a body, we'll need to parse it for navigation if self.article.body is not None: #Here is where we invoke the recursive parsing! for nav_pt in self.recursive_article_navmap(self.article.body): nav_insertion.append(nav_pt) #Add a navpoint to the references if appropriate if self.article.root.xpath('./back/ref'): ref_id = 'references-{0}'.format(self.article_doi) ref_label = 'References' ref_source = 'biblio.{0}.xhtml#references'.format(self.article_doi) ref_navpoint = navpoint(ref_id, ref_label, self.play_order, ref_source, []) nav_insertion.append(ref_navpoint) def recursive_article_navmap(self, src_element, depth=0, first=True): """ This function recursively traverses the content of an input article to add the correct elements to the NCX file's navMap and Lists. """ if depth > self.nav_depth: self.nav_depth = depth navpoints = [] tagnames = ['sec', 'fig', 'table-wrap'] for child in src_element: try: tagname = child.tag except AttributeError: continue else: if tagname not in tagnames: continue #Safely handle missing id attributes if 'id' not in child.attrib: child.attrib['id'] = self.auto_id #If in collection mode, we'll prepend the article DOI to avoid #collisions if self.collection: child_id = '-'.join([self.article_doi, child.attrib['id']]) else: child_id = child.attrib['id'] #Attempt to infer the correct text as a label #Skip the element if we cannot child_title = child.find('title') if child_title is None: continue # If there is no immediate title, skip this element label = element_methods.all_text(child_title) if not label: continue # If no text in the title, skip this element source = 'main.{0}.xhtml#{1}'.format(self.article_doi, child.attrib['id']) if tagname == 'sec': children = self.recursive_article_navmap(child, depth=depth + 1) navpoints.append(navpoint(child_id, label, self.play_order, source, children)) #figs and table-wraps do not have children elif tagname == 'fig': # Add navpoints to list_of_figures self.figures_list.append(navpoint(child.attrib['id'], label, None, source, [])) elif tagname == 'table-wrap': # Add navpoints to list_of_tables self.tables_list.append(navpoint(child.attrib['id'], label, None, source, [])) return navpoints def render_EPUB2(self, location): """ Creates the NCX specified file for EPUB2 """ def make_navlabel(text): """ Creates and returns a navLabel element with the supplied text. """ navlabel = etree.Element('navLabel') navlabel_text = etree.SubElement(navlabel, 'text') navlabel_text.text = text return navlabel def make_navMap(nav=None): if nav is None: nav_element = etree.Element('navMap') for nav_point in self.nav: nav_element.append(make_navMap(nav=nav_point)) else: nav_element = etree.Element('navPoint') nav_element.attrib['id'] = nav.id nav_element.attrib['playOrder'] = nav.playOrder nav_element.append(make_navlabel(nav.label)) content_element = etree.SubElement(nav_element, 'content') content_element.attrib['src'] = nav.source for child in nav.children: nav_element.append(make_navMap(nav=child)) return nav_element root = etree.XML('''\ <?xml version="1.0"?>\ <ncx version="2005-1" xmlns="http://www.daisy.org/z3986/2005/ncx/">\ <head>\ <meta name="dtb:uid" content="{uid}"/>\ <meta name="dtb:depth" content="{depth}"/>\ <meta name="dtb:totalPageCount" content="0"/>\ <meta name="dtb:maxPageNumber" content="0"/>\ <meta name="dtb:generator" content="OpenAccess_EPUB {version}"/>\ </head>\ </ncx>'''.format(**{'uid': ','.join(self.all_dois), 'depth': self.nav_depth, 'version': __version__})) document = etree.ElementTree(root) ncx = document.getroot() #Create the docTitle element doctitle = etree.SubElement(ncx, 'docTitle') doctitle_text = etree.SubElement(doctitle, 'text') doctitle_text.text = self.title #Create the docAuthor elements for contributor in self.contributors: if contributor.role == 'author': docauthor = etree.SubElement(ncx, 'docAuthor') docauthor_text = etree.SubElement(docauthor, 'text') docauthor_text.text = contributor.name #Create the navMap element ncx.append(make_navMap()) if self.figures_list: navlist = etree.SubElement(ncx, 'navList') navlist.append(make_navlabel('List of Figures')) for nav_pt in self.figures_list: navtarget = etree.SubElement(navlist, 'navTarget') navtarget.attrib['id'] = nav_pt.id navtarget.append(self.make_navlabel(nav_pt.label)) content = etree.SubElement(navtarget, 'content') content.attrib['src'] = nav_pt.source if self.tables_list: navlist = etree.SubElement(ncx, 'navList') navlist.append(make_navlabel('List of Tables')) for nav_pt in self.tables_list: navtarget = etree.SubElement(navlist, 'navTarget') navtarget.attrib['id'] = nav_pt.id navtarget.append(self.make_navlabel(nav_pt.label)) content = etree.SubElement(navtarget, 'content') content.attrib['src'] = nav_pt.source with open(os.path.join(location, 'EPUB', 'toc.ncx'), 'wb') as output: output.write(etree.tostring(document, encoding='utf-8', pretty_print=True)) def render_EPUB3(self, location): def make_nav(nav=None): if nav is None: nav_element = etree.Element('ol') for nav_point in self.nav: nav_element.append(make_nav(nav=nav_point)) else: nav_element = etree.Element('li') a = etree.SubElement(nav_element, 'a') a.attrib['href'] = nav.source a.text = nav.label if nav.children: ol = etree.SubElement(nav_element, 'ol') for child in nav.children: ol.append(make_nav(nav=child)) return nav_element root = etree.XML('''\ <?xml version="1.0"?>\ <!DOCTYPE html>\ <html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops">\ <head>\ <link rel="stylesheet" type="text/css" href="css/default.css" />\ </head>\ </html>''') document = etree.ElementTree(root) html = document.getroot() title = etree.SubElement(html[0], 'title') title.text = self.title body = etree.SubElement(html, 'body') # Create the body element #h1 = etree.SubElement(body, 'h1') #h1.text = self.title #Create the prinary nav element nav = etree.SubElement(body, 'nav') nav.attrib['{http://www.idpf.org/2007/ops}type'] = 'toc' nav.attrib['id'] = 'toc' #Create the title h2 = etree.SubElement(nav, 'h2') h2.text = 'Table of Contents' #Stuff nav.append(make_nav()) if self.figures_list: nav = etree.SubElement(body, 'nav') h2 = etree.SubElement(nav, 'h2') h2.text = 'List of Figures' ol = etree.SubElement(nav, 'ol') for nav_pt in self.figures_list: li = etree.SubElement(ol, 'li') a = etree.SubElement(li, 'a') a.attrib['href'] = nav_pt.source a.text = nav_pt.label if self.tables_list: nav = etree.SubElement(body, 'nav') h2 = etree.SubElement(nav, 'h2') h2.text = 'List of Tables' ol = etree.SubElement(nav, 'ol') for nav_pt in self.figures_list: li = etree.SubElement(ol, 'li') a = etree.SubElement(li, 'a') a.attrib['href'] = nav_pt.source a.text = nav_pt.label with open(os.path.join(location, 'EPUB', 'nav.xhtml'), 'wb') as output: output.write(etree.tostring(document, encoding='utf-8', pretty_print=True)) @property def play_order(self): self._play_order += 1 return str(self._play_order) @property def auto_id(self): self._auto_id += 1 id_gen = 'OAE-{0}'.format(self._auto_id) log.debug('Navigation element missing ID: assigned {0}'.format(id_gen)) return id_gen
class Navigation(object): def __init__(self, collection=False, title=''): self.collection = collection #Special navigation structures: List of Equations/Figures/Tables self.equations_list = [] self.figures_list = [] self.tables_list = [] self.article = None self.article_doi = None self.all_dois = [] # Used to create UID #These are the limited forms of metadata that might make it in to the #navigation document. Both are used for EPUB2, only the title is used #for EPUB3 self.title = title self.contributors = OrderedSet() #The nav structure is a list of navpoint trees. Each navpoint may have #children navpoints. This structure will be converted to the appropriate #xml/xhtml structure and written to file when required. self.nav = [] self.nav_depth = 0 self._play_order = 0 self._auto_id = 0 def process(self, article): """ Ingests an Article to create navigation structures and parse global metadata. """ if self.article is not None and not self.collection: log.warning( 'Could not process additional article. Navigation only \ handles one article unless collection mode is set.') return False if article.publisher is None: log.error('''Navigation cannot be generated for an Article \ without a publisher!''') return self.article = article self.article_doi = self.article.doi.split('/')[1] self.all_dois.append(self.article.doi) if self.collection: pass else: self.title = self.article.publisher.nav_title() for author in self.article.publisher.nav_contributors(): self.contributors.add(author) #Analyze the structure of the article to create internal mapping self.map_navigation() def map_navigation(self): """ This is a wrapper for depth-first recursive analysis of the article """ #All articles should have titles title_id = 'titlepage-{0}'.format(self.article_doi) title_label = self.article.publisher.nav_title() title_source = 'main.{0}.xhtml#title'.format(self.article_doi) title_navpoint = navpoint(title_id, title_label, self.play_order, title_source, []) self.nav.append(title_navpoint) #When processing a collection of articles, we will want all subsequent #navpoints for this article to be located under the title if self.collection: nav_insertion = title_navpoint.children else: nav_insertion = self.nav #If the article has a body, we'll need to parse it for navigation if self.article.body is not None: #Here is where we invoke the recursive parsing! for nav_pt in self.recursive_article_navmap(self.article.body): nav_insertion.append(nav_pt) #Add a navpoint to the references if appropriate if self.article.root.xpath('./back/ref'): ref_id = 'references-{0}'.format(self.article_doi) ref_label = 'References' ref_source = 'biblio.{0}.xhtml#references'.format(self.article_doi) ref_navpoint = navpoint(ref_id, ref_label, self.play_order, ref_source, []) nav_insertion.append(ref_navpoint) def recursive_article_navmap(self, src_element, depth=0, first=True): """ This function recursively traverses the content of an input article to add the correct elements to the NCX file's navMap and Lists. """ if depth > self.nav_depth: self.nav_depth = depth navpoints = [] tagnames = ['sec', 'fig', 'table-wrap'] for child in src_element: try: tagname = child.tag except AttributeError: continue else: if tagname not in tagnames: continue #Safely handle missing id attributes if 'id' not in child.attrib: child.attrib['id'] = self.auto_id #If in collection mode, we'll prepend the article DOI to avoid #collisions if self.collection: child_id = '-'.join([self.article_doi, child.attrib['id']]) else: child_id = child.attrib['id'] #Attempt to infer the correct text as a label #Skip the element if we cannot child_title = child.find('title') if child_title is None: continue # If there is no immediate title, skip this element label = element_methods.all_text(child_title) if not label: continue # If no text in the title, skip this element source = 'main.{0}.xhtml#{1}'.format(self.article_doi, child.attrib['id']) if tagname == 'sec': children = self.recursive_article_navmap(child, depth=depth + 1) navpoints.append( navpoint(child_id, label, self.play_order, source, children)) #figs and table-wraps do not have children elif tagname == 'fig': # Add navpoints to list_of_figures self.figures_list.append( navpoint(child.attrib['id'], label, None, source, [])) elif tagname == 'table-wrap': # Add navpoints to list_of_tables self.tables_list.append( navpoint(child.attrib['id'], label, None, source, [])) return navpoints def render_EPUB2(self, location): """ Creates the NCX specified file for EPUB2 """ def make_navlabel(text): """ Creates and returns a navLabel element with the supplied text. """ navlabel = etree.Element('navLabel') navlabel_text = etree.SubElement(navlabel, 'text') navlabel_text.text = text return navlabel def make_navMap(nav=None): if nav is None: nav_element = etree.Element('navMap') for nav_point in self.nav: nav_element.append(make_navMap(nav=nav_point)) else: nav_element = etree.Element('navPoint') nav_element.attrib['id'] = nav.id nav_element.attrib['playOrder'] = nav.playOrder nav_element.append(make_navlabel(nav.label)) content_element = etree.SubElement(nav_element, 'content') content_element.attrib['src'] = nav.source for child in nav.children: nav_element.append(make_navMap(nav=child)) return nav_element root = etree.XML('''\ <?xml version="1.0"?>\ <ncx version="2005-1" xmlns="http://www.daisy.org/z3986/2005/ncx/">\ <head>\ <meta name="dtb:uid" content="{uid}"/>\ <meta name="dtb:depth" content="{depth}"/>\ <meta name="dtb:totalPageCount" content="0"/>\ <meta name="dtb:maxPageNumber" content="0"/>\ <meta name="dtb:generator" content="OpenAccess_EPUB {version}"/>\ </head>\ </ncx>'''.format( **{ 'uid': ','.join(self.all_dois), 'depth': self.nav_depth, 'version': __version__ })) document = etree.ElementTree(root) ncx = document.getroot() #Create the docTitle element doctitle = etree.SubElement(ncx, 'docTitle') doctitle_text = etree.SubElement(doctitle, 'text') doctitle_text.text = self.title #Create the docAuthor elements for contributor in self.contributors: if contributor.role == 'author': docauthor = etree.SubElement(ncx, 'docAuthor') docauthor_text = etree.SubElement(docauthor, 'text') docauthor_text.text = contributor.name #Create the navMap element ncx.append(make_navMap()) if self.figures_list: navlist = etree.SubElement(ncx, 'navList') navlist.append(make_navlabel('List of Figures')) for nav_pt in self.figures_list: navtarget = etree.SubElement(navlist, 'navTarget') navtarget.attrib['id'] = nav_pt.id navtarget.append(self.make_navlabel(nav_pt.label)) content = etree.SubElement(navtarget, 'content') content.attrib['src'] = nav_pt.source if self.tables_list: navlist = etree.SubElement(ncx, 'navList') navlist.append(make_navlabel('List of Tables')) for nav_pt in self.tables_list: navtarget = etree.SubElement(navlist, 'navTarget') navtarget.attrib['id'] = nav_pt.id navtarget.append(self.make_navlabel(nav_pt.label)) content = etree.SubElement(navtarget, 'content') content.attrib['src'] = nav_pt.source with open(os.path.join(location, 'EPUB', 'toc.ncx'), 'wb') as output: output.write( etree.tostring(document, encoding='utf-8', pretty_print=True)) def render_EPUB3(self, location): def make_nav(nav=None): if nav is None: nav_element = etree.Element('ol') for nav_point in self.nav: nav_element.append(make_nav(nav=nav_point)) else: nav_element = etree.Element('li') a = etree.SubElement(nav_element, 'a') a.attrib['href'] = nav.source a.text = nav.label if nav.children: ol = etree.SubElement(nav_element, 'ol') for child in nav.children: ol.append(make_nav(nav=child)) return nav_element root = etree.XML('''\ <?xml version="1.0"?>\ <!DOCTYPE html>\ <html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops">\ <head>\ <link rel="stylesheet" type="text/css" href="css/default.css" />\ </head>\ </html>''') document = etree.ElementTree(root) html = document.getroot() title = etree.SubElement(html[0], 'title') title.text = self.title body = etree.SubElement(html, 'body') # Create the body element #h1 = etree.SubElement(body, 'h1') #h1.text = self.title #Create the prinary nav element nav = etree.SubElement(body, 'nav') nav.attrib['{http://www.idpf.org/2007/ops}type'] = 'toc' nav.attrib['id'] = 'toc' #Create the title h2 = etree.SubElement(nav, 'h2') h2.text = 'Table of Contents' #Stuff nav.append(make_nav()) if self.figures_list: nav = etree.SubElement(body, 'nav') h2 = etree.SubElement(nav, 'h2') h2.text = 'List of Figures' ol = etree.SubElement(nav, 'ol') for nav_pt in self.figures_list: li = etree.SubElement(ol, 'li') a = etree.SubElement(li, 'a') a.attrib['href'] = nav_pt.source a.text = nav_pt.label if self.tables_list: nav = etree.SubElement(body, 'nav') h2 = etree.SubElement(nav, 'h2') h2.text = 'List of Tables' ol = etree.SubElement(nav, 'ol') for nav_pt in self.figures_list: li = etree.SubElement(ol, 'li') a = etree.SubElement(li, 'a') a.attrib['href'] = nav_pt.source a.text = nav_pt.label with open(os.path.join(location, 'EPUB', 'nav.xhtml'), 'wb') as output: output.write( etree.tostring(document, encoding='utf-8', pretty_print=True)) @property def play_order(self): self._play_order += 1 return str(self._play_order) @property def auto_id(self): self._auto_id += 1 id_gen = 'OAE-{0}'.format(self._auto_id) log.debug('Navigation element missing ID: assigned {0}'.format(id_gen)) return id_gen