def enclose_and_change_self_size(self, outer_xpath, size_attribute, tag, change_tag): tree = self.load_dom_tree() # search the tree and grab the parent for child in tree.xpath( outer_xpath, namespaces={'tei': 'http://www.tei-c.org/ns/1.0'}): self.debug.print_debug( self, u'Enclosing and changing size: {0} to {1}'.format( child.tag, change_tag)) new_element = etree.Element(tag) child.attrib[u'meTypesetSize'] = size_attribute if child.tag == '{http://www.tei-c.org/ns/1.0}' + change_tag: child.tag = 'REMOVE' else: for sub_element in child: if sub_element.tag == '{http://www.tei-c.org/ns/1.0}' + change_tag: child.tag = 'REMOVE' if child.tag != 'REMOVE': child.tag = change_tag child.addnext(new_element) Manipulate.append_safe(new_element, child, self) if child.tag == 'REMOVE': etree.strip_tags(child.getparent(), 'REMOVE') if not (child.attrib['rend'] is None): if u'bold' in child.attrib[u'rend']: child.attrib[u'rend'] = child.attrib[u'rend'].replace( u'bold', u'') self.save_tree(tree)
def enclose_and_change_self_size(self, outer_xpath, size_attribute, tag, change_tag): tree = self.load_dom_tree() # search the tree and grab the parent for child in tree.xpath(outer_xpath, namespaces={'tei': 'http://www.tei-c.org/ns/1.0'}): self.debug.print_debug(self, u'Enclosing and changing size: {0} to {1}'.format(child.tag, change_tag)) new_element = etree.Element(tag) child.attrib[u'meTypesetSize'] = size_attribute if child.tag == '{http://www.tei-c.org/ns/1.0}' + change_tag: child.tag = 'REMOVE' else: for sub_element in child: if sub_element.tag == '{http://www.tei-c.org/ns/1.0}' + change_tag: child.tag = 'REMOVE' if child.tag != 'REMOVE': child.tag = change_tag child.addnext(new_element) Manipulate.append_safe(new_element, child, self) if child.tag == 'REMOVE': etree.strip_tags(child.getparent(), 'REMOVE') if not (child.attrib['rend'] is None): if u'bold' in child.attrib[u'rend']: child.attrib[u'rend'] = child.attrib[u'rend'].replace(u'bold', u'') self.save_tree(tree)
def find_or_create_element(self, tree, element_tag, add_xpath, is_sibling): # find_or_create_elements(tree, 'back', '//body', true) ret = None try: ret = tree.xpath(u'//' + element_tag, namespaces={'tei': 'http://www.tei-c.org/ns/1.0'})[0] self.debug.print_debug( self, u'Found existing {0}. Using it.'.format(element_tag)) except: self.debug.print_debug( self, u'Unable to find an existing {0} element.'.format(element_tag)) if ret is None: self.debug.print_debug( self, u'Creating new {0} element.'.format(element_tag)) ret = tree.xpath(add_xpath)[0] new_element = etree.Element(element_tag) if is_sibling: ret.addnext(new_element) else: Manipulate.append_safe(ret, new_element, self) ret = new_element return ret
def search_and_copy(last_node, move_node, nested_sibling, new_element, node, node_parent): append_location = new_element new_nodes_to_copy = node.xpath('following-sibling::node()') last_append = None for new_node in new_nodes_to_copy: if type(new_node) is etree._ElementStringResult or type(new_node) is etree._ElementUnicodeResult: if last_append is None: last_append = append_location if move_node: node.tail = new_node Manipulate.append_safe(last_append, node, None) else: last_append.text = new_node else: last_append.tail = new_node else: Manipulate.append_safe(append_location, new_node, None) last_append = new_node if nested_sibling is None: node_parent.addnext(new_element) node_parent.tail = '' nested_sibling = new_element else: nested_sibling.addnext(new_element) nested_sibling = new_element # remove the original tail (all text after the line break, for example) # <!--meTypeset:br-->A third break <-- "a third break" is the tail if not move_node: node.tail = '' last_node = new_element else: last_node = None return last_node
def tag_bibliography_refs(self): tree = self.load_dom_tree() existing_refs = tree.xpath('//back/ref-list') if len(existing_refs) > 0: return self.find_or_create_element(tree, 'back', '//body', True) ref_list = self.find_or_create_element(tree, 'ref-list', '//back', False) # change this to find <reflist> elements after we're more certain of how to identify them for refs in tree.xpath('//sec[@reflist="yes"]/p[@rend="ref"] | //sec[@reflist="yes"]/title ' '| //sec[@reflist="yes"]/*/listitem/p[@rend="ref"] | ' '//sec[@reflist="yes"]/*/p[@rend="ref"]'): if refs.tag == 'title': self.debug.print_debug(self, u'Removing title element from reference item') refs.getparent().remove(refs) else: self.debug.print_debug(self, u'Tagging element "{0}" as reference item'.format(refs.tag)) refs.tag = 'ref' refs.attrib['id'] = u'ID{0}'.format(unicode(uuid.uuid4())) if 'rend' in refs.attrib: del refs.attrib['rend'] Manipulate.append_safe(ref_list, refs, self) self.save_tree(tree)
def drop_addin_json(self, xpath, start_text, replace_tag, attribute, caller): # load the DOM tree = self.load_dom_tree() # search the tree and grab the parent for child in tree.xpath( xpath, namespaces={'tei': 'http://www.tei-c.org/ns/1.0'}): if not (child.text is None): # check that this is a known addin if child.text.startswith(start_text): tag_to_parse = re.sub(r'.+}\s?', '', child.text) new_element = etree.Element(replace_tag, rel=attribute) new_element.text = tag_to_parse child.addnext(new_element) for subchild in child: if type(subchild) is etree._Element: Manipulate.append_safe(new_element, subchild, self) child.getparent().remove(child) self.save_tree(tree)
def do_list_bibliography(self, xpath): found = False year_test = re.compile('((19|20)\d{2}[a-z]?)|(n\.d\.)') for last_list in xpath: if last_list.tag.endswith('list'): if len(last_list) > 0: text = self.get_stripped_text(last_list[0]) match = year_test.findall(text) if match: # it is a list, so change to reference list self.debug.print_debug(self, u'Found a list as last element. Treating as bibliography.') self.gv.used_list_method = True found = True last_list.tag = '{http://www.tei-c.org/ns/1.0}div' last_list.attrib['rend'] = u'Bibliography' parent_element = None # now convert each line for list_item in last_list: new_element = etree.Element('p') new_element.attrib['rend'] = u'Bibliography' list_item.addnext(new_element) Manipulate.append_safe(new_element, list_item, self) list_item.tag = '{http://www.tei-c.org/ns/1.0}ref' list_item.attrib['target'] = 'None' elif last_list.tag.endswith('item'): text = self.get_stripped_text(last_list) match = year_test.findall(text) if match: # it is a list, so change to reference list self.debug.print_debug(self, u'Found a list as last element via item. Treating as bibliography.') self.gv.used_list_method = True last_list = last_list.getparent() found = True last_list.tag = '{http://www.tei-c.org/ns/1.0}div' last_list.attrib['rend'] = u'Bibliography' parent_element = None # now convert each line for list_item in last_list: new_element = etree.Element('p') new_element.attrib['rend'] = u'Bibliography' list_item.addnext(new_element) Manipulate.append_safe(new_element, list_item, self) list_item.tag = '{http://www.tei-c.org/ns/1.0}ref' list_item.attrib['target'] = 'None' else: self.debug.print_debug(self, u'Last element in document was {0}. Not treating as ' u'bibliography.'.format(xpath[0].tag)) return found
def do_list_bibliography(self, xpath): found = False year_test = re.compile('((1|2)\d{3}[a-z]?)|(n\.d\.)') for last_list in xpath: if last_list.tag.endswith('list'): if len(last_list) > 0: text = self.get_stripped_text(last_list[0]) match = year_test.findall(text) if match: # it is a list, so change to reference list self.debug.print_debug(self, u'Found a list as last element. Treating as bibliography.') self.gv.used_list_method = True found = True last_list.tag = '{http://www.tei-c.org/ns/1.0}div' last_list.attrib['rend'] = u'Bibliography' parent_element = None # now convert each line for list_item in last_list: new_element = etree.Element('p') new_element.attrib['rend'] = u'Bibliography' list_item.addnext(new_element) Manipulate.append_safe(new_element, list_item, self) list_item.tag = '{http://www.tei-c.org/ns/1.0}ref' list_item.attrib['target'] = 'None' elif last_list.tag.endswith('item'): text = self.get_stripped_text(last_list) match = year_test.findall(text) if match: # it is a list, so change to reference list self.debug.print_debug(self, u'Found a list as last element via item. Treating as bibliography.') self.gv.used_list_method = True last_list = last_list.getparent() found = True last_list.tag = '{http://www.tei-c.org/ns/1.0}div' last_list.attrib['rend'] = u'Bibliography' parent_element = None # now convert each line for list_item in last_list: new_element = etree.Element('p') new_element.attrib['rend'] = u'Bibliography' list_item.addnext(new_element) Manipulate.append_safe(new_element, list_item, self) list_item.tag = '{http://www.tei-c.org/ns/1.0}ref' list_item.attrib['target'] = 'None' else: self.debug.print_debug(self, u'Last element in document was {0}. Not treating as ' u'bibliography.'.format(xpath[0].tag)) return found
def process_node_for_tags(self, nested_sibling, node, search_xpath, tag_name, new_tag='SAME'): if new_tag == 'SAME': new_tag = tag_name last_node = node new_element = etree.Element(new_tag) new_element.text = '' nodes_to_copy = node.xpath( '//{0}/following-sibling::node()'.format(search_xpath)) if len(nodes_to_copy) == 0: return self.debug.print_debug( self, u'Found {0} nodes to copy: {1}'.format(len(nodes_to_copy), nodes_to_copy)) #for element in nodes_to_copy: element = nodes_to_copy[0] # noinspection PyProtectedMember if not type(element) is etree._Element: if node.tail == element: # this should handle cases where a tag spans the break # for example: <p>Some text <italic>some italic text<!--meTypeset:br--> # more italic text</italic> more text</p> node_parent = node.getparent() iter_node = node_parent tail_stack = [] tail_stack_objects = [] move_node = False outer_node = None if node_parent.tag != tag_name: # the element here is nested (bold etc), so we need to move the tail to be the tail of the # outermost element and change "node" to be correct for its siblings to be the rest of # the element. So we might find the last part is in italics and bold, so we build a list and # iterate over it within the new copied element move_node, node, node_parent = self.handle_nested_elements( iter_node, move_node, node, node_parent, outer_node, tag_name, tail_stack, tail_stack_objects) # search for all siblings and copy them into a new element below last_node = self.search_and_copy(last_node, move_node, nested_sibling, new_element, node, node_parent) else: new_element.tail = node.tail node.tail = '' else: Manipulate.append_safe(new_element, element, self) if not last_node is None: last_node.addnext(new_element) node.getparent().remove(node)
def process_node_for_tags(self, nested_sibling, node, search_xpath, tag_name, new_tag='SAME'): if new_tag == 'SAME': new_tag = tag_name last_node = node new_element = etree.Element(new_tag) new_element.text = '' nodes_to_copy = node.xpath('//{0}/following-sibling::node()'.format(search_xpath)) if len(nodes_to_copy) == 0: return self.debug.print_debug(self, u'Found {0} nodes to copy: {1}'.format(len(nodes_to_copy), nodes_to_copy)) #for element in nodes_to_copy: element = nodes_to_copy[0] # noinspection PyProtectedMember if not type(element) is etree._Element: if node.tail == element: # this should handle cases where a tag spans the break # for example: <p>Some text <italic>some italic text<!--meTypeset:br--> # more italic text</italic> more text</p> node_parent = node.getparent() iter_node = node_parent tail_stack = [] tail_stack_objects = [] move_node = False outer_node = None if node_parent.tag != tag_name: # the element here is nested (bold etc), so we need to move the tail to be the tail of the # outermost element and change "node" to be correct for its siblings to be the rest of # the element. So we might find the last part is in italics and bold, so we build a list and # iterate over it within the new copied element move_node, node, node_parent = self.handle_nested_elements(iter_node, move_node, node, node_parent, outer_node, tag_name, tail_stack, tail_stack_objects) # search for all siblings and copy them into a new element below last_node = self.search_and_copy(last_node, move_node, nested_sibling, new_element, node, node_parent) else: new_element.tail = node.tail node.tail = '' else: Manipulate.append_safe(new_element, element, self) if not last_node is None: last_node.addnext(new_element) node.getparent().remove(node)
def enclose(self, start_xpath, select_xpath): tree = self.load_dom_tree() node = tree.xpath(start_xpath, namespaces={'tei': 'http://www.tei-c.org/ns/1.0'})[0] div = etree.Element('div') node.addprevious(div) self.debug.print_debug(self, u'Selecting for enclosure: {0}'.format(select_xpath)) # search the tree and grab the elements child = tree.xpath(select_xpath, namespaces={'tei': 'http://www.tei-c.org/ns/1.0'}) # move the elements for element in child: Manipulate.append_safe(div, element, self) self.save_tree(tree)
def handle_nested_elements(iter_node, move_node, node, node_parent, outer_node, tag_name, tail_stack, tail_stack_objects): if iter_node is None: return None, None, None while iter_node.tag != tag_name: tail_stack.append(iter_node.tag) tail_stack_objects.append(iter_node) iter_node = iter_node.getparent() if iter_node is None: return None, None, None # get the tail (of the comment) and style it append_location = None tail_text = node.tail iterator = 0 tail_stack.reverse() tail_stack_objects.reverse() # rebuild the styled tree on a set of subelements for node_to_add in tail_stack: sub_element = etree.Element(node_to_add) if iterator == len(tail_stack) - 1: sub_element.text = node.tail if iterator == 0: outer_node = sub_element iterator += 1 if append_location is None: tail_stack_objects[0].addnext(sub_element) append_location = sub_element else: Manipulate.append_safe(append_location, sub_element, None) append_location = sub_element # remove the old node (this is in the element above) node.getparent().remove(node) # set the search node to the outermost node so that we can find siblings node_parent = iter_node node = outer_node move_node = True return move_node, node, node_parent
def do_cit_bibliography(self, xpath): found = False for last_list in xpath: if last_list.tag == '{http://www.tei-c.org/ns/1.0}cit': # it is a list, so change to reference list self.debug.print_debug( self, u'Found a cit as last element. Treating as bibliography.') found = True sibling_tag = last_list.tag sibling = last_list.getprevious() while sibling.tag == sibling_tag: next_sibling = sibling.getprevious() new_element = etree.Element('p') new_element.attrib['rend'] = u'Bibliography' sibling.addnext(new_element) Manipulate.append_safe(new_element, sibling, self) sibling.tag = '{http://www.tei-c.org/ns/1.0}ref' sibling.attrib['target'] = 'None' sibling = next_sibling new_element = etree.Element('p') new_element.attrib['rend'] = u'Bibliography' last_list.addnext(new_element) Manipulate.append_safe(new_element, last_list, self) last_list.tag = '{http://www.tei-c.org/ns/1.0}ref' last_list.attrib['target'] = 'None' else: self.debug.print_debug( self, u'Last element in document was {0}. Not treating as ' u'bibliography.'.format(xpath[0].tag)) return found
def do_cit_bibliography(self, xpath): found = False for last_list in xpath: if last_list.tag == '{http://www.tei-c.org/ns/1.0}cit': # it is a list, so change to reference list self.debug.print_debug(self, u'Found a cit as last element. Treating as bibliography.') found = True sibling_tag = last_list.tag sibling = last_list.getprevious() while sibling.tag == sibling_tag: next_sibling = sibling.getprevious() new_element = etree.Element('p') new_element.attrib['rend'] = u'Bibliography' sibling.addnext(new_element) Manipulate.append_safe(new_element, sibling, self) sibling.tag = '{http://www.tei-c.org/ns/1.0}ref' sibling.attrib['target'] = 'None' sibling = next_sibling new_element = etree.Element('p') new_element.attrib['rend'] = u'Bibliography' last_list.addnext(new_element) Manipulate.append_safe(new_element, last_list, self) last_list.tag = '{http://www.tei-c.org/ns/1.0}ref' last_list.attrib['target'] = 'None' else: self.debug.print_debug(self, u'Last element in document was {0}. Not treating as ' u'bibliography.'.format(xpath[0].tag)) return found
def enclose_all(self, start_xpath, new_enclose, start_index): tree = self.load_dom_tree() self.debug.print_debug(self, u'Selecting for enclosure: {0}'.format(start_xpath)) # search the tree and grab the elements child = tree.xpath(start_xpath, namespaces={'tei': 'http://www.tei-c.org/ns/1.0'}) index = 0 added = False div = etree.Element(new_enclose) # move the elements for element in child: if not added: element.getparent().addprevious(div) added = True Manipulate.append_safe(div, element, self) index += 1 self.save_tree(tree)
def find_or_create_element(self, tree, element_tag, add_xpath, is_sibling): # find_or_create_elements(tree, 'back', '//body', true) ret = None try: ret = tree.xpath(u'//' + element_tag, namespaces={'tei': 'http://www.tei-c.org/ns/1.0'})[0] self.debug.print_debug(self, u'Found existing {0}. Using it.'.format(element_tag)) except: self.debug.print_debug(self, u'Unable to find an existing {0} element.'.format(element_tag)) if ret is None: self.debug.print_debug(self, u'Creating new {0} element.'.format(element_tag)) ret = tree.xpath(add_xpath)[0] new_element = etree.Element(element_tag) if is_sibling: ret.addnext(new_element) else: Manipulate.append_safe(ret, new_element, self) ret = new_element return ret
def drop_addin_json(self, xpath, start_text, replace_tag, attribute, caller): # load the DOM tree = self.load_dom_tree() # search the tree and grab the parent for child in tree.xpath(xpath, namespaces={'tei': 'http://www.tei-c.org/ns/1.0'}): if not (child.text is None): # check that this is a known addin if child.text.startswith(start_text): tag_to_parse = re.sub(r'.+}\s?', '', child.text) new_element = etree.Element(replace_tag, rel=attribute) new_element.text = tag_to_parse child.addnext(new_element) for subchild in child: if type(subchild) is etree._Element: Manipulate.append_safe(new_element, subchild, self) child.getparent().remove(child) self.save_tree(tree)
def tag_bibliography_refs(self): tree = self.load_dom_tree() existing_refs = tree.xpath('//back/ref-list') if len(existing_refs) > 0: return self.find_or_create_element(tree, 'back', '//body', True) ref_list = self.find_or_create_element(tree, 'ref-list', '//back', False) # change this to find <reflist> elements after we're more certain of how to identify them for refs in tree.xpath( '//sec[@reflist="yes"]/p[@rend="ref"] | //sec[@reflist="yes"]/title ' '| //sec[@reflist="yes"]/*/listitem/p[@rend="ref"] | ' '//sec[@reflist="yes"]/*/p[@rend="ref"]'): if refs.tag == 'title': self.debug.print_debug( self, u'Removing title element from reference item') refs.getparent().remove(refs) else: self.debug.print_debug( self, u'Tagging element "{0}" as reference item'.format( refs.tag)) refs.tag = 'ref' refs.attrib['id'] = u'ID{0}'.format(uuid.uuid4()) if 'rend' in refs.attrib: del refs.attrib['rend'] Manipulate.append_safe(ref_list, refs, self) self.save_tree(tree)
def search_and_copy(last_node, move_node, nested_sibling, new_element, node, node_parent): append_location = new_element new_nodes_to_copy = node.xpath('following-sibling::node()') last_append = None for new_node in new_nodes_to_copy: if type(new_node) is etree._ElementStringResult or type( new_node) is etree._ElementUnicodeResult: if last_append is None: last_append = append_location if move_node: node.tail = new_node Manipulate.append_safe(last_append, node, None) else: last_append.text = new_node else: last_append.tail = new_node else: Manipulate.append_safe(append_location, new_node, None) last_append = new_node if nested_sibling is None: node_parent.addnext(new_element) node_parent.tail = '' nested_sibling = new_element else: nested_sibling.addnext(new_element) nested_sibling = new_element # remove the original tail (all text after the line break, for example) # <!--meTypeset:br-->A third break <-- "a third break" is the tail if not move_node: node.tail = '' last_node = new_element else: last_node = None return last_node
def tag_bibliography_non_csl(self, xpath, start_text, caller): # load the DOM tree = self.load_dom_tree() change_element = None for child in tree.xpath( xpath, namespaces={'tei': 'http://www.tei-c.org/ns/1.0'}): try: if child.text.startswith(start_text): child.text = child.text.replace(start_text, '') change_element = child.getparent().getparent() except: pass if not change_element is None: # change the "sec" above to "p" change_element.tag = 'div' new_element = etree.Element('div') change_element.addnext(new_element) Manipulate.append_safe(new_element, change_element, self) # change all sub-elements to ref for element in change_element: if element.tag == '{http://www.tei-c.org/ns/1.0}head': self.debug.print_debug( self, u'Dropping head element: {0}'.format( etree.tostring(element, encoding="unicode"))) change_element.remove(element) elif element.tag == '{http://www.tei-c.org/ns/1.0}p': outer = etree.Element('p') outer.attrib['rend'] = 'Bibliography' element.tag = 'ref' element.attrib['target'] = 'None' Manipulate.append_safe(outer, element, self) Manipulate.append_safe(new_element, outer, self) new_element.remove(change_element) self.save_tree(tree)
def tag_bibliography_non_csl(self, xpath, start_text, caller): # load the DOM tree = self.load_dom_tree() change_element = None for child in tree.xpath(xpath, namespaces={'tei': 'http://www.tei-c.org/ns/1.0'}): if child.text.startswith(start_text): child.text = child.text.replace(start_text, '') try: change_element = child.getparent().getparent() except: pass if not change_element is None: # change the "sec" above to "p" change_element.tag = 'div' new_element = etree.Element('div') change_element.addnext(new_element) Manipulate.append_safe(new_element, change_element, self) # change all sub-elements to ref for element in change_element: if element.tag == '{http://www.tei-c.org/ns/1.0}head': self.debug.print_debug(self, u'Dropping head element: {0}'.format(etree.tostring(element))) change_element.remove(element) elif element.tag == '{http://www.tei-c.org/ns/1.0}p': outer = etree.Element('p') outer.attrib['rend'] = 'Bibliography' element.tag = 'ref' element.attrib['target'] = 'None' Manipulate.append_safe(outer, element, self) Manipulate.append_safe(new_element, outer, self) new_element.remove(change_element) self.save_tree(tree)
def enclose_bibliography_tags(self, xpath, top_tag, sub_tag, attrib, attribvalue): #tei_manipulator.enclose_bibliography_tags('//tei:p[@rend="Bibliography"]', 'back', 'div', 'type', 'bibliogr') # load the DOM tree = self.load_dom_tree() if len( tree.xpath(xpath, namespaces={'tei': 'http://www.tei-c.org/ns/1.0'})) == 0: return False parent = None # find the parent parent = self.find_or_create_element(tree, 'back', '//tei:body', True) if not (parent.tag == top_tag) and not ( parent.tag == '{http://www.tei-c.org/ns/1.0}' + top_tag): new_element = etree.Element(top_tag) self.debug.print_debug( self, u'Mismatch {0} {1}.'.format(parent.tag, top_tag)) else: new_element = parent try: sub_element = tree.xpath( '//tei:back/tei:div[@type="bibliogr"]', namespaces={'tei': 'http://www.tei-c.org/ns/1.0'})[0] self.debug.print_debug( self, u'Found existing bibliography block. Using it.') except: self.debug.print_debug(self, u'Creating bibliography block.') sub_element = etree.Element(sub_tag) sub_element.attrib[attrib] = attribvalue new_element.insert(0, sub_element) if not parent.tag == top_tag and not parent.tag == '{http://www.tei-c.org/ns/1.0}' + top_tag: parent.addnext(new_element) for element in tree.xpath( xpath, namespaces={'tei': 'http://www.tei-c.org/ns/1.0'}): Manipulate.append_safe(sub_element, element, self) # remove all refs within if len( tree.xpath(xpath + u'/tei:ref', namespaces={'tei': 'http://www.tei-c.org/ns/1.0' })) > 0: for ref in tree.xpath( xpath + u'/tei:ref', namespaces={'tei': 'http://www.tei-c.org/ns/1.0'}): # ensure that the ref is just a dud and not a valid link to a protocol schema if 'target' in ref.attrib and not ':' in ref.attrib['target']: ref.tag = 'p' ref.attrib['rend'] = 'Bibliography' if 'target' in ref.attrib: del ref.attrib['target'] ref_parent = ref.getparent() ref_parent.addnext(ref) if ref_parent.getparent() is not None: ref_parent.getparent().remove(ref_parent) else: for ref in tree.xpath( xpath, namespaces={'tei': 'http://www.tei-c.org/ns/1.0'}): ref.tag = 'p' ref.attrib['rend'] = 'Bibliography' self.save_tree(tree) self.debug.print_debug(self, u'Processed bibliography') return True
def enclose_bibliography_tags(self, xpath, top_tag, sub_tag, attrib, attribvalue): #tei_manipulator.enclose_bibliography_tags('//tei:p[@rend="Bibliography"]', 'back', 'div', 'type', 'bibliogr') # load the DOM tree = self.load_dom_tree() if len(tree.xpath(xpath, namespaces={'tei': 'http://www.tei-c.org/ns/1.0'})) == 0: return False parent = None # find the parent parent = self.find_or_create_element(tree, 'back', '//tei:body', True) if not (parent.tag == top_tag) and not (parent.tag == '{http://www.tei-c.org/ns/1.0}' + top_tag): new_element = etree.Element(top_tag) self.debug.print_debug(self, u'Mismatch {0} {1}.'.format(parent.tag, top_tag)) else: new_element = parent try: sub_element = tree.xpath('//tei:back/tei:div[@type="bibliogr"]', namespaces={'tei': 'http://www.tei-c.org/ns/1.0'})[0] self.debug.print_debug(self, u'Found existing bibliography block. Using it.') except: self.debug.print_debug(self, u'Creating bibliography block.') sub_element = etree.Element(sub_tag) sub_element.attrib[attrib] = attribvalue new_element.insert(0, sub_element) if not parent.tag == top_tag and not parent.tag == '{http://www.tei-c.org/ns/1.0}' + top_tag: parent.addnext(new_element) for element in tree.xpath(xpath, namespaces={'tei': 'http://www.tei-c.org/ns/1.0'}): Manipulate.append_safe(sub_element, element, self) # remove all refs within if len(tree.xpath(xpath + u'/tei:ref', namespaces={'tei': 'http://www.tei-c.org/ns/1.0'})) > 0: for ref in tree.xpath(xpath + u'/tei:ref', namespaces={'tei': 'http://www.tei-c.org/ns/1.0'}): # ensure that the ref is just a dud and not a valid link to a protocol schema if 'target' in ref.attrib and not ':' in ref.attrib['target']: ref.tag = 'p' ref.attrib['rend'] = 'Bibliography' if 'target' in ref.attrib: del ref.attrib['target'] ref_parent = ref.getparent() ref_parent.addnext(ref) ref_parent.getparent().remove(ref_parent) else: for ref in tree.xpath(xpath, namespaces={'tei': 'http://www.tei-c.org/ns/1.0'}): ref.tag = 'p' ref.attrib['rend'] = 'Bibliography' self.save_tree(tree) self.debug.print_debug(self, u'Processed bibliography') return True