Exemple #1
0
    def enclose_and_change_self_size(self, outer_xpath, size_attribute, tag,
                                     change_tag):
        tree = self.load_dom_tree()

        # search the tree and grab the parent
        for child in tree.xpath(
                outer_xpath, namespaces={'tei':
                                         'http://www.tei-c.org/ns/1.0'}):
            self.debug.print_debug(
                self, u'Enclosing and changing size: {0} to {1}'.format(
                    child.tag, change_tag))
            new_element = etree.Element(tag)
            child.attrib[u'meTypesetSize'] = size_attribute
            if child.tag == '{http://www.tei-c.org/ns/1.0}' + change_tag:
                child.tag = 'REMOVE'
            else:
                for sub_element in child:
                    if sub_element.tag == '{http://www.tei-c.org/ns/1.0}' + change_tag:
                        child.tag = 'REMOVE'

            if child.tag != 'REMOVE':
                child.tag = change_tag

            child.addnext(new_element)
            Manipulate.append_safe(new_element, child, self)

            if child.tag == 'REMOVE':
                etree.strip_tags(child.getparent(), 'REMOVE')

            if not (child.attrib['rend'] is None):
                if u'bold' in child.attrib[u'rend']:
                    child.attrib[u'rend'] = child.attrib[u'rend'].replace(
                        u'bold', u'')

        self.save_tree(tree)
    def enclose_and_change_self_size(self, outer_xpath, size_attribute, tag, change_tag):
        tree = self.load_dom_tree()

        # search the tree and grab the parent
        for child in tree.xpath(outer_xpath, namespaces={'tei': 'http://www.tei-c.org/ns/1.0'}):
            self.debug.print_debug(self, u'Enclosing and changing size: {0} to {1}'.format(child.tag, change_tag))
            new_element = etree.Element(tag)
            child.attrib[u'meTypesetSize'] = size_attribute
            if child.tag == '{http://www.tei-c.org/ns/1.0}' + change_tag:
                child.tag = 'REMOVE'
            else:
                for sub_element in child:
                    if sub_element.tag == '{http://www.tei-c.org/ns/1.0}' + change_tag:
                        child.tag = 'REMOVE'

            if child.tag != 'REMOVE':
                child.tag = change_tag

            child.addnext(new_element)
            Manipulate.append_safe(new_element, child, self)

            if child.tag == 'REMOVE':
                etree.strip_tags(child.getparent(), 'REMOVE')

            if not (child.attrib['rend'] is None):
                if u'bold' in child.attrib[u'rend']:
                    child.attrib[u'rend'] = child.attrib[u'rend'].replace(u'bold', u'')

        self.save_tree(tree)
Exemple #3
0
    def find_or_create_element(self, tree, element_tag, add_xpath, is_sibling):
        # find_or_create_elements(tree, 'back', '//body', true)
        ret = None
        try:
            ret = tree.xpath(u'//' + element_tag,
                             namespaces={'tei':
                                         'http://www.tei-c.org/ns/1.0'})[0]
            self.debug.print_debug(
                self, u'Found existing {0}. Using it.'.format(element_tag))
        except:
            self.debug.print_debug(
                self,
                u'Unable to find an existing {0} element.'.format(element_tag))

        if ret is None:
            self.debug.print_debug(
                self, u'Creating new {0} element.'.format(element_tag))
            ret = tree.xpath(add_xpath)[0]
            new_element = etree.Element(element_tag)

            if is_sibling:
                ret.addnext(new_element)
            else:
                Manipulate.append_safe(ret, new_element, self)

            ret = new_element

        return ret
    def search_and_copy(last_node, move_node, nested_sibling, new_element, node, node_parent):
        append_location = new_element
        new_nodes_to_copy = node.xpath('following-sibling::node()')
        last_append = None
        for new_node in new_nodes_to_copy:
            if type(new_node) is etree._ElementStringResult or type(new_node) is etree._ElementUnicodeResult:
                if last_append is None:
                    last_append = append_location
                    if move_node:
                        node.tail = new_node
                        Manipulate.append_safe(last_append, node, None)
                    else:
                        last_append.text = new_node
                else:
                    last_append.tail = new_node
            else:
                Manipulate.append_safe(append_location, new_node, None)
                last_append = new_node
        if nested_sibling is None:
            node_parent.addnext(new_element)
            node_parent.tail = ''
            nested_sibling = new_element
        else:
            nested_sibling.addnext(new_element)
            nested_sibling = new_element

        # remove the original tail (all text after the line break, for example)
        # <!--meTypeset:br-->A third break <-- "a third break" is the tail
        if not move_node:
            node.tail = ''
            last_node = new_element
        else:
            last_node = None
        return last_node
    def tag_bibliography_refs(self):

        tree = self.load_dom_tree()

        existing_refs = tree.xpath('//back/ref-list')

        if len(existing_refs) > 0:
            return

        self.find_or_create_element(tree, 'back', '//body', True)
        ref_list = self.find_or_create_element(tree, 'ref-list', '//back', False)

        # change this to find <reflist> elements after we're more certain of how to identify them
        for refs in tree.xpath('//sec[@reflist="yes"]/p[@rend="ref"] | //sec[@reflist="yes"]/title '
                               '| //sec[@reflist="yes"]/*/listitem/p[@rend="ref"] | '
                               '//sec[@reflist="yes"]/*/p[@rend="ref"]'):

            if refs.tag == 'title':
                self.debug.print_debug(self, u'Removing title element from reference item')
                refs.getparent().remove(refs)
            else:
                self.debug.print_debug(self, u'Tagging element "{0}" as reference item'.format(refs.tag))
                refs.tag = 'ref'
                refs.attrib['id'] = u'ID{0}'.format(unicode(uuid.uuid4()))

                if 'rend' in refs.attrib:
                        del refs.attrib['rend']

                Manipulate.append_safe(ref_list, refs, self)

        self.save_tree(tree)
Exemple #6
0
    def drop_addin_json(self, xpath, start_text, replace_tag, attribute,
                        caller):
        # load the DOM
        tree = self.load_dom_tree()

        # search the tree and grab the parent
        for child in tree.xpath(
                xpath, namespaces={'tei': 'http://www.tei-c.org/ns/1.0'}):
            if not (child.text is None):
                # check that this is a known addin
                if child.text.startswith(start_text):
                    tag_to_parse = re.sub(r'.+}\s?', '', child.text)

                    new_element = etree.Element(replace_tag, rel=attribute)
                    new_element.text = tag_to_parse

                    child.addnext(new_element)

                    for subchild in child:
                        if type(subchild) is etree._Element:
                            Manipulate.append_safe(new_element, subchild, self)

                    child.getparent().remove(child)

        self.save_tree(tree)
    def do_list_bibliography(self, xpath):
        found = False
        year_test = re.compile('((19|20)\d{2}[a-z]?)|(n\.d\.)')

        for last_list in xpath:
            if last_list.tag.endswith('list'):
                if len(last_list) > 0:

                    text = self.get_stripped_text(last_list[0])
                    match = year_test.findall(text)

                    if match:
                        # it is a list, so change to reference list
                        self.debug.print_debug(self, u'Found a list as last element. Treating as bibliography.')
                        self.gv.used_list_method = True
                        found = True
                        last_list.tag = '{http://www.tei-c.org/ns/1.0}div'
                        last_list.attrib['rend'] = u'Bibliography'

                        parent_element = None

                        # now convert each line
                        for list_item in last_list:
                            new_element = etree.Element('p')
                            new_element.attrib['rend'] = u'Bibliography'

                            list_item.addnext(new_element)
                            Manipulate.append_safe(new_element, list_item, self)
                            list_item.tag = '{http://www.tei-c.org/ns/1.0}ref'
                            list_item.attrib['target'] = 'None'
            elif last_list.tag.endswith('item'):

                text = self.get_stripped_text(last_list)
                match = year_test.findall(text)

                if match:
                    # it is a list, so change to reference list
                    self.debug.print_debug(self, u'Found a list as last element via item. Treating as bibliography.')
                    self.gv.used_list_method = True
                    last_list = last_list.getparent()
                    found = True
                    last_list.tag = '{http://www.tei-c.org/ns/1.0}div'
                    last_list.attrib['rend'] = u'Bibliography'

                    parent_element = None

                    # now convert each line
                    for list_item in last_list:
                        new_element = etree.Element('p')
                        new_element.attrib['rend'] = u'Bibliography'

                        list_item.addnext(new_element)
                        Manipulate.append_safe(new_element, list_item, self)
                        list_item.tag = '{http://www.tei-c.org/ns/1.0}ref'
                        list_item.attrib['target'] = 'None'
            else:
                self.debug.print_debug(self, u'Last element in document was {0}. Not treating as '
                                             u'bibliography.'.format(xpath[0].tag))
        return found
    def do_list_bibliography(self, xpath):
        found = False
        year_test =  re.compile('((1|2)\d{3}[a-z]?)|(n\.d\.)')

        for last_list in xpath:
            if last_list.tag.endswith('list'):
                if len(last_list) > 0:

                    text = self.get_stripped_text(last_list[0])
                    match = year_test.findall(text)

                    if match:
                        # it is a list, so change to reference list
                        self.debug.print_debug(self, u'Found a list as last element. Treating as bibliography.')
                        self.gv.used_list_method = True
                        found = True
                        last_list.tag = '{http://www.tei-c.org/ns/1.0}div'
                        last_list.attrib['rend'] = u'Bibliography'

                        parent_element = None

                        # now convert each line
                        for list_item in last_list:
                            new_element = etree.Element('p')
                            new_element.attrib['rend'] = u'Bibliography'

                            list_item.addnext(new_element)
                            Manipulate.append_safe(new_element, list_item, self)
                            list_item.tag = '{http://www.tei-c.org/ns/1.0}ref'
                            list_item.attrib['target'] = 'None'
            elif last_list.tag.endswith('item'):

                text = self.get_stripped_text(last_list)
                match = year_test.findall(text)

                if match:
                    # it is a list, so change to reference list
                    self.debug.print_debug(self, u'Found a list as last element via item. Treating as bibliography.')
                    self.gv.used_list_method = True
                    last_list = last_list.getparent()
                    found = True
                    last_list.tag = '{http://www.tei-c.org/ns/1.0}div'
                    last_list.attrib['rend'] = u'Bibliography'

                    parent_element = None

                    # now convert each line
                    for list_item in last_list:
                        new_element = etree.Element('p')
                        new_element.attrib['rend'] = u'Bibliography'

                        list_item.addnext(new_element)
                        Manipulate.append_safe(new_element, list_item, self)
                        list_item.tag = '{http://www.tei-c.org/ns/1.0}ref'
                        list_item.attrib['target'] = 'None'
            else:
                self.debug.print_debug(self, u'Last element in document was {0}. Not treating as '
                                             u'bibliography.'.format(xpath[0].tag))
        return found
Exemple #9
0
    def process_node_for_tags(self,
                              nested_sibling,
                              node,
                              search_xpath,
                              tag_name,
                              new_tag='SAME'):
        if new_tag == 'SAME':
            new_tag = tag_name

        last_node = node
        new_element = etree.Element(new_tag)
        new_element.text = ''
        nodes_to_copy = node.xpath(
            '//{0}/following-sibling::node()'.format(search_xpath))

        if len(nodes_to_copy) == 0:
            return

        self.debug.print_debug(
            self,
            u'Found {0} nodes to copy: {1}'.format(len(nodes_to_copy),
                                                   nodes_to_copy))
        #for element in nodes_to_copy:
        element = nodes_to_copy[0]
        # noinspection PyProtectedMember
        if not type(element) is etree._Element:
            if node.tail == element:
                # this should handle cases where a tag spans the break
                # for example: <p>Some text <italic>some italic text<!--meTypeset:br-->
                # more italic text</italic> more text</p>
                node_parent = node.getparent()
                iter_node = node_parent
                tail_stack = []
                tail_stack_objects = []
                move_node = False
                outer_node = None

                if node_parent.tag != tag_name:
                    # the element here is nested (bold etc), so we need to move the tail to be the tail of the
                    # outermost element and change "node" to be correct for its siblings to be the rest of
                    # the element. So we might find the last part is in italics and bold, so we build a list and
                    # iterate over it within the new copied element
                    move_node, node, node_parent = self.handle_nested_elements(
                        iter_node, move_node, node, node_parent, outer_node,
                        tag_name, tail_stack, tail_stack_objects)

                # search for all siblings and copy them into a new element below
                last_node = self.search_and_copy(last_node, move_node,
                                                 nested_sibling, new_element,
                                                 node, node_parent)

            else:
                new_element.tail = node.tail
                node.tail = ''
        else:
            Manipulate.append_safe(new_element, element, self)
        if not last_node is None:
            last_node.addnext(new_element)
            node.getparent().remove(node)
    def process_node_for_tags(self, nested_sibling, node, search_xpath, tag_name, new_tag='SAME'):
        if new_tag == 'SAME':
            new_tag = tag_name

        last_node = node
        new_element = etree.Element(new_tag)
        new_element.text = ''
        nodes_to_copy = node.xpath('//{0}/following-sibling::node()'.format(search_xpath))

        if len(nodes_to_copy) == 0:
            return

        self.debug.print_debug(self, u'Found {0} nodes to copy: {1}'.format(len(nodes_to_copy),
                                                                           nodes_to_copy))
        #for element in nodes_to_copy:
        element = nodes_to_copy[0]
        # noinspection PyProtectedMember
        if not type(element) is etree._Element:
            if node.tail == element:
                # this should handle cases where a tag spans the break
                # for example: <p>Some text <italic>some italic text<!--meTypeset:br-->
                # more italic text</italic> more text</p>
                node_parent = node.getparent()
                iter_node = node_parent
                tail_stack = []
                tail_stack_objects = []
                move_node = False
                outer_node = None

                if node_parent.tag != tag_name:
                    # the element here is nested (bold etc), so we need to move the tail to be the tail of the
                    # outermost element and change "node" to be correct for its siblings to be the rest of
                    # the element. So we might find the last part is in italics and bold, so we build a list and
                    # iterate over it within the new copied element
                    move_node, node, node_parent = self.handle_nested_elements(iter_node, move_node, node,
                                                                               node_parent, outer_node, tag_name,
                                                                               tail_stack, tail_stack_objects)

                # search for all siblings and copy them into a new element below
                last_node = self.search_and_copy(last_node, move_node, nested_sibling, new_element, node,
                                                 node_parent)

            else:
                new_element.tail = node.tail
                node.tail = ''
        else:
            Manipulate.append_safe(new_element, element, self)
        if not last_node is None:
            last_node.addnext(new_element)
            node.getparent().remove(node)
    def enclose(self, start_xpath, select_xpath):
        tree = self.load_dom_tree()

        node = tree.xpath(start_xpath, namespaces={'tei': 'http://www.tei-c.org/ns/1.0'})[0]
        div = etree.Element('div')
        node.addprevious(div)

        self.debug.print_debug(self, u'Selecting for enclosure: {0}'.format(select_xpath))

        # search the tree and grab the elements
        child = tree.xpath(select_xpath, namespaces={'tei': 'http://www.tei-c.org/ns/1.0'})

        # move the elements
        for element in child:
            Manipulate.append_safe(div, element, self)

        self.save_tree(tree)
Exemple #12
0
    def enclose(self, start_xpath, select_xpath):
        tree = self.load_dom_tree()

        node = tree.xpath(start_xpath, namespaces={'tei': 'http://www.tei-c.org/ns/1.0'})[0]
        div = etree.Element('div')
        node.addprevious(div)

        self.debug.print_debug(self, u'Selecting for enclosure: {0}'.format(select_xpath))

        # search the tree and grab the elements
        child = tree.xpath(select_xpath, namespaces={'tei': 'http://www.tei-c.org/ns/1.0'})

        # move the elements
        for element in child:
            Manipulate.append_safe(div, element, self)

        self.save_tree(tree)
Exemple #13
0
    def handle_nested_elements(iter_node, move_node, node, node_parent,
                               outer_node, tag_name, tail_stack,
                               tail_stack_objects):
        if iter_node is None:
            return None, None, None

        while iter_node.tag != tag_name:
            tail_stack.append(iter_node.tag)
            tail_stack_objects.append(iter_node)
            iter_node = iter_node.getparent()
            if iter_node is None:
                return None, None, None

        # get the tail (of the comment) and style it
        append_location = None
        tail_text = node.tail
        iterator = 0
        tail_stack.reverse()
        tail_stack_objects.reverse()
        # rebuild the styled tree on a set of subelements
        for node_to_add in tail_stack:
            sub_element = etree.Element(node_to_add)
            if iterator == len(tail_stack) - 1:
                sub_element.text = node.tail

            if iterator == 0:
                outer_node = sub_element

            iterator += 1
            if append_location is None:
                tail_stack_objects[0].addnext(sub_element)
                append_location = sub_element
            else:
                Manipulate.append_safe(append_location, sub_element, None)
                append_location = sub_element

        # remove the old node (this is in the element above)
        node.getparent().remove(node)
        # set the search node to the outermost node so that we can find siblings
        node_parent = iter_node
        node = outer_node
        move_node = True
        return move_node, node, node_parent
Exemple #14
0
    def do_cit_bibliography(self, xpath):
        found = False

        for last_list in xpath:

            if last_list.tag == '{http://www.tei-c.org/ns/1.0}cit':
                # it is a list, so change to reference list
                self.debug.print_debug(
                    self,
                    u'Found a cit as last element. Treating as bibliography.')
                found = True

                sibling_tag = last_list.tag

                sibling = last_list.getprevious()

                while sibling.tag == sibling_tag:
                    next_sibling = sibling.getprevious()

                    new_element = etree.Element('p')
                    new_element.attrib['rend'] = u'Bibliography'

                    sibling.addnext(new_element)
                    Manipulate.append_safe(new_element, sibling, self)
                    sibling.tag = '{http://www.tei-c.org/ns/1.0}ref'
                    sibling.attrib['target'] = 'None'

                    sibling = next_sibling

                new_element = etree.Element('p')
                new_element.attrib['rend'] = u'Bibliography'

                last_list.addnext(new_element)
                Manipulate.append_safe(new_element, last_list, self)
                last_list.tag = '{http://www.tei-c.org/ns/1.0}ref'
                last_list.attrib['target'] = 'None'

            else:
                self.debug.print_debug(
                    self, u'Last element in document was {0}. Not treating as '
                    u'bibliography.'.format(xpath[0].tag))
        return found
    def handle_nested_elements(iter_node, move_node, node, node_parent, outer_node, tag_name, tail_stack,
                               tail_stack_objects):
        if iter_node is None:
            return None, None, None

        while iter_node.tag != tag_name:
            tail_stack.append(iter_node.tag)
            tail_stack_objects.append(iter_node)
            iter_node = iter_node.getparent()
            if iter_node is None:
                return None, None, None

        # get the tail (of the comment) and style it
        append_location = None
        tail_text = node.tail
        iterator = 0
        tail_stack.reverse()
        tail_stack_objects.reverse()
        # rebuild the styled tree on a set of subelements
        for node_to_add in tail_stack:
            sub_element = etree.Element(node_to_add)
            if iterator == len(tail_stack) - 1:
                sub_element.text = node.tail

            if iterator == 0:
                outer_node = sub_element

            iterator += 1
            if append_location is None:
                tail_stack_objects[0].addnext(sub_element)
                append_location = sub_element
            else:
                Manipulate.append_safe(append_location, sub_element, None)
                append_location = sub_element

        # remove the old node (this is in the element above)
        node.getparent().remove(node)
        # set the search node to the outermost node so that we can find siblings
        node_parent = iter_node
        node = outer_node
        move_node = True
        return move_node, node, node_parent
Exemple #16
0
    def do_cit_bibliography(self, xpath):
        found = False

        for last_list in xpath:

            if last_list.tag == '{http://www.tei-c.org/ns/1.0}cit':
                # it is a list, so change to reference list
                self.debug.print_debug(self, u'Found a cit as last element. Treating as bibliography.')
                found = True

                sibling_tag = last_list.tag

                sibling = last_list.getprevious()

                while sibling.tag == sibling_tag:
                    next_sibling = sibling.getprevious()

                    new_element = etree.Element('p')
                    new_element.attrib['rend'] = u'Bibliography'

                    sibling.addnext(new_element)
                    Manipulate.append_safe(new_element, sibling, self)
                    sibling.tag = '{http://www.tei-c.org/ns/1.0}ref'
                    sibling.attrib['target'] = 'None'

                    sibling = next_sibling


                new_element = etree.Element('p')
                new_element.attrib['rend'] = u'Bibliography'

                last_list.addnext(new_element)
                Manipulate.append_safe(new_element, last_list, self)
                last_list.tag = '{http://www.tei-c.org/ns/1.0}ref'
                last_list.attrib['target'] = 'None'


            else:
                self.debug.print_debug(self, u'Last element in document was {0}. Not treating as '
                                             u'bibliography.'.format(xpath[0].tag))
        return found
    def enclose_all(self, start_xpath, new_enclose, start_index):
        tree = self.load_dom_tree()

        self.debug.print_debug(self, u'Selecting for enclosure: {0}'.format(start_xpath))

        # search the tree and grab the elements
        child = tree.xpath(start_xpath, namespaces={'tei': 'http://www.tei-c.org/ns/1.0'})

        index = 0
        added = False
        div = etree.Element(new_enclose)

        # move the elements
        for element in child:
            if not added:
                element.getparent().addprevious(div)
                added = True

            Manipulate.append_safe(div, element, self)
            index += 1

        self.save_tree(tree)
    def find_or_create_element(self, tree, element_tag, add_xpath, is_sibling):
        # find_or_create_elements(tree, 'back', '//body', true)
        ret = None
        try:
            ret = tree.xpath(u'//' + element_tag, namespaces={'tei': 'http://www.tei-c.org/ns/1.0'})[0]
            self.debug.print_debug(self, u'Found existing {0}. Using it.'.format(element_tag))
        except:
            self.debug.print_debug(self, u'Unable to find an existing {0} element.'.format(element_tag))

        if ret is None:
            self.debug.print_debug(self, u'Creating new {0} element.'.format(element_tag))
            ret = tree.xpath(add_xpath)[0]
            new_element = etree.Element(element_tag)

            if is_sibling:
                ret.addnext(new_element)
            else:
                Manipulate.append_safe(ret, new_element, self)

            ret = new_element

        return ret
Exemple #19
0
    def enclose_all(self, start_xpath, new_enclose, start_index):
        tree = self.load_dom_tree()

        self.debug.print_debug(self, u'Selecting for enclosure: {0}'.format(start_xpath))

        # search the tree and grab the elements
        child = tree.xpath(start_xpath, namespaces={'tei': 'http://www.tei-c.org/ns/1.0'})

        index = 0
        added = False
        div = etree.Element(new_enclose)

        # move the elements
        for element in child:
            if not added:
                element.getparent().addprevious(div)
                added = True

            Manipulate.append_safe(div, element, self)
            index += 1

        self.save_tree(tree)
Exemple #20
0
    def drop_addin_json(self, xpath, start_text, replace_tag, attribute, caller):
        # load the DOM
        tree = self.load_dom_tree()

        # search the tree and grab the parent
        for child in tree.xpath(xpath, namespaces={'tei': 'http://www.tei-c.org/ns/1.0'}):
            if not (child.text is None):
                # check that this is a known addin
                if child.text.startswith(start_text):
                    tag_to_parse = re.sub(r'.+}\s?', '', child.text)

                    new_element = etree.Element(replace_tag, rel=attribute)
                    new_element.text = tag_to_parse

                    child.addnext(new_element)

                    for subchild in child:
                        if type(subchild) is etree._Element:
                            Manipulate.append_safe(new_element, subchild, self)

                    child.getparent().remove(child)

        self.save_tree(tree)
Exemple #21
0
    def tag_bibliography_refs(self):

        tree = self.load_dom_tree()

        existing_refs = tree.xpath('//back/ref-list')

        if len(existing_refs) > 0:
            return

        self.find_or_create_element(tree, 'back', '//body', True)
        ref_list = self.find_or_create_element(tree, 'ref-list', '//back',
                                               False)

        # change this to find <reflist> elements after we're more certain of how to identify them
        for refs in tree.xpath(
                '//sec[@reflist="yes"]/p[@rend="ref"] | //sec[@reflist="yes"]/title '
                '| //sec[@reflist="yes"]/*/listitem/p[@rend="ref"] | '
                '//sec[@reflist="yes"]/*/p[@rend="ref"]'):

            if refs.tag == 'title':
                self.debug.print_debug(
                    self, u'Removing title element from reference item')
                refs.getparent().remove(refs)
            else:
                self.debug.print_debug(
                    self, u'Tagging element "{0}" as reference item'.format(
                        refs.tag))
                refs.tag = 'ref'
                refs.attrib['id'] = u'ID{0}'.format(uuid.uuid4())

                if 'rend' in refs.attrib:
                    del refs.attrib['rend']

                Manipulate.append_safe(ref_list, refs, self)

        self.save_tree(tree)
Exemple #22
0
    def search_and_copy(last_node, move_node, nested_sibling, new_element,
                        node, node_parent):
        append_location = new_element
        new_nodes_to_copy = node.xpath('following-sibling::node()')
        last_append = None
        for new_node in new_nodes_to_copy:
            if type(new_node) is etree._ElementStringResult or type(
                    new_node) is etree._ElementUnicodeResult:
                if last_append is None:
                    last_append = append_location
                    if move_node:
                        node.tail = new_node
                        Manipulate.append_safe(last_append, node, None)
                    else:
                        last_append.text = new_node
                else:
                    last_append.tail = new_node
            else:
                Manipulate.append_safe(append_location, new_node, None)
                last_append = new_node
        if nested_sibling is None:
            node_parent.addnext(new_element)
            node_parent.tail = ''
            nested_sibling = new_element
        else:
            nested_sibling.addnext(new_element)
            nested_sibling = new_element

        # remove the original tail (all text after the line break, for example)
        # <!--meTypeset:br-->A third break <-- "a third break" is the tail
        if not move_node:
            node.tail = ''
            last_node = new_element
        else:
            last_node = None
        return last_node
Exemple #23
0
    def tag_bibliography_non_csl(self, xpath, start_text, caller):
        # load the DOM
        tree = self.load_dom_tree()
        change_element = None

        for child in tree.xpath(
                xpath, namespaces={'tei': 'http://www.tei-c.org/ns/1.0'}):
            try:
                if child.text.startswith(start_text):
                    child.text = child.text.replace(start_text, '')
                    change_element = child.getparent().getparent()
            except:
                pass

        if not change_element is None:
            # change the "sec" above to "p"
            change_element.tag = 'div'

            new_element = etree.Element('div')
            change_element.addnext(new_element)
            Manipulate.append_safe(new_element, change_element, self)

            # change all sub-elements to ref
            for element in change_element:
                if element.tag == '{http://www.tei-c.org/ns/1.0}head':
                    self.debug.print_debug(
                        self, u'Dropping head element: {0}'.format(
                            etree.tostring(element, encoding="unicode")))
                    change_element.remove(element)
                elif element.tag == '{http://www.tei-c.org/ns/1.0}p':
                    outer = etree.Element('p')
                    outer.attrib['rend'] = 'Bibliography'
                    element.tag = 'ref'
                    element.attrib['target'] = 'None'

                    Manipulate.append_safe(outer, element, self)
                    Manipulate.append_safe(new_element, outer, self)

            new_element.remove(change_element)

        self.save_tree(tree)
Exemple #24
0
    def tag_bibliography_non_csl(self, xpath, start_text, caller):
        # load the DOM
        tree = self.load_dom_tree()
        change_element = None

        for child in tree.xpath(xpath, namespaces={'tei': 'http://www.tei-c.org/ns/1.0'}):
            if child.text.startswith(start_text):
                child.text = child.text.replace(start_text, '')
                try:
                    change_element = child.getparent().getparent()
                except:
                    pass

        if not change_element is None:
            # change the "sec" above to "p"
            change_element.tag = 'div'

            new_element = etree.Element('div')
            change_element.addnext(new_element)
            Manipulate.append_safe(new_element, change_element, self)

            # change all sub-elements to ref
            for element in change_element:
                if element.tag == '{http://www.tei-c.org/ns/1.0}head':
                    self.debug.print_debug(self, u'Dropping head element: {0}'.format(etree.tostring(element)))
                    change_element.remove(element)
                elif element.tag == '{http://www.tei-c.org/ns/1.0}p':
                    outer = etree.Element('p')
                    outer.attrib['rend'] = 'Bibliography'
                    element.tag = 'ref'
                    element.attrib['target'] = 'None'

                    Manipulate.append_safe(outer, element, self)
                    Manipulate.append_safe(new_element, outer, self)

            new_element.remove(change_element)



        self.save_tree(tree)
Exemple #25
0
    def enclose_bibliography_tags(self, xpath, top_tag, sub_tag, attrib,
                                  attribvalue):
        #tei_manipulator.enclose_bibliography_tags('//tei:p[@rend="Bibliography"]', 'back', 'div', 'type', 'bibliogr')

        # load the DOM
        tree = self.load_dom_tree()

        if len(
                tree.xpath(xpath,
                           namespaces={'tei':
                                       'http://www.tei-c.org/ns/1.0'})) == 0:
            return False

        parent = None

        # find the parent
        parent = self.find_or_create_element(tree, 'back', '//tei:body', True)

        if not (parent.tag == top_tag) and not (
                parent.tag == '{http://www.tei-c.org/ns/1.0}' + top_tag):
            new_element = etree.Element(top_tag)
            self.debug.print_debug(
                self, u'Mismatch {0} {1}.'.format(parent.tag, top_tag))
        else:
            new_element = parent

        try:
            sub_element = tree.xpath(
                '//tei:back/tei:div[@type="bibliogr"]',
                namespaces={'tei': 'http://www.tei-c.org/ns/1.0'})[0]
            self.debug.print_debug(
                self, u'Found existing bibliography block. Using it.')
        except:
            self.debug.print_debug(self, u'Creating bibliography block.')
            sub_element = etree.Element(sub_tag)
            sub_element.attrib[attrib] = attribvalue
            new_element.insert(0, sub_element)

        if not parent.tag == top_tag and not parent.tag == '{http://www.tei-c.org/ns/1.0}' + top_tag:
            parent.addnext(new_element)

        for element in tree.xpath(
                xpath, namespaces={'tei': 'http://www.tei-c.org/ns/1.0'}):
            Manipulate.append_safe(sub_element, element, self)

        # remove all refs within
        if len(
                tree.xpath(xpath + u'/tei:ref',
                           namespaces={'tei': 'http://www.tei-c.org/ns/1.0'
                                       })) > 0:
            for ref in tree.xpath(
                    xpath + u'/tei:ref',
                    namespaces={'tei': 'http://www.tei-c.org/ns/1.0'}):

                # ensure that the ref is just a dud and not a valid link to a protocol schema
                if 'target' in ref.attrib and not ':' in ref.attrib['target']:
                    ref.tag = 'p'
                    ref.attrib['rend'] = 'Bibliography'
                    if 'target' in ref.attrib:
                        del ref.attrib['target']

                    ref_parent = ref.getparent()

                    ref_parent.addnext(ref)
                    if ref_parent.getparent() is not None:
                        ref_parent.getparent().remove(ref_parent)
        else:
            for ref in tree.xpath(
                    xpath, namespaces={'tei': 'http://www.tei-c.org/ns/1.0'}):
                ref.tag = 'p'
                ref.attrib['rend'] = 'Bibliography'

        self.save_tree(tree)

        self.debug.print_debug(self, u'Processed bibliography')
        return True
Exemple #26
0
    def enclose_bibliography_tags(self, xpath, top_tag, sub_tag, attrib, attribvalue):
        #tei_manipulator.enclose_bibliography_tags('//tei:p[@rend="Bibliography"]', 'back', 'div', 'type', 'bibliogr')

        # load the DOM
        tree = self.load_dom_tree()

        if len(tree.xpath(xpath, namespaces={'tei': 'http://www.tei-c.org/ns/1.0'})) == 0:
            return False

        parent = None

        # find the parent
        parent = self.find_or_create_element(tree, 'back', '//tei:body', True)

        if not (parent.tag == top_tag) and not (parent.tag == '{http://www.tei-c.org/ns/1.0}' + top_tag):
            new_element = etree.Element(top_tag)
            self.debug.print_debug(self, u'Mismatch {0} {1}.'.format(parent.tag, top_tag))
        else:
            new_element = parent

        try:
            sub_element = tree.xpath('//tei:back/tei:div[@type="bibliogr"]',
                                     namespaces={'tei': 'http://www.tei-c.org/ns/1.0'})[0]
            self.debug.print_debug(self, u'Found existing bibliography block. Using it.')
        except:
            self.debug.print_debug(self, u'Creating bibliography block.')
            sub_element = etree.Element(sub_tag)
            sub_element.attrib[attrib] = attribvalue
            new_element.insert(0, sub_element)

        if not parent.tag == top_tag and not parent.tag == '{http://www.tei-c.org/ns/1.0}' + top_tag:
            parent.addnext(new_element)

        for element in tree.xpath(xpath, namespaces={'tei': 'http://www.tei-c.org/ns/1.0'}):
            Manipulate.append_safe(sub_element, element, self)

        # remove all refs within
        if len(tree.xpath(xpath + u'/tei:ref', namespaces={'tei': 'http://www.tei-c.org/ns/1.0'})) > 0:
            for ref in tree.xpath(xpath + u'/tei:ref',
                                  namespaces={'tei': 'http://www.tei-c.org/ns/1.0'}):

                # ensure that the ref is just a dud and not a valid link to a protocol schema
                if 'target' in ref.attrib and not ':' in ref.attrib['target']:
                    ref.tag = 'p'
                    ref.attrib['rend'] = 'Bibliography'
                    if 'target' in ref.attrib:
                        del ref.attrib['target']

                    ref_parent = ref.getparent()

                    ref_parent.addnext(ref)

                    ref_parent.getparent().remove(ref_parent)
        else:
            for ref in tree.xpath(xpath, namespaces={'tei': 'http://www.tei-c.org/ns/1.0'}):
                ref.tag = 'p'
                ref.attrib['rend'] = 'Bibliography'

        self.save_tree(tree)

        self.debug.print_debug(self, u'Processed bibliography')
        return True