Esempio n. 1
0
    def _find_section_parent(self, page, load, sections, sections_number):
        for section in sections:
            indexes = find_list(section.number, '.')
            number_of_dots = len(indexes)
            parent_number1 = parent_number2 = None
            end_with_dot = section.number.strip().endswith('.')

            if end_with_dot and number_of_dots > 1:
                # e.g., if section.number = 2.3.4.
                # then parent_number = 2.3.
                # unlikely parent_number = 2.3
                parent_number1 = section.number[:indexes[-2] + 1]
                parent_number2 = section.number[:indexes[-2]]
            elif not end_with_dot and number_of_dots > 0:
                # e.g., if section number = 2.3.4
                # then likely parent_number = 2.3
                # unlikely parent_number 2.3.
                parent_number1 = section.number[:indexes[-1]]
                parent_number2 = section.number[:indexes[-1] + 1]

            if parent_number1 is not None and parent_number1 in sections_number:
                section.parent = sections_number[parent_number1]
                section.save()
            elif parent_number2 is not None and \
                    parent_number2 in sections_number:
                section.parent = sections_number[parent_number2]
                section.save()
Esempio n. 2
0
def get_sentence(element, element_text, text_context, xtext=XTEXT):
    indexes = find_list(text_context, element_text)
    size = len(indexes)
    if size == 0:
        return ''
    elif size == 1:
        return find_sentence(text_context, indexes[0],
                             indexes[0] + len(element_text))
    else:
        parent = element.getparent()
        child_index_in_parent = 0
        for child in parent:
            if child == element:
                break
            else:
                temp_text = normalize(xtext(child))
                # We have encountered a child that has the same text,
                # so the first index is not the good one.
                if temp_text.find(element_text) != -1:
                    child_index_in_parent += 1

        if child_index_in_parent < size:
            return find_sentence(
                text_context, indexes[child_index_in_parent],
                indexes[child_index_in_parent] + len(element_text))
        else:
            # Something went wrong.
            return find_sentence(element_text, indexes[0],
                                 indexes[0] + len(element_text))
Esempio n. 3
0
def get_sentence(element, element_text, text_context, xtext=XTEXT):
    indexes = find_list(text_context, element_text)
    size = len(indexes)
    if size == 0:
        return ""
    elif size == 1:
        return find_sentence(text_context, indexes[0], indexes[0] + len(element_text))
    else:
        parent = element.getparent()
        child_index_in_parent = 0
        for child in parent:
            if child == element:
                break
            else:
                temp_text = normalize(xtext(child))
                # We have encountered a child that has the same text,
                # so the first index is not the good one.
                if temp_text.find(element_text) != -1:
                    child_index_in_parent += 1

        if child_index_in_parent < size:
            return find_sentence(
                text_context, indexes[child_index_in_parent], indexes[child_index_in_parent] + len(element_text)
            )
        else:
            # Something went wrong.
            return find_sentence(element_text, indexes[0], indexes[0] + len(element_text))