def _find_section_parent(self, page, load, sections, sections_number): for section in sections: indexes = find_list(section.number, '.') number_of_dots = len(indexes) parent_number1 = parent_number2 = None end_with_dot = section.number.strip().endswith('.') if end_with_dot and number_of_dots > 1: # e.g., if section.number = 2.3.4. # then parent_number = 2.3. # unlikely parent_number = 2.3 parent_number1 = section.number[:indexes[-2] + 1] parent_number2 = section.number[:indexes[-2]] elif not end_with_dot and number_of_dots > 0: # e.g., if section number = 2.3.4 # then likely parent_number = 2.3 # unlikely parent_number 2.3. parent_number1 = section.number[:indexes[-1]] parent_number2 = section.number[:indexes[-1] + 1] if parent_number1 is not None and parent_number1 in sections_number: section.parent = sections_number[parent_number1] section.save() elif parent_number2 is not None and \ parent_number2 in sections_number: section.parent = sections_number[parent_number2] section.save()
def get_sentence(element, element_text, text_context, xtext=XTEXT): indexes = find_list(text_context, element_text) size = len(indexes) if size == 0: return '' elif size == 1: return find_sentence(text_context, indexes[0], indexes[0] + len(element_text)) else: parent = element.getparent() child_index_in_parent = 0 for child in parent: if child == element: break else: temp_text = normalize(xtext(child)) # We have encountered a child that has the same text, # so the first index is not the good one. if temp_text.find(element_text) != -1: child_index_in_parent += 1 if child_index_in_parent < size: return find_sentence( text_context, indexes[child_index_in_parent], indexes[child_index_in_parent] + len(element_text)) else: # Something went wrong. return find_sentence(element_text, indexes[0], indexes[0] + len(element_text))
def get_sentence(element, element_text, text_context, xtext=XTEXT): indexes = find_list(text_context, element_text) size = len(indexes) if size == 0: return "" elif size == 1: return find_sentence(text_context, indexes[0], indexes[0] + len(element_text)) else: parent = element.getparent() child_index_in_parent = 0 for child in parent: if child == element: break else: temp_text = normalize(xtext(child)) # We have encountered a child that has the same text, # so the first index is not the good one. if temp_text.find(element_text) != -1: child_index_in_parent += 1 if child_index_in_parent < size: return find_sentence( text_context, indexes[child_index_in_parent], indexes[child_index_in_parent] + len(element_text) ) else: # Something went wrong. return find_sentence(element_text, indexes[0], indexes[0] + len(element_text))