Example #1
0
def find_reference_section(docbody):
    """Search in document body for its reference section.

    More precisely, find
    the first line of the reference section. Effectively, the function starts
    at the end of a document and works backwards, line-by-line, looking for
    the title of a reference section. It stops when (if) it finds something
    that it considers to be the first line of a reference section.
    @param docbody: (list) of strings - the full document body.
    @return: (dictionary) :
        { 'start_line' : (integer) - index in docbody of 1st reference line,
          'title_string' : (string) - title of the reference section.
          'marker' : (string) - the marker of the first reference line,
          'marker_pattern' : (string) - regexp string used to find the marker,
          'title_marker_same_line' : (integer) - flag to indicate whether the
                                        reference section title was on the same
                                        line as the first reference line's
                                        marker or not. 1 if it was; 0 if not.
        }
        Much of this information is used by later functions to rebuild
        a reference section.
         -- OR --
                (None) - when the reference section could not be found.
    """
    ref_details = None
    title_patterns = get_reference_section_title_patterns()

    # Try to find refs section title:
    for title_pattern in title_patterns:
        # Look for title pattern in docbody
        for reversed_index, line in enumerate(reversed(docbody)):
            title_match = title_pattern.match(line)
            if title_match:
                title = title_match.group('title')
                index = len(docbody) - 1 - reversed_index
                temp_ref_details, found_title = find_numeration(docbody[index:index+6], title)
                if temp_ref_details:
                    if ref_details and 'title' in ref_details \
                                   and ref_details['title'] \
                                   and not temp_ref_details['title']:
                        continue
                    if ref_details and 'marker' in ref_details \
                                   and ref_details['marker'] \
                                   and not temp_ref_details['marker']:
                        continue

                    ref_details = temp_ref_details
                    ref_details['start_line'] = index
                    ref_details['title_string'] = title

                if found_title:
                    break

        if ref_details:
            break

    return ref_details
Example #2
0
def find_reference_section(docbody):
    """Search in document body for its reference section.

    More precisely, find
    the first line of the reference section. Effectively, the function starts
    at the end of a document and works backwards, line-by-line, looking for
    the title of a reference section. It stops when (if) it finds something
    that it considers to be the first line of a reference section.
    @param docbody: (list) of strings - the full document body.
    @return: (dictionary) :
        { 'start_line' : (integer) - index in docbody of 1st reference line,
          'title_string' : (string) - title of the reference section.
          'marker' : (string) - the marker of the first reference line,
          'marker_pattern' : (string) - regexp string used to find the marker,
          'title_marker_same_line' : (integer) - flag to indicate whether the
                                        reference section title was on the same
                                        line as the first reference line's
                                        marker or not. 1 if it was; 0 if not.
        }
        Much of this information is used by later functions to rebuild
        a reference section.
         -- OR --
                (None) - when the reference section could not be found.
    """
    ref_details = None
    title_patterns = get_reference_section_title_patterns()

    # Try to find refs section title:
    for reversed_index, line in enumerate(reversed(docbody)):
        title_match = regex_match_list(line, title_patterns)
        if title_match:
            title = title_match.group('title')
            index = len(docbody) - 1 - reversed_index
            temp_ref_details, found_title = find_numeration(docbody[index:index+6], title)
            if temp_ref_details:
                if ref_details and 'title' in ref_details \
                               and ref_details['title'] \
                               and not temp_ref_details['title']:
                    continue
                if ref_details and 'marker' in ref_details \
                               and ref_details['marker'] \
                               and not temp_ref_details['marker']:
                    continue

                ref_details = temp_ref_details
                ref_details['start_line'] = index
                ref_details['title_string'] = title

            if found_title:
                break

    return ref_details
Example #3
0
 def test_reference_section_title_pattern(self):
     r = refextract_re.get_reference_section_title_patterns()
     self.assert_(len(r) > 2)
 def test_reference_section_title_pattern(self):
     r = refextract_re.get_reference_section_title_patterns()
     self.assert_(len(r) > 2)