Example #1
0
 def test_2_lines_together(self):
     marker_pattern = ur"\s*(?P<mark>\[\s*(?P<marknum>\d+)\s*\])"
     refs = [
         u"[1] hello",
         u"hello2 [2] foo",
     ]
     rebuilt_refs = rebuild_reference_lines(refs, marker_pattern)
     self.assertEqual(rebuilt_refs, [
         u"[1] hello hello2",
         u"[2] foo",
     ])
 def test_2_lines_together(self):
     marker_pattern = ur"\s*(?P<mark>\[\s*(?P<marknum>\d+)\s*\])"
     refs = [
         u"[1] hello",
         u"hello2 [2] foo",
     ]
     rebuilt_refs = rebuild_reference_lines(refs, marker_pattern)
     self.assertEqual(rebuilt_refs, [
         u"[1] hello hello2",
         u"[2] foo",
     ])
Example #3
0
 def test_pagination_non_removal(self):
     marker_pattern = ur"^\s*(?P<mark>\[\s*(?P<marknum>\d+)\s*\])"
     refs = [
         u"[1] hello",
         u"hello2",
         u"[2]",
         u"foo",
     ]
     rebuilt_refs = rebuild_reference_lines(refs, marker_pattern)
     self.assertEqual(rebuilt_refs, [
         u"[1] hello hello2",
         u"[2] foo",
     ])
 def test_pagination_non_removal(self):
     marker_pattern = ur"^\s*(?P<mark>\[\s*(?P<marknum>\d+)\s*\])"
     refs = [
         u"[1] hello",
         u"hello2",
         u"[2]",
         u"foo",
     ]
     rebuilt_refs = rebuild_reference_lines(refs, marker_pattern)
     self.assertEqual(rebuilt_refs, [
         u"[1] hello hello2",
         u"[2] foo",
     ])
Example #5
0
def extract_references_from_string_xml(source, is_only_references=True):
    """Extract references from a string

    The single parameter is the document
    The result is given in marcxml.
    """
    docbody = source.split("\n")
    if not is_only_references:
        reflines, dummy, dummy = extract_references_from_fulltext(docbody)
    else:
        refs_info = get_reference_section_beginning(docbody)
        if not refs_info:
            refs_info, dummy = find_numeration_in_body(docbody)
            refs_info["start_line"] = 0
            refs_info["end_line"] = (len(docbody) - 1,)

        reflines = rebuild_reference_lines(docbody, refs_info["marker_pattern"])
    return parse_references(reflines)
Example #6
0
def extract_references_from_string_xml(source, is_only_references=True):
    """Extract references from a string

    The single parameter is the document
    The result is given in marcxml.
    """
    docbody = source.split('\n')
    if not is_only_references:
        reflines, dummy, dummy = extract_references_from_fulltext(docbody)
    else:
        refs_info = get_reference_section_beginning(docbody)
        if not refs_info:
            refs_info, dummy = find_numeration_in_body(docbody)
            refs_info['start_line'] = 0
            refs_info['end_line'] = len(docbody) - 1,

        reflines = rebuild_reference_lines(docbody, refs_info['marker_pattern'])
    return parse_references(reflines)