Beispiel #1
0
    def test_multiline_fields_split(self):
        f = StringIO(preamble_s + "PT abc\nSO J.Whatever\nAF Here\n   be\n"
                     "   dragons\nER\nEF")

        r = PlainTextReader(f)
        expected = {"PT": "abc", "SO": "J.Whatever", "AF": "Here; be; dragons"}
        assert_dict_equal(next(r), expected)

        f.seek(0)
        r = PlainTextReader(f, subdelimiter="##")
        expected["AF"] = "Here##be##dragons"
        assert_dict_equal(next(r), expected)
Beispiel #2
0
    def test_multiline_fields_nosplit(self):
        f = StringIO(preamble_s + "PT abc\nSC Here; there\n  be dragons; Yes"
                     "\nER\nEF")

        r = PlainTextReader(f)
        expected = {"PT": "abc", "SC": "Here; there be dragons; Yes"}
        assert_dict_equal(next(r), expected)
Beispiel #3
0
    def test_multiple_records(self):
        f = StringIO(preamble_s + "PT abc\nAU xyz\nER\n\nPT abc2\n AU xyz2\n"
                     "AB abstract\nER\nEF")
        r = PlainTextReader(f)

        results = list(r)
        expected = [{"PT": "abc", "AU": "xyz"},
                    {"PT": "abc2", "AU": "xyz2", "AB": "abstract"}]

        assert_equal(len(results), len(expected))
        for result, exp in zip(results, expected):
            assert_dict_equal(result, exp)
Beispiel #4
0
    def test_ignore_empty_lines(self):
        f = StringIO(preamble_s + "PT abc\n\nAU xyz\nER\nEF")
        r = PlainTextReader(f)

        expected = {"PT": "abc", "AU": "xyz"}
        assert_dict_equal(next(r), expected)
Beispiel #5
0
 def test_forgotten_EF(self):
     f = StringIO(preamble_s + "PT abc\nAU xuz\nER\n\nPT abc2\nER")
     r = PlainTextReader(f)
     list(r)
Beispiel #6
0
 def test_wrong_version(self):
     f = StringIO("FN Thomson Reuters Web of Science\nVR 1.1")
     PlainTextReader(f)
Beispiel #7
0
 def test_wrong_format(self):
     f = StringIO("XY Bla\nVR 1.0")
     PlainTextReader(f)
Beispiel #8
0
 def test_wos_plaintext(self):
     # utf-8-sig = UTF-8 with BOM
     with open("data/wos_plaintext.txt", "rt", encoding="utf-8-sig") as fh:
         r = PlainTextReader(fh)
         for record in r:
             pass