def test_multiline_fields_split(self): f = StringIO(preamble_s + "PT abc\nSO J.Whatever\nAF Here\n be\n" " dragons\nER\nEF") r = PlainTextReader(f) expected = {"PT": "abc", "SO": "J.Whatever", "AF": "Here; be; dragons"} assert_dict_equal(next(r), expected) f.seek(0) r = PlainTextReader(f, subdelimiter="##") expected["AF"] = "Here##be##dragons" assert_dict_equal(next(r), expected)
def test_multiline_fields_nosplit(self): f = StringIO(preamble_s + "PT abc\nSC Here; there\n be dragons; Yes" "\nER\nEF") r = PlainTextReader(f) expected = {"PT": "abc", "SC": "Here; there be dragons; Yes"} assert_dict_equal(next(r), expected)
def test_multiple_records(self): f = StringIO(preamble_s + "PT abc\nAU xyz\nER\n\nPT abc2\n AU xyz2\n" "AB abstract\nER\nEF") r = PlainTextReader(f) results = list(r) expected = [{"PT": "abc", "AU": "xyz"}, {"PT": "abc2", "AU": "xyz2", "AB": "abstract"}] assert_equal(len(results), len(expected)) for result, exp in zip(results, expected): assert_dict_equal(result, exp)
def test_ignore_empty_lines(self): f = StringIO(preamble_s + "PT abc\n\nAU xyz\nER\nEF") r = PlainTextReader(f) expected = {"PT": "abc", "AU": "xyz"} assert_dict_equal(next(r), expected)
def test_forgotten_EF(self): f = StringIO(preamble_s + "PT abc\nAU xuz\nER\n\nPT abc2\nER") r = PlainTextReader(f) list(r)
def test_wrong_version(self): f = StringIO("FN Thomson Reuters Web of Science\nVR 1.1") PlainTextReader(f)
def test_wrong_format(self): f = StringIO("XY Bla\nVR 1.0") PlainTextReader(f)
def test_wos_plaintext(self): # utf-8-sig = UTF-8 with BOM with open("data/wos_plaintext.txt", "rt", encoding="utf-8-sig") as fh: r = PlainTextReader(fh) for record in r: pass