def test_repeated_multiblock(self): """Confirm repeated multiblock parser works correctly.""" from textwrap import dedent import pent from .testdata import mblock_repeated_result data = dedent(""" $top 1 2 3 0.2 0.3 0.4 0.3 0.4 0.6 4 5 6 0.1 0.1 0.1 0.5 0.5 0.5 $top 7 8 9 0.2 0.2 0.2 0.6 0.6 0.6 1 2 3 0.4 0.4 0.4 0.8 0.8 0.8 """) prs_inner = pent.Parser(head="#++i", body="#!+.f") prs_outer = pent.Parser(head="@.$top", body=prs_inner) self.assertEqual(prs_outer.capture_body(data), mblock_repeated_result)
def test_optional_single_line_tail(self): """Confirm optional-line parsing works.""" import pent from .testdata import opt_1line_tail_data as data from .testdata import opt_1line_tail_expect_block as expect_block from .testdata import opt_1line_tail_expect_struct as expect_struct prs = pent.Parser( head="@.HEAD", body=pent.Parser(head="#++i", body="#!+.f", tail="? @!.FOOT"), ) for i, tup in enumerate(zip(data, expect_block)): d, e = tup with self.subTest("block_{}".format(i)): result = prs.capture_body(d) self.assertEqual(result, e) for i, tup in enumerate(zip(data, expect_struct)): d, e = tup res_struct = [] with self.subTest("struct_{}".format(i)): for bdict in prs.capture_struct(d)[ParserField.Body]: res_struct.append(bdict[ParserField.Tail]) self.assertEqual(res_struct, e)
def test_absent_completely_optional_parser(self): """Confirm match when an all-optional Parser section is absent.""" prs = pent.Parser( head=pent.Parser(body="? &!. &!."), body=pent.Parser(head="&!.", body="#!..i #!..i #!..i"), ) good_texts = [ dedent(""" a b c 1 2 3 d 4 5 6 """), dedent(""" c 1 2 3 d 4 5 6 """), ] for i, text in enumerate(good_texts): with self.subTest("good_{}".format(i)): s = prs.capture_struct(text) self.assertNotEqual([], s)
def test_simple_multiblock(self): """Confirm simple multiblock parser works correctly.""" from textwrap import dedent import pent from .testdata import mblock_result data = dedent(""" test more test $data 1 2 3 1 2.5 -3.5 0.8 2 -1.2 8.1 -9.2 4 5 6 1 -0.1 3.5 8.1 2 1.4 2.2 -4.7 $next_data""") prs_inner = pent.Parser(head="#++i", body="#.+i #!+.f", tail="") prs_outer = pent.Parser(head="@.$data", body=prs_inner) self.assertEqual(prs_outer.capture_body(data), mblock_result)
def test_ORCA_CAS_state_results(self): """Confirm parse of CAS state results is correct.""" import pent from .testdata import orca_cas_states data = self.get_orca_cas_file() head_expect = [[["1", "6", "4"]], [["2", "4", "4"]], [["3", "2", "4"]]] prs_in = pent.Parser( head="@.ROOT #x!.+i @.: @.E= #o!..f ~!", body="#!.+f @o.[ #x!.+i @.]: #!.+i", ) prs_out = pent.Parser( head=( "@+-", "~ '@.FOR BLOCK' #!.+i @o.MULT= #!.+i @o.NROOTS= #!.+i", "@+-", "", ), body=prs_in, tail=("", ""), ) self.assertEqual(prs_out.capture_body(data), orca_cas_states) head_result = [] for bdict in prs_out.capture_struct(data): head_result.append(bdict[ParserField.Head]) self.assertEqual(head_result, head_expect)
def test_orca_hess_column_stacked(self): """Confirm column stacking works as expected.""" import pent from .testdata import orca_hess_hessian prs = pent.Parser( head=("@.$hessian", "#.+i"), body=pent.Parser(head="#++i", body="#.+i #!+.f"), ) data = self.get_orca_H2O_hess() self.assertEqual(orca_hess_hessian, pent.column_stack_2d(prs.capture_body(data)[0]))
class SuperPent: """Superclass of various test classes, with common methods.""" import pent prs = pent.Parser(body="") @staticmethod def does_parse_match(re_pat, s): """Run match-or-not test on `s` using regex pattern `re_pat`.""" m = re.search(re_pat, s) return m is not None @staticmethod def make_testname(v, n, s): """Compose test name from a numerical value and pattern Number/Sign.""" return "{0}_{1}_{2}".format(v, n, s) @staticmethod def get_file(fname): """Return the contents of the given file.""" path = str(Path() / "pent" / "test" / fname) if fname.endswith(".gz"): with gzip.open(path, "rt") as f: return f.read() else: with open(path) as f: return f.read()
def test_optional_lastline_tail(self): """Confirm parsers with one required & one optional line in tail.""" prs = pent.Parser(tail=("@!.foo", "? @!.bar"), body="#!+.i") good_texts = [ ("1 2 3\nfoo", [["foo", None]]), ("1 2 3\nfoo\nbar", [["foo", "bar"]]), ("1 2 3\nfoo\nquux", [["foo", None]]), ] for i, tup in enumerate(good_texts): with self.subTest("good_{}".format(i)): s = prs.capture_struct(tup[0]) self.assertNotEqual([], s) self.assertEqual(s[pent.ParserField.Tail], tup[1]) bad_texts = [ "1 2 3\nbar", "1 2 3\nfar\nbar", "1 2 3\nbar", "1 2 3\n\nfoo\nbar", ] for i, text in enumerate(bad_texts): with self.subTest("bad_{}".format(i)): s = prs.capture_struct(text) self.assertEqual([], s)
def test_optional_body_line(self): """Confirm optional line works inside body.""" prs = pent.Parser(head="@.foo", body=("#!+.i", "? #!+.d"), tail="@.bar") good_texts = [ dedent(""" foo 1 2 3 1. 2. 3. 4 5 6 4. 5. 6. bar """), dedent(""" foo 1 2 3 4 5 6 7 8 9 7. 8. 9. 1 2 3 bar """), ] for i, text in enumerate(good_texts): with self.subTest("good_{}".format(i)): s = prs.capture_struct(text) self.assertNotEqual([], s)
def test_optional_firstline_head(self): """Confirm parsers with one optional & one required line in head.""" prs = pent.Parser(head=("? @!.foo", "@!.bar"), body="#!+.i") good_texts = [ ("bar\n1 2 3", [[None, "bar"]]), ("foo\nbar\n1 2 3", [["foo", "bar"]]), ("quuz\nbar\n1 2 3", [[None, "bar"]]), ] for i, tup in enumerate(good_texts): with self.subTest("good_{}".format(i)): s = prs.capture_struct(tup[0]) self.assertNotEqual([], s) self.assertEqual(s[pent.ParserField.Head], tup[1]) bad_texts = [ "foo\n1 2 3", "foo\nbaz\n1 2 3", "bar\n\n1 2 3", "foo\nbar\n\n1 2 3", ] for i, text in enumerate(bad_texts): with self.subTest("bad_{}".format(i)): s = prs.capture_struct(text) self.assertEqual([], s)
def test_mwfn_di_data(self): """Confirm DI data parses as expected.""" import pent from .testdata import mwfn_di_data data = self.get_mwfn_li_di_elf() prs = pent.Parser( head="@+* &. @.delocalization ~", body=pent.Parser(head="#++i", body="#.+i #!++f"), ) res = pent.column_stack_2d(prs.capture_body(data)[0]) self.assertEqual(res, mwfn_di_data)
def test_mwfn_attractor_data(self): """Confirm attractor/basin data parses as expected.""" import pent from .testdata import mwfn_attractor_data mwfn_num_grids = [["85130"]] data = self.get_mwfn_dens_elf() prs = pent.Parser( head=( "~ '@.attractors after clustering:'", "@.Index '@.Average X,Y,Z' ~ @.Value", ), body="#.+i #!+.f #!.+f", tail="~ '@.interbasin grids:' #!.+i", ) body = prs.capture_body(data) tail = prs.capture_struct(data)[pent.ParserField.Tail] with self.subTest("body"): self.assertEqual(body, mwfn_attractor_data) with self.subTest("tail"): self.assertEqual(mwfn_num_grids, tail)
def test_orca_hess_freq_parser(self): """Confirm 1-D data parser for ORCA freqs works.""" import pent from .testdata import orca_hess_freqs head_pattern = ("@.$vibrational_frequencies", "#!.+i") body_pattern = "#.+i #!..f" # Trivial application of the tail, but serves to check that # it works correctly. tail_pattern = ("~", "@.$normal_modes", "#!++i") freq_parser = pent.Parser(head=head_pattern, body=body_pattern, tail=tail_pattern) data = self.get_orca_C2F4_hess() m = re.search(freq_parser.pattern(), data) self.assertIsNotNone(m) self.assertEqual(m.group(0).count("\n"), 22) self.assertEqual( freq_parser.capture_struct(data)[ParserField.Head], [["18"]]) self.assertEqual( freq_parser.capture_struct(data)[ParserField.Tail], [["18", "18"]]) self.assertEqual(freq_parser.capture_body(data), orca_hess_freqs)
def test_gamess_hess(self): """Confirm GAMESS hessian parses as expected.""" import pent from .testdata import gamess_hess_split data = self.get_gamess_file() prs = pent.Parser( head=("@+-", "'@.CARTESIAN FORCE CONSTANT MATRIX'", "@+-"), body=pent.Parser( head=("", "#.+i #.+i", "&. &.", "@.X @.Y @.Z @.X @.Y @.Z"), body="~ &o. #o!..f #o!..f #o!..f #o!..f #o!..f #o!..f", ), ) self.assertEqual(gamess_hess_split, prs.capture_body(data))
def test_body_cleared_after_init(self): """Confirm correct error raised if 'body' is reset to None.""" import pent prs = pent.Parser(body="#..i") prs.body = None self.assertRaises(pent.SectionError, prs.pattern)
def test_optional_space_after_literal(self): """Confirm the optional-space matching works.""" from textwrap import dedent import pent text = dedent("""\ 1 2 3 4 5 VALUE= 1 VALUE= 2 VALUE=10""") result = [[["1"], ["2"], ["10"]]] fail_prs = pent.Parser(head="#++i", body="@.VALUE= #!..i") good_prs = pent.Parser(head="#++i", body="@o.VALUE= #!..i") self.assertNotEqual(result, fail_prs.capture_body(text)) self.assertEqual(result, good_prs.capture_body(text))
def test_optional_space_after_number(self): """Confirm optional-space works for after numbers.""" from textwrap import dedent import pent text = dedent(""" 1 2 3 4 5 23 . 23. -3e4 . -3e4. """) result = [[["23"], ["23"], ["-3e4"], ["-3e4"]]] good_prs = pent.Parser(head="#++i", body="#o!..g @..") fail_prs = pent.Parser(head="#++i", body="#!..g @..") self.assertNotEqual(result, fail_prs.capture_body(text)) self.assertEqual(result, good_prs.capture_body(text))
def test_multiline_body_parser(self): """Confirm parsing w/multi-line body works ok.""" import pent result = [[["1", "2", "4"]]] text = "\n1\n\n2\n\n\n4" pat = ("", "#!.+i", "", "#!.+i", "", "", "#!.+i") prs = pent.Parser(body=pat) self.assertEqual(prs.capture_body(text), result)
def test_orca_opt_trajectory(self): """Confirm multiple-xyz .trj file parsing.""" import pent from .testdata import orca_opt_trajectory prs = pent.Parser(head=("#..i", "~"), body="&!. #!+.f") data = self.get_orca_trj() res = prs.capture_body(data) self.assertEqual(res, orca_opt_trajectory)
def timetest(): """Run timing test with data/code of #61.""" with gzip.open('pent\\test\\isosorbide_NO3_02.out.gz', 'rt') as f: data = f.read() prs = pent.Parser( head=("'@.REDUCED MASS:' #+.f", "'@.IR INTENSITY:' #+.f", ""), body="~ #!+.f", ) cap = prs.capture_body(data) arr = np.column_stack(np.array(_, dtype=float) for _ in cap)
def test_orca_hess_dipders_parser(self): """Confirm 2-D single-block data parser for ORCA dipders works.""" import pent from .testdata import orca_hess_dipders head_pattern = ("@.$dipole_derivatives", "#.+i") body_pattern = "#!+.f" freq_parser = pent.Parser(head=head_pattern, body=body_pattern) data = self.get_orca_C2F4_hess() self.assertEqual(freq_parser.capture_body(data), orca_hess_dipders)
def test_gamess_modes(self): """Confirm GAMESS normal modes list parses as expected.""" import pent from .testdata import gamess_modes_split data = self.get_gamess_file() prs = pent.Parser( head=("'@.REDUCED MASS:' #+.f", "'@.IR INTENSITY:' #+.f", ""), body="~ #!+.f", ) self.assertEqual(gamess_modes_split, prs.capture_body(data))
def test_mwfn_li_data(self): """Confirm LI data parses as expected.""" import pent from .testdata import mwfn_li_data data = self.get_mwfn_li_di_elf() prs = pent.Parser(head="'@.Total localization index:'", body="&!+") res = list(_ for _ in itt.chain.from_iterable(prs.capture_body(data)[0]) if ":" not in _) self.assertEqual(res, mwfn_li_data)
def test_gamess_geometry(self): """Confirm GAMESS geometry parses as expected.""" import pent from .testdata import gamess_geometry data = self.get_gamess_file() prs = pent.Parser( head=( "@.ATOM @.ATOMIC '@.COORDINATES (BOHR)'", "@.CHARGE @.X @.Y @.Z", ), body="&!. #!.+f #!+.f", ) self.assertEqual(gamess_geometry, prs.capture_body(data))
def test_ORCA_CAS_orbital_ranges(self): """Confirm inactive/active/virtual data captures correctly.""" import pent data = self.get_orca_cas_file() prs = pent.Parser( head="~ '@.orbital ranges:'", body="~ #!.+i @.- #!.+i @.( #!.+i @.orbitals)", tail="'@.Number of rotation parameters' @+. #!.+i", ) tail_val = [["1799"]] body_result = [[["0", "14", "15"], ["15", "21", "7"], ["22", "98", "77"]]] self.assertEqual(body_result, prs.capture_body(data)) self.assertEqual(tail_val, prs.capture_struct(data)[ParserField.Tail])
def test_optional_1line_tail(self): """Confirm parsers with one optional line in tail.""" prs = pent.Parser(tail="? @!.foo", body="#!+.i") # Many of these match because the "footer" line is just # ignored by the optional line of the tail. good_texts = [ ("1 2 3", []), ("1 2 3\n4 5 6", []), ("1 2 3\nfoo", [["foo"]]), ("\n1 2 3\n", []), ("1 2 3\n\nfoo", []), ("1 2 3\nfoobar", []), ] for i, tup in enumerate(good_texts): with self.subTest("good_{}".format(i)): s = prs.capture_struct(tup[0]) self.assertNotEqual([], s) self.assertEqual(s[pent.ParserField.Tail], tup[1])
def test_optional_1line_head(self): """Confirm parsers with one optional line in head.""" prs = pent.Parser(head="? @!.foo", body="#!+.i") # Many of these match because the "header" line is just # ignored by the optional line of the head. # NEED TO ROBUSTIFY WITH A CHECK ON THE HEAD CAPTURES! good_texts = [ ("1 2 3", []), ("1 2 3\n4 5 6", []), ("foo\n1 2 3", [["foo"]]), ("\n1 2 3", []), ("foo\n\n\n1 2 3", []), ("foobar\n1 2 3", []), ] for i, tup in enumerate(good_texts): with self.subTest("good_{}".format(i)): s = prs.capture_struct(tup[0]) self.assertNotEqual([], s) self.assertEqual(s[pent.ParserField.Head], tup[1])
def test_gamess_gradient(self): """Confirm GAMESS gradient parses as expected.""" import pent from .testdata import gamess_gradient data = self.get_gamess_file() prs = pent.Parser( head=( "@+-", "'@.ENERGY GRADIENT'", "@+-", "", "'@.UNITS ARE HARTREE/BOHR' ~", ), body="#.+i &. #!+.f", ) self.assertEqual(gamess_gradient, prs.capture_body(data))
def test_gamess_freqs(self): """Confirm GAMESS frequencies list parses as expected.""" import pent from .testdata import gamess_freqs data = self.get_gamess_file() prs = pent.Parser( head=( "'@.REFERENCE ON SAYVETZ' ~", "", "'@.NOTE - THE MODES' ~", "'@.SUM ON I' ~", "", "'@.MODE FREQ(CM**-1)' ~", ), body="#.+i #!..f &. #+.f", ) self.assertEqual(gamess_freqs, prs.capture_body(data))
def test_optional_2line_head(self): """Confirm parsers with two optional lines in head.""" prs = pent.Parser(head=("? @!.foo", "? @!.bar"), body="#!+.i") # Again, a couple of these match because the whole header # is just ignored. good_texts = [ ("1 2 3", []), ("1 2 3\n4 5", []), ("foo\n1 2 3", [["foo", None]]), ("bar\n1 2 3", [[None, "bar"]]), ("foobar\n1 2 3", [[None, None]]), ("foo\n\n\n1 2 3", [[None, None]]), ("foo\nbar\n1 2 3", [["foo", "bar"]]), ("foo\n\n1 2 3", [["foo", None]]), ] for i, tup in enumerate(good_texts): with self.subTest("good_{}".format(i)): s = prs.capture_struct(tup[0]) self.assertNotEqual([], s) self.assertEqual(s[pent.ParserField.Head], tup[1])