def test_parse_file_with_api(self): # Testing over poc-turtle corpus... 100% success is expected. options = BIT_NO_LWALL | BIT_NO_PERIOD | BIT_STRIP | BIT_RM_DIR | BIT_LOC_LANG | BIT_PARSE_QUALITY lgp = LGInprocParser() api = LGApiParser() dict = "en" corp = "test-data/corpora/poc-english/poc_english.txt" reff = "test-data/corpora/poc-english/poc_english_parses_lg.txt" outp = "test-data/temp" # reff = None # dict = "test-data/dict/poc-turtle" # corp = "test-data/corpora/poc-turtle/poc-turtle.txt" # outp = "test-data/temp" # reff = None m1, q1 = lgp.parse(dict, corp, outp, reff, options) m2, q2 = api.parse(dict, corp, outp, reff, options) self.assertTrue(m1 == m2) print(q1.text(q1)) print(q2.text(q2)) self.assertTrue(q1 == q2)
def test_parse_batch_ps_output(self): pr = LGInprocParser() num_sent = len(pr._parse_batch_ps_output(lg_post_output)) self.assertEqual( num_sent, 12, "'parse_batch_ps_output()' returns '{}' instead of '{}'".format( num_sent, 12))
def test_parseability_coinsedence(self): """ Test poc-english corpus with poc-turtle dictionary """ dict = "en" # "poc-turtle" # dict = handle_path_string("test-data/dict/poc-turtle") corp1 = handle_path_string( "test-data/corpora/poc-english/poc_english.txt") corp2 = handle_path_string("test-data/corpora/poc-english-multi") dest = handle_path_string("test-data/temp") ref1 = handle_path_string( "test-data/parses/poc-english-ref/poc_english.txt.ull") ref2 = handle_path_string("test-data/parses/poc-english-multi-ref") pr = LGInprocParser() # pr = LGApiParser() gt = GrammarTester(grmr, tmpl, limit, pr) pm1, pq1 = gt.test(dict, corp1, dest, ref1, opts) pm2, pq2 = gt.test(dict, corp2, dest, ref2, opts) # print(pm.text(pm)) # print(pq.text(pq)) self.assertEqual(pm1, pm2) self.assertEqual(pq1, pq2) # self.assertEqual(88, pm.sentences) self.assertEqual("100.00%", pm1.parseability_str(pm1).strip()) self.assertEqual("0.00%", pm1.completely_unparsed_str(pm1).strip()) self.assertEqual("100.00%", pm1.completely_parsed_str(pm1).strip())
def test_test(self): pr = LGInprocParser() # pr = LGApiParser() print(dict, corp, dest, ref, sep="\n") gt = GrammarTester(grmr, tmpl, limit, pr) pm, pq = gt.test(dict, corp, dest, ref, opts) print(pm.text(pm)) # print(pq.text(pq)) # self.assertEqual(25, gt._total_dicts) self.assertEqual(88, pm.sentences)
def test_parse_file_with_lgp_cmp(self): """ Make sure 'parse_file_with_lgp' and 'parse_file_with_lgp0' produce the same results. """ # print(__doc__, sys.stderr) pr = LGInprocParser() # Testing over poc-turtle corpus... 100% success is expected. options = 0 | BIT_STRIP # Test if two functions return the same results. tup_lgp = parse_file_with_lgp0( "test-data/dict/poc-turtle", "test-data/corpora/poc-turtle/poc-turtle.txt", None, 1, options) metrics = pr.parse("test-data/dict/poc-turtle", "test-data/corpora/poc-turtle/poc-turtle.txt", None, None, options) print(tup_lgp, sys.stderr) print(metrics.text(metrics), sys.stderr) self.assertEqual(tup_lgp[0], metrics.completely_parsed_ratio) self.assertEqual(tup_lgp[1], metrics.completely_unparsed_ratio) self.assertEqual(tup_lgp[2], metrics.average_parsed_ratio)
def test_parseability_multi_file(self): """ Test poc-english corpus with poc-turtle dictionary """ dict = "poc-turtle" # dict = handle_path_string("test-data/dict/poc-turtle") corp = handle_path_string("test-data/corpora/poc-english-multi") dest = handle_path_string("test-data/temp") ref = None # handle_path_string("test-data/parses/poc-english-multi-ref") pr = LGInprocParser() # pr = LGApiParser() # print(dict, corp, dest, ref, sep="\n") gt = GrammarTester(grmr, tmpl, limit, pr) pm, pq = gt.test(dict, corp, dest, ref, opts) print(pm.text(pm)) # print(pq.text(pq)) self.assertEqual(9, gt._total_files) self.assertEqual(88, pm.sentences) self.assertEqual("2.46%", pm.parseability_str(pm).strip()) self.assertEqual("90.91%", pm.completely_unparsed_str(pm).strip())
def test_parseability(self): """ Test poc-english corpus with poc-turtle dictionary """ dict = "poc-turtle" # dict = handle_path_string("test-data/dict/poc-turtle") corp = handle_path_string( "test-data/corpora/poc-english/poc_english.txt") dest = handle_path_string("test-data/temp") ref = None # "/home/alex/data/poc-english/poc_english_noamb_parse_ideal.txt" pr = LGInprocParser() # pr = LGApiParser() # print(dict, corp, dest, ref, sep="\n") gt = GrammarTester(grmr, tmpl, limit, pr) pm, pq = gt.test(dict, corp, dest, ref, opts) print(pm.text(pm)) # print(pq.text(pq)) # self.assertEqual(25, gt._total_dicts) self.assertEqual(88, pm.sentences) self.assertEqual("2.46%", pm.parseability_str(pm).strip()) self.assertEqual("90.91%", pm.completely_unparsed_str(pm).strip())