Esempio n. 1
0
    def test_parse_file_with_api(self):
        # Testing over poc-turtle corpus... 100% success is expected.

        options = BIT_NO_LWALL | BIT_NO_PERIOD | BIT_STRIP | BIT_RM_DIR | BIT_LOC_LANG | BIT_PARSE_QUALITY

        lgp = LGInprocParser()
        api = LGApiParser()

        dict = "en"
        corp = "test-data/corpora/poc-english/poc_english.txt"
        reff = "test-data/corpora/poc-english/poc_english_parses_lg.txt"
        outp = "test-data/temp"
        # reff = None

        # dict = "test-data/dict/poc-turtle"
        # corp = "test-data/corpora/poc-turtle/poc-turtle.txt"
        # outp = "test-data/temp"
        # reff = None

        m1, q1 = lgp.parse(dict, corp, outp, reff, options)
        m2, q2 = api.parse(dict, corp, outp, reff, options)

        self.assertTrue(m1 == m2)

        print(q1.text(q1))
        print(q2.text(q2))

        self.assertTrue(q1 == q2)
 def test_parse_batch_ps_output(self):
     pr = LGInprocParser()
     num_sent = len(pr._parse_batch_ps_output(lg_post_output))
     self.assertEqual(
         num_sent, 12,
         "'parse_batch_ps_output()' returns '{}' instead of '{}'".format(
             num_sent, 12))
    def test_parseability_coinsedence(self):
        """ Test poc-english corpus with poc-turtle dictionary """
        dict = "en"  # "poc-turtle"
        # dict = handle_path_string("test-data/dict/poc-turtle")
        corp1 = handle_path_string(
            "test-data/corpora/poc-english/poc_english.txt")
        corp2 = handle_path_string("test-data/corpora/poc-english-multi")
        dest = handle_path_string("test-data/temp")
        ref1 = handle_path_string(
            "test-data/parses/poc-english-ref/poc_english.txt.ull")
        ref2 = handle_path_string("test-data/parses/poc-english-multi-ref")

        pr = LGInprocParser()
        # pr = LGApiParser()

        gt = GrammarTester(grmr, tmpl, limit, pr)
        pm1, pq1 = gt.test(dict, corp1, dest, ref1, opts)
        pm2, pq2 = gt.test(dict, corp2, dest, ref2, opts)

        # print(pm.text(pm))
        # print(pq.text(pq))

        self.assertEqual(pm1, pm2)
        self.assertEqual(pq1, pq2)

        # self.assertEqual(88, pm.sentences)
        self.assertEqual("100.00%", pm1.parseability_str(pm1).strip())
        self.assertEqual("0.00%", pm1.completely_unparsed_str(pm1).strip())
        self.assertEqual("100.00%", pm1.completely_parsed_str(pm1).strip())
    def test_test(self):
        pr = LGInprocParser()
        # pr = LGApiParser()

        print(dict, corp, dest, ref, sep="\n")

        gt = GrammarTester(grmr, tmpl, limit, pr)
        pm, pq = gt.test(dict, corp, dest, ref, opts)

        print(pm.text(pm))
        # print(pq.text(pq))

        # self.assertEqual(25, gt._total_dicts)
        self.assertEqual(88, pm.sentences)
    def test_parse_file_with_lgp_cmp(self):
        """ Make sure 'parse_file_with_lgp' and 'parse_file_with_lgp0' produce the same results. """
        # print(__doc__, sys.stderr)

        pr = LGInprocParser()

        # Testing over poc-turtle corpus... 100% success is expected.
        options = 0 | BIT_STRIP

        # Test if two functions return the same results.
        tup_lgp = parse_file_with_lgp0(
            "test-data/dict/poc-turtle",
            "test-data/corpora/poc-turtle/poc-turtle.txt", None, 1, options)

        metrics = pr.parse("test-data/dict/poc-turtle",
                           "test-data/corpora/poc-turtle/poc-turtle.txt", None,
                           None, options)

        print(tup_lgp, sys.stderr)
        print(metrics.text(metrics), sys.stderr)

        self.assertEqual(tup_lgp[0], metrics.completely_parsed_ratio)
        self.assertEqual(tup_lgp[1], metrics.completely_unparsed_ratio)
        self.assertEqual(tup_lgp[2], metrics.average_parsed_ratio)
    def test_parseability_multi_file(self):
        """ Test poc-english corpus with poc-turtle dictionary """
        dict = "poc-turtle"
        # dict = handle_path_string("test-data/dict/poc-turtle")
        corp = handle_path_string("test-data/corpora/poc-english-multi")
        dest = handle_path_string("test-data/temp")
        ref = None  # handle_path_string("test-data/parses/poc-english-multi-ref")

        pr = LGInprocParser()
        # pr = LGApiParser()

        # print(dict, corp, dest, ref, sep="\n")

        gt = GrammarTester(grmr, tmpl, limit, pr)
        pm, pq = gt.test(dict, corp, dest, ref, opts)

        print(pm.text(pm))
        # print(pq.text(pq))

        self.assertEqual(9, gt._total_files)
        self.assertEqual(88, pm.sentences)
        self.assertEqual("2.46%", pm.parseability_str(pm).strip())
        self.assertEqual("90.91%", pm.completely_unparsed_str(pm).strip())
    def test_parseability(self):
        """ Test poc-english corpus with poc-turtle dictionary """
        dict = "poc-turtle"
        # dict = handle_path_string("test-data/dict/poc-turtle")
        corp = handle_path_string(
            "test-data/corpora/poc-english/poc_english.txt")
        dest = handle_path_string("test-data/temp")
        ref = None  # "/home/alex/data/poc-english/poc_english_noamb_parse_ideal.txt"

        pr = LGInprocParser()
        # pr = LGApiParser()

        # print(dict, corp, dest, ref, sep="\n")

        gt = GrammarTester(grmr, tmpl, limit, pr)
        pm, pq = gt.test(dict, corp, dest, ref, opts)

        print(pm.text(pm))
        # print(pq.text(pq))

        # self.assertEqual(25, gt._total_dicts)
        self.assertEqual(88, pm.sentences)
        self.assertEqual("2.46%", pm.parseability_str(pm).strip())
        self.assertEqual("90.91%", pm.completely_unparsed_str(pm).strip())