Example #1
0
    def test_parse_postscript_no_links(self):
        """ Test for parsing postscript with no links """
        options = 0
        options |= (BIT_RWALL | BIT_CAPS)
        options &= ~BIT_STRIP

        tokens, links = parse_postscript(self.post_no_links, options)
        self.assertEqual(0, len(links))
Example #2
0
    def test_parse_postscript_alice_bug_002(self):
        """ Gutenberg Children bug test """
        options = 0
        # options |= (BIT_RWALL | BIT_CAPS)
        options &= ~BIT_STRIP

        tokens, links = parse_postscript(alice_bug_002, options)

        self.assertEqual(29, len(tokens), tokens)
Example #3
0
    def test_parse_postscript_gutenchildren_bug(self):
        """ Test for number of tokens (bug from Gutenberg Children corpus) """
        options = 0
        # options |= (BIT_RWALL | BIT_CAPS)
        # options &= ~BIT_STRIP

        tokens, links = parse_postscript(gutenberg_children_bug, options)

        self.assertEqual(18, len(tokens))
Example #4
0
    def test_parse_postscript_gutenchildren_bug_002(self):

        options = BIT_NO_LWALL | BIT_NO_PERIOD | BIT_STRIP

        tokens, links = parse_postscript(gutenberg_children_bug_002, options)

        print(tokens)

        self.assertEqual(12, len(tokens))
        self.assertEqual(6, len(links))
Example #5
0
 def test_parse_postscript_all_walls(self):
     """ Test for parsing postscript with both walls in """
     options = 0
     options |= (BIT_RWALL | BIT_CAPS)
     options &= ~BIT_STRIP
     tokens, links = parse_postscript(self.post_all_walls, options)
     pm = parse_metrics(tokens)
     self.assertEqual(1.0, pm.completely_parsed_ratio)
     self.assertEqual(0.0, pm.completely_unparsed_ratio)
     self.assertEqual(1.0, pm.average_parsed_ratio)
Example #6
0
    def test_get_link_set(self):
        """ Test for link extraction according to set options """
        # post_all_walls = "[(LEFT-WALL)(Dad[!])(was.v-d)(not.e)(a)(parent.n)(before)(.)(RIGHT-WALL)]" \
        #                  "[[0 7 2 (Xp)][0 1 0 (Wd)][1 2 0 (Ss*s)][2 5 1 (Osm)][2 3 0 (EBm)]" \
        #                  "[4 5 0 (Ds**c)][5 6 0 (Mp)][7 8 0 (RW)]][0]"
        expected_set = {(1, 2), (2, 5), (2, 3), (4, 5), (5, 6)}
        options = BIT_NO_LWALL | BIT_NO_PERIOD | BIT_STRIP | BIT_PARSE_QUALITY
        tokens, links = parse_postscript(self.post_all_walls, options)
        result_set = get_link_set(tokens, links, options)

        self.assertTrue(result_set == expected_set)
Example #7
0
    def test_parse_tokens_sharp(self):
        """ Test for proper parsing of sharp sign prefixes """
        options = BIT_STRIP  # | BIT_NO_LWALL | BIT_NO_PERIOD

        # tokens = parse_tokens(sharp_sign_ps_tokens.replace("\n", ""), options)[0]

        tokens, links = parse_postscript(sharp_sign_ps_linkages, options)

        self.assertEqual(len(sharp_sign_tokens), len(tokens))
        self.assertEqual(sharp_sign_tokens, tokens)
        self.assertEqual(sharp_sign_links, set(links))
Example #8
0
    def test_parse_postscript_explosion_no_linkages(self):

        options = BIT_NO_LWALL | BIT_NO_PERIOD | BIT_STRIP

        tokens, links = parse_postscript(explosion_no_linkages.replace("\n", ""), options)

        print(tokens)
        print(links)

        self.assertEqual(27, len(tokens))
        self.assertEqual(0, len(links))
Example #9
0
    def test_parse_postscript_alice_bug_001(self):
        """ test_parse_postscript """
        # print(__doc__, sys.stderr)

        options = 0
        # options |= (BIT_RWALL | BIT_CAPS)
        options &= ~BIT_STRIP

        tokens, links = parse_postscript(alice_bug_001, options)

        self.assertEqual(15, len(tokens))

        for link in links:
            self.assertTrue(link[0] < 15 and link[1] < 15, str(link))