Example #1
0
    def test_not_opened(self):
        words = "i '' eat .".split()
        with self.assertRaisesRegex(ValueError, "Un-paired close"):
            processing.as_sentence(words)
        with self.assertRaisesRegex(ValueError, "Un-paired close"):
            [
                s for s in processing.sentences(
                    [processing.Token(w) for w in words])
            ]

        words = "i ) eat .".split()
        with self.assertRaisesRegex(ValueError, "Un-paired close"):
            processing.as_sentence(words)
        with self.assertRaisesRegex(ValueError, "Un-paired close"):
            [
                s for s in processing.sentences(
                    [processing.Token(w) for w in words])
            ]

        words = "i ] eat .".split()
        with self.assertRaisesRegex(ValueError, "Un-paired close"):
            processing.as_sentence(words)
        with self.assertRaisesRegex(ValueError, "Un-paired close"):
            [
                s for s in processing.sentences(
                    [processing.Token(w) for w in words])
            ]
Example #2
0
 def test_multiple(self):
     generator = processing.sentences([
         processing.Token(w) for w in "i eat food . you ate pie .".split()
     ])
     self.assertEqual([s for s in generator], [
         "i eat food .".split(" "),
         "you ate pie .".split(" "),
     ])
Example #3
0
    def test_terminals(self):
        # Terminal .
        words = "i eat food .".split()
        self.assertEqual(processing.as_sentence(words), words)
        generator = processing.sentences([processing.Token(w) for w in words])
        self.assertEqual([s for s in generator], [words])

        # Terminal ?
        words = "i eat food ?".split()
        self.assertEqual(processing.as_sentence(words), words)
        generator = processing.sentences([processing.Token(w) for w in words])
        self.assertEqual([s for s in generator], [words])

        # Terminal !
        words = "i eat food !".split()
        self.assertEqual(processing.as_sentence(words), words)
        generator = processing.sentences([processing.Token(w) for w in words])
        self.assertEqual([s for s in generator], [words])
Example #4
0
    def test_quotation(self):
        # Terminal .
        words = "i eat `` food '' .".split()
        self.assertEqual(processing.as_sentence(words), words)
        generator = processing.sentences([processing.Token(w) for w in words])
        self.assertEqual([s for s in generator], [words])

        words = "i eat `` food . ''".split()
        self.assertEqual(processing.as_sentence(words), words)
        generator = processing.sentences([processing.Token(w) for w in words])
        self.assertEqual([s for s in generator], [words])

        # Terminal ?
        words = "i eat `` food '' ?".split()
        self.assertEqual(processing.as_sentence(words), words)
        generator = processing.sentences([processing.Token(w) for w in words])
        self.assertEqual([s for s in generator], [words])

        words = "i eat `` food ? ''".split()
        self.assertEqual(processing.as_sentence(words), words)
        generator = processing.sentences([processing.Token(w) for w in words])
        self.assertEqual([s for s in generator], [words])

        # Terminal !
        words = "i eat `` food '' !".split()
        self.assertEqual(processing.as_sentence(words), words)
        generator = processing.sentences([processing.Token(w) for w in words])
        self.assertEqual([s for s in generator], [words])

        words = "i eat `` food ! ''".split()
        self.assertEqual(processing.as_sentence(words), words)
        generator = processing.sentences([processing.Token(w) for w in words])
        self.assertEqual([s for s in generator], [words])
Example #5
0
    def test_not_terminated(self):
        words = "i eat".split()
        with self.assertRaisesRegex(ValueError, "Non-terminated"):
            processing.as_sentence(words)
        with self.assertRaisesRegex(ValueError, "Non-terminated"):
            [
                s for s in processing.sentences(
                    [processing.Token(w) for w in words])
            ]

        words = "i eat `` food".split()
        with self.assertRaisesRegex(ValueError, "Non-terminated"):
            processing.as_sentence(words)
        with self.assertRaisesRegex(ValueError, "Non-terminated"):
            [
                s for s in processing.sentences(
                    [processing.Token(w) for w in words])
            ]

        words = "i eat `` food ''".split()
        with self.assertRaisesRegex(ValueError, "Non-terminated"):
            processing.as_sentence(words)
        with self.assertRaisesRegex(ValueError, "Non-terminated"):
            [
                s for s in processing.sentences(
                    [processing.Token(w) for w in words])
            ]

        words = "i eat `` food \"".split()
        with self.assertRaisesRegex(ValueError, "Non-terminated"):
            processing.as_sentence(words)
        with self.assertRaisesRegex(ValueError, "Non-terminated"):
            [
                s for s in processing.sentences(
                    [processing.Token(w) for w in words])
            ]
Example #6
0
    def test_symbols(self):
        words = "i - eat { food < .".split()
        self.assertEqual(processing.as_sentence(words), words)
        generator = processing.sentences([processing.Token(w) for w in words])
        self.assertEqual([s for s in generator], [words])

        words = "i + eat } food > .".split()
        self.assertEqual(processing.as_sentence(words), words)
        generator = processing.sentences([processing.Token(w) for w in words])
        self.assertEqual([s for s in generator], [words])

        words = "i / eat | food \ .".split()
        self.assertEqual(processing.as_sentence(words), words)
        generator = processing.sentences([processing.Token(w) for w in words])
        self.assertEqual([s for s in generator], [words])

        words = "i ` eat ~ food @ .".split()
        self.assertEqual(processing.as_sentence(words), words)
        generator = processing.sentences([processing.Token(w) for w in words])
        self.assertEqual([s for s in generator], [words])

        words = "i # eat $ food % .".split()
        self.assertEqual(processing.as_sentence(words), words)
        generator = processing.sentences([processing.Token(w) for w in words])
        self.assertEqual([s for s in generator], [words])

        words = "i ^ eat & food * .".split()
        self.assertEqual(processing.as_sentence(words), words)
        generator = processing.sentences([processing.Token(w) for w in words])
        self.assertEqual([s for s in generator], [words])

        words = "i 't eat : food ; .".split()
        self.assertEqual(processing.as_sentence(words), words)
        generator = processing.sentences([processing.Token(w) for w in words])
        self.assertEqual([s for s in generator], [words])

        words = "i , eat , food * .".split()
        self.assertEqual(processing.as_sentence(words), words)
        generator = processing.sentences([processing.Token(w) for w in words])
        self.assertEqual([s for s in generator], [words])
Example #7
0
    def test_quoted(self):
        words = "i eat ' food ' .".split()
        self.assertEqual(processing.as_sentence(words), words)
        generator = processing.sentences([processing.Token(w) for w in words])
        self.assertEqual([s for s in generator], [words])

        words = "i eat ' food . '".split()
        with self.assertRaisesRegex(ValueError, "Early-terminated"):
            processing.as_sentence(words)
        with self.assertRaisesRegex(ValueError, "Non-terminated"):
            [
                s for s in processing.sentences(
                    [processing.Token(w) for w in words])
            ]

        words = "'".split()
        with self.assertRaisesRegex(ValueError, "Non-terminated"):
            processing.as_sentence(words)
        with self.assertRaisesRegex(ValueError, "Non-terminated"):
            [
                s for s in processing.sentences(
                    [processing.Token(w) for w in words])
            ]

        words = "i eat ' food .".split()
        self.assertEqual(processing.as_sentence(words), words)
        generator = processing.sentences([processing.Token(w) for w in words])
        self.assertEqual([s for s in generator], [words])

        words = "i eat \" food \" .".split()
        self.assertEqual(processing.as_sentence(words), words)
        generator = processing.sentences([processing.Token(w) for w in words])
        self.assertEqual([s for s in generator], [words])

        words = "i eat \" food . \"".split()
        self.assertEqual(processing.as_sentence(words), words)
        generator = processing.sentences([processing.Token(w) for w in words])
        self.assertEqual([s for s in generator], [words])
Example #8
0
 def test_eventually_terminal(self):
     words = "i eat `` ( [ ( `` food . '' ) ] ) ''".split()
     self.assertEqual(processing.as_sentence(words), words)
     generator = processing.sentences([processing.Token(w) for w in words])
     self.assertEqual([s for s in generator], [words])
Example #9
0
    def test_not_closed(self):
        # Determined by abrupt non-termination
        words = "i \" eat .".split()
        with self.assertRaisesRegex(ValueError, "Non-terminated"):
            processing.as_sentence(words)
        with self.assertRaisesRegex(ValueError, "Non-terminated"):
            [
                s for s in processing.sentences(
                    [processing.Token(w) for w in words])
            ]

        words = "i `` eat .".split()
        with self.assertRaisesRegex(ValueError, "Non-terminated"):
            processing.as_sentence(words)
        with self.assertRaisesRegex(ValueError, "Non-terminated"):
            [
                s for s in processing.sentences(
                    [processing.Token(w) for w in words])
            ]

        words = "i ( eat .".split()
        with self.assertRaisesRegex(ValueError, "Non-terminated"):
            processing.as_sentence(words)
        with self.assertRaisesRegex(ValueError, "Non-terminated"):
            [
                s for s in processing.sentences(
                    [processing.Token(w) for w in words])
            ]

        words = "i [ eat .".split()
        with self.assertRaisesRegex(ValueError, "Non-terminated"):
            processing.as_sentence(words)
        with self.assertRaisesRegex(ValueError, "Non-terminated"):
            [
                s for s in processing.sentences(
                    [processing.Token(w) for w in words])
            ]

        # Determined by continuing non-termination
        words = "i ' eat . nothing".split()
        with self.assertRaisesRegex(ValueError, "Early-terminated"):
            processing.as_sentence(words)
        with self.assertRaisesRegex(ValueError, "Non-terminated"):
            [
                s for s in processing.sentences(
                    [processing.Token(w) for w in words])
            ]

        words = "i \" eat . nothing".split()
        with self.assertRaisesRegex(ValueError, "Non-terminated"):
            processing.as_sentence(words)
        with self.assertRaisesRegex(ValueError, "Non-terminated"):
            [
                s for s in processing.sentences(
                    [processing.Token(w) for w in words])
            ]

        words = "i `` eat . nothing".split()
        with self.assertRaisesRegex(ValueError, "Non-terminated"):
            processing.as_sentence(words)
        with self.assertRaisesRegex(ValueError, "Non-terminated"):
            [
                s for s in processing.sentences(
                    [processing.Token(w) for w in words])
            ]

        words = "i ( eat . nothing".split()
        with self.assertRaisesRegex(ValueError, "Non-terminated"):
            processing.as_sentence(words)
        with self.assertRaisesRegex(ValueError, "Non-terminated"):
            [
                s for s in processing.sentences(
                    [processing.Token(w) for w in words])
            ]

        words = "i [ eat . nothing".split()
        with self.assertRaisesRegex(ValueError, "Non-terminated"):
            processing.as_sentence(words)
        with self.assertRaisesRegex(ValueError, "Non-terminated"):
            [
                s for s in processing.sentences(
                    [processing.Token(w) for w in words])
            ]

        # Determined by other closing.
        words = "i \" eat '' .".split()
        with self.assertRaisesRegex(ValueError, "Un-paired open/close"):
            processing.as_sentence(words)
        with self.assertRaisesRegex(ValueError, "Un-paired open/close"):
            [
                s for s in processing.sentences(
                    [processing.Token(w) for w in words])
            ]

        words = "i `` eat ] .".split()
        with self.assertRaisesRegex(ValueError, "Un-paired open/close"):
            processing.as_sentence(words)
        with self.assertRaisesRegex(ValueError, "Un-paired open/close"):
            [
                s for s in processing.sentences(
                    [processing.Token(w) for w in words])
            ]

        words = "i ( eat '' .".split()
        with self.assertRaisesRegex(ValueError, "Un-paired open/close"):
            processing.as_sentence(words)
        with self.assertRaisesRegex(ValueError, "Un-paired open/close"):
            [
                s for s in processing.sentences(
                    [processing.Token(w) for w in words])
            ]

        words = "i [ eat ) .".split()
        with self.assertRaisesRegex(ValueError, "Un-paired open/close"):
            processing.as_sentence(words)
        with self.assertRaisesRegex(ValueError, "Un-paired open/close"):
            [
                s for s in processing.sentences(
                    [processing.Token(w) for w in words])
            ]
Example #10
0
 def test_apostrophe(self):
     words = "the teachers ' students are \" junior congress members \" .".split(
     )
     self.assertEqual(processing.as_sentence(words), words)
     generator = processing.sentences([processing.Token(w) for w in words])
     self.assertEqual([s for s in generator], [words])