Esempio n. 1
0
    def test_three_dots_process(self):
        phrases = [
            u"tie...to...",
            u"used to...",
            u"wash...face",
            u"...weeks old",
            u"I/He/She... was(not) going to…",
            u"or...or...a...a",
        ]

        recognizer = PhrasalRecognizer(phrases)
        recognizer.inspect = True

        data = {
            u"To fasten or secure with or as if with a cord, rope, or strap: tied the kite to a post; tie up a bundle.":
            sorted(phrases[0:1] + phrases[5:6]),
            u"I am used to hitchhiking":
            phrases[1:2],
            u"There are specific things to keep in mind when washing your face":
            phrases[2:3],
            u"The Best Foods for 6 Week Old Puppies | Dog Care - The Daily":
            phrases[3:4],
            u"He was(not) going to say hello.":
            phrases[4:5],
            u"To fasten or secure with or as if with a cord a cake":
            phrases[5:6],
        }

        for content in reversed(sorted(data.keys())):
            result = recognizer.process(content)
            self.assertEqual(result[1], data[content])
Esempio n. 2
0
    def test_basic_process(self):
        recognizer = PhrasalRecognizer([
            u"ruby python",
            u"have lunch",
            u"a lot of",
            u"Don't",
            u"Don't have to",
        ])
        recognizer.inspect = True

        data = {
            u"ruby python which one": [u"which one", [u"ruby python"]],
            u"It’s 12:00 now. Let’s have lunch together.":
            [u"It’s 12:00 now. Let’s together.", [u"have lunch"]],
            u"There are a lot of signs                 the grass.":
            [u"There are signs                 the grass.", [u"a lot of"]],
            u"Don't    ": [u"", [u"Don't"]],
            # dont replace twice
            u"Don't have to        Don't have to    ":
            [u"Don't have to", [u"Don't", u"Don't have to"]],
            # u"Don't talk in class, Don't read in bed, Don't spill the sugar on the table" : [u"Don't talk in class, Don't read in bed,", [u"Don't"]],
        }

        for content in reversed(sorted(data.keys())):
            result = recognizer.process(content, inspect=True, replace=True)
            # if 'talk' in result[0]: import pdb; pdb.set_trace() # TODO some extract bugs?
            self.assertEqual(result, data[content])