Esempio n. 1
0
    def test_three_dots_process(self):
        phrases = [
            u"tie...to...",
            u"used to...",
            u"wash...face",
            u"...weeks old",
            u"I/He/She... was(not) going to…",
            u"or...or...a...a",
        ]

        recognizer = PhrasalRecognizer(phrases)
        recognizer.inspect = True

        data = {
            u"To fasten or secure with or as if with a cord, rope, or strap: tied the kite to a post; tie up a bundle.":
            sorted(phrases[0:1] + phrases[5:6]),
            u"I am used to hitchhiking":
            phrases[1:2],
            u"There are specific things to keep in mind when washing your face":
            phrases[2:3],
            u"The Best Foods for 6 Week Old Puppies | Dog Care - The Daily":
            phrases[3:4],
            u"He was(not) going to say hello.":
            phrases[4:5],
            u"To fasten or secure with or as if with a cord a cake":
            phrases[5:6],
        }

        for content in reversed(sorted(data.keys())):
            result = recognizer.process(content)
            self.assertEqual(result[1], data[content])
Esempio n. 2
0
    def test_basic_process(self):
        recognizer = PhrasalRecognizer([
            u"ruby python",
            u"have lunch",
            u"a lot of",
            u"Don't",
            u"Don't have to",
        ])
        recognizer.inspect = True

        data = {
            u"ruby python which one": [u"which one", [u"ruby python"]],
            u"It’s 12:00 now. Let’s have lunch together.":
            [u"It’s 12:00 now. Let’s together.", [u"have lunch"]],
            u"There are a lot of signs                 the grass.":
            [u"There are signs                 the grass.", [u"a lot of"]],
            u"Don't    ": [u"", [u"Don't"]],
            # dont replace twice
            u"Don't have to        Don't have to    ":
            [u"Don't have to", [u"Don't", u"Don't have to"]],
            # u"Don't talk in class, Don't read in bed, Don't spill the sugar on the table" : [u"Don't talk in class, Don't read in bed,", [u"Don't"]],
        }

        for content in reversed(sorted(data.keys())):
            result = recognizer.process(content, inspect=True, replace=True)
            # if 'talk' in result[0]: import pdb; pdb.set_trace() # TODO some extract bugs?
            self.assertEqual(result, data[content])
Esempio n. 3
0
    def test_three_dots_process(self):
        phrases = [
            u"tie...to...",
            u"used to...",
            u"wash...face",
            u"...weeks old",
            u"I/He/She... was(not) going to…",
            u"or...or...a...a",
        ]

        recognizer = PhrasalRecognizer(phrases)
        recognizer.inspect = True

        data = {
            u"To fasten or secure with or as if with a cord, rope, or strap: tied the kite to a post; tie up a bundle.": sorted(phrases[0:1] + phrases[5:6]),
            u"I am used to hitchhiking": phrases[1:2],
            u"There are specific things to keep in mind when washing your face": phrases[2:3],
            u"The Best Foods for 6 Week Old Puppies | Dog Care - The Daily": phrases[3:4],
            u"He was(not) going to say hello.": phrases[4:5],
            u"To fasten or secure with or as if with a cord a cake": phrases[5:6],
        }

        for content in reversed(sorted(data.keys())):
            result = recognizer.process(content)
            self.assertEqual(result[1], data[content])
Esempio n. 4
0
    def test_basic_process(self):
        recognizer = PhrasalRecognizer([
            u"ruby python",
            u"have lunch",
            u"a lot of",

            u"Don't",
            u"Don't have to",
        ])
        recognizer.inspect = True

        data = {
            u"ruby python which one": [u"which one", [u"ruby python"]],
            u"It’s 12:00 now. Let’s have lunch together.": [u"It’s 12:00 now. Let’s together.", [u"have lunch"]],
            u"There are a lot of signs                 the grass.": [u"There are signs                 the grass.", [u"a lot of"]],
            u"Don't    ": [u"", [u"Don't"]],
            # dont replace twice
            u"Don't have to        Don't have to    ": [u"Don't have to", [u"Don't", u"Don't have to"]],
            # u"Don't talk in class, Don't read in bed, Don't spill the sugar on the table" : [u"Don't talk in class, Don't read in bed,", [u"Don't"]],
        }

        for content in reversed(sorted(data.keys())):
            result = recognizer.process(content, inspect=True, replace=True)
            # if 'talk' in result[0]: import pdb; pdb.set_trace() # TODO some extract bugs?
            self.assertEqual(result, data[content])
Esempio n. 5
0
    def test_split(self):
        result1 = PhrasalRecognizer.split(u"I/We/They… always/usually/often/sometimes…")
        self.assertEqual(result1[0], "I ... always ...")
        self.assertEqual(result1[7], "We ... sometimes ...")

        result2 = PhrasalRecognizer.split(u"My name's...=My name is...")
        self.assertEqual(result2[1], u"My name be ...")

        result3 = PhrasalRecognizer.split(u"Usually /Sometimes…I go…")
        self.assertEqual(result3[0], u"Usually ... I go ...")
Esempio n. 6
0
    def test_split(self):
        result1 = PhrasalRecognizer.split(
            u"I/We/They… always/usually/often/sometimes…")
        self.assertEqual(result1[0], "I ... always ...")
        self.assertEqual(result1[7], "We ... sometimes ...")

        result2 = PhrasalRecognizer.split(u"My name's...=My name is...")
        self.assertEqual(result2[1], u"My name be ...")

        result3 = PhrasalRecognizer.split(u"Usually /Sometimes…I go…")
        self.assertEqual(result3[0], u"Usually ... I go ...")