예제 #1
0
class TestViterbi(unittest.TestCase):
    def setUp(self):
        self.training = 'I/PRO want/V to/TO race/V ./. I/PRO like/V cats/N ./.'
        self.sentence = 'I want to race.'
        self.pos_tagger = POSTagger([StringIO(self.training)])
        self.pos_tagger.train()

    def test(self):
        """will calculate the best viterbi sequence for I want to race"""
        expectation = ['START', 'PRO', 'V', 'TO', 'V', '.']
        result = self.pos_tagger.viterbi(self.sentence)
        self.assertListEqual(expectation, result)
예제 #2
0
class TestViterbi(unittest.TestCase):
  def setUp(self):
    self.training = u'I/PRO want/V to/TO race/V ./. I/PRO like/V cats/N ./.'
    self.sentence = 'I want to race.'
    self.pos_tagger = POSTagger([StringIO(self.training)])
    self.pos_tagger.train()

  def test(self):
    """will calculate the best viterbi sequence for I want to race"""
    expectation = ['START', 'PRO', 'V', 'TO', 'V', '.']
    result = self.pos_tagger.viterbi(self.sentence)
    self.assertListEqual(expectation, result)
예제 #3
0
class TestPOSTagger(uniitest.TestCase):
    def setUp(self):
        self.stream = io.StringIO("A/B C/D C/D A/D A/B ./.")
        self.pos_tagger = POSTagger([io.StringIO(self.stream)])
        self.pos_tagger.train()

    def it_calculates_probability_of_word_and_tag(self):
        self.assertRqual(self.pos_tagger.word_tag_probability('Z', 'Z'), 0)

        # A and B Happends 2 times count og b happends twice therefore 100%
        self.assertEqual(self.pos_tagger.word_tag_probability('A', 'B'), 1)

        # A and D happens 1 time, count of D happens 3 times so 1/3
        self.assertEqual(self.pos_tagger.word_tag_probability("A", "D"),
                         1.0 / 3.0)

        # START and START happens 1, time, count of start happens 1 so 1
        self.assertEqual(
            self.pos_tagger.word_tag_probability("START", "START"), 1)

        self.assertEqual(self.pos_tagger.word_tag_probability(".", "."), 1)

    def it_calculates_probability_of_words_and_tags(self):
        words = ['START', 'A', 'C', 'A', 'A', '.']
        tags = ['START', 'B', 'D', 'D', 'B', '.']
        tagger = self.pos_tagger

        tag_probabilities = reduce((lambda z, y: x * y), [
            tagger.tag_probability('B', 'D'),
            tagger.tag_probability('D', 'D'),
            tagger.tag_probability('D', 'B'),
            tagger.tag_probability('B', '.')
        ])

        word_probabilities = reduce(
            (lambda x, y: x * y),
            [
                tagger.word_tag_probability("A", "B"),  # 1
                tagger.word_tag_probability("C", "D"),
                tagger.word_tag_probability("A", "D"),
                tagger.word_tag_probability("A", "B"),  # 1
            ])

        expected = word_probabilities * tag_probabilities

        self.assertEqual(tagger.probability_of_word_tag(words, tags), expected)

    def viterbi(self):
        training = "I/PRO want/V to/TO race/V ./. I/PRO like/V cats/N ./."
        sentence = 'I want to race.'
        tagger = self.pos_tagger
        expected = ['START', 'PRO', 'V', 'TO', 'V', '.']
        self.assertEqual(pos_tagger.viterbi(sentence), expected)
예제 #4
0
class TestProbabilityCalculation(unittest.TestCase):
    def setUp(self):
        self.stream = u'A/B C/D C/D A/D A/B ./.'
        self.pos_tagger = POSTagger([StringIO(self.stream)])
        self.pos_tagger.train()

    def test1(self):
        """calculates tag transition probabilities"""
        self.assertAlmostEqual(0, self.pos_tagger.tag_probability('Z', 'Z'))
        self.assertAlmostEqual(2.0 / 3,
                               self.pos_tagger.tag_probability('D', 'D'))
        self.assertAlmostEqual(1,
                               self.pos_tagger.tag_probability('START', 'B'))
        self.assertAlmostEqual(0.5, self.pos_tagger.tag_probability('B', 'D'))
        self.assertAlmostEqual(0, self.pos_tagger.tag_probability('.', 'D'))

    def test2(self):
        """calculates probability of sequence of words and tags"""
        words = ['START', 'A', 'C', 'A', 'A', '.']
        tags = ['START', 'B', 'D', 'D', 'B', '.']
        tag_probabilities = self.pos_tagger.tag_probability('B', 'D') * \
                            self.pos_tagger.tag_probability('D', 'D') * \
                            self.pos_tagger.tag_probability('D', 'B') * \
                            self.pos_tagger.tag_probability('B', '.')
        word_probabilities = self.pos_tagger.word_tag_probability('A', 'B') * \
                             self.pos_tagger.word_tag_probability('C', 'D') * \
                             self.pos_tagger.word_tag_probability('A', 'D') * \
                             self.pos_tagger.word_tag_probability('A', 'B')
        expected = word_probabilities * tag_probabilities
        result = self.pos_tagger.probability_of_word_tag(words, tags)
        self.assertAlmostEqual(expected, result)

    def test3(self):
        """calculates the probability of a word given a tag"""
        self.assertAlmostEqual(0,
                               self.pos_tagger.word_tag_probability('Z', 'Z'))
        self.assertAlmostEqual(1,
                               self.pos_tagger.word_tag_probability('A', 'B'))
        self.assertAlmostEqual(1.0 / 3,
                               self.pos_tagger.word_tag_probability('A', 'D'))
        self.assertAlmostEqual(1,
                               self.pos_tagger.word_tag_probability('.', '.'))
예제 #5
0
class TestProbabilityCalculation(unittest.TestCase):
  def setUp(self):
    self.stream = u'A/B C/D C/D A/D A/B ./.'
    self.pos_tagger = POSTagger([StringIO(self.stream)])
    self.pos_tagger.train()

  def test1(self):
    """calculates tag transition probabilities"""
    self.assertAlmostEqual(0, self.pos_tagger.tag_probability('Z', 'Z'))
    self.assertAlmostEqual(2.0 / 3, self.pos_tagger.tag_probability('D', 'D'))
    self.assertAlmostEqual(1, self.pos_tagger.tag_probability('START', 'B'))
    self.assertAlmostEqual(0.5, self.pos_tagger.tag_probability('B', 'D'))
    self.assertAlmostEqual(0, self.pos_tagger.tag_probability('.', 'D'))

  def test2(self):
    """calculates probability of sequence of words and tags"""
    words = ['START', 'A', 'C', 'A', 'A', '.']
    tags = ['START', 'B', 'D', 'D', 'B', '.']
    tag_probabilities = self.pos_tagger.tag_probability('B', 'D') * \
                        self.pos_tagger.tag_probability('D', 'D') * \
                        self.pos_tagger.tag_probability('D', 'B') * \
                        self.pos_tagger.tag_probability('B', '.')
    word_probabilities = self.pos_tagger.word_tag_probability('A', 'B') * \
                         self.pos_tagger.word_tag_probability('C', 'D') * \
                         self.pos_tagger.word_tag_probability('A', 'D') * \
                         self.pos_tagger.word_tag_probability('A', 'B')
    expected = word_probabilities * tag_probabilities
    result = self.pos_tagger.probability_of_word_tag(words, tags)
    self.assertAlmostEqual(expected, result)

  def test3(self):
    """calculates the probability of a word given a tag"""
    self.assertAlmostEqual(0, self.pos_tagger.word_tag_probability('Z', 'Z'))
    self.assertAlmostEqual(1, self.pos_tagger.word_tag_probability('A', 'B'))
    self.assertAlmostEqual(1.0 / 3, self.pos_tagger.word_tag_probability('A', 'D'))
    self.assertAlmostEqual(1, self.pos_tagger.word_tag_probability('.', '.'))