def setUp(self):
     self.vocabulary = Vocabulary()
     self.vocabulary.load('../data/vocabulary.dat')
     self.hmm_segmenter = HMMSegmenter()
     self.hmm_segmenter.load('../data/hmm_segment_model')
     self.max_prob_segmenter = MaxProbSegmenter(
             self.vocabulary, self.hmm_segmenter)
class MaxProbSegmenterTest(unittest.TestCase):

    def setUp(self):
        self.vocabulary = Vocabulary()
        self.vocabulary.load('../data/vocabulary.dat')
        self.hmm_segmenter = HMMSegmenter()
        self.hmm_segmenter.load('../data/hmm_segment_model')
        self.max_prob_segmenter = MaxProbSegmenter(
                self.vocabulary, self.hmm_segmenter)

    def call_segment(self, text):
        for word in self.max_prob_segmenter.segment(text):
            print word + '/\t',
        print ''

    def test_segment(self):
        fp = open('testdata/document.dat', 'rb')
        for text in fp.readlines():
            self.call_segment(text.strip())
        fp.close()