def test_cleanevalformat(self): s = ''' URL: http://childparenting.about.com/b/archives.htm <p> this is <h> cleaneval <l> format <P> this is <H> cleaneval <L> format ''' ce = CleanEvalFormat(s) self.assertEqual(ce.get_word_seq(), ['this','is','cleaneval','format','this','is','cleaneval','format']) self.assertEqual(ce.get_bow(), {'this':2,'is':2,'cleaneval':2,'format':2})
def test_cleanevalformat(self): s = ''' URL: http://childparenting.about.com/b/archives.htm <p> this is <h> cleaneval <l> format <P> this is <H> cleaneval <L> format ''' ce = CleanEvalFormat(s) self.assertEqual(ce.get_word_seq(), [ 'this', 'is', 'cleaneval', 'format', 'this', 'is', 'cleaneval', 'format' ]) self.assertEqual(ce.get_bow(), { 'this': 2, 'is': 2, 'cleaneval': 2, 'format': 2 })
def test_cleanevalformat_empty(self): s = '''URL: http://childparenting.about.com/b/archives.htm ''' ce = CleanEvalFormat(s) self.assertEqual(ce.get_word_seq(), []) self.assertEqual(ce.get_bow(), {})