def test_googlenewsformat_empty1(self): s = ''' <p> <span class="x-nc-sel5"> Headline here (not content) </span> <span class="bodysmall"> <span class="x-nc-sel5"> not content <span class="x-nc-sel5"> no content here€ </span> not content </span> </span> Not content </p> ''' gn = GoogleNewsFormat(s, 'utf8') self.assertEqual(gn.get_word_seq(), []) self.assertEqual(gn.get_bow(), {})
def test_googlenewsformat(self): s = ''' <p> <span class="x-nc-sel1"> Headline here </span> <span class="bodysmall"> <span class="x-nc-sel2"> Double content <span class="x-nc-sel2"> Text content here€ </span> content </span> </span> Not content </p> ''' gn = GoogleNewsFormat(s, 'utf8') self.assertEqual(gn.get_word_seq(), ['headline','here','double','content','text','content','here','content',]) self.assertEqual(gn.get_bow(), {'headline':1,'here':2,'double':1,'content':3,'text':1})
def test_googlenewsformat(self): s = ''' <p> <span class="x-nc-sel1"> Headline here </span> <span class="bodysmall"> <span class="x-nc-sel2"> Double content <span class="x-nc-sel2"> Text content here€ </span> content </span> </span> Not content </p> ''' gn = GoogleNewsFormat(s, 'utf8') self.assertEqual(gn.get_word_seq(), [ 'headline', 'here', 'double', 'content', 'text', 'content', 'here', 'content', ]) self.assertEqual(gn.get_bow(), { 'headline': 1, 'here': 2, 'double': 1, 'content': 3, 'text': 1 })
def test_googlenewsformat_empty2(self): gn = GoogleNewsFormat('', 'ascii') self.assertEqual(gn.get_word_seq(), []) self.assertEqual(gn.get_bow(), {})
def test_googlenewsformat_empty2(self): gn = GoogleNewsFormat('','ascii') self.assertEqual(gn.get_word_seq(), []) self.assertEqual(gn.get_bow(), {})