def test_googlenewsformat_empty1(self):
     s = '''
     <p>
     <span class="x-nc-sel5"> 
         Headline here (not content)
     </span>
     <span class="bodysmall">
         <span class="x-nc-sel5"> 
             not content 
             <span class="x-nc-sel5"> 
                 no content here€
             </span>
             not content
         </span>
     </span> 
     Not content
     </p>
     '''
     gn = GoogleNewsFormat(s, 'utf8')
     self.assertEqual(gn.get_word_seq(), [])
     self.assertEqual(gn.get_bow(), {})
 def test_googlenewsformat_empty1(self):
     s = '''
     <p>
     <span class="x-nc-sel5"> 
         Headline here (not content)
     </span>
     <span class="bodysmall">
         <span class="x-nc-sel5"> 
             not content 
             <span class="x-nc-sel5"> 
                 no content here€
             </span>
             not content
         </span>
     </span> 
     Not content
     </p>
     '''
     gn = GoogleNewsFormat(s, 'utf8')
     self.assertEqual(gn.get_word_seq(), [])
     self.assertEqual(gn.get_bow(), {})
 def test_googlenewsformat(self):
     s = '''
     <p>
     <span class="x-nc-sel1"> 
         Headline here
     </span>
     <span class="bodysmall">
         <span class="x-nc-sel2"> 
             Double content 
             <span class="x-nc-sel2"> 
                 Text content here€
             </span>
             content
         </span>
     </span> 
     Not content
     </p>
     '''
     gn = GoogleNewsFormat(s, 'utf8')
     self.assertEqual(gn.get_word_seq(), ['headline','here','double','content','text','content','here','content',])
     self.assertEqual(gn.get_bow(), {'headline':1,'here':2,'double':1,'content':3,'text':1})
 def test_googlenewsformat(self):
     s = '''
     <p>
     <span class="x-nc-sel1"> 
         Headline here
     </span>
     <span class="bodysmall">
         <span class="x-nc-sel2"> 
             Double content 
             <span class="x-nc-sel2"> 
                 Text content here€
             </span>
             content
         </span>
     </span> 
     Not content
     </p>
     '''
     gn = GoogleNewsFormat(s, 'utf8')
     self.assertEqual(gn.get_word_seq(), [
         'headline',
         'here',
         'double',
         'content',
         'text',
         'content',
         'here',
         'content',
     ])
     self.assertEqual(gn.get_bow(), {
         'headline': 1,
         'here': 2,
         'double': 1,
         'content': 3,
         'text': 1
     })
 def test_googlenewsformat_empty2(self):
     gn = GoogleNewsFormat('', 'ascii')
     self.assertEqual(gn.get_word_seq(), [])
     self.assertEqual(gn.get_bow(), {})
 def test_googlenewsformat_empty2(self):
     gn = GoogleNewsFormat('','ascii')
     self.assertEqual(gn.get_word_seq(), [])
     self.assertEqual(gn.get_bow(), {})