Example #1
0
 def get_entries(self, soup):
   for a_tag in soup.find_all('a', {'class':'link-article'}):
     yield {
       'url': self.url + a_tag.attrs.get('href'),
       'headline': a_tag.text.strip(),
       'created_at': utils.now()
     }
Example #2
0
 def get_created(self, obj):
     """
     return earliest time of candidates or current time.
     """
     candidates = self.get_candidates(obj, DATE_CANDIDATE_JSONPATH)
     if len(candidates) > 0:
       return utils.struct_time_to_ts(sorted(candidates)[0])
     return utils.now()
Example #3
0
 def get_entries(self, soup=None):
   soup = self.soup
   for div in soup.find_all('div', {'class': ['caption','caption-dark']}):
     a_tag = div.find('a')
     yield {
       'url': self.url + a_tag.attrs.get('href'),
       'headline': a_tag.text.strip().split('\n')[0].strip(),
       'created_at': utils.now()
     }
Example #4
0
  def get_entries(self, soup):

    for div in soup.find_all('div', {'class': 'post-box'}):
      a_tag = div.find('a')
      hed_tag = div.find('h4')
      yield {
        'url': a_tag.attrs.get('href'),
        'headline': hed_tag.text.strip(),
        'created_at': utils.now()
      }
Example #5
0
 def parse_entry(self, entry):
     """
     Parse an entry in an RSS feed.
     """
     u =  self.get_url(entry)
     return {
         'url': u,
         'headline': self.get_title(entry),
         'created_at': utils.now()
     }
Example #6
0
 def parse_entries(self, soup):
   """
   Parse entries from soup.
   """
   for a_tag in soup.find_all('a', {'class':'box-link'}):
     hed_tag = a_tag.find('strong', {'class':['post-title', 'title-news']})
     yield {
       'url': a_tag.attrs.get('href'),
       'headline': hed_tag.text.strip(),
       'created_at': utils.now()
     }