Example #1
0
 def get_title(self, news_row):
     title = news_row.xpath('string(./div[contains(@class, "title")]/a)'
                            ).extract_first().strip()
     title_str_list = utils.filter_spaces(title)
     title_str = ''
     for t in title_str_list:
         t.strip()
         title_str += t
     return title_str
 def get_date_time(self, news_row):
     date_str = news_row.xpath('string(./a/span)').extract_first()
     date_str_list = utils.filter_spaces(date_str)
     date_str = ''
     for s in date_str_list:
         s = s.strip()
         date_str += s + ' '
     if date_str:
         date_time = utils.create_date_time_tzinfo(date_str, self.tzinfo)
         return date_time
     else:
         raise Exception('Error: Date parsing error')
 def get_date_time(self, news_row):
   date_str = news_row.xpath('string(.//div[contains(@class, "article-date")])'
                            ).extract_first().strip()
   if date_str:
     date_str = utils.filter_spaces(date_str)[0]
     date_time = utils.create_date_time_tzinfo(
         date_str,
         self.tzinfo,
     )
     return date_time
   else:
     raise Exception('Error: Date parsing error')
Example #4
0
 def get_date_time(self, news_row):
     date_str = news_row.xpath(
         'string(./div[contains(@class, "date")])').extract_first()
     date_str_list = utils.filter_spaces(date_str)
     date_str = ''
     for s in date_str_list:
         s = s.strip()
         if s.lower() != 'date':
             date_str += s
     if date_str:
         date_time = utils.create_date_time_tzinfo(date_str, self.tzinfo)
         return date_time
     else:
         raise Exception('Error: Date parsing error')
Example #5
0
 def get_title(self, news_row):
     title = news_row.xpath('string(.//h4/a)').extract_first().strip()
     title = utils.filter_spaces(title)[0]
     return title
Example #6
0
 def parse(self, response):
     self.logger.info(
         util.filter_spaces(
             response.xpath('string(//div[@class="list-group"]/div[1])').
             extract_first()))