def get_title(self, news_row): title = news_row.xpath('string(./div[contains(@class, "title")]/a)' ).extract_first().strip() title_str_list = utils.filter_spaces(title) title_str = '' for t in title_str_list: t.strip() title_str += t return title_str
def get_date_time(self, news_row): date_str = news_row.xpath('string(./a/span)').extract_first() date_str_list = utils.filter_spaces(date_str) date_str = '' for s in date_str_list: s = s.strip() date_str += s + ' ' if date_str: date_time = utils.create_date_time_tzinfo(date_str, self.tzinfo) return date_time else: raise Exception('Error: Date parsing error')
def get_date_time(self, news_row): date_str = news_row.xpath('string(.//div[contains(@class, "article-date")])' ).extract_first().strip() if date_str: date_str = utils.filter_spaces(date_str)[0] date_time = utils.create_date_time_tzinfo( date_str, self.tzinfo, ) return date_time else: raise Exception('Error: Date parsing error')
def get_date_time(self, news_row): date_str = news_row.xpath( 'string(./div[contains(@class, "date")])').extract_first() date_str_list = utils.filter_spaces(date_str) date_str = '' for s in date_str_list: s = s.strip() if s.lower() != 'date': date_str += s if date_str: date_time = utils.create_date_time_tzinfo(date_str, self.tzinfo) return date_time else: raise Exception('Error: Date parsing error')
def get_title(self, news_row): title = news_row.xpath('string(.//h4/a)').extract_first().strip() title = utils.filter_spaces(title)[0] return title
def parse(self, response): self.logger.info( util.filter_spaces( response.xpath('string(//div[@class="list-group"]/div[1])'). extract_first()))