def get_url(self, news_row):
     url_str = news_row.xpath('(./td)[2]/a/@href').extract_first().strip()
     url = url_str
     if 'javascript' in url.lower():
         import re
         reg = re.compile(r"\('(.*?)'\)")
         url = self.root_url + reg.findall(url_str)[0]
     # todo: every url yielded should be validated & quoted
     return utils.quote_url(url)
 def get_url(self, news_row):
     url = news_row.xpath('./a/@href').extract_first().strip()
     url = self.root_url + url
     return utils.quote_url(url)
 def get_url(self, news_row):
     url = news_row.xpath(
         './/td[contains(@class, "views-field-field-productnews-display-title-value")]/a/@href'
     ).extract_first().strip()
     url = self.root_url + url
     return utils.quote_url(url)
Beispiel #4
0
 def get_url(self, news_row):
     url = news_row.xpath('.//h4/a/@href').extract_first().strip()
     return utils.quote_url(url)
Beispiel #5
0
 def get_url(self, news_row):
     url = news_row.xpath('./div[contains(@class, "title")]/a/@href'
                          ).extract_first().strip()
     url = self.root_url + url
     return utils.quote_url(url)
 def get_url(self, news_row):
     url = news_row.xpath(
         '(.//table)[1]//td/a/@href').extract_first().strip()
     # todo: asx hkex quote url
     return utils.quote_url(url)