def parse_date(self, response): sel = Selector(response) title = tostr(sel.xpath("//title/text()")) m = date_re.search(title) if not m: raise DateParseError(title) date = m.group("date").replace(".", "-") return date
def request_from_link(self, response, link): url = tostr(link.xpath("./@href")) if url.startswith("javascript"): return self.request_minutes(response, link) url = urljoin(response.url, url) return Request(url=url, headers=self.headers, callback=self.parse_list)
def parse_date(self, response): sel = Selector(response) title = tostr(sel.xpath('//title/text()')) m = date_re.search(title) if not m: raise DateParseError(title) date = m.group('date').replace('.', '-') return date
def request_from_link(self, response, link): url = tostr(link.xpath('./@href')) if url.startswith('javascript'): return self.request_minutes(response, link) url = urljoin(response.url, url) return Request(url=url, headers=self.headers, callback=self.parse_list)
def request_minutes(self, response, link): onclick = tostr(link.xpath("./@onclick")) m = minutes_re.match(onclick) if not m: raise UrlParseError(onclick) url = m.group("url") url = url.replace("frame.php", "viewer.total.php") url = urljoin(response.url, url) return Request(url=url, headers=self.headers, callback=self.parse_minutes)
def request_minutes(self, response, link): onclick = tostr(link.xpath('./@onclick')) m = minutes_re.match(onclick) if not m: raise UrlParseError(onclick) url = m.group('url') url = url.replace('frame.php', 'viewer.total.php') url = urljoin(response.url, url) return Request(url=url, headers=self.headers, callback=self.parse_minutes)