def parse(self, response: TypeResponse): yield Request( 'https://mirror.bgm.rin.cat/group/topic/350626', callback=self.parse_topic, ) return for item in response.xpath('//*[@id="eden_tpc_list"]/ul/li'): url = item.xpath('./a/@href').extract_first().replace( '/rakuen/topic/group/', '/group/topic/') yield Request( response.urljoin(url), callback=self.parse_topic, )
def parse(self, response: TypeResponse): links = set() for link in response.xpath( '//*[@id="wikiEntryMainTab"]//li/a/@href').extract(): links.add(link) for link in response.xpath( '//*[@id="latestEntryMainTab"]//li/a/@href').extract(): links.add(link) for link in links: if '/subject/' in link: yield Request(response.urljoin(link), callback=self.parse_page, meta={'dont_cache': True})