def parse_products(self, response): url = "https://hwo0nunfkk-dsn.algolia.net/1/indexes/*/queries?x-algolia-agent=Algolia%20for%20" \ "JavaScript%20(3.35.1)%3B%20Browser%3B%20instantsearch.js%20(4.15.0)%3B%20Magento2%20integ" \ "ration%20(3.1.0)%3B%20JS%20Helper%20(3.4.4)&" \ "x-algolia-application-id=HWO0NUNFKK&x-algolia-api-key=MjEwNzc3YjI4Yjk5OWY1" \ "NTY4NzI3NjY0MjUyZjVmNWI1YzZjMWNkOWIzNDA0NDc2YTZlYzY4NDEyMmViMzk1N3RhZ0ZpbHRlcnM9" yield Request.from_curl(url=self.url, callback=self.parse)
def start_requests(self): yield Request.from_curl( "curl 'https://img.tingchina.com/play/h5_jsonp.asp?0.11683375963617659' \ -H 'Connection: keep-alive' \ -H 'User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.96 Safari/537.36' \ -H 'Accept: */*' \ -H 'Sec-Fetch-Site: same-site' \ -H 'Sec-Fetch-Mode: no-cors' \ -H 'Sec-Fetch-Dest: script' \ -H 'Referer: https://www.tingchina.com/yousheng/31046/play_31046_0.htm' \ -H 'Accept-Language: en-US,en;q=0.9' \ -H 'Cookie: UM_distinctid=177461d45ea7fc-09f21ff66496a9-1e2a1f04-25a3ac-177461d45eb6a8; ting_0_31046_1=0; ting_0_31046_2=490.593481; tingNewJieshaoren=0; ASPSESSIONIDSGTTAATQ=OKMJNKKBJGNBGGHLGNPFKAHM; tingNewIP%2D0%2D31046=over; tNew_play_url=https%3A//www.tingchina.com/yousheng/31046/play_31046_0.htm; ting_0_31046_0=430.00254; cscpvrich2729_p=1' \ --compressed", callback=self.parse_key_string)
def start_requests(self): self.url = 'https://crates.io/api/v1/?category=no-std&page={page}&per_page={per_page}&sort=downloads' #self.url = 'https://crates.io/api/v1/crates/bencher/reverse_dependencies?page={page}&per_page={per_page}' #self.url = 'https://crates.io/api/v1/crates?page={page}&per_page={per_page}&sort=downloads' def write_time(): secs = subprocess.run(["date", "+%s"], stdout=subprocess.PIPE, text=True) nanos = subprocess.run(["date", "+%N"], stdout=subprocess.PIPE, text=True) self.results["creation_date"] = {} self.results["creation_date"]["secs_since_epoch"] = int(secs.stdout[:-1]) self.results["creation_date"]["nanos_since_epoch"] = int(nanos.stdout[:-1]) self.results["crates"] = [] write_time() for page in range(self.total_page): yield Request.from_curl( "curl " + self.url.format(page=page+1, per_page=self.per_page), callback=self.parse)
def start_requests(self): for date in self.create_date_range(1990): for keyword in self.keywords: request = f"curl 'https://www.jurisprudencia.gob.sv/busqueda/result.php' -H 'Connection: keep-alive' -H 'Accept: */*' -H 'X-Requested-With: XMLHttpRequest' -H 'User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.193 Safari/537.36' -H 'Content-Type: application/x-www-form-urlencoded; charset=UTF-8' -H 'Origin: https://www.jurisprudencia.gob.sv' -H 'Sec-Fetch-Site: same-origin' -H 'Sec-Fetch-Mode: cors' -H 'Sec-Fetch-Dest: empty' -H 'Referer: https://www.jurisprudencia.gob.sv/busqueda/busquedaLeg.php?id=2' -H 'Accept-Language: ca,en;q=0.9' -H 'Cookie: _ga=GA1.3.499250194.1605023569; _gid=GA1.3.1562076103.1605176978; wplc_chat_status=5; _icl_current_language=es; nc_status=browsing; PHPSESSID=emkambpjvphadn3r7lracuqvg6' --data-raw 'libre=true&txtBusquedaLibre={keyword}&baseDatos=2&nivel1=0&nivel2=0&nivel3=0&nivel4=0&maximo=300&inicio={date[0]}&fin={date[1]}&tipoBusquedaFrasePalabra=1' --compressed" yield Request.from_curl(request, callback=self.parse)
def start_requests(self): url = self.url for page in range(self.total_page): yield Request.from_curl( "curl " + url.format(page=page+1, per_page=self.per_page), callback=self.parse)