def get_contents_list(self, response: HtmlResponse): meat = response.meta contents_list = response.json().get('list') with open('Khala/spider_params/lenovo/language.txt', 'r+') as languages: for language in languages: language = language.replace('\n', '') for contents in contents_list: url = f'https://pcsupport.lenovo.com/us/{language}/products/{meat["model"]}/solutions/{contents["docid"]}' yield response.follow(url=url, callback=self.out_item)
def parse(self, response: HtmlResponse, **kwargs): total = response.json().get('meta').get('total') self.logger.warn('videos count: %s', total) count = response.json().get('meta').get('count') if count != 0: old_offset = int(response.url.split('offset=')[1].split('&')[0]) offset = 96 + int(response.url.split('offset=')[1].split('&')[0]) base_url = response.url.replace('offset={0}'.format(old_offset), 'offset={0}') yield scrapy.Request(url=base_url.format(offset)) # 想要的结果数据都在 result 中, 他是一个 list for result in response.json().get('result'): # type:dict title = result.get('title') release_date = self.parse_date(result.get('dateReleased')) desc = result.get('description') download_url = self.extract_download_url(result) if download_url is not None: yield BrazzersItem(title=title, release_date=release_date, desc=desc, download_url=download_url) else: self.logger.warn('no download,the video name: %s', title)
def parse(self, response: HtmlResponse, **kwargs): if response.json().get('status') == 'success': for category in self.settings.getlist('CATEGORY'): yield scrapy.Request(url='https://theartporn.com/categories/{0}/'.format(category), callback=self.categories_parse, cb_kwargs={'category': category})
def get_pdf_list(self, response: HtmlResponse): for data in response.json().get('docList'): if data.get('downloadUrl'): bson = {'url': data['downloadUrl']} yield bson