Example #1
0
 def get_contents_list(self, response: HtmlResponse):
     meat = response.meta
     contents_list = response.json().get('list')
     with open('Khala/spider_params/lenovo/language.txt',
               'r+') as languages:
         for language in languages:
             language = language.replace('\n', '')
             for contents in contents_list:
                 url = f'https://pcsupport.lenovo.com/us/{language}/products/{meat["model"]}/solutions/{contents["docid"]}'
                 yield response.follow(url=url, callback=self.out_item)
Example #2
0
    def parse(self, response: HtmlResponse, **kwargs):
        total = response.json().get('meta').get('total')
        self.logger.warn('videos count: %s', total)
        count = response.json().get('meta').get('count')
        if count != 0:
            old_offset = int(response.url.split('offset=')[1].split('&')[0])
            offset = 96 + int(response.url.split('offset=')[1].split('&')[0])
            base_url = response.url.replace('offset={0}'.format(old_offset), 'offset={0}')
            yield scrapy.Request(url=base_url.format(offset))

            # 想要的结果数据都在 result 中, 他是一个 list
            for result in response.json().get('result'):  # type:dict
                title = result.get('title')
                release_date = self.parse_date(result.get('dateReleased'))
                desc = result.get('description')
                download_url = self.extract_download_url(result)
                if download_url is not None:
                    yield BrazzersItem(title=title, release_date=release_date, desc=desc, download_url=download_url)
                else:
                    self.logger.warn('no download,the video name: %s', title)
Example #3
0
 def parse(self, response: HtmlResponse, **kwargs):
     if response.json().get('status') == 'success':
         for category in self.settings.getlist('CATEGORY'):
             yield scrapy.Request(url='https://theartporn.com/categories/{0}/'.format(category),
                                  callback=self.categories_parse, cb_kwargs={'category': category})
Example #4
0
 def get_pdf_list(self, response: HtmlResponse):
     for data in response.json().get('docList'):
         if data.get('downloadUrl'):
             bson = {'url': data['downloadUrl']}
             yield bson