def parse2(self,response): item = qihustoryItem() tempD = response.css('.info_con') tempN = tempD.xpath('//h1').extract_first() item['appname'] = tempN[4:-5] author = tempD.xpath('//em')[4].extract() item['author'] = author[4:-5] tempData = response.css('.version_con') tempInfo = tempData.xpath('//dl/dd/p')[2].extract() version = tempInfo[tempInfo.find('版本')+3:] item['version'] = version[:version.find('<br')] filesize = tempInfo[tempInfo.find('大小')+3:] item['fileSize'] = filesize[:filesize.find('</')] tempTime = tempD.xpath('//em')[1].extract() item['dataTime'] = tempTime[4:-5] tmpCount = tempD.xpath('//em')[2].extract() item['downCount'] = tmpCount[4:-5] description = response.css('.game_txt').extract_first() item['description'] = description[28:-10] #下载url #https://count.liqucn.com/d.php?id=706758&urlos=android&from_type=web tempId = response.css('.version_btn a::attr(href)').extract()[0] Id = tempId[tempId.find('rj/')+3:-6] downUrl = 'https://count.liqucn.com/d.php?id=ID&urlos=android&from_type=web' downUrl.replace('ID',Id) item['downUrl']= downUrl item['channel'] = response.xpath('//title/text()').extract_first() yield item print(item['appname'])
def parse2(self,response): item = qihustoryItem() item['appname'] =response.css('.intro-titles h3').extract_first()[4:-5] item['author'] = response.css('.intro-titles p').extract_first()[3:-4] versionD = response.css('.look-detail') versionD = versionD.css('.weight-font') datainfo = versionD.xpath('//li').extract() versionD = ''.join(datainfo) tmp = versionD[versionD.find('版本号')+13:] version = tmp[:tmp.find('</li>')] item['version'] = '应用版本:'+version filesizetemp = versionD[versionD.find('软件大小')+14:] item['fileSize'] = filesizetemp[:filesizetemp.find('</li>')] tmpD = versionD[versionD.find('更新时间')+14:] item['dataTime'] = tmpD[:tmpD.find('</li')] descriptionTmp = response.css('.app-text') descriptionTmp = descriptionTmp.css('.pslide') description = descriptionTmp.css('.pslide').extract_first() item['description'] = description[description.find('">')+2:] downUrl = response.css('.download::attr(href)').extract_first() item['downUrl'] = response.urljoin(downUrl) item['channel'] = response.xpath('//title/text()').extract_first() print(item['appname']) yield item
def parse2(self, response): item = qihustoryItem() appinfo = response.css('dl') authorData = response.xpath('//td').extract()[0] #author = re.search('/strong>.*</td>',auth[0]).group() #author =authorData[authorData.find('/strong>')+8:-5] item['author'] = authorData[authorData.find('/strong>') + 8:-5] appname = response.css('title').extract()[0] #appname = appname[0] item['appname'] = appname[7:-16] version = response.xpath('//td')[2].extract() item['version'] = '版本号:' + version[24:version.find('<!')] numdata = response.css('.s-3').extract() filesize = numdata[1] filesize = filesize[filesize.find('">') + 2:filesize.find('</')] dataTime = response.xpath('//td').extract()[1] temp = dataTime[dataTime.find('strong>') + 7:dataTime.find('strong>') + 7 + 5] item['dataTime'] = temp + dataTime[dataTime.find('/strong>') + 8:-5] downCount = numdata[0] item['downCount'] = downCount[downCount.find('">') + 2:-7] description = response.css('.breif').extract()[0] item['description'] = description[description.find('breif">') + 20:description. find('<div class="base-info')] downUrl = response.css('.js-downLog::attr(href)').extract()[0] item['downUrl'] = downUrl[downUrl.find('url=') + 4:] item['channel'] = response.xpath('//title/text()').extract_first() print(item['appname']) yield item
def parse2(self,response): item = qihustoryItem() item['appname'] = response.css('.detail_line h3').extract_first()[4:-5] tempD = response.css('.detail_description') tempD = tempD.extract_first() authorD = tempD[tempD.find('作者:'):] item['author'] = authorD[:authorD.find('</')] tempV = response.css('.app_detail_version').extract_first() item['version'] = '版本号:' + tempV[tempV.find('">(')+3:tempV.find(')</span')] tempSi = tempD[tempD.find('大小')+3:] item['fileSize'] = tempSi[:tempSi.find('</span')] tempTime = tempD[tempD.find('时间')+3:] item['dataTime'] = tempTime[:tempTime.find('</li')] tempCount = tempD[tempD.find('下载')+3:] item['downCount'] = tempCount[:tempCount.find('</span>')] tempDesc = response.css('.app_detail_infor').extract_first() item['description'] = tempDesc[tempDesc.find('<p>')+11:-16] #下载链接地址格式 #http://www.anzhi.com/dl_app.php?s=3091483&n=5 tempID = response.css('.detail_down').extract_first() id = tempID[tempID.find('opendown')+9:tempID.find(')')] downUrl = 'http://www.anzhi.com/dl_app.php?s=ID&n=5' item['downUrl'] = downUrl.replace('ID',str(id)) item['channel'] = response.xpath('//title/text()').extract_first() yield item
def parse(self, response): item = qihustoryItem() applist = response.css('.app') for app in applist: appd = app.css('.little-install') for info in appd: item['appname'] = info.css( 'a::attr(data_name)').extract_first() print(item['appname']) item['version'] = info.css( 'a::attr(data_versionname)').extract_first() item['fileSize'] = info.css( 'a::attr(data_size)').extract_first() item['author'] = info.css('a::attr(data_from)').extract_first() item['downUrl'] = info.css('a::attr(data_url)').extract_first() downCount = app.css('.size').extract_first() item['downCount'] = downCount[downCount.find('">') + 2:-7] item['description'] = app.css('.brief::text').extract_first() item['channel'] = response.xpath('//title/text()').extract_first() yield item
def parse(self, response): #json请求地址: #https://sj.qq.com/myapp/searchAjax.htm?kw=%E5%8F%91%E7%A5%A8%E3%80%81&pns=MTA=&sid=0 #https://sj.qq.com/myapp/searchAjax.htm?kw=%E5%8F%91%E7%A5%A8%E3%80%81&pns=MjA=&sid=0 #https://sj.qq.com/myapp/searchAjax.htm?kw=%E5%8F%91%E7%A5%A8%E3%80%81&pns=MzA=&sid=0 #https://sj.qq.com/myapp/searchAjax.htm?kw=%E5%8F%91%E7%A5%A8%E3%80%81&pns=NDA=&sid=0 js = json.loads(response.body) item = qihustoryItem() appSo = '应用宝网站数据获取' appList = js['obj']['appDetails'] for ite in appList: item['appname'] = ite['appName'] item['author'] = ite['authorName'] item['version'] = ite['versionName'] item['fileSize'] = ite['fileSize'] item['downCount'] = ite['appDownCount'] item['description'] = ite['description'] item['downUrl'] = ite['apkUrl'] item['channel'] = response.xpath('//title/text()').extract_first() yield item
def parse2(self, response): item = qihustoryItem() item['appname'] = response.css('.app-title::text').extract_first() appInfoD = response.css('.app-detail-info') versionInfo = appInfoD.css('.ellipsis strong').extract()[2] item['version'] = versionInfo[versionInfo.find('strong>') + 7:versionInfo.find('</')] filesizeD = appInfoD.css('.ellipsis strong').extract()[1] item['fileSize'] = filesizeD[filesizeD.find('strong>') + 7:filesizeD.find('</')] dateTimeD = appInfoD.css('.ellipsis strong').extract()[0] item['dataTime'] = dateTimeD[dateTimeD.find('strong>') + 7:dateTimeD.find('</')] downCount = response.css('.app-downs::text').extract_first() item['downCount'] = downCount[:downCount.find('|') - 3] descriptionD = response.css('.app-detail-intro').extract_first() item['description'] = descriptionD[descriptionD.find('">') + 2:] downUrlD = response.css('.app-install') item['downUrl'] = downUrlD.css('a::attr(href)').extract()[1] item['channel'] = response.xpath('//title/text()').extract_first() print(item['appname']) yield item