Example #1
0
	def parse2(self,response):
		item = qihustoryItem()
		tempD = response.css('.info_con')
		tempN = tempD.xpath('//h1').extract_first()
		item['appname'] = tempN[4:-5]
		author = tempD.xpath('//em')[4].extract()
		item['author'] = author[4:-5]
		tempData = response.css('.version_con')
		tempInfo = tempData.xpath('//dl/dd/p')[2].extract()
		version = tempInfo[tempInfo.find('版本')+3:]
		item['version'] = version[:version.find('<br')]
		filesize = tempInfo[tempInfo.find('大小')+3:]
		item['fileSize'] = filesize[:filesize.find('</')]
		tempTime = tempD.xpath('//em')[1].extract()
		item['dataTime'] = tempTime[4:-5]
		tmpCount = tempD.xpath('//em')[2].extract()
		item['downCount'] = tmpCount[4:-5]
		description = response.css('.game_txt').extract_first()
		item['description'] = description[28:-10]
		#下载url
		#https://count.liqucn.com/d.php?id=706758&urlos=android&from_type=web
		tempId = response.css('.version_btn a::attr(href)').extract()[0]
		Id = tempId[tempId.find('rj/')+3:-6]
		downUrl = 'https://count.liqucn.com/d.php?id=ID&urlos=android&from_type=web'
		downUrl.replace('ID',Id)
		item['downUrl']= downUrl
		item['channel'] = response.xpath('//title/text()').extract_first()
		yield item
		print(item['appname'])
Example #2
0
	def parse2(self,response):
		item = qihustoryItem()
		item['appname'] =response.css('.intro-titles h3').extract_first()[4:-5]
		item['author'] = response.css('.intro-titles p').extract_first()[3:-4]
		versionD = response.css('.look-detail')
		versionD = versionD.css('.weight-font')
		datainfo = versionD.xpath('//li').extract()
		versionD = ''.join(datainfo)
		tmp = versionD[versionD.find('版本号')+13:]
		version = tmp[:tmp.find('</li>')]
		item['version'] = '应用版本:'+version
		filesizetemp = versionD[versionD.find('软件大小')+14:]		
		item['fileSize'] = filesizetemp[:filesizetemp.find('</li>')]
		tmpD = versionD[versionD.find('更新时间')+14:]
		item['dataTime'] = tmpD[:tmpD.find('</li')]
		descriptionTmp = response.css('.app-text')
		descriptionTmp = descriptionTmp.css('.pslide')
		description = descriptionTmp.css('.pslide').extract_first()
		item['description'] = description[description.find('">')+2:]
		downUrl = response.css('.download::attr(href)').extract_first()
		item['downUrl'] = response.urljoin(downUrl)
		item['channel'] = response.xpath('//title/text()').extract_first()

		print(item['appname'])
		yield item
Example #3
0
 def parse2(self, response):
     item = qihustoryItem()
     appinfo = response.css('dl')
     authorData = response.xpath('//td').extract()[0]
     #author = re.search('/strong>.*</td>',auth[0]).group()
     #author =authorData[authorData.find('/strong>')+8:-5]
     item['author'] = authorData[authorData.find('/strong>') + 8:-5]
     appname = response.css('title').extract()[0]
     #appname = appname[0]
     item['appname'] = appname[7:-16]
     version = response.xpath('//td')[2].extract()
     item['version'] = '版本号:' + version[24:version.find('<!')]
     numdata = response.css('.s-3').extract()
     filesize = numdata[1]
     filesize = filesize[filesize.find('">') + 2:filesize.find('</')]
     dataTime = response.xpath('//td').extract()[1]
     temp = dataTime[dataTime.find('strong>') + 7:dataTime.find('strong>') +
                     7 + 5]
     item['dataTime'] = temp + dataTime[dataTime.find('/strong>') + 8:-5]
     downCount = numdata[0]
     item['downCount'] = downCount[downCount.find('">') + 2:-7]
     description = response.css('.breif').extract()[0]
     item['description'] = description[description.find('breif">') +
                                       20:description.
                                       find('<div class="base-info')]
     downUrl = response.css('.js-downLog::attr(href)').extract()[0]
     item['downUrl'] = downUrl[downUrl.find('url=') + 4:]
     item['channel'] = response.xpath('//title/text()').extract_first()
     print(item['appname'])
     yield item
Example #4
0
	def parse2(self,response):
		item = qihustoryItem()
		item['appname'] = response.css('.detail_line h3').extract_first()[4:-5]
		tempD = response.css('.detail_description')
		tempD = tempD.extract_first()
		authorD = tempD[tempD.find('作者:'):]
		item['author'] = authorD[:authorD.find('</')]
		tempV = response.css('.app_detail_version').extract_first()
		item['version'] = '版本号:' + tempV[tempV.find('">(')+3:tempV.find(')</span')]
		tempSi = tempD[tempD.find('大小')+3:]
		item['fileSize'] = tempSi[:tempSi.find('</span')]
		tempTime = tempD[tempD.find('时间')+3:]
		item['dataTime'] = tempTime[:tempTime.find('</li')]
		tempCount = tempD[tempD.find('下载')+3:]
		item['downCount'] = tempCount[:tempCount.find('</span>')]
		tempDesc = response.css('.app_detail_infor').extract_first()
		item['description'] = tempDesc[tempDesc.find('<p>')+11:-16]
		#下载链接地址格式
		#http://www.anzhi.com/dl_app.php?s=3091483&n=5
		tempID = response.css('.detail_down').extract_first()
		id = tempID[tempID.find('opendown')+9:tempID.find(')')]
		downUrl = 'http://www.anzhi.com/dl_app.php?s=ID&n=5'
		item['downUrl'] = downUrl.replace('ID',str(id))
		item['channel'] = response.xpath('//title/text()').extract_first()
		yield item
Example #5
0
 def parse(self, response):
     item = qihustoryItem()
     applist = response.css('.app')
     for app in applist:
         appd = app.css('.little-install')
         for info in appd:
             item['appname'] = info.css(
                 'a::attr(data_name)').extract_first()
             print(item['appname'])
             item['version'] = info.css(
                 'a::attr(data_versionname)').extract_first()
             item['fileSize'] = info.css(
                 'a::attr(data_size)').extract_first()
             item['author'] = info.css('a::attr(data_from)').extract_first()
             item['downUrl'] = info.css('a::attr(data_url)').extract_first()
         downCount = app.css('.size').extract_first()
         item['downCount'] = downCount[downCount.find('">') + 2:-7]
         item['description'] = app.css('.brief::text').extract_first()
         item['channel'] = response.xpath('//title/text()').extract_first()
         yield item
Example #6
0
 def parse(self, response):
     #json请求地址:
     #https://sj.qq.com/myapp/searchAjax.htm?kw=%E5%8F%91%E7%A5%A8%E3%80%81&pns=MTA=&sid=0
     #https://sj.qq.com/myapp/searchAjax.htm?kw=%E5%8F%91%E7%A5%A8%E3%80%81&pns=MjA=&sid=0
     #https://sj.qq.com/myapp/searchAjax.htm?kw=%E5%8F%91%E7%A5%A8%E3%80%81&pns=MzA=&sid=0
     #https://sj.qq.com/myapp/searchAjax.htm?kw=%E5%8F%91%E7%A5%A8%E3%80%81&pns=NDA=&sid=0
     js = json.loads(response.body)
     item = qihustoryItem()
     appSo = '应用宝网站数据获取'
     appList = js['obj']['appDetails']
     for ite in appList:
         item['appname'] = ite['appName']
         item['author'] = ite['authorName']
         item['version'] = ite['versionName']
         item['fileSize'] = ite['fileSize']
         item['downCount'] = ite['appDownCount']
         item['description'] = ite['description']
         item['downUrl'] = ite['apkUrl']
         item['channel'] = response.xpath('//title/text()').extract_first()
         yield item
Example #7
0
    def parse2(self, response):
        item = qihustoryItem()
        item['appname'] = response.css('.app-title::text').extract_first()
        appInfoD = response.css('.app-detail-info')
        versionInfo = appInfoD.css('.ellipsis strong').extract()[2]
        item['version'] = versionInfo[versionInfo.find('strong>') +
                                      7:versionInfo.find('</')]
        filesizeD = appInfoD.css('.ellipsis strong').extract()[1]
        item['fileSize'] = filesizeD[filesizeD.find('strong>') +
                                     7:filesizeD.find('</')]
        dateTimeD = appInfoD.css('.ellipsis strong').extract()[0]
        item['dataTime'] = dateTimeD[dateTimeD.find('strong>') +
                                     7:dateTimeD.find('</')]
        downCount = response.css('.app-downs::text').extract_first()
        item['downCount'] = downCount[:downCount.find('|') - 3]
        descriptionD = response.css('.app-detail-intro').extract_first()
        item['description'] = descriptionD[descriptionD.find('">') + 2:]
        downUrlD = response.css('.app-install')
        item['downUrl'] = downUrlD.css('a::attr(href)').extract()[1]
        item['channel'] = response.xpath('//title/text()').extract_first()
        print(item['appname'])

        yield item