def parse_item(self, response): source = 'wandoujia' name = util.get_text(response, '//p[@class="app-name"]/span/text()') if not name: return version = util.get_text(response, '//dl[@class="infos-list"]/dd[4]/text()') first = response.meta['first'] second = util.get_text(response, '//div[@class="crumb"]/div[2]/a/span/text()') category = first + '-' + second time = util.get_text(response, '//time[@id="baidu_time"]/text()') size = util.get_text(response, '//dl[@class="infos-list"]/dd[1]/text()') system = util.get_text(response, '//dl[@class="infos-list"]/dd[5]/text()') text = util.get_text(response, '//div[@itemprop="description"]',0) download = util.get_text(response, '//i[@itemprop="interactionCount"]/@content') pingfen = '' tag = response.xpath('//dd[@class="tag-box"]//a/text()').extract() tags=','.join([i.strip() for i in tag]) self.fileout.write( source + '\001' + name + '\001' + version + '\001' + category + '\001' + util.unify_data(time) + '\001' + size + '\001' + system + '\001' + text + '\001' + util.unify_download_count(download) + '\001' + pingfen + '\001' + tags ) self.fileout.write('\n')
def parse_item(self, response): source = "anzow" name = util.get_text(response, "//dl[@class='down_info clear']/dd/div[1]/h1/text()") if not name: return version = "" first = util.get_text(response, "//div[@class='crumbs fl']/a[2]/text()")[-2:] second = util.get_text(response, "//div[@class='crumbs fl']/a[3]/text()") category = first + "-" + second time = response.meta["time"] size = util.get_text(response, '//div[@class="xiazai1"][1]/../dl/dt/ul/li[3]/text()') system = response.meta["system"] text = util.get_text(response, '//div[@class="down_intro"]', 0) download = response.meta["download"] pingfen = util.get_text(response, '//dl[@class="down_info clear"]/dd/dl/dt/ul/li[7]/strong/text()') try: pingfen = str(pingfen.count("★") * 20) except Exception: pingfen = "" tag = response.xpath('//p[@class="keywords"]//a/text()').extract() tags = ",".join(tag) self.fileout.write( source + "\001" + name + "\001" + version + "\001" + category + "\001" + util.unify_data(time) + "\001" + size + "\001" + system + "\001" + text + "\001" + util.unify_download_count(download) + "\001" + pingfen + "\001" + tags ) self.fileout.write("\n")
def parse_item(self, response): source = 'applestore' name = util.get_text( response, '//div[@id="desktopContentBlockId"]//div[@id="title"]//h1/text()') if not name: return version = util.get_text( response, '//div[@id="left-stack"]//span[@itemprop="softwareVersion"]/text()' ) first = '软件' second = util.get_text( response, '//div[@id="left-stack"]//span[@itemprop="applicationCategory"]/text()' ) category = first + '-' + second time = util.get_text( response, '//div[@id="left-stack"]//span[@itemprop="datePublished"]/text()') size = '' system = util.get_text( response, '//div[@id="left-stack"]//span[@itemprop="operatingSystem"]/text()' ) text = util.get_text( response, '//div[@class="center-stack"]/div[@class="product-review"]/p', 0) download = util.get_text( response, '//div[@class="extra-list customer-ratings"]/div[4]/span/text()') pingfen = util.get_text( response, '//div[@class="extra-list customer-ratings"]/div[4]/@aria-label') try: pingfen = str(float(pingfen.split('星, ')[0]) * 20) except Exception: pingfen = '' tags = '' self.fileout.write(source + '\001' + name + '\001' + version + '\001' + category + '\001' + util.unify_data(time) + '\001' + size + '\001' + system + '\001' + text + '\001' + util.unify_download_count(download) + '\001' + pingfen + '\001' + tags) self.fileout.write('\n')
def get_upload_time(self,detail_page_driver): try: ele = detail_page_driver.find_element_by_xpath(self.UPLOAD_TIME) except selenium_exception.NoSuchElementException as e: return None try: upload_time = util.unify_data(ele.text.split("上架时间: ")[1].split('相关推荐')[0].strip()) except Exception as e: return None return upload_time
def get_upload_time(self, detail_page_driver): try: ele = detail_page_driver.find_element_by_xpath(self.UPLOAD_TIME) except selenium_exception.NoSuchElementException as e: return None try: upload_time = util.unify_data( ele.text.split("上架时间: ")[1].split('相关推荐')[0].strip()) except Exception as e: return None return upload_time
def get_upload_time(self,detail_page_driver): if self.UPLOAD_TIME is None: return None try: ele = detail_page_driver.find_element_by_xpath(self.UPLOAD_TIME) except selenium_exception.NoSuchElementException as e: return None try: upload_time = util.unify_data(ele.get_attribute("textContent").strip()) except Exception as e: return None return upload_time
def get_upload_time(self,detail_page_driver): if self.UPLOAD_TIME is None: return None try: ele = detail_page_driver.find_element_by_xpath(self.UPLOAD_TIME) except selenium_exception.NoSuchElementException as e: return None try: upload_time = util.unify_data(ele.text.split(":")[4]) except Exception as e: return None return upload_time
def parse_item(self, response): source = 'hiapk' name_and_version = util.get_text(response, "//div[@id='appSoftName']/text()") try: version = name_and_version.split('(')[1].split(')')[0] name = name_and_version.split('(')[0] except Exception: version = '' name = name_and_version if not name: return first = util.get_text(response, "//a[@id='categoryParent']/text()") second = util.get_text(response, "//a[@id='categoryLink']/text()") category = first + '-' + second time = util.get_text( response, '//div[@class="code_box_border"]/div[@class="line_content"][7]/span[2]/text()' ) size = util.get_text(response, '//span[@id="appSize"]/text()') system = util.get_text( response, '//span[@class="font14 detailMiniSdk d_gj_line left"]/text()') text = util.get_text(response, '//pre[@id="softIntroduce"]', 0) download = util.get_text( response, '//div[@class="code_box_border"]/div[@class="line_content"][2]/span[2]/text()' ) pingfen = util.get_text(response, '//div[@id="appIconTips"]/div[1]/@class') try: pingfen = str(float(pingfen.split(" ")[2].split("_")[2]) * 2) except Exception: pingfen = '' tags = '' self.fileout.write(source + '\001' + name + '\001' + version + '\001' + category + '\001' + util.unify_data(time) + '\001' + size + '\001' + system + '\001' + text + '\001' + util.unify_download_count(download) + '\001' + pingfen + '\001' + tags) self.fileout.write('\n')
def parse_item(self, response): source = 'yingyonghui' name = util.get_text(response, '//h1[@class="app-name"]/text()') if not name: return version = util.get_text(response, '//div[@class="intro"]/p[1]/text()[2]')[3:] first = util.get_text( response, '//div[@class="breadcrumb centre-content"]/a[2]/text()') second = util.get_text( response, '//div[@class="breadcrumb centre-content"]/a[3]/text()') category = first + '-' + second time = util.get_text(response, '//div[@class="intro"]/p[1]/text()')[3:] size = util.get_text(response, '//span[@class="app-statistic"]/text()[2]') try: size = size.split('大小:')[1].split(' 更新')[0] except Exception: size = '' system = util.get_text(response, '//p[@class="art-content"][3]/text()[4]')[3:] text = util.get_text(response, '//div[@class="main-info"]/p[1]', 0) download = util.get_text(response, '//span[@class="app-statistic"]/text()') try: download = download.split('下载')[0] except Exception: download = '' pingfen = '' tags = '' self.fileout.write(source + '\001' + name + '\001' + version + '\001' + category + '\001' + util.unify_data(time) + '\001' + size + '\001' + system + '\001' + text + '\001' + util.unify_download_count(download) + '\001' + pingfen + '\001' + tags) self.fileout.write('\n')
def parse_item(self, response): source = 'anzhi' name = util.get_text(response, '//div[@class="detail_line"]/h3//text()') if not name: return version = util.get_text(response, '//div[@class="detail_line"]/span//text()')[1:-1] first = response.meta['cate'] data = response.xpath('//ul[@id="detail_line_ul"]/li//text()').extract() if len(data) == 7 : second = data[0][3:] download = data[1][3:] time = data[2][3:] size = data[3][3:] system = data[4][3:] if len(data) == 6 : second = data[0][3:] download = '' time = data[1][3:] size = data[2][3:] system = data[3][3:] category = first + '-' + second text = util.get_text(response, '//div[@class="app_detail_infor"]',0) pingfen = util.get_text(response, '//div[@id="stars_detail"]/@style') p = pingfen.split('-') if len(p) == 2: pingfen = '0.0' elif len(p) == 3: pingfen = p[2][:-3] try: pingfen = str(float(pingfen)/15*10) except Exception: pingfen ='' tags='' self.fileout.write( source + '\001' + name + '\001' + version + '\001' + category + '\001' + util.unify_data(time) + '\001' + size + '\001' + system + '\001' + text + '\001' + util.unify_download_count(download) + '\001' + pingfen + '\001' + tags ) self.fileout.write('\n')
def parse_item(self, response): source = 'apk91' name = util.get_text(response, '//h1[@class="ff f20 fb fl"]/text()') if not name: return version = util.get_text(response, '//ul[@class="s_info"]/li[1]/text()')[3:] first = util.get_text(response, '//div[@class="crumb clearfix"]/a[2]/text()') second = response.meta['cat'] category = first + '-' + second time = util.get_text(response, '//ul[@class="s_info"]/li[5]/text()')[5:15] size = util.get_text(response, '//ul[@class="s_info"]/li[3]/text()')[5:] system = util.get_text(response, '//ul[@class="s_info"]/li[4]/text()')[5:] text = util.get_text(response, '//div[@class="o-content"]', 0) download = util.get_text(response, '//ul[@class="s_info"]/li[2]/text()') pingfen = util.get_text( response, '//div[@class="s_intro_pic fl"]/span[@class="spr star"]/a/@class') try: pingfen = str(float(pingfen.split('w')[1].split(' ')[0]) * 20) except Exception: pingfen = '' tag = response.xpath('//ul[@class="s_info"]/li[10]/a/text()').extract() tags = ','.join(tag) self.fileout.write(source + '\001' + name + '\001' + version + '\001' + category + '\001' + util.unify_data(time) + '\001' + size + '\001' + system + '\001' + text + '\001' + util.unify_download_count(download) + '\001' + pingfen + '\001' + tags) self.fileout.write('\n')
def parse_item(self, response): source = 'anzow' name = util.get_text( response, "//dl[@class='down_info clear']/dd/div[1]/h1/text()") if not name: return version = '' first = util.get_text(response, "//div[@class='crumbs fl']/a[2]/text()")[-2:] second = util.get_text(response, "//div[@class='crumbs fl']/a[3]/text()") category = first + '-' + second time = response.meta['time'] size = util.get_text( response, '//div[@class="xiazai1"][1]/../dl/dt/ul/li[3]/text()') system = response.meta['system'] text = util.get_text(response, '//div[@class="down_intro"]', 0) download = response.meta['download'] pingfen = util.get_text( response, '//dl[@class="down_info clear"]/dd/dl/dt/ul/li[7]/strong/text()') try: pingfen = str(pingfen.count('★') * 20) except Exception: pingfen = '' tag = response.xpath('//p[@class="keywords"]//a/text()').extract() tags = ','.join(tag) self.fileout.write(source + '\001' + name + '\001' + version + '\001' + category + '\001' + util.unify_data(time) + '\001' + size + '\001' + system + '\001' + text + '\001' + util.unify_download_count(download) + '\001' + pingfen + '\001' + tags) self.fileout.write('\n')
def parse_item(self, response): source = '163' name = util.get_text(response, '//span[@class="f-h1"]/text()') if not name: return version = util.get_text( response, '//table[@class="table-appinfo"]/tr[3]/td/text()') first = util.get_text( response, "//div[@class='sect']/div[@class='crumb']/a[2]/text()")[-2:] second = util.get_text( response, "//div[@class='sect']/div[@class='crumb']/a[3]/text()") category = first + '-' + second time = '' size = util.get_text( response, '//table[@class="table-appinfo"]/tr[2]/td[1]/text()') system = '' text = util.get_text(response, '//div[@id="app-desc"]', 0) download = util.get_text(response, '//span[@class="vote-text-s"]/text()')[1:-1] pingfen = util.get_text(response, '//span[@class="vote-column-s"]/i/@style') try: pingfen = pingfen.split(':')[1].split('%')[0] except Exception: pingfen = '' tags = '' self.fileout.write(source + '\001' + name + '\001' + version + '\001' + category + '\001' + util.unify_data(time) + '\001' + size + '\001' + system + '\001' + text + '\001' + util.unify_download_count(download) + '\001' + pingfen + '\001' + tags) self.fileout.write('\n')
def parse_item(self, response): source = '3310' name_version = util.get_text(response, '//div[@class="cont"]/h2/text()') if not name_version: return ns = name_version.split(' ') version = ns.pop(-1) name = ' '.join(ns) first = util.get_text(response, '//div[@class="guide"]/a[3]/text()') second = util.get_text(response, '//div[@class="guide"]/a[4]/text()') category = first + '-' + second time = util.get_text(response, '//div[@class="cont"]/p[2]/text()')[5:] size = util.get_text(response, '//div[@class="cont"]/p[1]/span/text()')[3:] system = util.get_text(response, '//div[@class="cont"]/p[3]/span/text()')[5:] text = util.get_text(response, '//div[@class="pictxt item"][not(@style)]', 0) download = util.get_text(response, '//span[@id="downnum"]/text()') pingfen = util.get_text(response, '//div[@class="score"]/span/text()') try: pingfen = str(float(pingfen) * 20) except Exception: pingfen = '' tags = '' self.fileout.write(source + '\001' + name + '\001' + version + '\001' + category + '\001' + util.unify_data(time) + '\001' + size + '\001' + system + '\001' + text + '\001' + util.unify_download_count(download) + '\001' + pingfen + '\001' + tags) self.fileout.write('\n')
def parse_item(self, response): source = 'mumayi' name_version = util.get_text( response, '//h1[@class="iappname hidden fl"]/text()') if not name_version: return sn = name_version.split('V') version = sn.pop(-1) if len(sn) > 1 else '' name = 'V'.join(sn) if sn else name_version first = util.get_text(response, '//div[@id="classlists"]/a[2]/text()')[:2] second = util.get_text(response, '//div[@id="classlists"]/a[3]/text()') category = first + '-' + second time = response.meta['time'] size = util.get_text(response, '//span[text()="程序大小:"]/../text()') system = util.get_text(response, '//div[@class="sel_text fl"]/text()') text = util.get_text(response, '//ul[@class="author"]/..//p[position()<last()]', 0) download = '' pingfen = util.get_text(response, '//div[@id="starlist"]/@class') try: pingfen = str(float(pingfen.split('now')[1]) * 2) except Exception: pingfen = '' tags = '' self.fileout.write(source + '\001' + name + '\001' + version + '\001' + category + '\001' + util.unify_data(time) + '\001' + size + '\001' + system + '\001' + text + '\001' + util.unify_download_count(download) + '\001' + pingfen + '\001' + tags) self.fileout.write('\n')
def parse(self, response): source = 'baidu' name = util.get_text(response, '//span[@class="gray"]/text()') if not name: return version = util.get_text(response, '//span[@class="version"]/text()')[3:] first = util.get_text(response, '//div[@class="nav"]//a/text()') second = util.get_text(response, '//div[@class="nav"]/span[3]/a/text()') category = first + '-' + second time = '' size = util.get_text(response, '//span[@class="size"]/text()')[3:] system = '' text = util.get_text(response, '//div[@class="brief-long"]/p', 0) download = util.get_text(response, '//span[@class="download-num"]/text()')[5:] pingfen = util.get_text(response, '//span[@class="star-percent"]/@style') try: pingfen = pingfen.split(':')[1].split('%')[0] except Exception: pingfen = '' tags = '' self.fileout.write(source + '\001' + name + '\001' + version + '\001' + category + '\001' + util.unify_data(time) + '\001' + size + '\001' + system + '\001' + text + '\001' + util.unify_download_count(download) + '\001' + pingfen + '\001' + tags) self.fileout.write('\n')
def parse_item(self, response): source = 'hiapk' name_and_version = util.get_text(response, "//div[@id='appSoftName']/text()") try: version = name_and_version.split('(')[1].split(')')[0] name = name_and_version.split('(')[0] except Exception: version = '' name = name_and_version if not name: return first = util.get_text(response, "//a[@id='categoryParent']/text()") second = util.get_text(response, "//a[@id='categoryLink']/text()") category = first + '-' + second time = util.get_text(response, '//div[@class="code_box_border"]/div[@class="line_content"][7]/span[2]/text()') size = util.get_text(response, '//span[@id="appSize"]/text()') system = util.get_text(response, '//span[@class="font14 detailMiniSdk d_gj_line left"]/text()') text = util.get_text(response, '//pre[@id="softIntroduce"]',0) download = util.get_text(response, '//div[@class="code_box_border"]/div[@class="line_content"][2]/span[2]/text()') pingfen = util.get_text(response, '//div[@id="appIconTips"]/div[1]/@class') try: pingfen = str(float(pingfen.split(" ")[2].split("_")[2])*2) except Exception: pingfen ='' tags='' self.fileout.write( source + '\001' + name + '\001' + version + '\001' + category + '\001' + util.unify_data(time) + '\001' + size + '\001' + system + '\001' + text + '\001' + util.unify_download_count(download) + '\001' + pingfen + '\001' + tags ) self.fileout.write('\n')
def parse_item(self, response): source = 'yingyonghui' name = util.get_text(response, '//h1[@class="app-name"]/text()') if not name: return version = util.get_text(response, '//div[@class="intro"]/p[1]/text()[2]')[3:] first = util.get_text(response, '//div[@class="breadcrumb centre-content"]/a[2]/text()') second = util.get_text(response, '//div[@class="breadcrumb centre-content"]/a[3]/text()') category = first + '-' + second time = util.get_text(response, '//div[@class="intro"]/p[1]/text()')[3:] size = util.get_text(response, '//span[@class="app-statistic"]/text()[2]') try: size = size.split('大小:')[1].split(' 更新')[0] except Exception: size = '' system = util.get_text(response, '//p[@class="art-content"][3]/text()[4]')[3:] text = util.get_text(response, '//div[@class="main-info"]/p[1]',0) download = util.get_text(response, '//span[@class="app-statistic"]/text()') try: download = download.split('下载')[0] except Exception: download = '' pingfen = '' tags='' self.fileout.write( source + '\001' + name + '\001' + version + '\001' + category + '\001' + util.unify_data(time) + '\001' + size + '\001' + system + '\001' + text + '\001' + util.unify_download_count(download) + '\001' + pingfen + '\001' + tags ) self.fileout.write('\n')
def parse_item(self, response): source = '3310' name_version = util.get_text(response, '//div[@class="cont"]/h2/text()') if not name_version: return ns = name_version.split(' ') version = ns.pop(-1) name = ' '.join(ns) first = util.get_text(response, '//div[@class="guide"]/a[3]/text()') second = util.get_text(response, '//div[@class="guide"]/a[4]/text()') category = first + '-' + second time = util.get_text(response, '//div[@class="cont"]/p[2]/text()')[5:] size = util.get_text(response, '//div[@class="cont"]/p[1]/span/text()')[3:] system = util.get_text(response, '//div[@class="cont"]/p[3]/span/text()')[5:] text = util.get_text(response, '//div[@class="pictxt item"][not(@style)]',0) download = util.get_text(response, '//span[@id="downnum"]/text()') pingfen = util.get_text(response, '//div[@class="score"]/span/text()') try: pingfen = str(float(pingfen)*20) except Exception: pingfen ='' tags='' self.fileout.write( source + '\001' + name + '\001' + version + '\001' + category + '\001' + util.unify_data(time) + '\001' + size + '\001' + system + '\001' + text + '\001' + util.unify_download_count(download) + '\001' + pingfen + '\001' + tags ) self.fileout.write('\n')
def parse_item(self, response): source = '360' name = util.get_text(response, '//h2[@id="app-name"]/span/text()') if not name: return version = util.get_text(response, '//div[@class="breif"]/div[@class="base-info"]/table/tbody/tr[2]/td[1]/text()') first = util.get_text(response, '//div[@class="nav"]/ul/li[@class="cur"]/a/text()')[1:] second = response.meta['categroy'] category = first + '-' + second time = util.get_text(response, '//div[@class="breif"]/div[@class="base-info"]/table/tbody/tr[1]/td[2]/text()') size = util.get_text(response, '//div[@class="pf"]/span[@class="s-3"][2]/text()') system = util.get_text(response, '//div[@class="breif"]/div[@class="base-info"]/table/tbody/tr[2]/td[2]/text()') text = util.get_text(response, '//div[@class="breif"]',0) download = util.get_text(response, '//div[@class="pf"]/span[@class="s-3"][1]/text()') pingfen = util.get_text(response, '//div[@class="pf"]/span[@class="s-1 js-votepanel"]/text()') try: pingfen = str(float(pingfen)*10) except Exception: pingfen ='' tag = response.xpath('//div[@class="app-tags"]//a/text()').extract() tags=','.join([i.strip() for i in tag]) self.fileout.write( source + '\001' + name + '\001' + version + '\001' + category + '\001' + util.unify_data(time) + '\001' + size + '\001' + system + '\001' + text + '\001' + util.unify_download_count(download) + '\001' + pingfen + '\001' + tags ) self.fileout.write('\n')
def parse_item(self, response): source = '25pp' name = util.get_text(response, '//div[@class="title-stat"]/div[@class="txt"]/h1/text()') if not name: return version = util.get_text(response, '//div[@class="title-stat"]/div[@class="txt"]/ul/li[1]/text()')[3:] first = util.get_text(response, '//div[@class="location"]/a[2]/text()') second = util.get_text(response, '//div[@class="title-stat"]/div[@class="txt"]/ul/li[2]/text()') category = first + '-' + second time = '' size = util.get_text(response, '//div[@class="title-stat"]/div[@class="txt"]/ul/li[3]/text()')[3:] system = util.get_text(response, '//div[@class="title-stat"]/div[@class="txt"]/ul/li[5]/text()')[5:] text = util.get_text(response, '//div[@class="conTxt"][1]',0) download = util.get_text(response, '//li[@class="borderR"]/span/text()') pingfen = util.get_text(response, '//div[@class="downMunber"]/ul/li[3]/span/text()') try: pingfen = str(float(pingfen)*20) except Exception: pingfen ='' tag = response.xpath('//li[@class="w-450"]//a/text()').extract() tags=','.join(tag) self.fileout.write( source + '\001' + name + '\001' + version + '\001' + category + '\001' + util.unify_data(time) + '\001' + size + '\001' + system + '\001' + text + '\001' + util.unify_download_count(download) + '\001' + pingfen + '\001' + tags ) self.fileout.write('\n')
def parse_item(self, response): source = 'apk91' name = util.get_text(response, '//h1[@class="ff f20 fb fl"]/text()') if not name: return version = util.get_text(response, '//ul[@class="s_info"]/li[1]/text()')[3:] first = util.get_text(response, '//div[@class="crumb clearfix"]/a[2]/text()') second = response.meta['cat'] category = first + '-' + second time = util.get_text(response, '//ul[@class="s_info"]/li[5]/text()')[5:15] size = util.get_text(response, '//ul[@class="s_info"]/li[3]/text()')[5:] system = util.get_text(response, '//ul[@class="s_info"]/li[4]/text()')[5:] text = util.get_text(response, '//div[@class="o-content"]',0) download = util.get_text(response, '//ul[@class="s_info"]/li[2]/text()') pingfen = util.get_text(response, '//div[@class="s_intro_pic fl"]/span[@class="spr star"]/a/@class') try: pingfen = str(float(pingfen.split('w')[1].split(' ')[0])*20) except Exception: pingfen ='' tag = response.xpath('//ul[@class="s_info"]/li[10]/a/text()').extract() tags=','.join(tag) self.fileout.write( source + '\001' + name + '\001' + version + '\001' + category + '\001' + util.unify_data(time) + '\001' + size + '\001' + system + '\001' + text + '\001' + util.unify_download_count(download) + '\001' + pingfen + '\001' + tags ) self.fileout.write('\n')
def parse_item(self, response): source = 'mumayi' name_version = util.get_text(response, '//h1[@class="iappname hidden fl"]/text()') if not name_version: return sn = name_version.split('V') version = sn.pop(-1) if len(sn)>1 else '' name = 'V'.join(sn) if sn else name_version first = util.get_text(response, '//div[@id="classlists"]/a[2]/text()')[:2] second = util.get_text(response, '//div[@id="classlists"]/a[3]/text()') category = first + '-' + second time = response.meta['time'] size = util.get_text(response, '//span[text()="程序大小:"]/../text()') system = util.get_text(response, '//div[@class="sel_text fl"]/text()') text = util.get_text(response, '//ul[@class="author"]/..//p[position()<last()]',0) download = '' pingfen = util.get_text(response, '//div[@id="starlist"]/@class') try: pingfen = str(float(pingfen.split('now')[1])*2) except Exception: pingfen ='' tags = '' self.fileout.write( source + '\001' + name + '\001' + version + '\001' + category + '\001' + util.unify_data(time) + '\001' + size + '\001' + system + '\001' + text + '\001' + util.unify_download_count(download) + '\001' + pingfen + '\001' + tags ) self.fileout.write('\n')
def parse_item(self, response): source = 'xiaomi' name = util.get_text(response, '//div[@class="intro-titles"]/h3/text()') if not name: return version = util.get_text(response, '//ul[@class=" cf"]/li[4]/text()') first = response.meta['first'] second = util.get_text(response, '//div[@class="bread-crumb"]/ul/li[2]/a/text()') category = first + '-' + second time = util.get_text(response, '//ul[@class=" cf"]/li[6]/text()') size = util.get_text(response, '//ul[@class=" cf"]/li[2]/text()') system = '' text = util.get_text(response, '//p[@class="pslide"]',0) download = '' pingfen = util.get_text(response, '//div[@class="star1-empty"]/div/@class') try: pingfen = str(float(pingfen.split('star1-hover star1-')[1])*10) except Exception: pingfen ='' tags='' self.fileout.write( source + '\001' + name + '\001' + version + '\001' + category + '\001' + util.unify_data(time) + '\001' + size + '\001' + system + '\001' + text + '\001' + util.unify_download_count(download) + '\001' + pingfen + '\001' + tags ) self.fileout.write('\n')
def parse(self, response): source = 'baidu' name = util.get_text(response, '//span[@class="gray"]/text()') if not name: return version = util.get_text(response, '//span[@class="version"]/text()')[3:] first = util.get_text(response, '//div[@class="nav"]//a/text()') second = util.get_text(response, '//div[@class="nav"]/span[3]/a/text()') category = first + '-' + second time = '' size = util.get_text(response, '//span[@class="size"]/text()')[3:] system = '' text = util.get_text(response, '//div[@class="brief-long"]/p', 0) download = util.get_text(response, '//span[@class="download-num"]/text()')[5:] pingfen = util.get_text(response, '//span[@class="star-percent"]/@style') try: pingfen = pingfen.split(':')[1].split('%')[0] except Exception: pingfen ='' tags = '' self.fileout.write( source + '\001' + name + '\001' + version + '\001' + category + '\001' + util.unify_data(time) + '\001' + size + '\001' + system + '\001' + text + '\001' + util.unify_download_count(download) + '\001' + pingfen + '\001' + tags ) self.fileout.write('\n')
def parse_item(self, response): source = 'applestore' name = util.get_text(response, '//div[@id="desktopContentBlockId"]//div[@id="title"]//h1/text()') if not name: return version = util.get_text(response, '//div[@id="left-stack"]//span[@itemprop="softwareVersion"]/text()') first = '软件' second = util.get_text(response, '//div[@id="left-stack"]//span[@itemprop="applicationCategory"]/text()') category = first + '-' + second time = util.get_text(response, '//div[@id="left-stack"]//span[@itemprop="datePublished"]/text()') size = '' system = util.get_text(response, '//div[@id="left-stack"]//span[@itemprop="operatingSystem"]/text()') text = util.get_text(response, '//div[@class="center-stack"]/div[@class="product-review"]/p',0) download = util.get_text(response, '//div[@class="extra-list customer-ratings"]/div[4]/span/text()') pingfen = util.get_text(response, '//div[@class="extra-list customer-ratings"]/div[4]/@aria-label') try: pingfen = str(float(pingfen.split('星, ')[0])*20) except Exception: pingfen ='' tags='' self.fileout.write( source + '\001' + name + '\001' + version + '\001' + category + '\001' + util.unify_data(time) + '\001' + size + '\001' + system + '\001' + text + '\001' + util.unify_download_count(download) + '\001' + pingfen + '\001' + tags ) self.fileout.write('\n')
def parse_item(self, response): source = '163' name = util.get_text(response, '//span[@class="f-h1"]/text()') if not name: return version = util.get_text(response, '//table[@class="table-appinfo"]/tr[3]/td/text()') first = util.get_text(response, "//div[@class='sect']/div[@class='crumb']/a[2]/text()")[-2:] second = util.get_text(response, "//div[@class='sect']/div[@class='crumb']/a[3]/text()") category = first + '-' + second time = '' size = util.get_text(response, '//table[@class="table-appinfo"]/tr[2]/td[1]/text()') system = '' text = util.get_text(response, '//div[@id="app-desc"]',0) download = util.get_text(response, '//span[@class="vote-text-s"]/text()')[1:-1] pingfen = util.get_text(response, '//span[@class="vote-column-s"]/i/@style') try: pingfen = pingfen.split(':')[1].split('%')[0] except Exception: pingfen ='' tags = '' self.fileout.write( source + '\001' + name + '\001' + version + '\001' + category + '\001' + util.unify_data(time) + '\001' + size + '\001' + system + '\001' + text + '\001' + util.unify_download_count(download) + '\001' + pingfen + '\001' + tags ) self.fileout.write('\n')