def parse_item(self, response):

        source = 'wandoujia'

        name = util.get_text(response, '//p[@class="app-name"]/span/text()')
        if not name:
            return

        version = util.get_text(response, '//dl[@class="infos-list"]/dd[4]/text()')

        first = response.meta['first']
        second = util.get_text(response, '//div[@class="crumb"]/div[2]/a/span/text()')
        category = first + '-' + second

        time = util.get_text(response, '//time[@id="baidu_time"]/text()')

        size = util.get_text(response, '//dl[@class="infos-list"]/dd[1]/text()')

        system = util.get_text(response, '//dl[@class="infos-list"]/dd[5]/text()')

        text = util.get_text(response, '//div[@itemprop="description"]',0)

        download = util.get_text(response, '//i[@itemprop="interactionCount"]/@content')

        pingfen = ''

        tag = response.xpath('//dd[@class="tag-box"]//a/text()').extract()
        tags=','.join([i.strip() for i in tag])

        self.fileout.write(
            source + '\001' + name + '\001' + version + '\001' + category + '\001' + util.unify_data(time) + '\001' + size + '\001' + system + '\001' + text + '\001' + util.unify_download_count(download) + '\001' + pingfen + '\001' + tags
        )
        self.fileout.write('\n')
Exemple #2
0
    def parse_item(self, response):

        source = "anzow"

        name = util.get_text(response, "//dl[@class='down_info clear']/dd/div[1]/h1/text()")
        if not name:
            return

        version = ""

        first = util.get_text(response, "//div[@class='crumbs fl']/a[2]/text()")[-2:]
        second = util.get_text(response, "//div[@class='crumbs fl']/a[3]/text()")
        category = first + "-" + second

        time = response.meta["time"]

        size = util.get_text(response, '//div[@class="xiazai1"][1]/../dl/dt/ul/li[3]/text()')

        system = response.meta["system"]

        text = util.get_text(response, '//div[@class="down_intro"]', 0)

        download = response.meta["download"]

        pingfen = util.get_text(response, '//dl[@class="down_info clear"]/dd/dl/dt/ul/li[7]/strong/text()')
        try:
            pingfen = str(pingfen.count("★") * 20)
        except Exception:
            pingfen = ""

        tag = response.xpath('//p[@class="keywords"]//a/text()').extract()
        tags = ",".join(tag)

        self.fileout.write(
            source
            + "\001"
            + name
            + "\001"
            + version
            + "\001"
            + category
            + "\001"
            + util.unify_data(time)
            + "\001"
            + size
            + "\001"
            + system
            + "\001"
            + text
            + "\001"
            + util.unify_download_count(download)
            + "\001"
            + pingfen
            + "\001"
            + tags
        )
        self.fileout.write("\n")
    def parse_item(self, response):

        source = 'applestore'

        name = util.get_text(
            response,
            '//div[@id="desktopContentBlockId"]//div[@id="title"]//h1/text()')
        if not name:
            return

        version = util.get_text(
            response,
            '//div[@id="left-stack"]//span[@itemprop="softwareVersion"]/text()'
        )

        first = '软件'
        second = util.get_text(
            response,
            '//div[@id="left-stack"]//span[@itemprop="applicationCategory"]/text()'
        )
        category = first + '-' + second

        time = util.get_text(
            response,
            '//div[@id="left-stack"]//span[@itemprop="datePublished"]/text()')

        size = ''

        system = util.get_text(
            response,
            '//div[@id="left-stack"]//span[@itemprop="operatingSystem"]/text()'
        )

        text = util.get_text(
            response,
            '//div[@class="center-stack"]/div[@class="product-review"]/p', 0)

        download = util.get_text(
            response,
            '//div[@class="extra-list customer-ratings"]/div[4]/span/text()')

        pingfen = util.get_text(
            response,
            '//div[@class="extra-list customer-ratings"]/div[4]/@aria-label')
        try:
            pingfen = str(float(pingfen.split('星, ')[0]) * 20)
        except Exception:
            pingfen = ''

        tags = ''

        self.fileout.write(source + '\001' + name + '\001' + version + '\001' +
                           category + '\001' + util.unify_data(time) + '\001' +
                           size + '\001' + system + '\001' + text + '\001' +
                           util.unify_download_count(download) + '\001' +
                           pingfen + '\001' + tags)
        self.fileout.write('\n')
 def get_upload_time(self,detail_page_driver):
     try:
         ele = detail_page_driver.find_element_by_xpath(self.UPLOAD_TIME)
     except selenium_exception.NoSuchElementException as e:
         return None
     try:
         upload_time = util.unify_data(ele.text.split("上架时间: ")[1].split('相关推荐')[0].strip())
     except Exception as e:
         return None
     return upload_time
Exemple #5
0
 def get_upload_time(self, detail_page_driver):
     try:
         ele = detail_page_driver.find_element_by_xpath(self.UPLOAD_TIME)
     except selenium_exception.NoSuchElementException as e:
         return None
     try:
         upload_time = util.unify_data(
             ele.text.split("上架时间: ")[1].split('相关推荐')[0].strip())
     except Exception as e:
         return None
     return upload_time
Exemple #6
0
 def get_upload_time(self,detail_page_driver):
     if self.UPLOAD_TIME is None:
         return None
     try:
         ele = detail_page_driver.find_element_by_xpath(self.UPLOAD_TIME)
     except selenium_exception.NoSuchElementException as e:
         return None
     try:
         upload_time = util.unify_data(ele.get_attribute("textContent").strip())
     except Exception as e:
         return None
     return upload_time
 def get_upload_time(self,detail_page_driver):
     if self.UPLOAD_TIME is None:
         return None
     try:
         ele = detail_page_driver.find_element_by_xpath(self.UPLOAD_TIME)
     except selenium_exception.NoSuchElementException as e:
         return None
     try:
         upload_time = util.unify_data(ele.get_attribute("textContent").strip())
     except Exception as e:
         return None
     return upload_time
 def get_upload_time(self,detail_page_driver):
     if self.UPLOAD_TIME is None:
         return None
     try:
         ele = detail_page_driver.find_element_by_xpath(self.UPLOAD_TIME)
     except selenium_exception.NoSuchElementException as e:
         return None
     try:
         upload_time = util.unify_data(ele.text.split(":")[4])
     except Exception as e:
         return None
     return upload_time
Exemple #9
0
    def parse_item(self, response):

        source = 'hiapk'

        name_and_version = util.get_text(response,
                                         "//div[@id='appSoftName']/text()")
        try:
            version = name_and_version.split('(')[1].split(')')[0]
            name = name_and_version.split('(')[0]
        except Exception:
            version = ''
            name = name_and_version
        if not name:
            return

        first = util.get_text(response, "//a[@id='categoryParent']/text()")
        second = util.get_text(response, "//a[@id='categoryLink']/text()")
        category = first + '-' + second

        time = util.get_text(
            response,
            '//div[@class="code_box_border"]/div[@class="line_content"][7]/span[2]/text()'
        )

        size = util.get_text(response, '//span[@id="appSize"]/text()')

        system = util.get_text(
            response,
            '//span[@class="font14 detailMiniSdk d_gj_line left"]/text()')

        text = util.get_text(response, '//pre[@id="softIntroduce"]', 0)

        download = util.get_text(
            response,
            '//div[@class="code_box_border"]/div[@class="line_content"][2]/span[2]/text()'
        )

        pingfen = util.get_text(response,
                                '//div[@id="appIconTips"]/div[1]/@class')
        try:
            pingfen = str(float(pingfen.split(" ")[2].split("_")[2]) * 2)
        except Exception:
            pingfen = ''

        tags = ''

        self.fileout.write(source + '\001' + name + '\001' + version + '\001' +
                           category + '\001' + util.unify_data(time) + '\001' +
                           size + '\001' + system + '\001' + text + '\001' +
                           util.unify_download_count(download) + '\001' +
                           pingfen + '\001' + tags)
        self.fileout.write('\n')
Exemple #10
0
    def parse_item(self, response):

        source = 'yingyonghui'

        name = util.get_text(response, '//h1[@class="app-name"]/text()')
        if not name:
            return

        version = util.get_text(response,
                                '//div[@class="intro"]/p[1]/text()[2]')[3:]

        first = util.get_text(
            response, '//div[@class="breadcrumb centre-content"]/a[2]/text()')
        second = util.get_text(
            response, '//div[@class="breadcrumb centre-content"]/a[3]/text()')
        category = first + '-' + second

        time = util.get_text(response, '//div[@class="intro"]/p[1]/text()')[3:]

        size = util.get_text(response,
                             '//span[@class="app-statistic"]/text()[2]')
        try:
            size = size.split('大小:')[1].split(' 更新')[0]
        except Exception:
            size = ''

        system = util.get_text(response,
                               '//p[@class="art-content"][3]/text()[4]')[3:]

        text = util.get_text(response, '//div[@class="main-info"]/p[1]', 0)

        download = util.get_text(response,
                                 '//span[@class="app-statistic"]/text()')
        try:
            download = download.split('下载')[0]
        except Exception:
            download = ''

        pingfen = ''

        tags = ''

        self.fileout.write(source + '\001' + name + '\001' + version + '\001' +
                           category + '\001' + util.unify_data(time) + '\001' +
                           size + '\001' + system + '\001' + text + '\001' +
                           util.unify_download_count(download) + '\001' +
                           pingfen + '\001' + tags)
        self.fileout.write('\n')
Exemple #11
0
    def parse_item(self, response):

        source = 'anzhi'

        name = util.get_text(response, '//div[@class="detail_line"]/h3//text()')
        if not name:
            return

        version = util.get_text(response, '//div[@class="detail_line"]/span//text()')[1:-1]

        first = response.meta['cate']

        data = response.xpath('//ul[@id="detail_line_ul"]/li//text()').extract()
        if len(data) == 7 :
            second = data[0][3:]
            download = data[1][3:]
            time = data[2][3:]
            size = data[3][3:]
            system = data[4][3:]

        if len(data) == 6 :
            second = data[0][3:]
            download = ''
            time = data[1][3:]
            size = data[2][3:]
            system = data[3][3:]

        category = first + '-' + second

        text = util.get_text(response, '//div[@class="app_detail_infor"]',0)

        pingfen = util.get_text(response, '//div[@id="stars_detail"]/@style')
        p = pingfen.split('-')
        if len(p) == 2:
            pingfen = '0.0'
        elif len(p) == 3:
            pingfen = p[2][:-3]
        try:
            pingfen = str(float(pingfen)/15*10)
        except Exception:
            pingfen =''

        tags=''

        self.fileout.write(
            source + '\001' + name + '\001' + version + '\001' + category + '\001' + util.unify_data(time) + '\001' + size + '\001' + system + '\001' + text + '\001' + util.unify_download_count(download) + '\001' + pingfen + '\001' + tags
        )
        self.fileout.write('\n')
Exemple #12
0
    def parse_item(self, response):

        source = 'apk91'

        name = util.get_text(response, '//h1[@class="ff f20 fb fl"]/text()')
        if not name:
            return

        version = util.get_text(response,
                                '//ul[@class="s_info"]/li[1]/text()')[3:]

        first = util.get_text(response,
                              '//div[@class="crumb clearfix"]/a[2]/text()')
        second = response.meta['cat']
        category = first + '-' + second

        time = util.get_text(response,
                             '//ul[@class="s_info"]/li[5]/text()')[5:15]

        size = util.get_text(response,
                             '//ul[@class="s_info"]/li[3]/text()')[5:]

        system = util.get_text(response,
                               '//ul[@class="s_info"]/li[4]/text()')[5:]

        text = util.get_text(response, '//div[@class="o-content"]', 0)

        download = util.get_text(response,
                                 '//ul[@class="s_info"]/li[2]/text()')

        pingfen = util.get_text(
            response,
            '//div[@class="s_intro_pic fl"]/span[@class="spr star"]/a/@class')
        try:
            pingfen = str(float(pingfen.split('w')[1].split(' ')[0]) * 20)
        except Exception:
            pingfen = ''

        tag = response.xpath('//ul[@class="s_info"]/li[10]/a/text()').extract()
        tags = ','.join(tag)

        self.fileout.write(source + '\001' + name + '\001' + version + '\001' +
                           category + '\001' + util.unify_data(time) + '\001' +
                           size + '\001' + system + '\001' + text + '\001' +
                           util.unify_download_count(download) + '\001' +
                           pingfen + '\001' + tags)
        self.fileout.write('\n')
Exemple #13
0
    def parse_item(self, response):

        source = 'anzow'

        name = util.get_text(
            response, "//dl[@class='down_info clear']/dd/div[1]/h1/text()")
        if not name:
            return

        version = ''

        first = util.get_text(response,
                              "//div[@class='crumbs fl']/a[2]/text()")[-2:]
        second = util.get_text(response,
                               "//div[@class='crumbs fl']/a[3]/text()")
        category = first + '-' + second

        time = response.meta['time']

        size = util.get_text(
            response, '//div[@class="xiazai1"][1]/../dl/dt/ul/li[3]/text()')

        system = response.meta['system']

        text = util.get_text(response, '//div[@class="down_intro"]', 0)

        download = response.meta['download']

        pingfen = util.get_text(
            response,
            '//dl[@class="down_info clear"]/dd/dl/dt/ul/li[7]/strong/text()')
        try:
            pingfen = str(pingfen.count('★') * 20)
        except Exception:
            pingfen = ''

        tag = response.xpath('//p[@class="keywords"]//a/text()').extract()
        tags = ','.join(tag)

        self.fileout.write(source + '\001' + name + '\001' + version + '\001' +
                           category + '\001' + util.unify_data(time) + '\001' +
                           size + '\001' + system + '\001' + text + '\001' +
                           util.unify_download_count(download) + '\001' +
                           pingfen + '\001' + tags)
        self.fileout.write('\n')
Exemple #14
0
    def parse_item(self, response):

        source = '163'

        name = util.get_text(response, '//span[@class="f-h1"]/text()')
        if not name:
            return

        version = util.get_text(
            response, '//table[@class="table-appinfo"]/tr[3]/td/text()')

        first = util.get_text(
            response,
            "//div[@class='sect']/div[@class='crumb']/a[2]/text()")[-2:]
        second = util.get_text(
            response, "//div[@class='sect']/div[@class='crumb']/a[3]/text()")
        category = first + '-' + second

        time = ''

        size = util.get_text(
            response, '//table[@class="table-appinfo"]/tr[2]/td[1]/text()')

        system = ''

        text = util.get_text(response, '//div[@id="app-desc"]', 0)

        download = util.get_text(response,
                                 '//span[@class="vote-text-s"]/text()')[1:-1]

        pingfen = util.get_text(response,
                                '//span[@class="vote-column-s"]/i/@style')
        try:
            pingfen = pingfen.split(':')[1].split('%')[0]
        except Exception:
            pingfen = ''

        tags = ''

        self.fileout.write(source + '\001' + name + '\001' + version + '\001' +
                           category + '\001' + util.unify_data(time) + '\001' +
                           size + '\001' + system + '\001' + text + '\001' +
                           util.unify_download_count(download) + '\001' +
                           pingfen + '\001' + tags)
        self.fileout.write('\n')
Exemple #15
0
    def parse_item(self, response):

        source = '3310'

        name_version = util.get_text(response,
                                     '//div[@class="cont"]/h2/text()')
        if not name_version:
            return

        ns = name_version.split(' ')
        version = ns.pop(-1)
        name = ' '.join(ns)

        first = util.get_text(response, '//div[@class="guide"]/a[3]/text()')
        second = util.get_text(response, '//div[@class="guide"]/a[4]/text()')
        category = first + '-' + second

        time = util.get_text(response, '//div[@class="cont"]/p[2]/text()')[5:]

        size = util.get_text(response,
                             '//div[@class="cont"]/p[1]/span/text()')[3:]

        system = util.get_text(response,
                               '//div[@class="cont"]/p[3]/span/text()')[5:]

        text = util.get_text(response,
                             '//div[@class="pictxt item"][not(@style)]', 0)

        download = util.get_text(response, '//span[@id="downnum"]/text()')

        pingfen = util.get_text(response, '//div[@class="score"]/span/text()')
        try:
            pingfen = str(float(pingfen) * 20)
        except Exception:
            pingfen = ''

        tags = ''

        self.fileout.write(source + '\001' + name + '\001' + version + '\001' +
                           category + '\001' + util.unify_data(time) + '\001' +
                           size + '\001' + system + '\001' + text + '\001' +
                           util.unify_download_count(download) + '\001' +
                           pingfen + '\001' + tags)
        self.fileout.write('\n')
Exemple #16
0
    def parse_item(self, response):

        source = 'mumayi'

        name_version = util.get_text(
            response, '//h1[@class="iappname hidden fl"]/text()')
        if not name_version:
            return
        sn = name_version.split('V')
        version = sn.pop(-1) if len(sn) > 1 else ''
        name = 'V'.join(sn) if sn else name_version

        first = util.get_text(response,
                              '//div[@id="classlists"]/a[2]/text()')[:2]
        second = util.get_text(response, '//div[@id="classlists"]/a[3]/text()')
        category = first + '-' + second

        time = response.meta['time']

        size = util.get_text(response, '//span[text()="程序大小:"]/../text()')

        system = util.get_text(response, '//div[@class="sel_text fl"]/text()')

        text = util.get_text(response,
                             '//ul[@class="author"]/..//p[position()<last()]',
                             0)

        download = ''

        pingfen = util.get_text(response, '//div[@id="starlist"]/@class')
        try:
            pingfen = str(float(pingfen.split('now')[1]) * 2)
        except Exception:
            pingfen = ''

        tags = ''

        self.fileout.write(source + '\001' + name + '\001' + version + '\001' +
                           category + '\001' + util.unify_data(time) + '\001' +
                           size + '\001' + system + '\001' + text + '\001' +
                           util.unify_download_count(download) + '\001' +
                           pingfen + '\001' + tags)
        self.fileout.write('\n')
Exemple #17
0
    def parse(self, response):

        source = 'baidu'

        name = util.get_text(response, '//span[@class="gray"]/text()')
        if not name:
            return

        version = util.get_text(response,
                                '//span[@class="version"]/text()')[3:]

        first = util.get_text(response, '//div[@class="nav"]//a/text()')
        second = util.get_text(response,
                               '//div[@class="nav"]/span[3]/a/text()')
        category = first + '-' + second

        time = ''

        size = util.get_text(response, '//span[@class="size"]/text()')[3:]

        system = ''

        text = util.get_text(response, '//div[@class="brief-long"]/p', 0)

        download = util.get_text(response,
                                 '//span[@class="download-num"]/text()')[5:]

        pingfen = util.get_text(response,
                                '//span[@class="star-percent"]/@style')
        try:
            pingfen = pingfen.split(':')[1].split('%')[0]
        except Exception:
            pingfen = ''

        tags = ''

        self.fileout.write(source + '\001' + name + '\001' + version + '\001' +
                           category + '\001' + util.unify_data(time) + '\001' +
                           size + '\001' + system + '\001' + text + '\001' +
                           util.unify_download_count(download) + '\001' +
                           pingfen + '\001' + tags)
        self.fileout.write('\n')
Exemple #18
0
    def parse_item(self, response):

        source = 'hiapk'

        name_and_version = util.get_text(response, "//div[@id='appSoftName']/text()")
        try:
            version = name_and_version.split('(')[1].split(')')[0]
            name = name_and_version.split('(')[0]
        except Exception:
            version = ''
            name = name_and_version
        if not name:
            return

        first = util.get_text(response, "//a[@id='categoryParent']/text()")
        second = util.get_text(response, "//a[@id='categoryLink']/text()")
        category = first + '-' + second

        time = util.get_text(response, '//div[@class="code_box_border"]/div[@class="line_content"][7]/span[2]/text()')

        size = util.get_text(response, '//span[@id="appSize"]/text()')

        system = util.get_text(response, '//span[@class="font14 detailMiniSdk d_gj_line left"]/text()')

        text = util.get_text(response, '//pre[@id="softIntroduce"]',0)

        download = util.get_text(response, '//div[@class="code_box_border"]/div[@class="line_content"][2]/span[2]/text()')

        pingfen = util.get_text(response, '//div[@id="appIconTips"]/div[1]/@class')
        try:
            pingfen = str(float(pingfen.split(" ")[2].split("_")[2])*2)
        except Exception:
            pingfen =''

        tags=''

        self.fileout.write(
            source + '\001' + name + '\001' + version + '\001' + category + '\001' + util.unify_data(time) + '\001' + size + '\001' + system + '\001' + text + '\001' + util.unify_download_count(download) + '\001' + pingfen + '\001' + tags
        )
        self.fileout.write('\n')
    def parse_item(self, response):

        source = 'yingyonghui'

        name = util.get_text(response, '//h1[@class="app-name"]/text()')
        if not name:
            return

        version = util.get_text(response, '//div[@class="intro"]/p[1]/text()[2]')[3:]

        first = util.get_text(response, '//div[@class="breadcrumb centre-content"]/a[2]/text()')
        second = util.get_text(response, '//div[@class="breadcrumb centre-content"]/a[3]/text()')
        category = first + '-' + second

        time = util.get_text(response, '//div[@class="intro"]/p[1]/text()')[3:]

        size = util.get_text(response, '//span[@class="app-statistic"]/text()[2]')
        try:
            size = size.split('大小:')[1].split(' 更新')[0]
        except Exception:
            size = ''

        system = util.get_text(response, '//p[@class="art-content"][3]/text()[4]')[3:]

        text = util.get_text(response, '//div[@class="main-info"]/p[1]',0)

        download = util.get_text(response, '//span[@class="app-statistic"]/text()')
        try:
            download = download.split('下载')[0]
        except Exception:
            download = ''

        pingfen = ''

        tags=''

        self.fileout.write(
            source + '\001' + name + '\001' + version + '\001' + category + '\001' + util.unify_data(time) + '\001' + size + '\001' + system + '\001' + text + '\001' + util.unify_download_count(download) + '\001' + pingfen + '\001' + tags
        )
        self.fileout.write('\n')
Exemple #20
0
    def parse_item(self, response):

        source = '3310'

        name_version = util.get_text(response, '//div[@class="cont"]/h2/text()')
        if not name_version:
            return
        
        ns = name_version.split(' ')
        version = ns.pop(-1)
        name = ' '.join(ns)

        first = util.get_text(response, '//div[@class="guide"]/a[3]/text()')
        second = util.get_text(response, '//div[@class="guide"]/a[4]/text()')
        category = first + '-' + second

        time = util.get_text(response, '//div[@class="cont"]/p[2]/text()')[5:]

        size = util.get_text(response, '//div[@class="cont"]/p[1]/span/text()')[3:]

        system = util.get_text(response, '//div[@class="cont"]/p[3]/span/text()')[5:]

        text = util.get_text(response, '//div[@class="pictxt item"][not(@style)]',0)

        download = util.get_text(response, '//span[@id="downnum"]/text()')

        pingfen = util.get_text(response, '//div[@class="score"]/span/text()')
        try:
            pingfen = str(float(pingfen)*20)
        except Exception:
            pingfen =''

        tags=''

        self.fileout.write(
            source + '\001' + name + '\001' + version + '\001' + category + '\001' + util.unify_data(time) + '\001' + size + '\001' + system + '\001' + text + '\001' + util.unify_download_count(download) + '\001' + pingfen + '\001' + tags
        )
        self.fileout.write('\n')
Exemple #21
0
    def parse_item(self, response):

        source = '360'

        name = util.get_text(response, '//h2[@id="app-name"]/span/text()')
        if not name:
            return

        version = util.get_text(response, '//div[@class="breif"]/div[@class="base-info"]/table/tbody/tr[2]/td[1]/text()')

        first = util.get_text(response, '//div[@class="nav"]/ul/li[@class="cur"]/a/text()')[1:]
        second = response.meta['categroy']
        category = first + '-' + second

        time = util.get_text(response, '//div[@class="breif"]/div[@class="base-info"]/table/tbody/tr[1]/td[2]/text()')

        size = util.get_text(response, '//div[@class="pf"]/span[@class="s-3"][2]/text()')

        system = util.get_text(response, '//div[@class="breif"]/div[@class="base-info"]/table/tbody/tr[2]/td[2]/text()')

        text = util.get_text(response, '//div[@class="breif"]',0)

        download = util.get_text(response, '//div[@class="pf"]/span[@class="s-3"][1]/text()')

        pingfen = util.get_text(response, '//div[@class="pf"]/span[@class="s-1 js-votepanel"]/text()')
        try:
            pingfen = str(float(pingfen)*10)
        except Exception:
            pingfen =''

        tag = response.xpath('//div[@class="app-tags"]//a/text()').extract()
        tags=','.join([i.strip() for i in tag])

        self.fileout.write(
            source + '\001' + name + '\001' + version + '\001' + category + '\001' + util.unify_data(time) + '\001' + size + '\001' + system + '\001' + text + '\001' + util.unify_download_count(download) + '\001' + pingfen + '\001' + tags
        )
        self.fileout.write('\n')
Exemple #22
0
    def parse_item(self, response):

        source = '25pp'

        name = util.get_text(response, '//div[@class="title-stat"]/div[@class="txt"]/h1/text()')
        if not name:
            return

        version = util.get_text(response, '//div[@class="title-stat"]/div[@class="txt"]/ul/li[1]/text()')[3:]

        first = util.get_text(response, '//div[@class="location"]/a[2]/text()')
        second = util.get_text(response, '//div[@class="title-stat"]/div[@class="txt"]/ul/li[2]/text()')
        category = first + '-' + second

        time = ''

        size = util.get_text(response, '//div[@class="title-stat"]/div[@class="txt"]/ul/li[3]/text()')[3:]

        system = util.get_text(response, '//div[@class="title-stat"]/div[@class="txt"]/ul/li[5]/text()')[5:]

        text = util.get_text(response, '//div[@class="conTxt"][1]',0)

        download = util.get_text(response, '//li[@class="borderR"]/span/text()')

        pingfen = util.get_text(response, '//div[@class="downMunber"]/ul/li[3]/span/text()')
        try:
            pingfen = str(float(pingfen)*20)
        except Exception:
            pingfen =''

        tag = response.xpath('//li[@class="w-450"]//a/text()').extract()
        tags=','.join(tag)

        self.fileout.write(
            source + '\001' + name + '\001' + version + '\001' + category + '\001' + util.unify_data(time) + '\001' + size + '\001' + system + '\001' + text + '\001' + util.unify_download_count(download) + '\001' + pingfen + '\001' + tags
        )
        self.fileout.write('\n')
Exemple #23
0
    def parse_item(self, response):

        source = 'apk91'

        name = util.get_text(response, '//h1[@class="ff f20 fb fl"]/text()')
        if not name:
            return

        version = util.get_text(response, '//ul[@class="s_info"]/li[1]/text()')[3:]

        first = util.get_text(response, '//div[@class="crumb clearfix"]/a[2]/text()')
        second = response.meta['cat']
        category = first + '-' + second

        time = util.get_text(response, '//ul[@class="s_info"]/li[5]/text()')[5:15]

        size = util.get_text(response, '//ul[@class="s_info"]/li[3]/text()')[5:]

        system = util.get_text(response, '//ul[@class="s_info"]/li[4]/text()')[5:]

        text = util.get_text(response, '//div[@class="o-content"]',0)

        download = util.get_text(response, '//ul[@class="s_info"]/li[2]/text()')

        pingfen = util.get_text(response, '//div[@class="s_intro_pic fl"]/span[@class="spr star"]/a/@class')
        try:
            pingfen = str(float(pingfen.split('w')[1].split(' ')[0])*20)
        except Exception:
            pingfen =''

        tag = response.xpath('//ul[@class="s_info"]/li[10]/a/text()').extract()
        tags=','.join(tag)

        self.fileout.write(
            source + '\001' + name + '\001' + version + '\001' + category + '\001' + util.unify_data(time) + '\001' + size + '\001' + system + '\001' + text + '\001' + util.unify_download_count(download) + '\001' + pingfen + '\001' + tags
        )
        self.fileout.write('\n')
Exemple #24
0
    def parse_item(self, response):

        source = 'mumayi'

        name_version = util.get_text(response, '//h1[@class="iappname hidden fl"]/text()')
        if not name_version:
            return
        sn = name_version.split('V')
        version = sn.pop(-1) if len(sn)>1 else ''
        name = 'V'.join(sn) if sn else name_version

        first = util.get_text(response, '//div[@id="classlists"]/a[2]/text()')[:2]
        second = util.get_text(response, '//div[@id="classlists"]/a[3]/text()')
        category = first + '-' + second

        time = response.meta['time']

        size = util.get_text(response, '//span[text()="程序大小:"]/../text()')

        system = util.get_text(response, '//div[@class="sel_text fl"]/text()')

        text = util.get_text(response, '//ul[@class="author"]/..//p[position()<last()]',0)

        download = ''

        pingfen = util.get_text(response, '//div[@id="starlist"]/@class')
        try:
            pingfen = str(float(pingfen.split('now')[1])*2)
        except Exception:
            pingfen =''

        tags = ''

        self.fileout.write(
            source + '\001' + name + '\001' + version + '\001' + category + '\001' + util.unify_data(time) + '\001' + size + '\001' + system + '\001' + text + '\001' + util.unify_download_count(download) + '\001' + pingfen + '\001' + tags
        )
        self.fileout.write('\n')
Exemple #25
0
    def parse_item(self, response):

        source = 'xiaomi'

        name = util.get_text(response, '//div[@class="intro-titles"]/h3/text()')
        if not name:
            return

        version = util.get_text(response, '//ul[@class=" cf"]/li[4]/text()')

        first = response.meta['first']
        second = util.get_text(response, '//div[@class="bread-crumb"]/ul/li[2]/a/text()')
        category = first + '-' + second

        time = util.get_text(response, '//ul[@class=" cf"]/li[6]/text()')

        size = util.get_text(response, '//ul[@class=" cf"]/li[2]/text()')

        system = ''

        text = util.get_text(response, '//p[@class="pslide"]',0)

        download = ''

        pingfen = util.get_text(response, '//div[@class="star1-empty"]/div/@class')
        try:
            pingfen = str(float(pingfen.split('star1-hover star1-')[1])*10)
        except Exception:
            pingfen =''

        tags=''

        self.fileout.write(
            source + '\001' + name + '\001' + version + '\001' + category + '\001' + util.unify_data(time) + '\001' + size + '\001' + system + '\001' + text + '\001' + util.unify_download_count(download) + '\001' + pingfen + '\001' + tags
        )
        self.fileout.write('\n')
Exemple #26
0
    def parse(self, response):

        source = 'baidu'

        name = util.get_text(response, '//span[@class="gray"]/text()')
        if not name:
            return

        version = util.get_text(response, '//span[@class="version"]/text()')[3:]

        first = util.get_text(response, '//div[@class="nav"]//a/text()')
        second = util.get_text(response, '//div[@class="nav"]/span[3]/a/text()')
        category = first + '-' + second

        time = ''

        size = util.get_text(response, '//span[@class="size"]/text()')[3:]

        system = ''

        text = util.get_text(response, '//div[@class="brief-long"]/p', 0)

        download = util.get_text(response, '//span[@class="download-num"]/text()')[5:]

        pingfen = util.get_text(response, '//span[@class="star-percent"]/@style')
        try:
            pingfen = pingfen.split(':')[1].split('%')[0]
        except Exception:
            pingfen =''

        tags = ''

        self.fileout.write(
            source + '\001' + name + '\001' + version + '\001' + category + '\001' + util.unify_data(time) + '\001' + size + '\001' + system + '\001' + text + '\001' + util.unify_download_count(download) + '\001' + pingfen + '\001' + tags
        )
        self.fileout.write('\n')
    def parse_item(self, response):

        source = 'applestore'

        name = util.get_text(response, '//div[@id="desktopContentBlockId"]//div[@id="title"]//h1/text()')
        if not name:
            return

        version = util.get_text(response, '//div[@id="left-stack"]//span[@itemprop="softwareVersion"]/text()')

        first = '软件'
        second = util.get_text(response, '//div[@id="left-stack"]//span[@itemprop="applicationCategory"]/text()')
        category = first + '-' + second

        time = util.get_text(response, '//div[@id="left-stack"]//span[@itemprop="datePublished"]/text()')

        size = ''

        system = util.get_text(response, '//div[@id="left-stack"]//span[@itemprop="operatingSystem"]/text()')

        text = util.get_text(response, '//div[@class="center-stack"]/div[@class="product-review"]/p',0)

        download = util.get_text(response, '//div[@class="extra-list customer-ratings"]/div[4]/span/text()')

        pingfen = util.get_text(response, '//div[@class="extra-list customer-ratings"]/div[4]/@aria-label')
        try:
            pingfen = str(float(pingfen.split('星, ')[0])*20)
        except Exception:
            pingfen =''

        tags=''

        self.fileout.write(
            source + '\001' + name + '\001' + version + '\001' + category + '\001' + util.unify_data(time) + '\001' + size + '\001' + system + '\001' + text + '\001' + util.unify_download_count(download) + '\001' + pingfen + '\001' + tags
        )
        self.fileout.write('\n')
Exemple #28
0
    def parse_item(self, response):

        source = '163'

        name = util.get_text(response, '//span[@class="f-h1"]/text()')
        if not name:
            return

        version = util.get_text(response, '//table[@class="table-appinfo"]/tr[3]/td/text()')

        first = util.get_text(response, "//div[@class='sect']/div[@class='crumb']/a[2]/text()")[-2:]
        second = util.get_text(response, "//div[@class='sect']/div[@class='crumb']/a[3]/text()")
        category = first + '-' + second

        time = ''

        size = util.get_text(response, '//table[@class="table-appinfo"]/tr[2]/td[1]/text()')

        system = ''

        text = util.get_text(response, '//div[@id="app-desc"]',0)

        download = util.get_text(response, '//span[@class="vote-text-s"]/text()')[1:-1]

        pingfen = util.get_text(response, '//span[@class="vote-column-s"]/i/@style')
        try:
            pingfen = pingfen.split(':')[1].split('%')[0]
        except Exception:
            pingfen =''

        tags = ''

        self.fileout.write(
            source + '\001' + name + '\001' + version + '\001' + category + '\001' + util.unify_data(time) + '\001' + size + '\001' + system + '\001' + text + '\001' + util.unify_download_count(download) + '\001' + pingfen + '\001' + tags
        )
        self.fileout.write('\n')