Exemple #1
0
    def handle(self,datas):
        # print '____',datas
        data = datas.xpath("div[@class='item']")
        address = self.mackining('address',data)
        name = self.mackining('name',data)
        url = self.mackining('url',data)
        score = self.mackining('score',data)
        SCORES = re.search(u'\s*([0-5])\s*',score)
        score = int(SCORES.group(1)) if SCORES else ''
        title = self.mackining('title',data)
        comment = self.mackining('comment',data)
        commentid = self.mackining('commentid',data)
        buytime = self.mackining('buytime',data)
        useful = int(self.mackining('useful',data))
        reply = int(self.mackining('reply',data))
        buytime = ProcessData.str_datetime(buytime)
        commenttime = self.mackining('commenttime',data)
        commenttime = ProcessData.str_datetime(commenttime)

        return {
            'address': address,
            'name': name,
            'url': url,
            'score': score,
            'title': title,
            'comment': comment,
            'commentid': commentid,
            'buytime': buytime,
            'commenttime': commenttime,
            'province': address,
            'city': '',
            'useful': useful,
            'reply': reply
            # 'city': city
        }
Exemple #2
0
    def crawl(self):
        ecid = self.data['uuid']
        goodsNo = str(self.key)
        category_data = extract_category(self)
        totalpage = int(self.get_page(goodsNo))
        if totalpage == 0:
            return
        for i in range(totalpage + 1):
            url = self.get_url(goodsNo, i)
            json = ProcessData.get_json_data(url)
            appraise = json['appraiseArray']

            for item in appraise:
                commentid = item['id']
                summary = item['summary']
                score = item['appraiseGrade']
                userorderid = item['appraiseName']
                commenttime = ProcessData.str_datetime(item['appraiseTime'])
                comment_data = {
                    'eid': ecid,  #commodity table foreign key
                    'source_id': goodsNo,
                    'source': self.data.get('source'),
                    'comment_id': item['id'],  #review id
                    'score': item['appraiseGrade'],  #commodity score
                    'pubtime': ProcessData.str_datetime(item['appraiseTime']),
                    'user_name': item['appraiseName'],
                    'content': item['summary'],
                    'brand': self.data['brand'],
                    'version': self.data['version'],
                    'series': self.data['series'],
                    'comment': {
                        'is_Bbc': self.data['is_Bbc'],
                        'skuID': self.data['skuID'],
                    }
                }
                comment_data.update(category_data)
                comment_data.update(get_ctime())
                model = EcCommentModel(comment_data)
                export(model)
Exemple #3
0
 def crawl(self):
     ecid = self.data['uuid']
     goodsNo = str(self.key)
     category_data = extract_category(self)
     totalpage = int(self.get_page(goodsNo))
     if totalpage == 0:
         print "get_page fail"
         return {}
     for i in range(totalpage):
         url = self.get_url(goodsNo, i)
         json = ProcessData.get_json_data(url)
         try:
             appraise = json['appraiseArray']
         except Exception, e:
             self.logger.error(url)
             self.logger.error(e)
             print "get appraise fail"
         for item in appraise:
             commentid = item['id']
             summary = item['summary']
             score = item['appraiseGrade']
             userorderid = item['appraiseName']
             commenttime = ProcessData.str_datetime(item['appraiseTime'])
             # print commentid
             # print summary.encode('utf-8')
             comment_data = {
                 'ecid': ecid,  #commodity table foreign key
                 'source_id': goodsNo,
                 'source': self.data.get('source'),
                 'comment_id': item['id'],  #review id
                 'score': item['appraiseGrade'],  #commodity score
                 'pubtime': ProcessData.str_datetime(item['appraiseTime']),
                 'user_id': item['appraiseName'],
                 'content': item['summary']
             }
             comment_data.update(category_data)
             model = EcCommentModel(comment_data)
             export(model)