Ejemplo n.º 1
0
 def parse_data(self, data):
     try:
         rest_id = str(data['param'])
         if len(rest_id) <= 11:
             data = data['data'].get('delivery_mode')
             delever_id = getMapValue(data,"id")
             delever_text = getMapValue(data, "text")
             param = [self.city, self.date,rest_id,delever_text,delever_id]
             self.batch.addBatch(param)
             if self.batch.getSize() > 100000:
                 self.db.update(self.batch)
                 self.batch.cleanBatch()
         else:
             print("错误数据", data)
     except Exception as e:
         print(traceback.print_exc())
         print('解析数据报错\n错误数据:{}'.format(data))
Ejemplo n.º 2
0
 def parse_data(self, data):
     # print(data)
     try:
         rest_id = str(data['param'])
         if len(rest_id) <= 11:
             data = data['data'].get('delivery_mode')
             delever_id = getMapValue(data, "id")
             delever_text = getMapValue(data, "text")
             param1 = [
                 self.city, self.date, self.rest_area, rest_id,
                 delever_text, delever_id
             ]
             self.insert(self.batch1, param1)
         else:
             print("错误数据", data)
     except Exception as e:
         print(traceback.print_exc())
         print('解析数据报错\n错误数据:{}'.format(data))
    def parse_data(self, data):
        try:
            rest_id = data['param']
            if rest_id in self.err_rest_ids:
                return
            if len(rest_id) <= 11:
                for item in data["data"]:
                    param1 = [
                        self.city, self.date, self.rest_area, rest_id,
                        getMapValue(item, "name"),
                        getMapValue(item, "count")
                    ]
                    self.insert(self.batch1, param1)
            else:
                print("错误数据", data)

        except Exception as e:
            print("解析数据报错", e)
Ejemplo n.º 4
0
 def parse_data(self, data):
     try:
         rest_id = data['param']
         if rest_id in self.err_rest_ids:
             return
         if len(rest_id) <= 11:
             param1 = [
                 self.city, self.date, self.rest_area, rest_id,
                 getMapValue(data['data'], "compare_rating"),
                 getMapValue(data['data'], "deliver_time"),
                 getMapValue(data['data'], "food_score"),
                 getMapValue(data['data'], "overall_score"),
                 getMapValue(data['data'], "service_score")
             ]
             self.insert(self.batch1, param1)
         else:
             print("错误数据", data)
     except Exception as e:
         print('解析数据报错\n错误数据:{}'.format(data))
Ejemplo n.º 5
0
    def parse_data(self,data):
        try:
            rest_id = str(data["param"][0])
            if rest_id in self.err_rest_ids:
                return
            if len(rest_id) <= 11:
                la = data["param"][1]
                lo = data["param"][2]
                for item in data["data"]:
                    search_word = getMapValue(item, "search_word")
                    if search_word != '-999':
                        param1 = [self.city, self.date,self.rest_area, rest_id, la, lo, search_word]
                        self.insert(self.batch1,param1)
            else:
                print("错误数据", data)

        except Exception as e:
            print("解析数据报错", e)
Ejemplo n.º 6
0
    def parse_data(self,data):
        print("data",data)
        try:
            rest_id = str(data["param"])
            if len(rest_id) <= 11:
                item1 = data["data"]

                print('item1',item1)
                rating_id = str(uuid.uuid1())
                print("rating_id",rating_id)

                rate_time = getMapValue(item1, "rated_at")
                rating_text = getMapValue(item1, "rating_text")
                reply_text = getMapValue(item1, "reply_text")
                rating_star = getMapValue(item1, "rating_star")
                username = getMapValue(item1, "username")

                param1 = [self.city, self.date,self.rest_area,rest_id,rating_id, rate_time,
                            rating_text,reply_text,rating_star,username]

                self.insert(self.batch1,param1)
                for item2 in item1["item_ratings"]:
                    food_id = getMapValue(item2, "food_id")
                    food_name = getMapValue(item2,"food_name")
                    image_hash = getMapValue(item2, "image_hash")

                    param2 = [self.city, self.date,self.rest_area, rest_id, rating_id,rate_time,
                              food_id,image_hash,food_name,rating_star,rating_text,reply_text]

                    self.insert(self.batch2, param2)
            else:
                print("错误数据", data)

        except Exception as e:
            print(traceback.print_exc())
            print('解析数据报错\n错误数据:{}'.format(data))
    def parse_data(self, data):
        try:
            category_item = data['param'][2]
            category_level2_id = str(category_item.get("id2"))
            item = data["data"]
            rest_id = getMapValue(item, "id")
            lat = float(getMapValue(item, "latitude"))
            lng = float(getMapValue(item, "longitude"))
            if len(rest_id) > 11:
                print('错误店铺id\nrest_id: {}'.format(rest_id))
            # elif not (self.polygonHellp.is_location_in_polygon(lng, lat)):
            #     self.err_rest_ids.add(rest_id)
            #     print('错误地址\nrest_id: {}, lat: {}, lng: {}, name: {}\n'
            #               'address: {}'.format(rest_id, lat, lng, getMapValue(item, "name"),
            #                                    getMapValue(item, "address")))

            else:
                # 店铺去重
                if not self.rest_category[rest_id]:
                    param1 = [
                        self.city, self.date, self.rest_area, rest_id,
                        getMapValue(item, "name"),
                        getMapValue(item, "phone", '/'),
                        getMapValue(item, "address"),
                        getMapValue(item, "average_cost"),
                        [1, 0][getMapValue(item, "delivery_mode") == '-999'],
                        getMapValue(item, "float_delivery_fee"),
                        getMapValue(item, "float_minimum_order_amount"),
                        [0, 1][getMapValue(item, "is_new") == 'True'],
                        [0, 1][getMapValue(item, "is_premium") == 'True'], lat,
                        lng,
                        getMapValue(item, "recent_order_num"), '-999'
                    ]
                    self.insert(self.batch1, param1)

                    opening_hours = getMapValue(item, "opening_hours")
                    arr = json.loads(opening_hours.replace("\'", "\""))
                    if not isinstance(arr, list):
                        print('错误数据:', rest_id, opening_hours)
                    else:
                        for s in arr:
                            times = s.split("/")
                            param4 = [
                                self.city, self.date, self.rest_area, rest_id,
                                *times
                            ]
                            self.insert(self.batch4, param4)

                    # TODO 平均满减折扣有问题
                    discount_rate_list = []
                    for active in item.get('activities'):
                        attribute = getMapValue(active, "attribute")
                        active_type = getMapValue(active, "icon_name")
                        param2 = [
                            self.city, self.date, self.rest_area, rest_id,
                            getMapValue(active, "description"), active_type
                        ]

                        self.insert(self.batch2, param2)

                        if active_type == '减' and attribute != '-999':
                            arr = json.loads(attribute)
                            for key, value in arr.items():
                                if isinstance(value, dict):
                                    sub_price = value.get("1")
                                    if sub_price == 0:
                                        sub_price = value.get("0")
                                else:
                                    sub_price = value
                                key = round(float(key), 2)
                                sub_price = round(float(sub_price), 2)
                                discount_rate = round(sub_price / key, 4)
                                discount_rate_list.append(discount_rate)
                                param5 = [
                                    self.city, self.date, self.rest_area,
                                    rest_id, key, sub_price, discount_rate
                                ]
                                self.insert(self.batch5, param5)

                    # TODO 平均满减折扣有问题
                    if len(discount_rate_list):
                        avg_discount_rate = sum(discount_rate_list) / len(
                            discount_rate_list)
                        avg_discount_rate = round(avg_discount_rate, 4)
                        param6 = [
                            self.city, self.date, self.rest_area, rest_id,
                            avg_discount_rate
                        ]
                        self.insert(self.batch6, param6)

                # 店铺对应品类去重
                if category_level2_id not in self.rest_category[rest_id]:
                    self.rest_category[rest_id].add(category_level2_id)

                    category_level1_id = category_item.get('id1')
                    category_level1_name = category_item.get('name')
                    category_level2_name = category_item.get('name2')
                    param3 = [
                        self.city, self.date, self.rest_area,
                        category_level1_id, category_level1_name,
                        category_level2_id, category_level2_name, rest_id,
                        getMapValue(item, "name")
                    ]
                    self.insert(self.batch3, param3)

        except Exception:
            # print("解析数据报错", traceback.format_exc())
            print('解析数据报错\n错误数据:{}'.format(data))
Ejemplo n.º 8
0
    def parse_data(self, data):
        try:
            rest_id = getMapValue(data, 'param')
            # TODO err_rest_ids需要传入
            if rest_id in self.err_rest_ids:
                return
            for item in data["data"]:
                # 验证是否是错误数据
                try:
                    foods = item['foods'][::-1]
                except KeyError:
                    print('错误数据\n{}'.format(item))
                    continue
                menu_name = getMapValue(item, 'name')
                param1 = [
                    self.city, self.date, self.rest_area, rest_id, menu_name
                ]
                self.insert(self.batch1, param1)

                for item2 in foods:
                    food_id = getMapValue(item2, 'virtual_food_id')
                    food_price_current = getMapValue(item2["specfoods"][0],
                                                     'price')
                    food_price_primary = getMapValue(item2["specfoods"][0],
                                                     'original_price')
                    has_activity = '0'
                    if food_price_primary not in ['-999', '0']:
                        has_activity = '1'
                    param2 = [
                        self.city, self.date, self.rest_area, rest_id, food_id,
                        getMapValue(item2, 'name'),
                        getMapValue(item2, 'month_sales'),
                        getMapValue(item2, 'rating'),
                        getMapValue(item2, 'satisfy_count'),
                        food_price_current, has_activity, menu_name
                    ]
                    self.insert(self.batch2, param2)

                    if food_id not in self.rest_food[rest_id]:
                        self.rest_food[rest_id].add(food_id)
                        param3 = [
                            self.city, self.date, self.rest_area, rest_id,
                            food_id,
                            getMapValue(item2, 'name'),
                            getMapValue(item2, 'month_sales'),
                            getMapValue(item2, 'rating'),
                            getMapValue(item2, 'satisfy_count'),
                            food_price_current, has_activity, menu_name
                        ]
                        self.insert(self.batch3, param3)

                        if food_price_primary not in ['-999', '0']:
                            discount = '%.6f' % (float(food_price_current) /
                                                 float(food_price_primary))
                            param4 = [
                                self.city, self.date, self.rest_area, rest_id,
                                food_id, food_price_primary,
                                food_price_current, discount
                            ]
                            self.insert(self.batch4, param4)

        except Exception as e:
            print("解析数据报错", traceback.print_exc())
            print('解析数据报错\n错误数据:{}'.format(data))