def parse_data(self, data): try: rest_id = str(data['param']) if len(rest_id) <= 11: data = data['data'].get('delivery_mode') delever_id = getMapValue(data,"id") delever_text = getMapValue(data, "text") param = [self.city, self.date,rest_id,delever_text,delever_id] self.batch.addBatch(param) if self.batch.getSize() > 100000: self.db.update(self.batch) self.batch.cleanBatch() else: print("错误数据", data) except Exception as e: print(traceback.print_exc()) print('解析数据报错\n错误数据:{}'.format(data))
def parse_data(self, data): # print(data) try: rest_id = str(data['param']) if len(rest_id) <= 11: data = data['data'].get('delivery_mode') delever_id = getMapValue(data, "id") delever_text = getMapValue(data, "text") param1 = [ self.city, self.date, self.rest_area, rest_id, delever_text, delever_id ] self.insert(self.batch1, param1) else: print("错误数据", data) except Exception as e: print(traceback.print_exc()) print('解析数据报错\n错误数据:{}'.format(data))
def parse_data(self, data): try: rest_id = data['param'] if rest_id in self.err_rest_ids: return if len(rest_id) <= 11: for item in data["data"]: param1 = [ self.city, self.date, self.rest_area, rest_id, getMapValue(item, "name"), getMapValue(item, "count") ] self.insert(self.batch1, param1) else: print("错误数据", data) except Exception as e: print("解析数据报错", e)
def parse_data(self, data): try: rest_id = data['param'] if rest_id in self.err_rest_ids: return if len(rest_id) <= 11: param1 = [ self.city, self.date, self.rest_area, rest_id, getMapValue(data['data'], "compare_rating"), getMapValue(data['data'], "deliver_time"), getMapValue(data['data'], "food_score"), getMapValue(data['data'], "overall_score"), getMapValue(data['data'], "service_score") ] self.insert(self.batch1, param1) else: print("错误数据", data) except Exception as e: print('解析数据报错\n错误数据:{}'.format(data))
def parse_data(self,data): try: rest_id = str(data["param"][0]) if rest_id in self.err_rest_ids: return if len(rest_id) <= 11: la = data["param"][1] lo = data["param"][2] for item in data["data"]: search_word = getMapValue(item, "search_word") if search_word != '-999': param1 = [self.city, self.date,self.rest_area, rest_id, la, lo, search_word] self.insert(self.batch1,param1) else: print("错误数据", data) except Exception as e: print("解析数据报错", e)
def parse_data(self,data): print("data",data) try: rest_id = str(data["param"]) if len(rest_id) <= 11: item1 = data["data"] print('item1',item1) rating_id = str(uuid.uuid1()) print("rating_id",rating_id) rate_time = getMapValue(item1, "rated_at") rating_text = getMapValue(item1, "rating_text") reply_text = getMapValue(item1, "reply_text") rating_star = getMapValue(item1, "rating_star") username = getMapValue(item1, "username") param1 = [self.city, self.date,self.rest_area,rest_id,rating_id, rate_time, rating_text,reply_text,rating_star,username] self.insert(self.batch1,param1) for item2 in item1["item_ratings"]: food_id = getMapValue(item2, "food_id") food_name = getMapValue(item2,"food_name") image_hash = getMapValue(item2, "image_hash") param2 = [self.city, self.date,self.rest_area, rest_id, rating_id,rate_time, food_id,image_hash,food_name,rating_star,rating_text,reply_text] self.insert(self.batch2, param2) else: print("错误数据", data) except Exception as e: print(traceback.print_exc()) print('解析数据报错\n错误数据:{}'.format(data))
def parse_data(self, data): try: category_item = data['param'][2] category_level2_id = str(category_item.get("id2")) item = data["data"] rest_id = getMapValue(item, "id") lat = float(getMapValue(item, "latitude")) lng = float(getMapValue(item, "longitude")) if len(rest_id) > 11: print('错误店铺id\nrest_id: {}'.format(rest_id)) # elif not (self.polygonHellp.is_location_in_polygon(lng, lat)): # self.err_rest_ids.add(rest_id) # print('错误地址\nrest_id: {}, lat: {}, lng: {}, name: {}\n' # 'address: {}'.format(rest_id, lat, lng, getMapValue(item, "name"), # getMapValue(item, "address"))) else: # 店铺去重 if not self.rest_category[rest_id]: param1 = [ self.city, self.date, self.rest_area, rest_id, getMapValue(item, "name"), getMapValue(item, "phone", '/'), getMapValue(item, "address"), getMapValue(item, "average_cost"), [1, 0][getMapValue(item, "delivery_mode") == '-999'], getMapValue(item, "float_delivery_fee"), getMapValue(item, "float_minimum_order_amount"), [0, 1][getMapValue(item, "is_new") == 'True'], [0, 1][getMapValue(item, "is_premium") == 'True'], lat, lng, getMapValue(item, "recent_order_num"), '-999' ] self.insert(self.batch1, param1) opening_hours = getMapValue(item, "opening_hours") arr = json.loads(opening_hours.replace("\'", "\"")) if not isinstance(arr, list): print('错误数据:', rest_id, opening_hours) else: for s in arr: times = s.split("/") param4 = [ self.city, self.date, self.rest_area, rest_id, *times ] self.insert(self.batch4, param4) # TODO 平均满减折扣有问题 discount_rate_list = [] for active in item.get('activities'): attribute = getMapValue(active, "attribute") active_type = getMapValue(active, "icon_name") param2 = [ self.city, self.date, self.rest_area, rest_id, getMapValue(active, "description"), active_type ] self.insert(self.batch2, param2) if active_type == '减' and attribute != '-999': arr = json.loads(attribute) for key, value in arr.items(): if isinstance(value, dict): sub_price = value.get("1") if sub_price == 0: sub_price = value.get("0") else: sub_price = value key = round(float(key), 2) sub_price = round(float(sub_price), 2) discount_rate = round(sub_price / key, 4) discount_rate_list.append(discount_rate) param5 = [ self.city, self.date, self.rest_area, rest_id, key, sub_price, discount_rate ] self.insert(self.batch5, param5) # TODO 平均满减折扣有问题 if len(discount_rate_list): avg_discount_rate = sum(discount_rate_list) / len( discount_rate_list) avg_discount_rate = round(avg_discount_rate, 4) param6 = [ self.city, self.date, self.rest_area, rest_id, avg_discount_rate ] self.insert(self.batch6, param6) # 店铺对应品类去重 if category_level2_id not in self.rest_category[rest_id]: self.rest_category[rest_id].add(category_level2_id) category_level1_id = category_item.get('id1') category_level1_name = category_item.get('name') category_level2_name = category_item.get('name2') param3 = [ self.city, self.date, self.rest_area, category_level1_id, category_level1_name, category_level2_id, category_level2_name, rest_id, getMapValue(item, "name") ] self.insert(self.batch3, param3) except Exception: # print("解析数据报错", traceback.format_exc()) print('解析数据报错\n错误数据:{}'.format(data))
def parse_data(self, data): try: rest_id = getMapValue(data, 'param') # TODO err_rest_ids需要传入 if rest_id in self.err_rest_ids: return for item in data["data"]: # 验证是否是错误数据 try: foods = item['foods'][::-1] except KeyError: print('错误数据\n{}'.format(item)) continue menu_name = getMapValue(item, 'name') param1 = [ self.city, self.date, self.rest_area, rest_id, menu_name ] self.insert(self.batch1, param1) for item2 in foods: food_id = getMapValue(item2, 'virtual_food_id') food_price_current = getMapValue(item2["specfoods"][0], 'price') food_price_primary = getMapValue(item2["specfoods"][0], 'original_price') has_activity = '0' if food_price_primary not in ['-999', '0']: has_activity = '1' param2 = [ self.city, self.date, self.rest_area, rest_id, food_id, getMapValue(item2, 'name'), getMapValue(item2, 'month_sales'), getMapValue(item2, 'rating'), getMapValue(item2, 'satisfy_count'), food_price_current, has_activity, menu_name ] self.insert(self.batch2, param2) if food_id not in self.rest_food[rest_id]: self.rest_food[rest_id].add(food_id) param3 = [ self.city, self.date, self.rest_area, rest_id, food_id, getMapValue(item2, 'name'), getMapValue(item2, 'month_sales'), getMapValue(item2, 'rating'), getMapValue(item2, 'satisfy_count'), food_price_current, has_activity, menu_name ] self.insert(self.batch3, param3) if food_price_primary not in ['-999', '0']: discount = '%.6f' % (float(food_price_current) / float(food_price_primary)) param4 = [ self.city, self.date, self.rest_area, rest_id, food_id, food_price_primary, food_price_current, discount ] self.insert(self.batch4, param4) except Exception as e: print("解析数据报错", traceback.print_exc()) print('解析数据报错\n错误数据:{}'.format(data))