async def get_shops(city, params, latlng_id, latlng): try: async with aiohttp.ClientSession() as session: async with session.get(SHOP_URL, params=params) as response: data = await response.text() restaurant__json = json.loads(data) restaurant__list = restaurant__json['items'] for restaurant in restaurant__list: restaurant = restaurant['restaurant'] shop_id = restaurant['id'] shop_name = restaurant['name'] shop_address = restaurant['address'] shop_openning_hours = ''.join(restaurant['opening_hours']) shop_phone = restaurant['phone'] shop_flavor_ids = '' for flavor in restaurant['flavors']: shop_flavor_ids += str(flavor['id']) shop_flavor_ids += ',' dbsession.merge( Shop(id=shop_id, name=shop_name, address=shop_address, city=city, latlng_id=latlng_id, latlng=latlng, flavors=shop_flavor_ids[:-1], openning_hours=shop_openning_hours, phone=shop_phone)) dbsession.commit() except Exception as e: print('{},{}'.format(params, e))
def main(): with open(CONCEPT_PATH, 'r') as f: for l in f.readlines(): l = l.strip() key_words = l.split(',') concept_name = key_words[0].strip() dbsession.merge(FoodConcept( name=concept_name, key_words=l, )) dbsession.commit() dbsession.close()
def main(): with open(FLAVOR_FILE_PATH, 'r') as f: flavors_json = json.loads(f.read()) flavors = [] for flavor_category in flavors_json: if flavor_category['name'] == '全部商家': continue else: flavors.extend(get_flavors(flavor_category, None)) flavors = list(sorted(flavors, key=lambda fl: fl.id)) flavors = [ Flavor(id=flavor.id, name=flavor.name, level=flavor.level, parent_id=flavor.parent_id) for flavor in flavors ] for flavor in flavors: dbsession.merge(flavor) dbsession.commit()
def get_latlngs(city): g = geocoder.arcgis(city) northeast = g.bbox['northeast'] southwest = g.bbox['southwest'] # 东西向距离 lat_dis = geodistance(northeast[0], southwest[1], northeast[0], northeast[1]) # 东西向每步距离 lat_step_dis = lat_dis # 东西向步数 lat_step = 0 while lat_step_dis > ELE_DISTANCE: lat_step += 1 lat_step_dis = lat_dis / lat_step # 南北向距离 lng_dis = geodistance(northeast[0], southwest[1], southwest[0], southwest[1]) # 南北向每步距离 lng_step_dis = lng_dis # 南北向步数 lng_step = 0 while lng_step_dis > ELE_DISTANCE: lng_step += 1 lng_step_dis = lng_dis / lng_step lat_lngs = [ '{},{}'.format(lat, lng) for lat in np.linspace( start=southwest[0], stop=northeast[0], num=lat_step, endpoint=True) for lng in np.linspace( start=southwest[1], stop=northeast[1], num=lng_step, endpoint=True) ] for lat_lng in lat_lngs: latlng = lat_lng.split(',') address = geocoder.arcgis([latlng[0], latlng[1]], method='reverse').address print(address) old_lat_lng = dbsession.query(Latlng).filter( Latlng.lat_lng == lat_lng).first() if old_lat_lng is None: dbsession.add(Latlng(address=address, city=CITY, lat_lng=lat_lng)) dbsession.commit()
async def get_foods(session, shop_id, ip): with (await semaphore): params = {'restaurant_id': shop_id} print('剩余店铺量: {}'.format(len(shop_ids))) headers = { r'Host': r'h5.ele.me', r'Connection': r'keep-alive', r'User-Agent': r'Mozilla/5.0 (Linux; U; Android 5.1; zh-CN; MZ-m2 note Build/MRA58K) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/40.0.2214.89 MZBrowser/6.10.2 UWS/2.11.0.33 Mobile Safari/537.36', r'x-shard': r'shopid={};loc=114.273573,30.590624'.format(shop_id), r'Accept': r'*/*', r'Referer': r'https://h5.ele.me/shop/', r'Accept-Encoding': r'gzip, deflate, br', r'Accept-Language': r'zh-CN,en-US;q=0.8', r'Cookie': r'ubt_ssid=nbouvov5sdvl4nbniquai795jrvi0vub_2018-02-27; perf_ssid=rn3toaudzil6ti5ru0y7hzq2dvbaipv5_2018-02-27; _utrace=a1d39d357cd6f361e1d3c461f7cfc236_2018-02-27', } try: async with session.get(FOOD_URL, headers=headers, params=params, proxy=ip, timeout=10) as response: data = await response.text() src_foods = json.loads(data) for src_food in src_foods: src_items = src_food['foods'] for src_item in src_items: food_id = src_item['specfoods'][0]['food_id'] food_name = src_item['specfoods'][0]['name'] original_price = src_item['specfoods'][0][ 'original_price'] price = src_item['specfoods'][0]['price'] if original_price is not None and price == 1: price = original_price recent_popularity = src_item['specfoods'][0][ 'recent_popularity'] class_id = food_classifer.classify_food(food_name) old_food = dbsession.query(Food).filter( Food.id == food_id).first() if old_food and old_food.recent_popularity != recent_popularity: dbsession.add( Record( food_id=food_id, food=food_name, price=price, concept_ids=str(class_id), old_popularity=old_food.recent_popularity, new_popularity=recent_popularity, )) dbsession.merge( Food(id=food_id, name=food_name, shop_id=shop_id, price=price, concept_ids=str(class_id), recent_popularity=recent_popularity)) try: dbsession.commit() except: dbsession.rollback() if len(src_foods): shop_ids.remove(shop_id) with open('shops.txt', 'at') as f: f.writelines(str(shop_id) + '\n') else: print('********fail*********') except Exception as e: print('{},{}'.format(shop_id, e))