def get_area_page(key, lat, lng): url = 'http://waimai.meituan.com/geo/geohash' query = {'lat': lat, 'lng': lng, 'addr': key, 'from': 'm'} headers = { 'host': 'waimai.meituan.com', 'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36' } cookies = {'_lxsdk_s': _LXSDK_S} location = None try: resp = requests.get(url, params=query, timeout=5, headers=headers, allow_redirects=False, cookies=cookies) logger.info('get home page resp: {} {} {}'.format( resp.status_code, resp.content, resp.headers)) if resp.status_code == 200: resp.encoding = 'utf-8' location = resp.json() elif resp.status_code == 302: location = resp.headers.get('location') else: logger.error(resp.content) except Exception as e: logger.error(e, exc_info=True) finally: return location
def get_ele_restaurants(geohash, latitude, longitude, cookies, offset=0, limit=24): url = 'https://www.ele.me/restapi/shopping/restaurants' params = { 'geohash': geohash, 'latitude': latitude, 'longitude': longitude, 'offset': offset, 'limit': limit, 'extras[]': 'activities' } try: resp = requests.get(url, timeout=5, params=params, cookies=cookies) logger.info(resp.headers) if resp.status_code == 200: data = resp.json() for item in data: image_path = item['image_path'] save_ele_restaurants.put( source=SOURCE.ELE, restaurant_id=item['id'], name=item['name'], sales=item['recent_order_num'], arrive_time=item['order_lead_time'], send_fee=item['float_delivery_fee'], score=item['rating'], latitude=item['latitude'], longitude=item['longitude'], image='https://fuss10.elemecdn.com/{}/{}/{}.{}'.format(image_path[0:1], image_path[1:3], image_path[3:], image_path[32:]) ) return data except Exception as e: logger.error(e, exc_info=True)
def load_data(): try: _data = json.loads(item.data) except Exception as e: logger.error(e) return None else: return _data
def subscribe(self): if isinstance(self.tubes, list): for tube in self.tubes: if tube not in Subscriber.FUN_MAP.keys(): logger.error('tube:{} not register!'.format(tube)) continue self.beanstalk.watch(tube) else: if self.tubes not in Subscriber.FUN_MAP.keys(): logger.error('tube:{} not register!'.format(self.tubes)) return self.beanstalk.watch(self.tubes)
def get_ele_city_list(): url = 'https://www.ele.me/restapi/shopping/v1/cities' try: resp = requests.get(url, timeout=5) if resp.status_code == 200: data = resp.json() cities = [] for k, v in data.iteritems(): item = [i for i in data[k] if i['name'] in HOT_CITIES] cities.extend(item) return cities except Exception as e: logger.error(e, exc_info=True)
def on_job(cls, job): start = time.time() msg = json.loads(job.body) tube = msg.get('tube') func_name = msg.get('func_name') try: func = Subscriber.FUN_MAP[tube][func_name] kwargs = msg.get('kwargs') logger.info(u'run {} args:{}'.format(func_name, kwargs)) func(**kwargs) except Exception as e: logger.error(e.message, exc_info=True) cost = time.time() - start logger.info('{} cost {} s'.format(func_name, cost))
def commit_task(): locations = request.json success = True if not isinstance(locations, list): return jsonify(message=u'数据错误', success=False), 400 for item in locations: try: address = item['address'] ll = item['lng_lat'].split(',') lng = ll[0] lat = ll[1] commit_mt_crawler_task.put(address=address, lng=lng, lat=lat, cookies=request.cookies) except IndexError: return jsonify(message=u'坐标组合错误', success=False), 400 except Exception as e: logger.error(e, exc_info=True) return jsonify(message=e.__str__(), success=False), 400 return jsonify(success=success)
def search_ele_address(key, latitude, longitude): url = 'https://www.ele.me/restapi/v2/pois' _geohash = geohash.encode(latitude=float( latitude), longitude=float(longitude)) logger.info('geohash: {}'.format(_geohash)) params = { 'extras[]': 'count', 'geohash': _geohash, 'keyword': key, 'limit': 20, 'type': 'nearby' } try: resp = requests.get(url, timeout=5, params=params) if resp.status_code == 200: data = resp.json() return data except Exception as e: logger.error(e, exc_info=True)
def login_ele_by_mobile(mobile_phone, sms_code, sms_token): url = 'https://h5.ele.me/restapi/eus/login/login_by_mobile' payload = { "mobile": mobile_phone, "validate_code": sms_code, "validate_token": sms_token } headers = { 'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.91 Safari/537.36', 'origin': 'https://h5.ele.me', 'referer': 'https://h5.ele.me/login/' } try: resp = requests.post(url, json=payload, headers=headers, timeout=5) if resp.status_code == 200: return True, resp.cookies, resp.text return False, resp.cookies, resp.text except Exception as e: logger.error(e, exc_info=True)
def get_ele_captchas(mobile_phone): url = 'https://www.ele.me/restapi/eus/v3/captchas' payload = { 'captcha_str': mobile_phone } headers = { 'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.91 Safari/537.36', 'origin': 'https://h5.ele.me', 'referer': 'https://h5.ele.me/login/' } try: resp = requests.post(url, json=payload, headers=headers, timeout=5) if resp.status_code == 200: data = resp.json() return True, data.get('captcha_image'), data.get('captcha_hash') logger.error(u'get_ele_pic_failed: {}'.format(resp.content)) except Exception as e: logger.error(e, exc_info=True) finally: return False, None, None
def get_restaurant_data(self, page_source): sp = get_soup(page_source) restaurants_list_li = sp.find_all('li', class_='fl rest-li') for item in restaurants_list_li: restaurant_element = item.find('div', class_='restaurant') if not restaurant_element: continue try: name = restaurant_element['data-title'] restaurant_id = int(restaurant_element['data-poiid']) self.restaurant_id = restaurant_id img_src = restaurant_element.find( 'div', class_='preview').find('img', class_='scroll-loading')['src'] # 评价 score = get_number_from_str( restaurant_element.find('span', class_='score-num').get_text()) # 消费多少元才配送 start_send_fee = get_number_from_str( restaurant_element.find('span', class_='start-price').get_text()) # 配送费 send_fee = get_number_from_str( restaurant_element.find('span', class_='send-price').get_text()) # 配送时间 arrive_time = get_number_from_str( restaurant_element.find('span', class_='send-time').get_text()) save_ele_restaurants.put(restaurant_id=restaurant_id, name=name, source=SOURCE.MEI_TUAN, arrive_time=arrive_time, send_fee=send_fee, score=score, image=img_src) except Exception as e: logger.error(e)
def run(self): self.subscribe() while True: if self.signal_shutdown: break if self.signal_shutdown: logger.info("graceful shutdown") break job = self.beanstalk.reserve( timeout=self.reserve_timeout) # 阻塞获取任务,最长等待 timeout if not job: continue try: self.on_job(job) self.delete_job(job) except beanstalkc.CommandFailed as e: logger.error(e, exc_info=True) except Exception as e: logger.error(e, exc_info=True) kicks = job.stats()['kicks'] if kicks < 3: self.bury_job(job) else: message = json.loads(job.body) logger.error("Kicks reach max. Delete the job", extra={'body': message}) self.delete_job(job)