Beispiel #1
0
def get_area_page(key, lat, lng):
    url = 'http://waimai.meituan.com/geo/geohash'
    query = {'lat': lat, 'lng': lng, 'addr': key, 'from': 'm'}
    headers = {
        'host':
        'waimai.meituan.com',
        'user-agent':
        'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36'
    }
    cookies = {'_lxsdk_s': _LXSDK_S}
    location = None
    try:
        resp = requests.get(url,
                            params=query,
                            timeout=5,
                            headers=headers,
                            allow_redirects=False,
                            cookies=cookies)
        logger.info('get home page resp: {} {} {}'.format(
            resp.status_code, resp.content, resp.headers))
        if resp.status_code == 200:
            resp.encoding = 'utf-8'
            location = resp.json()
        elif resp.status_code == 302:
            location = resp.headers.get('location')
        else:
            logger.error(resp.content)
    except Exception as e:
        logger.error(e, exc_info=True)
    finally:
        return location
Beispiel #2
0
def get_ele_restaurants(geohash, latitude, longitude, cookies, offset=0, limit=24):
    url = 'https://www.ele.me/restapi/shopping/restaurants'
    params = {
        'geohash': geohash,
        'latitude': latitude,
        'longitude': longitude,
        'offset': offset,
        'limit': limit,
        'extras[]': 'activities'
    }
    try:
        resp = requests.get(url, timeout=5, params=params, cookies=cookies)
        logger.info(resp.headers)
        if resp.status_code == 200:
            data = resp.json()
            for item in data:
                image_path = item['image_path']
                save_ele_restaurants.put(
                    source=SOURCE.ELE,
                    restaurant_id=item['id'],
                    name=item['name'],
                    sales=item['recent_order_num'],
                    arrive_time=item['order_lead_time'],
                    send_fee=item['float_delivery_fee'],
                    score=item['rating'],
                    latitude=item['latitude'],
                    longitude=item['longitude'],
                    image='https://fuss10.elemecdn.com/{}/{}/{}.{}'.format(image_path[0:1], image_path[1:3],
                                                                           image_path[3:],
                                                                           image_path[32:])
                )
            return data
    except Exception as e:
        logger.error(e, exc_info=True)
 def load_data():
     try:
         _data = json.loads(item.data)
     except Exception as e:
         logger.error(e)
         return None
     else:
         return _data
Beispiel #4
0
 def subscribe(self):
     if isinstance(self.tubes, list):
         for tube in self.tubes:
             if tube not in Subscriber.FUN_MAP.keys():
                 logger.error('tube:{} not register!'.format(tube))
                 continue
             self.beanstalk.watch(tube)
     else:
         if self.tubes not in Subscriber.FUN_MAP.keys():
             logger.error('tube:{} not register!'.format(self.tubes))
             return
         self.beanstalk.watch(self.tubes)
Beispiel #5
0
def get_ele_city_list():
    url = 'https://www.ele.me/restapi/shopping/v1/cities'
    try:
        resp = requests.get(url, timeout=5)
        if resp.status_code == 200:
            data = resp.json()
            cities = []
            for k, v in data.iteritems():
                item = [i for i in data[k] if i['name'] in HOT_CITIES]
                cities.extend(item)
            return cities
    except Exception as e:
        logger.error(e, exc_info=True)
Beispiel #6
0
 def on_job(cls, job):
     start = time.time()
     msg = json.loads(job.body)
     tube = msg.get('tube')
     func_name = msg.get('func_name')
     try:
         func = Subscriber.FUN_MAP[tube][func_name]
         kwargs = msg.get('kwargs')
         logger.info(u'run {} args:{}'.format(func_name, kwargs))
         func(**kwargs)
     except Exception as e:
         logger.error(e.message, exc_info=True)
     cost = time.time() - start
     logger.info('{} cost {} s'.format(func_name, cost))
Beispiel #7
0
def commit_task():
    locations = request.json
    success = True
    if not isinstance(locations, list):
        return jsonify(message=u'数据错误', success=False), 400
    for item in locations:
        try:
            address = item['address']
            ll = item['lng_lat'].split(',')
            lng = ll[0]
            lat = ll[1]
            commit_mt_crawler_task.put(address=address, lng=lng, lat=lat, cookies=request.cookies)
        except IndexError:
            return jsonify(message=u'坐标组合错误', success=False), 400
        except Exception as e:
            logger.error(e, exc_info=True)
            return jsonify(message=e.__str__(), success=False), 400
    return jsonify(success=success)
Beispiel #8
0
def search_ele_address(key, latitude, longitude):
    url = 'https://www.ele.me/restapi/v2/pois'
    _geohash = geohash.encode(latitude=float(
        latitude), longitude=float(longitude))
    logger.info('geohash: {}'.format(_geohash))
    params = {
        'extras[]': 'count',
        'geohash': _geohash,
        'keyword': key,
        'limit': 20,
        'type': 'nearby'
    }
    try:
        resp = requests.get(url, timeout=5, params=params)
        if resp.status_code == 200:
            data = resp.json()
            return data
    except Exception as e:
        logger.error(e, exc_info=True)
Beispiel #9
0
def login_ele_by_mobile(mobile_phone, sms_code, sms_token):
    url = 'https://h5.ele.me/restapi/eus/login/login_by_mobile'
    payload = {
        "mobile": mobile_phone,
        "validate_code": sms_code,
        "validate_token": sms_token
    }
    headers = {
        'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.91 Safari/537.36',
        'origin': 'https://h5.ele.me',
        'referer': 'https://h5.ele.me/login/'
    }
    try:
        resp = requests.post(url, json=payload, headers=headers, timeout=5)
        if resp.status_code == 200:
            return True, resp.cookies, resp.text
        return False, resp.cookies, resp.text
    except Exception as e:
        logger.error(e, exc_info=True)
Beispiel #10
0
def get_ele_captchas(mobile_phone):
    url = 'https://www.ele.me/restapi/eus/v3/captchas'
    payload = {
        'captcha_str': mobile_phone
    }
    headers = {
        'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.91 Safari/537.36',
        'origin': 'https://h5.ele.me',
        'referer': 'https://h5.ele.me/login/'
    }
    try:
        resp = requests.post(url, json=payload, headers=headers, timeout=5)
        if resp.status_code == 200:
            data = resp.json()
            return True, data.get('captcha_image'), data.get('captcha_hash')
        logger.error(u'get_ele_pic_failed: {}'.format(resp.content))
    except Exception as e:
        logger.error(e, exc_info=True)
    finally:
        return False, None, None
Beispiel #11
0
 def get_restaurant_data(self, page_source):
     sp = get_soup(page_source)
     restaurants_list_li = sp.find_all('li', class_='fl rest-li')
     for item in restaurants_list_li:
         restaurant_element = item.find('div', class_='restaurant')
         if not restaurant_element:
             continue
         try:
             name = restaurant_element['data-title']
             restaurant_id = int(restaurant_element['data-poiid'])
             self.restaurant_id = restaurant_id
             img_src = restaurant_element.find(
                 'div',
                 class_='preview').find('img',
                                        class_='scroll-loading')['src']
             # 评价
             score = get_number_from_str(
                 restaurant_element.find('span',
                                         class_='score-num').get_text())
             # 消费多少元才配送
             start_send_fee = get_number_from_str(
                 restaurant_element.find('span',
                                         class_='start-price').get_text())
             # 配送费
             send_fee = get_number_from_str(
                 restaurant_element.find('span',
                                         class_='send-price').get_text())
             # 配送时间
             arrive_time = get_number_from_str(
                 restaurant_element.find('span',
                                         class_='send-time').get_text())
             save_ele_restaurants.put(restaurant_id=restaurant_id,
                                      name=name,
                                      source=SOURCE.MEI_TUAN,
                                      arrive_time=arrive_time,
                                      send_fee=send_fee,
                                      score=score,
                                      image=img_src)
         except Exception as e:
             logger.error(e)
Beispiel #12
0
 def run(self):
     self.subscribe()
     while True:
         if self.signal_shutdown:
             break
         if self.signal_shutdown:
             logger.info("graceful shutdown")
             break
         job = self.beanstalk.reserve(
             timeout=self.reserve_timeout)  # 阻塞获取任务,最长等待 timeout
         if not job:
             continue
         try:
             self.on_job(job)
             self.delete_job(job)
         except beanstalkc.CommandFailed as e:
             logger.error(e, exc_info=True)
         except Exception as e:
             logger.error(e, exc_info=True)
             kicks = job.stats()['kicks']
             if kicks < 3:
                 self.bury_job(job)
             else:
                 message = json.loads(job.body)
                 logger.error("Kicks reach max. Delete the job",
                              extra={'body': message})
                 self.delete_job(job)