def error(self, info):
     CrawlerDao.update_by_id(self.id,
                             self.u_id,
                             status=Crawler_Status.Error,
                             info=info,
                             finished=datetime.now())
     logger.error(info)
Beispiel #2
0
def get_user_activity():
    u_id = request.cookies.get('u_id')
    crawlers, crawler_total = CrawlerDao.batch_get_by_status(u_id, page=-1)
    _, done_total = CrawlerDao.batch_get_by_status(u_id, page=-1, status=Crawler_Status.DONE)
    logs, logs_total = UserLogDao.get_by_user(u_id)
    return jsonify(
        crawlers={
            'data': crawlers,
            'crawler_total': crawler_total,
            'done_total': done_total
        },
        actions={
            'data': logs,
            'action_total': logs_total
        }
    )
 def setUp(self):
     self.app = get_application().test_client()
     self.user_id = UserDao.create('t', 't', '*****@*****.**', '1')
     self.c_id = [
         CrawlerDao.create(self.user_id, 1, '', '', ''),
         CrawlerDao.create(self.user_id, 2, '', '', '')
     ]
     for i in self.c_id:
         CrawlerDao.update_by_id(i,
                                 self.user_id,
                                 status=1,
                                 data_count=10,
                                 total=10,
                                 finished=datetime.now())
     self.app.set_cookie('localhost', 'u_id', str(self.user_id))
     with self.app.session_transaction() as session:
         session[str(self.user_id)] = 't'
 def __init__(self, source, c_type, restaurant_id, cookies, args):
     self.cookies = cookies
     self.u_id = cookies.get('u_id')
     self.id = CrawlerDao.create(self.u_id,
                                 restaurant_id,
                                 source,
                                 c_type,
                                 args=json.dumps(args))
     UserLogDao.create(self.u_id, u'创建{}爬虫'.format(SOURCES.get(source)))
Beispiel #5
0
def sale_distribution(crawler_id):
    u_id = request.cookies.get('u_id')
    data = AnalyseTaskDao.get_by_u_id(u_id,
                                      crawler_one=crawler_id,
                                      _type=AnalyTaskType.SINGLE)
    if data:
        return jsonify(data['data'])
    UserLogDao.create(u_id, u'获取店铺商品数据分布')
    crawler = CrawlerDao.get_by_id(crawler_id, u_id)
    dishes = crawler['dishes']

    def render_item(k):
        return sorted([{
            'food_id': item['food_id'],
            'food_name': item['name'],
            'value': item[k]
        } for item in dishes],
                      key=lambda d: d['value'],
                      reverse=True)

    # 销量分布
    sales = render_item('moth_sales')
    # 评分分布
    rate_dis = render_item('rating')
    # 销量随价格的分布
    price_dis = {}
    for item in dishes:
        price = item['price']
        price_dis.setdefault(price, 0)
        sale = item['moth_sales']
        price_dis[price] += sale
    price_dis = sorted([{
        'name': k,
        'value': v
    } for k, v in price_dis.items()],
                       key=lambda d: d['name'])

    # 店铺评论数随时间分布
    rate, _ = RateDao.get_by_crawler_id(crawler_id, page=-1)
    rate_date_dis = {}
    for item in rate:
        rate_at = item['rated_at']
        rate_date_dis.setdefault(rate_at, 0)
        rate_date_dis[rate_at] += 1
    data = {
        'restaurant': crawler['restaurant'],
        'sales_dis': sales,
        'rate_dis': rate_dis,
        'price_dis': price_dis,
        'rate_date_dis': rate_date_dis
    }
    analy_task_controller.save_analyse_data.put(u_id=u_id,
                                                data=json.dumps(data),
                                                crawler_one=crawler_id,
                                                _type=AnalyTaskType.SINGLE)
    return jsonify(data)
def get_tasks():
    page = int(request.args.get('page', 1))
    per_page = int(request.args.get('per_page', 20))
    status = request.args.get('status')
    u_id = request.cookies.get('u_id')
    if status is not None:
        status = int(status)
    crawlers, total = CrawlerDao.batch_get_by_status(u_id,
                                                     page=page,
                                                     per_page=per_page,
                                                     status=status)
    return jsonify({
        'page': page,
        "per_page": per_page,
        'total': total,
        'data': crawlers
    })
def get_crawler(crawler_id=None):
    u_id = request.cookies.get('u_id')
    crawler = CrawlerDao.get_by_id(crawler_id, u_id)
    if not crawler:
        return jsonify(crawler=None)
    # 菜品
    dishes, dish_total = DishDao.get_by_crawler_id(crawler_id, page=-1)
    # 评论
    rate, rate_total = RateDao.get_by_crawler_id(crawler_id, page=-1)
    return jsonify(crawler=crawler,
                   dish={
                       'total': dish_total,
                       'data': dishes
                   },
                   rate={
                       'total': rate_total,
                       'data': rate
                   })
 def update_count(self, count):
     CrawlerDao.update_by_id(self.id, self.u_id, data_count=count)
 def insert_extras(self, extras):
     CrawlerDao.update_by_id(self.id, self.u_id, extras=extras)
 def done(self):
     CrawlerDao.update_by_id(self.id,
                             self.u_id,
                             status=Crawler_Status.DONE,
                             finished=datetime.now())
def delete_crawler(crawler_id):
    u_id = request.cookies.get('u_id')
    CrawlerDao.delete(crawler_id, u_id)
    return jsonify(status="ok")
def get_crawler_status(crawler_id):
    u_id = request.cookies.get('u_id')
    crawler = CrawlerDao.get_by_id(crawler_id, u_id)
    logger.info(crawler['count'])
    return jsonify(crawler)
Beispiel #13
0
def compare(crawler_one, crawler_two):
    u_id = request.cookies.get('u_id')
    data = AnalyseTaskDao.get_by_u_id(u_id,
                                      crawler_one=crawler_one,
                                      crawler_two=crawler_two,
                                      _type=AnalyTaskType.MULTI)
    if data:
        return jsonify(data['data'])
    crawler_1 = CrawlerDao.get_by_id(crawler_one, u_id)
    crawler_2 = CrawlerDao.get_by_id(crawler_two, u_id)
    if not crawler_1 or not crawler_2:
        return jsonify({'message': u'爬虫不存在'}), 401
    UserLogDao.create(u_id,
                      action_name=u'比较两家店',
                      action_args=u'{} vs {}'.format(
                          crawler_1['restaurant']['name'],
                          crawler_2['restaurant']['name']))
    dish_1 = crawler_1['dishes']
    dish_2 = crawler_2['dishes']
    # 同价位商品销量比较
    sales_compare_with_same_price = {}
    # 同价位商品评价比较
    rate_compare_with_same_price = {}
    price_set = set()
    max_sale = 0
    for item in dish_1:
        price = item['price']
        price_set.add(price)
    for item in dish_2:
        price = item['price']
        price_set.add(price)

    for item in dish_1:
        price = item['price']
        sale = item['moth_sales']
        rate = item['rating']
        if sale > max_sale:
            max_sale = sale
        sales_compare_with_same_price.setdefault('a', {})
        sales_compare_with_same_price['a'][price] = sale
        rate_compare_with_same_price.setdefault('a', {})
        rate_compare_with_same_price['a'][price] = rate

    for item in dish_2:
        price = item['price']
        sale = item['moth_sales']
        rate = item['rating']
        if sale > max_sale:
            max_sale = sale
        sales_compare_with_same_price.setdefault('b', {})
        sales_compare_with_same_price['b'][price] = sale
        rate_compare_with_same_price.setdefault('b', {})
        rate_compare_with_same_price['b'][price] = rate

    for p in price_set:
        sales_compare_with_same_price['a'].setdefault(p, 0)
        sales_compare_with_same_price['b'].setdefault(p, 0)
        rate_compare_with_same_price['a'].setdefault(p, 0)
        rate_compare_with_same_price['b'].setdefault(p, 0)

    def sort_price(items):
        return sorted([{
            'price': k,
            'value': v
        } for k, v in items.iteritems()],
                      key=lambda d: d['price'])

    a_sales = sort_price(sales_compare_with_same_price['a'])
    b_sales = sort_price(sales_compare_with_same_price['b'])

    a_rate = sort_price(rate_compare_with_same_price['a'])
    b_rate = sort_price(rate_compare_with_same_price['b'])
    data = {
        'crawler_1': crawler_1,
        'crawler_2': crawler_2,
        'sales_compare_with_same_price': {
            'a': a_sales,
            'b': b_sales,
            'max': max_sale
        },
        'rate_compare_with_same_price': {
            'a': a_rate,
            'b': b_rate
        },
        'other': []
    }
    analy_task_controller.save_analyse_data.put(u_id=u_id,
                                                data=json.dumps(data),
                                                crawler_one=crawler_one,
                                                crawler_two=crawler_two,
                                                _type=AnalyTaskType.MULTI)
    return jsonify(data)
 def tearDown(self):
     UserDao.delete(self.user_id)
     CrawlerDao.delete([self.c_id], self.user_id)