def error(self, info): CrawlerDao.update_by_id(self.id, self.u_id, status=Crawler_Status.Error, info=info, finished=datetime.now()) logger.error(info)
def get_user_activity(): u_id = request.cookies.get('u_id') crawlers, crawler_total = CrawlerDao.batch_get_by_status(u_id, page=-1) _, done_total = CrawlerDao.batch_get_by_status(u_id, page=-1, status=Crawler_Status.DONE) logs, logs_total = UserLogDao.get_by_user(u_id) return jsonify( crawlers={ 'data': crawlers, 'crawler_total': crawler_total, 'done_total': done_total }, actions={ 'data': logs, 'action_total': logs_total } )
def setUp(self): self.app = get_application().test_client() self.user_id = UserDao.create('t', 't', '*****@*****.**', '1') self.c_id = [ CrawlerDao.create(self.user_id, 1, '', '', ''), CrawlerDao.create(self.user_id, 2, '', '', '') ] for i in self.c_id: CrawlerDao.update_by_id(i, self.user_id, status=1, data_count=10, total=10, finished=datetime.now()) self.app.set_cookie('localhost', 'u_id', str(self.user_id)) with self.app.session_transaction() as session: session[str(self.user_id)] = 't'
def __init__(self, source, c_type, restaurant_id, cookies, args): self.cookies = cookies self.u_id = cookies.get('u_id') self.id = CrawlerDao.create(self.u_id, restaurant_id, source, c_type, args=json.dumps(args)) UserLogDao.create(self.u_id, u'创建{}爬虫'.format(SOURCES.get(source)))
def sale_distribution(crawler_id): u_id = request.cookies.get('u_id') data = AnalyseTaskDao.get_by_u_id(u_id, crawler_one=crawler_id, _type=AnalyTaskType.SINGLE) if data: return jsonify(data['data']) UserLogDao.create(u_id, u'获取店铺商品数据分布') crawler = CrawlerDao.get_by_id(crawler_id, u_id) dishes = crawler['dishes'] def render_item(k): return sorted([{ 'food_id': item['food_id'], 'food_name': item['name'], 'value': item[k] } for item in dishes], key=lambda d: d['value'], reverse=True) # 销量分布 sales = render_item('moth_sales') # 评分分布 rate_dis = render_item('rating') # 销量随价格的分布 price_dis = {} for item in dishes: price = item['price'] price_dis.setdefault(price, 0) sale = item['moth_sales'] price_dis[price] += sale price_dis = sorted([{ 'name': k, 'value': v } for k, v in price_dis.items()], key=lambda d: d['name']) # 店铺评论数随时间分布 rate, _ = RateDao.get_by_crawler_id(crawler_id, page=-1) rate_date_dis = {} for item in rate: rate_at = item['rated_at'] rate_date_dis.setdefault(rate_at, 0) rate_date_dis[rate_at] += 1 data = { 'restaurant': crawler['restaurant'], 'sales_dis': sales, 'rate_dis': rate_dis, 'price_dis': price_dis, 'rate_date_dis': rate_date_dis } analy_task_controller.save_analyse_data.put(u_id=u_id, data=json.dumps(data), crawler_one=crawler_id, _type=AnalyTaskType.SINGLE) return jsonify(data)
def get_tasks(): page = int(request.args.get('page', 1)) per_page = int(request.args.get('per_page', 20)) status = request.args.get('status') u_id = request.cookies.get('u_id') if status is not None: status = int(status) crawlers, total = CrawlerDao.batch_get_by_status(u_id, page=page, per_page=per_page, status=status) return jsonify({ 'page': page, "per_page": per_page, 'total': total, 'data': crawlers })
def get_crawler(crawler_id=None): u_id = request.cookies.get('u_id') crawler = CrawlerDao.get_by_id(crawler_id, u_id) if not crawler: return jsonify(crawler=None) # 菜品 dishes, dish_total = DishDao.get_by_crawler_id(crawler_id, page=-1) # 评论 rate, rate_total = RateDao.get_by_crawler_id(crawler_id, page=-1) return jsonify(crawler=crawler, dish={ 'total': dish_total, 'data': dishes }, rate={ 'total': rate_total, 'data': rate })
def update_count(self, count): CrawlerDao.update_by_id(self.id, self.u_id, data_count=count)
def insert_extras(self, extras): CrawlerDao.update_by_id(self.id, self.u_id, extras=extras)
def done(self): CrawlerDao.update_by_id(self.id, self.u_id, status=Crawler_Status.DONE, finished=datetime.now())
def delete_crawler(crawler_id): u_id = request.cookies.get('u_id') CrawlerDao.delete(crawler_id, u_id) return jsonify(status="ok")
def get_crawler_status(crawler_id): u_id = request.cookies.get('u_id') crawler = CrawlerDao.get_by_id(crawler_id, u_id) logger.info(crawler['count']) return jsonify(crawler)
def compare(crawler_one, crawler_two): u_id = request.cookies.get('u_id') data = AnalyseTaskDao.get_by_u_id(u_id, crawler_one=crawler_one, crawler_two=crawler_two, _type=AnalyTaskType.MULTI) if data: return jsonify(data['data']) crawler_1 = CrawlerDao.get_by_id(crawler_one, u_id) crawler_2 = CrawlerDao.get_by_id(crawler_two, u_id) if not crawler_1 or not crawler_2: return jsonify({'message': u'爬虫不存在'}), 401 UserLogDao.create(u_id, action_name=u'比较两家店', action_args=u'{} vs {}'.format( crawler_1['restaurant']['name'], crawler_2['restaurant']['name'])) dish_1 = crawler_1['dishes'] dish_2 = crawler_2['dishes'] # 同价位商品销量比较 sales_compare_with_same_price = {} # 同价位商品评价比较 rate_compare_with_same_price = {} price_set = set() max_sale = 0 for item in dish_1: price = item['price'] price_set.add(price) for item in dish_2: price = item['price'] price_set.add(price) for item in dish_1: price = item['price'] sale = item['moth_sales'] rate = item['rating'] if sale > max_sale: max_sale = sale sales_compare_with_same_price.setdefault('a', {}) sales_compare_with_same_price['a'][price] = sale rate_compare_with_same_price.setdefault('a', {}) rate_compare_with_same_price['a'][price] = rate for item in dish_2: price = item['price'] sale = item['moth_sales'] rate = item['rating'] if sale > max_sale: max_sale = sale sales_compare_with_same_price.setdefault('b', {}) sales_compare_with_same_price['b'][price] = sale rate_compare_with_same_price.setdefault('b', {}) rate_compare_with_same_price['b'][price] = rate for p in price_set: sales_compare_with_same_price['a'].setdefault(p, 0) sales_compare_with_same_price['b'].setdefault(p, 0) rate_compare_with_same_price['a'].setdefault(p, 0) rate_compare_with_same_price['b'].setdefault(p, 0) def sort_price(items): return sorted([{ 'price': k, 'value': v } for k, v in items.iteritems()], key=lambda d: d['price']) a_sales = sort_price(sales_compare_with_same_price['a']) b_sales = sort_price(sales_compare_with_same_price['b']) a_rate = sort_price(rate_compare_with_same_price['a']) b_rate = sort_price(rate_compare_with_same_price['b']) data = { 'crawler_1': crawler_1, 'crawler_2': crawler_2, 'sales_compare_with_same_price': { 'a': a_sales, 'b': b_sales, 'max': max_sale }, 'rate_compare_with_same_price': { 'a': a_rate, 'b': b_rate }, 'other': [] } analy_task_controller.save_analyse_data.put(u_id=u_id, data=json.dumps(data), crawler_one=crawler_one, crawler_two=crawler_two, _type=AnalyTaskType.MULTI) return jsonify(data)
def tearDown(self): UserDao.delete(self.user_id) CrawlerDao.delete([self.c_id], self.user_id)