def get_results(self, id: str) -> (dict, tuple): """ Get a list of results crawled in a given task. :param id: task_id """ args = self.parser.parse_args() page_size = args.get('page_size') or 10 page_num = args.get('page_num') or 1 task = db_manager.get('tasks', id=id) spider = db_manager.get('spiders', id=task['spider_id']) col_name = spider.get('col') if not col_name: return [] fields = get_spider_col_fields(col_name) items = db_manager.list(col_name, {'task_id': id}, skip=page_size * (page_num - 1), limit=page_size) return { 'status': 'ok', 'fields': jsonify(fields), 'total_count': db_manager.count(col_name, {'task_id': id}), 'page_num': page_num, 'page_size': page_size, 'items': jsonify(items) }
def get_results(self, id): task = db_manager.get('tasks', id=id) spider = db_manager.get('spiders', id=task['spider_id']) col_name = spider.get('col') if not col_name: return [] fields = get_spider_col_fields(col_name) items = db_manager.list(col_name, {'task_id': id}) return jsonify({'status': 'ok', 'fields': fields, 'items': items})
def download_results(self, id: str): task = db_manager.get('tasks', id=id) spider = db_manager.get('spiders', id=task['spider_id']) col_name = spider.get('col') if not col_name: return send_csv([], f'results_{col_name}_{round(time())}.csv') items = db_manager.list(col_name, {'task_id': id}, limit=999999999) fields = get_spider_col_fields(col_name, task_id=id, limit=999999999) return send_csv(items, filename=f'results_{col_name}_{round(time())}.csv', fields=fields, encoding='utf-8')
def get_results(self, id: str) -> (dict, tuple): """ Get a list of results crawled in a given task. :param id: task_id """ args = self.parser.parse_args() page_size = args.get('page_size') or 10 task = db_manager.get('tasks', id=id) spider = db_manager.get('spiders', id=task['spider_id']) col_name = spider.get('col') if not col_name: return [] fields = get_spider_col_fields(col_name) fields = list(set(fields) - set(IGNORE_FIELD)) items = db_manager.list(col_name, {'task_id': id}) # 避免内容过长,做一下限制;同时剔除无用的字段不展示 adjust_items = [] for item in items: adjust_item = {} for key, value in item.items(): if isinstance(value, str) == False: continue if key in IGNORE_FIELD: continue if len(value) > 500: value = value[:500] + '...' adjust_item[key] = value adjust_items += [adjust_item] total_count = db_manager.count(col_name, {'task_id': id}) page_num = len(adjust_items) / page_size if isinstance(page_num, float): page_num = int(page_num) + 1 return { 'status': 'ok', 'fields': jsonify(fields), 'total_count': len(adjust_items), 'page_num': page_num, 'page_size': page_size, 'items': jsonify(adjust_items) }
def get_results(self, id): args = self.parser.parse_args() page_size = args.get('page_size') or 10 page_num = args.get('page_num') or 1 task = db_manager.get('tasks', id=id) spider = db_manager.get('spiders', id=task['spider_id']) col_name = spider.get('col') if not col_name: return [] fields = get_spider_col_fields(col_name) items = db_manager.list(col_name, {'task_id': id}) return { 'status': 'ok', 'fields': jsonify(fields), 'total_count': db_manager.count(col_name, {'task_id': id}), 'page_num': page_num, 'page_size': page_size, 'items': jsonify(items) }