Ejemplo n.º 1
0
    def get_results(self, id: str) -> (dict, tuple):
        """
        Get a list of results crawled in a given task.
        :param id: task_id
        """
        args = self.parser.parse_args()
        page_size = args.get('page_size') or 10
        page_num = args.get('page_num') or 1

        task = db_manager.get('tasks', id=id)
        spider = db_manager.get('spiders', id=task['spider_id'])
        col_name = spider.get('col')
        if not col_name:
            return []
        fields = get_spider_col_fields(col_name)
        items = db_manager.list(col_name, {'task_id': id},
                                skip=page_size * (page_num - 1),
                                limit=page_size)
        return {
            'status': 'ok',
            'fields': jsonify(fields),
            'total_count': db_manager.count(col_name, {'task_id': id}),
            'page_num': page_num,
            'page_size': page_size,
            'items': jsonify(items)
        }
Ejemplo n.º 2
0
 def get_results(self, id):
     task = db_manager.get('tasks', id=id)
     spider = db_manager.get('spiders', id=task['spider_id'])
     col_name = spider.get('col')
     if not col_name:
         return []
     fields = get_spider_col_fields(col_name)
     items = db_manager.list(col_name, {'task_id': id})
     return jsonify({'status': 'ok', 'fields': fields, 'items': items})
Ejemplo n.º 3
0
 def download_results(self, id: str):
     task = db_manager.get('tasks', id=id)
     spider = db_manager.get('spiders', id=task['spider_id'])
     col_name = spider.get('col')
     if not col_name:
         return send_csv([], f'results_{col_name}_{round(time())}.csv')
     items = db_manager.list(col_name, {'task_id': id}, limit=999999999)
     fields = get_spider_col_fields(col_name, task_id=id, limit=999999999)
     return send_csv(items,
                     filename=f'results_{col_name}_{round(time())}.csv',
                     fields=fields,
                     encoding='utf-8')
Ejemplo n.º 4
0
    def get_results(self, id: str) -> (dict, tuple):
        """
        Get a list of results crawled in a given task.
        :param id: task_id
        """
        args = self.parser.parse_args()
        page_size = args.get('page_size') or 10

        task = db_manager.get('tasks', id=id)
        spider = db_manager.get('spiders', id=task['spider_id'])
        col_name = spider.get('col')
        if not col_name:
            return []
        fields = get_spider_col_fields(col_name)
        fields = list(set(fields) - set(IGNORE_FIELD))
        items = db_manager.list(col_name, {'task_id': id})

        # 避免内容过长,做一下限制;同时剔除无用的字段不展示
        adjust_items = []
        for item in items:
            adjust_item = {}
            for key, value in item.items():
                if isinstance(value, str) == False:
                    continue
                if key in IGNORE_FIELD:
                    continue
                if len(value) > 500:
                    value = value[:500] + '...'
                adjust_item[key] = value
            adjust_items += [adjust_item]

        total_count = db_manager.count(col_name, {'task_id': id})
        page_num = len(adjust_items) / page_size
        if isinstance(page_num, float):
            page_num = int(page_num) + 1

        return {
            'status': 'ok',
            'fields': jsonify(fields),
            'total_count': len(adjust_items),
            'page_num': page_num,
            'page_size': page_size,
            'items': jsonify(adjust_items)
        }
Ejemplo n.º 5
0
    def get_results(self, id):
        args = self.parser.parse_args()
        page_size = args.get('page_size') or 10
        page_num = args.get('page_num') or 1

        task = db_manager.get('tasks', id=id)
        spider = db_manager.get('spiders', id=task['spider_id'])
        col_name = spider.get('col')
        if not col_name:
            return []
        fields = get_spider_col_fields(col_name)
        items = db_manager.list(col_name, {'task_id': id})
        return {
            'status': 'ok',
            'fields': jsonify(fields),
            'total_count': db_manager.count(col_name, {'task_id': id}),
            'page_num': page_num,
            'page_size': page_size,
            'items': jsonify(items)
        }