コード例 #1
0
    def run_spider(cls, **kwargs):
        """
        运行爬虫函数
        :param kwargs:
            project 项目名 str
            spider  爬虫名 str
            options 其他参数 dict
        :return:
        """
        project = kwargs['project']
        schedule_job_id = kwargs['schedule_job_id']
        spider = kwargs['spider']
        options = kwargs.get('options')

        # 默认值处理
        if options:
            opts = json.loads(options)
        else:
            opts = {}

        try:
            res = client.schedule(project=project, spider=spider, **opts)
            spider_job_id = res['jobid']
            message = ''

        except Exception as e:
            message = str(e)
            spider_job_id = ''

        ScheduleHistoryModel.insert_row(project=project,
                                        spider=spider,
                                        schedule_job_id=schedule_job_id,
                                        spider_job_id=spider_job_id,
                                        options=options,
                                        message=message)
コード例 #2
0
    def get_log_total_count(cls, project=None, spider=None, schedule_job_id=None):
        """计算日志总条数"""
        query = ScheduleHistoryModel.select()

        if project:
            query = query.where(ScheduleHistoryModel.project == project)

        if spider:
            query = query.where(ScheduleHistoryModel.spider == spider)

        if schedule_job_id:
            query = query.where(ScheduleHistoryModel.schedule_job_id == schedule_job_id)

        return query.count()
コード例 #3
0
    def get_log_success_count(cls, project=None, spider=None, schedule_job_id=None):
        """计算成功日志条数"""
        query = ScheduleHistoryModel.select()

        if project:
            query = query.where(ScheduleHistoryModel.project == project)

        if spider:
            query = query.where(ScheduleHistoryModel.spider == spider)

        if schedule_job_id:
            query = query.where(ScheduleHistoryModel.schedule_job_id == schedule_job_id)

        query = query.where(ScheduleHistoryModel.spider_job_id != '')
        return query.count()