Example #1
0
def job_add(project_id):
    project = Project.find_project_by_id(project_id)
    job_instance = JobInstance()
    job_instance.spider_name = request.form['spider_name']
    job_instance.project_id = project_id
    job_instance.spider_arguments = request.form['spider_arguments']
    job_instance.priority = request.form.get('priority', 0)
    job_instance.run_type = request.form['run_type']
    # chose daemon manually
    if request.form['daemon'] != 'auto':
        spider_args = []
        if request.form['spider_arguments']:
            spider_args = request.form['spider_arguments'].split(",")
        spider_args.append("daemon={}".format(request.form['daemon']))
        job_instance.spider_arguments = ','.join(spider_args)
    if job_instance.run_type == JobRunType.ONETIME:
        job_instance.enabled = -1
        db.session.add(job_instance)
        db.session.commit()
        agent.start_spider(job_instance)
    if job_instance.run_type == JobRunType.PERIODIC:
        job_instance.cron_minutes = request.form.get('cron_minutes') or '0'
        job_instance.cron_hour = request.form.get('cron_hour') or '*'
        job_instance.cron_day_of_month = request.form.get(
            'cron_day_of_month') or '*'
        job_instance.cron_day_of_week = request.form.get(
            'cron_day_of_week') or '*'
        job_instance.cron_month = request.form.get('cron_month') or '*'
        # set cron exp manually
        if request.form.get('cron_exp'):
            job_instance.cron_minutes, job_instance.cron_hour, job_instance.cron_day_of_month, job_instance.cron_month, job_instance.cron_day_of_week = \
                request.form['cron_exp'].split(' ')
        db.session.add(job_instance)
        db.session.commit()
    return redirect(request.referrer, code=302)
Example #2
0
 def put(self, project_id, job_id):
     post_data = request.form
     if post_data:
         job_instance = JobInstance.query.filter_by(project_id=project_id,
                                                    id=job_id).first()
         if not job_instance: abort(404)
         job_instance.spider_arguments = post_data.get(
             'spider_arguments') or job_instance.spider_arguments
         job_instance.priority = post_data.get(
             'priority') or job_instance.priority
         job_instance.enabled = post_data.get('enabled', 0)
         job_instance.cron_minutes = post_data.get(
             'cron_minutes') or job_instance.cron_minutes
         job_instance.cron_hour = post_data.get(
             'cron_hour') or job_instance.cron_hour
         job_instance.cron_day_of_month = post_data.get(
             'cron_day_of_month') or job_instance.cron_day_of_month
         job_instance.cron_day_of_week = post_data.get(
             'cron_day_of_week') or job_instance.cron_day_of_week
         job_instance.cron_month = post_data.get(
             'cron_month') or job_instance.cron_month
         job_instance.desc = post_data.get('desc', 0) or job_instance.desc
         job_instance.tags = post_data.get('tags', 0) or job_instance.tags
         db.session.commit()
         if post_data.get('status') == 'run':
             agent.start_spider(job_instance)
         return True
Example #3
0
def _run_spider(spider_name, project_id):
    """
    Run a spider
    :param spider_name:
    :param project_id:
    :return:
    """
    job_instance = JobInstance()
    job_instance.project_id = project_id
    job_instance.spider_name = spider_name
    job_instance.priority = JobPriority.NORMAL
    job_instance.run_type = JobRunType.ONETIME
    job_instance.overlapping = True
    job_instance.enabled = -1

    # settings for tempering the requests
    throttle_value = _get_throttle_value(spider_name, project_id)
    job_instance.spider_arguments = "setting=AUTOTHROTTLE_TARGET_CONCURRENCY={}".format(
        throttle_value)
    job_instance.throttle_concurrency = throttle_value

    db.session.add(job_instance)
    try:
        db.session.commit()
    except Exception as e:
        db.session.rollback()
        raise e

    agent.start_spider(job_instance)
Example #4
0
def run_spider_job(job_instance_id):
    '''
    run spider by scheduler
    :param job_instance_id:
    :return:
    '''
    try:
        job_instance = JobInstance.find_job_instance_by_id(job_instance_id)
        agent.start_spider(job_instance)
        app.logger.info(
            '[run_spider_job][project:%s][spider_name:%s][job_instance_id:%s]'
            % (job_instance.project_id, job_instance.spider_name,
               job_instance.id))
    except Exception as e:
        app.logger.error('[run_spider_job] ' + str(e))
Example #5
0
 def put(self, project_id, spider_id):
     spider_instance = SpiderInstance.query.filter_by(project_id=project_id,
                                                      id=spider_id).first()
     if not spider_instance: abort(404)
     job_instance = JobInstance()
     job_instance.spider_name = spider_instance.spider_name
     job_instance.project_id = project_id
     job_instance.spider_arguments = request.form.get('spider_arguments')
     job_instance.desc = request.form.get('desc')
     job_instance.tags = request.form.get('tags')
     job_instance.run_type = JobRunType.ONETIME
     job_instance.priority = request.form.get('priority', 0)
     job_instance.enabled = -1
     db.session.add(job_instance)
     db.session.commit()
     agent.start_spider(job_instance)
     return True
Example #6
0
def run_spider_job(job_instance_id):
    '''
    run spider by scheduler
    :param job_instance_id:
    :return:
    '''
    try:
        job_instance = JobInstance.find_job_instance_by_id(job_instance_id)
        start_tasks = job_instance.start_tasks
        """start_time = datetime.now() - timedelta(minutes=30)
        count = JobExecution.query.filter_by(
            job_instance_id=job_instance_id,
            running_status=SpiderStatus.RUNNING,
        ).filter(JobExecution.start_time < start_time).count()
        if count > 0:
            return"""

        count = JobExecution.query.filter_by(
            job_instance_id=job_instance_id).filter(
                JobExecution.running_status.in_(
                    [SpiderStatus.PENDING, SpiderStatus.RUNNING])).count()
        if count >= job_instance.max_start_tasks:
            return

        slots = job_instance.max_start_tasks - count
        if job_instance.start_tasks > slots:
            start_tasks = slots

        if start_tasks > 0:
            i = 0
            while i < start_tasks:
                agent.start_spider(job_instance)
                i += 1
                app.logger.info(
                    '[run_spider_job][project:%s][spider_name:%s][job_instance_id:%s]'
                    '[start_tasks:%s][i:%s]' %
                    (job_instance.project_id, job_instance.spider_name,
                     job_instance.id, start_tasks, i))

    except Exception as e:
        app.logger.error('[run_spider_job] ' + str(e))
Example #7
0
def job_run(project_id, job_instance_id):
    job_instance = JobInstance.query.filter_by(project_id=project_id,
                                               id=job_instance_id).first()
    agent.start_spider(job_instance)
    return redirect(request.referrer, code=302)