Ejemplo n.º 1
0
def job_back_in_time(project_id):
    if not config.BACK_IN_TIME_ENABLED:
        return redirect(request.referrer, code=302)

    spider_names = request.form.getlist('spider_name')
    for spider in spider_names:
        job_instance = JobInstance()
        job_instance.project_id = project_id
        job_instance.spider_name = spider

        spider_args = request.form['spider_arguments'].split(",")
        spider_args.append("--callback={}".format(request.form['callback']))
        spider_args.append("SCRAPY_PROJECT=SCRAPY_PROJECT")
        job_instance.spider_arguments = ','.join(spider_args)

        job_instance.priority = request.form.get('priority', 0)
        job_instance.run_type = JobRunType.ONETIME
        job_instance.overlapping = True
        # chose daemon manually
        if request.form['daemon'] != 'auto':
            spider_args = []
            if request.form['spider_arguments']:
                spider_args = request.form['spider_arguments'].split(",")
            spider_args.append("daemon={}".format(request.form['daemon']))
            job_instance.spider_arguments = ','.join(spider_args)

        job_instance.enabled = -1
        db.session.add(job_instance)
        try:
            db.session.commit()
        except Exception as e:
            db.session.rollback()
            raise e
        agent.run_back_in_time(job_instance)
    return redirect(request.referrer, code=302)
Ejemplo n.º 2
0
def _run_spider(spider_name, project_id):
    """
    Run a spider
    :param spider_name:
    :param project_id:
    :return:
    """
    job_instance = JobInstance()
    job_instance.project_id = project_id
    job_instance.spider_name = spider_name
    job_instance.priority = JobPriority.NORMAL
    job_instance.run_type = JobRunType.ONETIME
    job_instance.overlapping = True
    job_instance.enabled = -1

    # settings for tempering the requests
    throttle_value = _get_throttle_value(spider_name, project_id)
    job_instance.spider_arguments = "setting=AUTOTHROTTLE_TARGET_CONCURRENCY={}".format(
        throttle_value)
    job_instance.throttle_concurrency = throttle_value

    db.session.add(job_instance)
    try:
        db.session.commit()
    except Exception as e:
        db.session.rollback()
        raise e

    agent.start_spider(job_instance)
Ejemplo n.º 3
0
def job_addlist(project_id):
    project = Project.find_project_by_id(project_id)
    spider_names = request.form.getlist('spider_name')
    for spider in spider_names:
        job_instance = JobInstance()
        job_instance.project_id = project_id
        job_instance.spider_name = spider
        job_instance.spider_arguments = request.form['spider_arguments']
        job_instance.priority = request.form.get('priority', 0)
        job_instance.run_type = request.form['run_type']
        job_instance.overlapping = bool(request.form.get('overlapping', False))
        # chose daemon manually
        if request.form['daemon'] != 'auto':
            spider_args = []
            if request.form['spider_arguments']:
                spider_args = request.form['spider_arguments'].split(",")
            spider_args.append("daemon={}".format(request.form['daemon']))
            job_instance.spider_arguments = ','.join(spider_args)
        if job_instance.run_type == JobRunType.ONETIME:
            job_instance.enabled = -1
            db.session.add(job_instance)
            try:
                db.session.commit()
            except Exception as e:
                db.session.rollback()
                raise e
            agent.start_spider(job_instance)
        if job_instance.run_type == JobRunType.PERIODIC:
            job_instance.cron_minutes = request.form.get('cron_minutes') or '0'
            job_instance.cron_hour = request.form.get('cron_hour') or '*'
            job_instance.cron_day_of_month = request.form.get('cron_day_of_month') or '*'
            job_instance.cron_day_of_week = request.form.get('cron_day_of_week') or '*'
            job_instance.cron_month = request.form.get('cron_month') or '*'
            # set cron exp manually
            if request.form.get('cron_exp'):
                job_instance.cron_minutes, job_instance.cron_hour, job_instance.cron_day_of_month, job_instance.cron_month, job_instance.cron_day_of_week = \
                    request.form['cron_exp'].split(' ')
            db.session.add(job_instance)
            try:
                db.session.commit()
            except Exception as e:
                db.session.rollback()
                raise e
    return redirect(request.referrer, code=302)