コード例 #1
0
ファイル: controller.py プロジェクト: bsekiewicz/ScrapyKeeper
def job_add(project_id):
    project = Project.find_project_by_id(project_id)
    job_instance = JobInstance()
    job_instance.spider_name = request.form['spider_name']
    job_instance.project_id = project_id
    job_instance.spider_arguments = request.form['spider_arguments']
    job_instance.priority = request.form.get('priority', 0)
    job_instance.run_type = request.form['run_type']
    # chose daemon manually
    if request.form['daemon'] != 'auto':
        spider_args = []
        if request.form['spider_arguments']:
            spider_args = request.form['spider_arguments'].split(",")
        spider_args.append("daemon={}".format(request.form['daemon']))
        job_instance.spider_arguments = ','.join(spider_args)
    if job_instance.run_type == JobRunType.ONETIME:
        job_instance.enabled = -1
        db.session.add(job_instance)
        db.session.commit()
        agent.start_spider(job_instance)
    if job_instance.run_type == JobRunType.PERIODIC:
        job_instance.cron_minutes = request.form.get('cron_minutes') or '0'
        job_instance.cron_hour = request.form.get('cron_hour') or '*'
        job_instance.cron_day_of_month = request.form.get(
            'cron_day_of_month') or '*'
        job_instance.cron_day_of_week = request.form.get(
            'cron_day_of_week') or '*'
        job_instance.cron_month = request.form.get('cron_month') or '*'
        # set cron exp manually
        if request.form.get('cron_exp'):
            job_instance.cron_minutes, job_instance.cron_hour, job_instance.cron_day_of_month, job_instance.cron_month, job_instance.cron_day_of_week = \
                request.form['cron_exp'].split(' ')
        db.session.add(job_instance)
        db.session.commit()
    return redirect(request.referrer, code=302)
コード例 #2
0
ファイル: controller.py プロジェクト: divtiply/ScrapyKeeper
def job_back_in_time(project_id):
    if not config.BACK_IN_TIME_ENABLED:
        return redirect(request.referrer, code=302)

    spider_names = request.form.getlist('spider_name')
    for spider in spider_names:
        job_instance = JobInstance()
        job_instance.project_id = project_id
        job_instance.spider_name = spider

        spider_args = request.form['spider_arguments'].split(",")
        spider_args.append("--callback={}".format(request.form['callback']))
        spider_args.append("SCRAPY_PROJECT=SCRAPY_PROJECT")
        job_instance.spider_arguments = ','.join(spider_args)

        job_instance.priority = request.form.get('priority', 0)
        job_instance.run_type = JobRunType.ONETIME
        job_instance.overlapping = True
        # chose daemon manually
        if request.form['daemon'] != 'auto':
            spider_args = []
            if request.form['spider_arguments']:
                spider_args = request.form['spider_arguments'].split(",")
            spider_args.append("daemon={}".format(request.form['daemon']))
            job_instance.spider_arguments = ','.join(spider_args)

        job_instance.enabled = -1
        db.session.add(job_instance)
        try:
            db.session.commit()
        except Exception as e:
            db.session.rollback()
            raise e
        agent.run_back_in_time(job_instance)
    return redirect(request.referrer, code=302)
コード例 #3
0
def _run_spider(spider_name, project_id):
    """
    Run a spider
    :param spider_name:
    :param project_id:
    :return:
    """
    job_instance = JobInstance()
    job_instance.project_id = project_id
    job_instance.spider_name = spider_name
    job_instance.priority = JobPriority.NORMAL
    job_instance.run_type = JobRunType.ONETIME
    job_instance.overlapping = True
    job_instance.enabled = -1

    # settings for tempering the requests
    throttle_value = _get_throttle_value(spider_name, project_id)
    job_instance.spider_arguments = "setting=AUTOTHROTTLE_TARGET_CONCURRENCY={}".format(
        throttle_value)
    job_instance.throttle_concurrency = throttle_value

    db.session.add(job_instance)
    try:
        db.session.commit()
    except Exception as e:
        db.session.rollback()
        raise e

    agent.start_spider(job_instance)
コード例 #4
0
ファイル: controller.py プロジェクト: bsekiewicz/ScrapyKeeper
 def put(self, project_id, spider_id):
     spider_instance = SpiderInstance.query.filter_by(project_id=project_id,
                                                      id=spider_id).first()
     if not spider_instance: abort(404)
     job_instance = JobInstance()
     job_instance.spider_name = spider_instance.spider_name
     job_instance.project_id = project_id
     job_instance.spider_arguments = request.form.get('spider_arguments')
     job_instance.desc = request.form.get('desc')
     job_instance.tags = request.form.get('tags')
     job_instance.run_type = JobRunType.ONETIME
     job_instance.priority = request.form.get('priority', 0)
     job_instance.enabled = -1
     db.session.add(job_instance)
     db.session.commit()
     agent.start_spider(job_instance)
     return True