예제 #1
0
def job_add(project_id):
    project = Project.find_project_by_id(project_id)
    job_instance = JobInstance()
    job_instance.spider_name = request.form['spider_name']
    job_instance.project_id = project_id
    job_instance.spider_arguments = request.form['spider_arguments']
    job_instance.priority = request.form.get('priority', 0)
    job_instance.run_type = request.form['run_type']
    # chose daemon manually
    if request.form['daemon'] != 'auto':
        spider_args = []
        if request.form['spider_arguments']:
            spider_args = request.form['spider_arguments'].split(",")
        spider_args.append("daemon={}".format(request.form['daemon']))
        job_instance.spider_arguments = ','.join(spider_args)
    if job_instance.run_type == JobRunType.ONETIME:
        job_instance.enabled = -1
        db.session.add(job_instance)
        db.session.commit()
        agent.start_spider(job_instance)
    if job_instance.run_type == JobRunType.PERIODIC:
        job_instance.cron_minutes = request.form.get('cron_minutes') or '0'
        job_instance.cron_hour = request.form.get('cron_hour') or '*'
        job_instance.cron_day_of_month = request.form.get(
            'cron_day_of_month') or '*'
        job_instance.cron_day_of_week = request.form.get(
            'cron_day_of_week') or '*'
        job_instance.cron_month = request.form.get('cron_month') or '*'
        # set cron exp manually
        if request.form.get('cron_exp'):
            job_instance.cron_minutes, job_instance.cron_hour, job_instance.cron_day_of_month, job_instance.cron_month, job_instance.cron_day_of_week = \
                request.form['cron_exp'].split(' ')
        db.session.add(job_instance)
        db.session.commit()
    return redirect(request.referrer, code=302)
예제 #2
0
def job_back_in_time(project_id):
    if not config.BACK_IN_TIME_ENABLED:
        return redirect(request.referrer, code=302)

    spider_names = request.form.getlist('spider_name')
    for spider in spider_names:
        job_instance = JobInstance()
        job_instance.project_id = project_id
        job_instance.spider_name = spider

        spider_args = request.form['spider_arguments'].split(",")
        spider_args.append("--callback={}".format(request.form['callback']))
        spider_args.append("SCRAPY_PROJECT=SCRAPY_PROJECT")
        job_instance.spider_arguments = ','.join(spider_args)

        job_instance.priority = request.form.get('priority', 0)
        job_instance.run_type = JobRunType.ONETIME
        job_instance.overlapping = True
        # chose daemon manually
        if request.form['daemon'] != 'auto':
            spider_args = []
            if request.form['spider_arguments']:
                spider_args = request.form['spider_arguments'].split(",")
            spider_args.append("daemon={}".format(request.form['daemon']))
            job_instance.spider_arguments = ','.join(spider_args)

        job_instance.enabled = -1
        db.session.add(job_instance)
        try:
            db.session.commit()
        except Exception as e:
            db.session.rollback()
            raise e
        agent.run_back_in_time(job_instance)
    return redirect(request.referrer, code=302)
예제 #3
0
 def post(self, project_id):
     post_data = request.form
     if post_data:
         job_instance = JobInstance()
         job_instance.spider_name = post_data['spider_name']
         job_instance.project_id = project_id
         job_instance.spider_arguments = post_data.get('spider_arguments')
         job_instance.desc = post_data.get('desc')
         job_instance.tags = post_data.get('tags')
         job_instance.run_type = post_data['run_type']
         job_instance.priority = post_data.get('priority', 0)
         if job_instance.run_type == "periodic":
             job_instance.cron_minutes = post_data.get(
                 'cron_minutes') or '0'
             job_instance.cron_hour = post_data.get('cron_hour') or '*'
             job_instance.cron_day_of_month = post_data.get(
                 'cron_day_of_month') or '*'
             job_instance.cron_day_of_week = post_data.get(
                 'cron_day_of_week') or '*'
             job_instance.cron_month = post_data.get('cron_month') or '*'
             job_instance.cron_month = post_data.get('cron_month') or '*'
             job_instance.cron_month = post_data.get('cron_month') or '*'
             job_instance.start_tasks = post_data.get('start_tasks', 1)
             job_instance.max_start_tasks = post_data.get(
                 'max_start_tasks', 1)
         db.session.add(job_instance)
         db.session.commit()
         return True
예제 #4
0
def _run_spider(spider_name, project_id):
    """
    Run a spider
    :param spider_name:
    :param project_id:
    :return:
    """
    job_instance = JobInstance()
    job_instance.project_id = project_id
    job_instance.spider_name = spider_name
    job_instance.priority = JobPriority.NORMAL
    job_instance.run_type = JobRunType.ONETIME
    job_instance.overlapping = True
    job_instance.enabled = -1

    # settings for tempering the requests
    throttle_value = _get_throttle_value(spider_name, project_id)
    job_instance.spider_arguments = "setting=AUTOTHROTTLE_TARGET_CONCURRENCY={}".format(
        throttle_value)
    job_instance.throttle_concurrency = throttle_value

    db.session.add(job_instance)
    try:
        db.session.commit()
    except Exception as e:
        db.session.rollback()
        raise e

    agent.start_spider(job_instance)
예제 #5
0
 def put(self, project_id, spider_id):
     spider_instance = SpiderInstance.query.filter_by(project_id=project_id,
                                                      id=spider_id).first()
     if not spider_instance: abort(404)
     job_instance = JobInstance()
     job_instance.spider_name = spider_instance.spider_name
     job_instance.project_id = project_id
     job_instance.spider_arguments = request.form.get('spider_arguments')
     job_instance.desc = request.form.get('desc')
     job_instance.tags = request.form.get('tags')
     job_instance.run_type = JobRunType.ONETIME
     job_instance.priority = request.form.get('priority', 0)
     job_instance.enabled = -1
     db.session.add(job_instance)
     db.session.commit()
     agent.start_spider(job_instance)
     return True
예제 #6
0
 def post(self, project_id):
     post_data = request.form
     if post_data:
         job_instance = JobInstance()
         job_instance.spider_name = post_data['spider_name']
         job_instance.project_id = project_id
         job_instance.spider_arguments = post_data.get('spider_arguments')
         job_instance.desc = post_data.get('desc')
         job_instance.tags = post_data.get('tags')
         job_instance.run_type = post_data['run_type']
         job_instance.priority = post_data.get('priority', 0)
         if job_instance.run_type == "periodic":
             job_instance.cron_minutes = post_data.get('cron_minutes') or '0'
             job_instance.cron_hour = post_data.get('cron_hour') or '*'
             job_instance.cron_day_of_month = post_data.get('cron_day_of_month') or '*'
             job_instance.cron_day_of_week = post_data.get('cron_day_of_week') or '*'
             job_instance.cron_month = post_data.get('cron_month') or '*'
         db.session.add(job_instance)
         try:
             db.session.commit()
         except Exception as e:
             db.session.rollback()
             raise e
         return True