def post(self, project_id): post_data = request.form if post_data: job_instance = JobInstance() job_instance.spider_name = post_data['spider_name'] job_instance.project_id = project_id job_instance.spider_arguments = post_data.get('spider_arguments') job_instance.desc = post_data.get('desc') job_instance.tags = post_data.get('tags') job_instance.run_type = post_data['run_type'] job_instance.priority = post_data.get('priority', 0) if job_instance.run_type == 'onetime': print(job_instance.run_type) print('-+' * 100) job_instance.enabled = -1 db.session.add(job_instance) for i in range(3): try: db.session.commit() break except: continue agent.start_spider(job_instance) if job_instance.run_type == "periodic": job_instance.cron_minutes = post_data.get( 'cron_minutes') or '0' job_instance.cron_hour = post_data.get('cron_hour') or '*' job_instance.cron_day_of_month = post_data.get( 'cron_day_of_month') or '*' job_instance.cron_day_of_week = post_data.get( 'cron_day_of_week') or '*' job_instance.cron_month = post_data.get('cron_month') or '*' db.session.add(job_instance) db.session.commit() return True
def job_add(project_id): project = Project.find_project_by_id(project_id) job_instance = JobInstance() job_instance.spider_name = request.form['spider_name'] job_instance.project_id = project_id job_instance.spider_arguments = request.form['spider_arguments'] job_instance.priority = request.form.get('priority', 0) job_instance.run_type = request.form['run_type'] # chose daemon manually if request.form['daemon'] != 'auto': spider_args = [] if request.form['spider_arguments']: spider_args = request.form['spider_arguments'].split(",") spider_args.append("daemon={}".format(request.form['daemon'])) job_instance.spider_arguments = ','.join(spider_args) if job_instance.run_type == JobRunType.ONETIME: job_instance.enabled = -1 db.session.add(job_instance) db.session.commit() agent.start_spider(job_instance) if job_instance.run_type == JobRunType.PERIODIC: job_instance.cron_minutes = request.form.get('cron_minutes') or '0' job_instance.cron_hour = request.form.get('cron_hour') or '*' job_instance.cron_day_of_month = request.form.get( 'cron_day_of_month') or '*' job_instance.cron_day_of_week = request.form.get( 'cron_day_of_week') or '*' job_instance.cron_month = request.form.get('cron_month') or '*' # set cron exp manually if request.form.get('cron_exp'): job_instance.cron_minutes, job_instance.cron_hour, job_instance.cron_day_of_month, job_instance.cron_day_of_week, job_instance.cron_month = \ request.form['cron_exp'].split(' ') db.session.add(job_instance) db.session.commit() return redirect(request.referrer, code=302)
def post(self, project_id): post_data = request.form if post_data: job_instance = JobInstance() job_instance.spider_name = post_data['spider_name'] job_instance.project_id = project_id job_instance.spider_arguments = post_data.get('spider_arguments') job_instance.desc = post_data.get('desc') job_instance.tags = post_data.get('tags') job_instance.run_type = post_data['run_type'] job_instance.priority = post_data.get('priority', 0) if job_instance.run_type == "periodic": job_instance.cron_minutes = post_data.get( 'cron_minutes') or '0' job_instance.cron_hour = post_data.get('cron_hour') or '*' job_instance.cron_day_of_month = post_data.get( 'cron_day_of_month') or '*' job_instance.cron_day_of_week = post_data.get( 'cron_day_of_week') or '*' job_instance.cron_month = post_data.get('cron_month') or '*' try: db.session.add(job_instance) db.session.commit() except: db.session.rollback() raise return True
def job_add(project_id): # Save the upload file, and save the file path to the # job_instance.spider_arguments dst = '' if 'file' in request.files: file = request.files['file'] # if user does not select file, browser also # submit a empty part without filename if file.filename == '': pass if file and allowed_seed(file.filename): filename = secure_filename(file.filename) dst = os.path.join( app.config['UPLOAD_DIR'], datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S-") + filename) file.save(dst) project = Project.find_project_by_id(project_id) job_instance = JobInstance() job_instance.spider_name = request.form['spider_name'] job_instance.project_id = project_id job_instance.spider_arguments = request.form['spider_arguments'] if dst: if job_instance.spider_arguments: job_instance.spider_arguments += (",seed={}".format(dst)) else: job_instance.spider_arguments = "seed={}".format(dst) job_instance.priority = request.form.get('priority', 0) job_instance.run_type = request.form['run_type'] # chose daemon manually if request.form['daemon'] != 'auto': spider_args = [] if request.form['spider_arguments']: spider_args = request.form['spider_arguments'].split(",") spider_args.append("daemon={}".format(request.form['daemon'])) job_instance.spider_arguments = ','.join(spider_args) if job_instance.run_type == JobRunType.ONETIME: job_instance.enabled = -1 db.session.add(job_instance) db.session.commit() agent.start_spider(job_instance) if job_instance.run_type == JobRunType.PERIODIC: job_instance.cron_minutes = request.form.get('cron_minutes') or '0' job_instance.cron_hour = request.form.get('cron_hour') or '*' job_instance.cron_day_of_month = request.form.get( 'cron_day_of_month') or '*' job_instance.cron_day_of_week = request.form.get( 'cron_day_of_week') or '*' job_instance.cron_month = request.form.get('cron_month') or '*' # set cron exp manually if request.form.get('cron_exp'): job_instance.cron_minutes, job_instance.cron_hour, job_instance.cron_day_of_month, job_instance.cron_day_of_week, job_instance.cron_month = \ request.form['cron_exp'].split(' ') db.session.add(job_instance) db.session.commit() return redirect(request.referrer, code=302)
def add_scheduler(): """ 功能: 给爬虫添加周期调度实例, 添加成功后数据库同步 :param: project_id: 工程id :param: spider_name: 爬虫名称 :param: spider_arguments: 爬虫需要传入的参数 :param: priority: 任务的优先级 :param: daemon: 任务线程的类型, 是否为守护线程 :param: cron_minutes: 调度周期参数-分钟 :param: cron_hour: 调度周期参数-小时 :param: cron_day_of_month: 调度周期参数-每月的天 :param: cron_day_of_week: 调度周期参数-每周的星期 :return: json.dumps({"code": 200, "status": "success/e"}), e指具体抛出的异常 """ try: project_id = request.form.get('project_id') job_instance = JobInstance() job_instance.spider_name = request.form['spider_name'] job_instance.project_id = project_id job_instance.spider_arguments = request.form['spider_arguments'] job_instance.priority = request.form.get('priority', 0) job_instance.run_type = 'periodic' # chose daemon manually if request.form['daemon'] != 'auto': spider_args = [] if request.form['spider_arguments']: spider_args = request.form['spider_arguments'].split(",") spider_args.append("daemon={}".format(request.form['daemon'])) job_instance.spider_arguments = ','.join(spider_args) job_instance.cron_minutes = request.form.get('cron_minutes') or '0' job_instance.cron_hour = request.form.get('cron_hour') or '*' job_instance.cron_day_of_month = request.form.get( 'cron_day_of_month') or '*' job_instance.cron_day_of_week = request.form.get( 'cron_day_of_week') or '*' job_instance.cron_month = request.form.get('cron_month') or '*' if request.form.get('cron_exp'): job_instance.cron_minutes, job_instance.cron_hour, job_instance.cron_day_of_month, job_instance.cron_day_of_week, job_instance.cron_month = \ request.form['cron_exp'].split(' ') db.session.add(job_instance) db.session.commit() return json.dumps({"code": 200, "status": "success"}) except Exception as e: return json.dumps({"code": 500, "status": "error", "msg": "运行错误"})
def job_add(project_id): project = Project.find_project_by_id(project_id) job_instance = JobInstance() job_instance.spider_name = request.form['spider_name'] job_instance.project_id = project_id job_instance.spider_arguments = request.form['spider_arguments'] job_instance.priority = request.form.get('priority', 0) job_instance.run_type = request.form['run_type'] if job_instance.run_type == JobRunType.ONETIME: job_instance.enabled = -1 db.session.add(job_instance) db.session.commit() agent.start_spider(job_instance) if job_instance.run_type == JobRunType.PERIODIC: job_instance.cron_minutes = request.form.get('cron_minutes') or '0' job_instance.cron_hour = request.form.get('cron_hour') or '*' job_instance.cron_day_of_month = request.form.get( 'cron_day_of_month') or '*' job_instance.cron_day_of_week = request.form.get( 'cron_day_of_week') or '*' job_instance.cron_month = request.form.get('cron_month') or '*' db.session.add(job_instance) db.session.commit() return redirect(request.referrer, code=302)
def job_add(project_id): project = Project.find_project_by_id(project_id) job_instance = JobInstance() job_instance.spider_name = request.form['spider_name'] job_instance.project_id = project_id job_instance.spider_arguments = request.form['spider_arguments'] job_instance.priority = request.form.get('priority', 0) job_instance.run_type = request.form['run_type'] spider_url = request.form['spider_url'] spider_models = request.form['spider_models'] if job_instance.spider_name == "news": allowed_domains = spider_url.split('/')[2] a = "allowed_domains=" + allowed_domains a = a + "," + "model=" + spider_models job_instance.spider_arguments = a r = CRedis() r.lpush(allowed_domains + ':start_urls', spider_url) elif job_instance.spider_name == 'jd': r = CRedis() r.lpush('jd:start_urls', spider_url) if job_instance.run_type == JobRunType.ONETIME: job_instance.enabled = -1 db.session.add(job_instance) db.session.commit() agent.start_spider(job_instance) if job_instance.run_type == JobRunType.PERIODIC: job_instance.cron_minutes = request.form.get('cron_minutes') or '0' job_instance.cron_hour = request.form.get('cron_hour') or '*' job_instance.cron_day_of_month = request.form.get( 'cron_day_of_month') or '*' job_instance.cron_day_of_week = request.form.get( 'cron_day_of_week') or '*' job_instance.cron_month = request.form.get('cron_month') or '*' db.session.add(job_instance) db.session.commit() return redirect(request.referrer, code=302)