def test_run(app, client): set_single_scrapyd(app) # ScrapydWeb-demo.egg: custom_settings = {}, also log settings & arguments upload_file_deploy(app, client, filename='ScrapydWeb-demo.egg', project=PROJECT, redirect_project=PROJECT) with app.test_request_context(): url = url_for('schedule.run', node=1) data = {'filename': '%s_%s_%s.pickle' % (PROJECT, VERSION, SPIDER)} response = client.post(url, data=data) assert url_for('dashboard', node=1) in get_text(response) sleep() url = url_for('log', node=1, opt='utf8', project=PROJECT, spider=SPIDER, job=JOBID) response = client.get(url) text = get_text(response) assert 'JOB: %s' % JOBID in text assert 'USER_AGENT: Mozilla/5.0' in text assert 'COOKIES_ENABLED: False' in text assert 'ROBOTSTXT_OBEY: False' in text assert 'CONCURRENT_REQUESTS: 1' in text assert 'DOWNLOAD_DELAY: 2' in text assert 'CLOSESPIDER_TIMEOUT: 60' in text assert 'CLOSESPIDER_PAGECOUNT: 10' in text assert 'self.arg1: val1' in text client.get(url_for('api', node=1, opt='forcestop', project=PROJECT, version_spider_job=JOBID))
def test_switch_template(app, client): task_id = metadata['task_id'] task_result_id = metadata['task_result_id'] req(app, client, view='tasks.xhr', kws=dict(node=NODE, action='delete', task_id=task_id, task_result_id=task_result_id)) req(app, client, view='tasks', kws=dict(node=NODE, task_id=task_id), ins=["status_code: 200,", "status: 'ok',", ":total='1'"], nos=["status_code: -1,", "status: 'error',", 'label="Fail count"', 'label="Server"']) switch_scrapyd(app) req(app, client, view='tasks.xhr', kws=dict(node=NODE, action='fire', task_id=task_id)) sleep(2) req(app, client, view='tasks', kws=dict(node=NODE), ins=["id: %s," % task_id, "prev_run_result: 'FAIL 0, PASS 0',", "run_times: 2,"]) req(app, client, view='tasks', kws=dict(node=NODE, task_id=task_id), ins=['label="Fail count"', "pass_count: 0,", "fail_count: 0,", "pass_count: 1,", ":total='2'"], nos=['label="Server"', "status_code:", "status:"]) sleep(28) req(app, client, view='tasks', kws=dict(node=NODE), ins=["id: %s," % task_id, "prev_run_result: 'FAIL 1, PASS 0',", "run_times: 'FAIL 1 / 2',"]) req(app, client, view='tasks', kws=dict(node=NODE, task_id=task_id), ins=["status_code: 200,", "status: 'ok',", "status_code: -1,", "status: 'error',", ":total='2'"], nos=['label="Fail count"', 'label="Server"']) req(app, client, view='tasks.xhr', kws=dict(node=NODE, action='delete', task_id=task_id))
def test_email(app, client): # with app.test_request_context(): if not app.config.get('ENABLE_EMAIL', False): return def start_a_job(): kws = dict(node=1, opt='start', project=cst.PROJECT, version_spider_job=cst.SPIDER) __, js = req(app, client, view='api', kws=kws) sleep() return js['jobid'] def forcestop_a_job(job): req(app, client, view='api', kws=dict(node=1, opt='forcestop', project=cst.PROJECT, version_spider_job=job)) def post_for_poll(job, job_finished=''): kws = dict(node=1, opt='stats', project=cst.PROJECT, spider=cst.SPIDER, job=job, job_finished=job_finished) req(app, client, view='log', kws=kws, data={}, ins='Stats collection') # Simulate poll post 'Finished' app.config['ON_JOB_FINISHED'] = True jobid = start_a_job() post_for_poll(jobid, job_finished='True') forcestop_a_job(jobid) # Simulate poll post 'ForceStopped' app.config['ON_JOB_FINISHED'] = False app.config['LOG_CRITICAL_THRESHOLD'] = 1 app.config['LOG_CRITICAL_TRIGGER_FORCESTOP'] = True jobid = start_a_job() post_for_poll(jobid) forcestop_a_job(jobid) # Simulate poll post 'Stopped' app.config['LOG_CRITICAL_THRESHOLD'] = 0 app.config['LOG_REDIRECT_THRESHOLD'] = 1 app.config['LOG_REDIRECT_TRIGGER_STOP'] = True jobid = start_a_job() post_for_poll(jobid) forcestop_a_job(jobid) # Simulate poll post 'Triggered' app.config['LOG_REDIRECT_THRESHOLD'] = 0 app.config['LOG_IGNORE_THRESHOLD'] = 1 jobid = start_a_job() post_for_poll(jobid) forcestop_a_job(jobid) # Simulate poll post 'Running' app.config['LOG_IGNORE_THRESHOLD'] = 0 app.config['ON_JOB_RUNNING_INTERVAL'] = 5 jobid = start_a_job() post_for_poll(jobid) # Would NOT trigger email sleep() post_for_poll(jobid) # Would trigger email app.config['ON_JOB_RUNNING_INTERVAL'] = 0 sleep() post_for_poll(jobid) # Would NOT trigger email forcestop_a_job(jobid)
def test_run(app, client): with app.test_request_context(): req_single_scrapyd(app, client, view='schedule.run', kws=dict(node=1), data=dict(filename=FILENAME), location=url_for('jobs', node=1)) sleep() ins = [ 'JOB: %s' % cst.JOBID, 'USER_AGENT: Mozilla/5.0 (Windows', 'ROBOTSTXT_OBEY: False', 'COOKIES_ENABLED: False', 'CONCURRENT_REQUESTS: 1', 'DOWNLOAD_DELAY: 2', 'CLOSESPIDER_TIMEOUT: 60', 'CLOSESPIDER_PAGECOUNT: 10', 'self.arg1: %s' % metadata['value'] ] req_single_scrapyd(app, client, view='log', kws=dict(node=1, opt='utf8', project=cst.PROJECT, spider=cst.SPIDER, job=cst.JOBID), ins=ins) req_single_scrapyd(app, client, view='api', kws=dict(node=1, opt='forcestop', project=cst.PROJECT, version_spider_job=cst.JOBID))
def test_enable_logparser(app, client): def json_loads_from_file(path): with io.open(path, 'r', encoding='utf-8') as f: return json.loads(f.read()) # In conftest.py: ENABLE_LOGPARSER=False assert not os.path.exists(app.config['STATS_JSON_PATH']) assert not os.path.exists(app.config['DEMO_JSON_PATH']) app.config['ENABLE_LOGPARSER'] = True app.config['ENABLE_EMAIL'] = False # ['username:[email protected]:6800', ] app.config['SCRAPYD_SERVERS'] = app.config['_SCRAPYD_SERVERS'] check_app_config(app.config) logparser_pid = app.config['LOGPARSER_PID'] assert isinstance(logparser_pid, int) and logparser_pid > 0 assert app.config['POLL_PID'] is None req(app, client, view='settings', kws=dict(node=1), ins='logparser_pid: %s' % logparser_pid) sleep() stats_json = json_loads_from_file(app.config['STATS_JSON_PATH']) assert stats_json['logparser_version'] == cst.LOGPARSER_VERSION assert cst.DEMO_JOBID in stats_json['datas'][cst.PROJECT][cst.SPIDER] demo_json = json_loads_from_file(app.config['DEMO_JSON_PATH']) assert demo_json['runtime'] == '0:01:08' assert demo_json['finish_reason'] == 'finished' assert demo_json['logparser_version'] == cst.LOGPARSER_VERSION
def test_log_utf8_stats(app, client): upload_file_deploy(app, client, filename='demo.egg', project=cst.PROJECT, redirect_project=cst.PROJECT) with app.test_request_context(): kws = dict(node=1, opt='start', project=cst.PROJECT, version_spider_job=cst.SPIDER) __, js = req(app, client, view='api', kws=kws) jobid = js['jobid'] sleep() # the Stats page req(app, client, view='log', kws=dict(node=1, opt='stats', project=cst.PROJECT, spider=cst.SPIDER, job=jobid), ins='Stats collection') # the Log page req(app, client, view='log', kws=dict(node=1, opt='utf8', project=cst.PROJECT, spider=cst.SPIDER, job=jobid), ins='log - ScrapydWeb') # For testing request_scrapy_log() of LogView in log.py app.config['SCRAPYD_LOGS_DIR'] = 'dir-not-exist' req(app, client, view='log', kws=dict(node=1, opt='utf8', project=cst.PROJECT, spider=cst.SPIDER, job=jobid), ins='log - ScrapydWeb') # the Dashboard page url_stop = url_for('api', node=1, opt='stop', project=cst.PROJECT, version_spider_job=jobid) req(app, client, view='dashboard', kws=dict(node=1), ins=url_stop) client.get(url_for('api', node=1, opt='forcestop', project=cst.PROJECT, version_spider_job=jobid)) # /1/schedule/ScrapydWeb_demo/default:%20the%20latest%20version/test/ url_start = url_for('schedule.schedule', node=1, project=cst.PROJECT, version=cst.DEFAULT_LATEST_VERSION, spider=cst.SPIDER) req(app, client, view='dashboard', kws=dict(node=1), ins=url_start)
def start_a_job(): kws = dict(node=1, opt='start', project=cst.PROJECT, version_spider_job=cst.SPIDER) __, js = req(app, client, view='api', kws=kws) sleep() return js['jobid']
def test_api_stop(app, client): sleep() req(app, client, view='api', kws=dict(node=1, opt='stop', project=PROJECT, version_spider_job=jobid), jskws=dict(status=OK, prevstate='running'), nos='times')
def test_api_forcestop(app, client): sleep(5) req(app, client, view='api', kws=dict(node=1, opt='forcestop', project=PROJECT, version_spider_job=jobid), jskws=dict(status=OK, prevstate=None, times=2))
def test_delete_task_or_task_result_on_the_fly(app, client): for kind in ['delete_task', 'delete_task_result']: check_data_ = dict(check_data) req(app, client, view='schedule.check', kws=dict(node=NODE), data=check_data_, jskws=dict(cmd="-d _version=%s" % cst.VERSION, filename=FILENAME)) with app.test_request_context(): text, __ = req(app, client, view='schedule.run', kws=dict(node=NODE), data=run_data, location=url_for('tasks', node=NODE)) m = re.search(cst.TASK_NEXT_RUN_TIME_PATTERN, unquote_plus(text)) task_id = int(m.group(1)) print("task_id: %s" % task_id) __, js = req(app, client, view='tasks.xhr', kws=dict(node=NODE, action='dump', task_id=task_id)) assert js['data']['selected_nodes'] == [1, 2] sleep(2) # the first execution has not finished yet __, js = req(app, client, view='tasks.xhr', kws=dict(node=NODE, action='list', task_id=task_id)) assert len(js['ids']) == 1 task_result_id = js['ids'][0] __, js = req(app, client, view='tasks.xhr', kws=dict(node=NODE, action='list', task_id=task_id, task_result_id=task_result_id)) assert len(js['ids']) == 1 if kind == 'delete_task': req(app, client, view='tasks.xhr', kws=dict(node=NODE, action='delete', task_id=task_id)) else: req(app, client, view='tasks.xhr', kws=dict(node=NODE, action='delete', task_id=task_id, task_result_id=task_result_id)) __, js = req(app, client, view='tasks.xhr', kws=dict(node=NODE, action='list')) if kind == 'delete_task': assert task_id not in js['ids'] else: assert task_id in js['ids'] __, js = req(app, client, view='tasks.xhr', kws=dict(node=NODE, action='list', task_id=task_id)) assert len(js['ids']) == 0 __, js = req(app, client, view='tasks.xhr', kws=dict(node=NODE, action='list', task_id=task_id, task_result_id=task_result_id)) assert len(js['ids']) == 0 sleep(28) req(app, client, view='tasks.xhr', kws=dict(node=NODE, action='delete', task_id=task_id, task_result_id=task_result_id)) __, js = req(app, client, view='tasks.xhr', kws=dict(node=NODE, action='list', task_id=task_id)) assert len(js['ids']) == 0 __, js = req(app, client, view='tasks.xhr', kws=dict(node=NODE, action='list', task_id=task_id, task_result_id=task_result_id)) assert len(js['ids']) == 0 req(app, client, view='tasks.xhr', kws=dict(node=1, action='delete', task_id=task_id))
def test_api_forcestop(app, client): sleep(5) with app.test_request_context(): url = url_for('api', node=1, opt='forcestop', project=PROJECT, version_spider_job=jobid, ui='mobile') response = client.get(url) js = load_json(response) assert js['status'] == OK and js['prevstate'] is None and js[ 'times'] == 2
def check_pass(recipients=None, subject='Email from #scrapydweb', text=None): assert js['status'] == cst.OK assert js['result']['reason'] == 'Sent' assert js['result']['sender'] == app.config['EMAIL_SENDER'] if recipients is not None: assert js['result']['recipients'] == recipients if subject is not None: assert js['result']['subject'] == subject if text is not None: assert js['result']['text'] == text assert 'debug' not in js assert js['when'] sleep(10)
def test_api_stop(app, client): sleep() with app.test_request_context(): url = url_for('api', node=1, opt='stop', project=PROJECT, version_spider_job=jobid, ui='simple') response = client.get(url) js = load_json(response) assert js['status'] == OK and js[ 'prevstate'] == 'running' and 'times' not in js
def test_pending_jobs(app, client): node = 1 for i in range(2): req(app, client, view='schedule.run', kws=dict(node=NODE), data=run_data, ins="run results - ScrapydWeb") req(app, client, view='jobs', kws=dict(node=node, style='database'), ins="Vue.extend(Main)", nos='class="table wrap"') sleep() for i in range(2): req(app, client, view='schedule.run', kws=dict(node=NODE), data=run_data, ins="run results - ScrapydWeb") req(app, client, view='jobs', kws=dict(node=node, style='database'), ins="Vue.extend(Main)", nos='class="table wrap"') sleep() req(app, client, view='jobs', kws=dict(node=node, style='database'), ins=[ "Ignore seen running job: %s, started at" % KEY, "Vue.extend(Main)" ], nos='class="table wrap"') for i in range(4): req(app, client, view='api', kws=dict(node=node, opt='forcestop', project=cst.PROJECT, version_spider_job=cst.JOBID))
def test_run(app, client): # ScrapydWeb_demo.egg: custom_settings = {}, also log settings & arguments upload_file_deploy(app, client, filename='ScrapydWeb_demo.egg', project=cst.PROJECT, redirect_project=cst.PROJECT) with app.test_request_context(): req_single_scrapyd(app, client, view='schedule.run', kws=dict(node=1), data=dict(filename='%s_%s_%s.pickle' % (cst.PROJECT, cst.VERSION, cst.SPIDER)), location=url_for('dashboard', node=1)) sleep() ins = [ 'JOB: %s' % cst.JOBID, 'USER_AGENT: Mozilla/5.0', 'COOKIES_ENABLED: False', 'ROBOTSTXT_OBEY: False', 'CONCURRENT_REQUESTS: 1', 'DOWNLOAD_DELAY: 2', 'CLOSESPIDER_TIMEOUT: 60', 'CLOSESPIDER_PAGECOUNT: 10', 'self.arg1: val1' ] req_single_scrapyd(app, client, view='log', kws=dict(node=1, opt='utf8', project=cst.PROJECT, spider=cst.SPIDER, job=cst.JOBID), ins=ins) req_single_scrapyd(app, client, view='api', kws=dict(node=1, opt='forcestop', project=cst.PROJECT, version_spider_job=cst.JOBID))
def test_edit_task(app, client): task_id = metadata['task_id'] # http://127.0.0.1:5000/1/schedule/?task_id=1 req(app, client, view='schedule', kws=dict(node=NODE, task_id=task_id), ins=["checked />%s" % app.config['SCRAPYD_SERVERS'][0], "checked />%s" % app.config['SCRAPYD_SERVERS'][-1]]) check_data_ = dict(check_data) check_data_.update(task_id=task_id, hour='6') req(app, client, view='schedule.check', kws=dict(node=NODE), data=check_data_, jskws=dict(cmd="-d _version=%s" % cst.VERSION, filename=FILENAME)) with app.test_request_context(): metadata['location'] = url_for('tasks', node=NODE) text, __ = req(app, client, view='schedule.run', kws=dict(node=NODE), data=run_data_single_scrapyd, location=metadata['location']) m = re.search(cst.TASK_NEXT_RUN_TIME_PATTERN, unquote_plus(text)) assert int(m.group(1)) == task_id __, js = req(app, client, view='tasks.xhr', kws=dict(node=NODE, action='dump', task_id=task_id)) assert js['data']['selected_nodes'] == [1] sleep() req(app, client, view='tasks', kws=dict(node=NODE), ins=["fail_times: 1,", "run_times: 'FAIL 1 / 2',"]) text, __ = req(app, client, view='tasks', kws=dict(node=NODE, task_id=task_id), ins=["fail_count: 0,", "fail_count: 1,", "pass_count: 1,", ":total='2'"]) with app.test_request_context(): url_delete = url_for('tasks.xhr', node=NODE, action='delete', task_id=task_id) # in the task results page: url_action: '/1/tasks/xhr/delete/5/10/', new_task_result_id = int(re.search(r'%s(\d+)/' % url_delete, text).group(1)) print("new_task_result_id: %s" % new_task_result_id) req(app, client, view='tasks', kws=dict(node=NODE, task_id=task_id, task_result_id=new_task_result_id), ins=["node: 1,", "server: '%s'," % app.config['SCRAPYD_SERVERS'][0], "status_code: 200,", "status: 'ok',", ":total='1'"]) __, js = req(app, client, view='tasks.xhr', kws=dict(node=NODE, action='dump', task_id=task_id)) assert '06:00:00' in js['data']['apscheduler_job']['next_run_time'] req(app, client, view='schedule', kws=dict(node=NODE, task_id=task_id), ins="checked />%s" % app.config['SCRAPYD_SERVERS'][0], nos="checked />%s" % app.config['SCRAPYD_SERVERS'][-1])
def test_check_result(app, client): task_id = metadata['task_id'] sleep(2) # The first execution has not finished yet: self.sleep_seconds_before_retry = 3 req(app, client, view='tasks', kws=dict(node=NODE), ins=["id: %s," % task_id, "prev_run_result: 'FAIL 0, PASS 0',", "fail_times: 0,", "run_times: 1,"]) text, __ = req(app, client, view='tasks', kws=dict(node=NODE, task_id=task_id), ins=["fail_count: 0,", "pass_count: 0,", ":total='1'"]) with app.test_request_context(): url_delete = url_for('tasks.xhr', node=NODE, action='delete', task_id=task_id) # in the task results page: url_action: '/1/tasks/xhr/delete/5/10/', task_result_id = int(re.search(r'%s(\d+)/' % url_delete, text).group(1)) print("task_result_id: %s" % task_result_id) metadata['task_result_id'] = task_result_id with app.test_request_context(): url_delete_task_result = url_for('tasks.xhr', node=NODE, action='delete', task_id=task_id, task_result_id=task_result_id) assert url_delete_task_result in text sleep(8) req(app, client, view='tasks', kws=dict(node=NODE, task_id=task_id, task_result_id=task_result_id), ins=["node: 1,", "server: '%s'," % app.config['SCRAPYD_SERVERS'][0], "status_code: 200,", "status: 'ok',"]) # , ":total='1'" sleep(20) # The first execution has finished req(app, client, view='tasks', kws=dict(node=NODE), ins=["id: %s," % task_id, "prev_run_result: 'FAIL 1, PASS 1',", "fail_times: 1,", "run_times: 'FAIL 1 / 1',"]) req(app, client, view='tasks', kws=dict(node=NODE, task_id=task_id), ins=["fail_count: 1,", "pass_count: 1,", ":total='1'"]) req(app, client, view='tasks', kws=dict(node=NODE, task_id=task_id, task_result_id=task_result_id), ins=["node: 1,", "server: '%s'," % app.config['SCRAPYD_SERVERS'][0], "status_code: 200,", "status: 'ok',", "node: 2,", "server: '%s'," % app.config['SCRAPYD_SERVERS'][-1], "status_code: -1,", "status: 'error',", ":total='2'"]) __, js = req(app, client, view='tasks.xhr', kws=dict(node=NODE, action='dump', task_id=task_id)) assert '03:00:00' in js['data']['apscheduler_job']['next_run_time']
def test_execute_task_exception(app, client): check_data_ = dict(check_data) check_data_.update(action='add') req(app, client, view='schedule.check', kws=dict(node=NODE), data=check_data_, jskws=dict(cmd="-d _version=%s" % cst.VERSION, filename=FILENAME)) with app.test_request_context(): text, __ = req(app, client, view='schedule.run', kws=dict(node=NODE), data=run_data, location=url_for('tasks', node=NODE)) m = re.search(cst.TASK_NEXT_RUN_TIME_PATTERN, unquote_plus(text)) task_id = int(m.group(1)) print("task_id: %s" % task_id) __, js = req(app, client, view='tasks.xhr', kws=dict(node=NODE, action='dump', task_id=task_id)) assert js['data']['selected_nodes'] == [1, 2] # req_single_scrapyd would set single_scrapyd=True req_single_scrapyd(app, client, view='tasks.xhr', kws=dict(node=1, action='fire', task_id=task_id)) sleep() req(app, client, view='tasks', kws=dict(node=1), ins=["id: %s," % task_id, "prev_run_result: 'FAIL 1, PASS 1',", "fail_times: 1,", "run_times: 'FAIL 1 / 1',"]) text, __ = req(app, client, view='tasks', kws=dict(node=1, task_id=task_id), ins=["fail_count: 1,", "pass_count: 1,", ":total='1'"]) with app.test_request_context(): url_delete = url_for('tasks.xhr', node=1, action='delete', task_id=task_id) # in the task results page: url_action: '/1/tasks/xhr/delete/5/10/', task_result_id = int(re.search(r'%s(\d+)/' % url_delete, text).group(1)) print("task_result_id: %s" % task_result_id) # In baseview.py: assert 0 < self.node <= self.SCRAPYD_SERVERS_AMOUNT # Note that AssertionError would be raise directly in test, whereas internal_server_error() would return 500.html # instead when the app is actually running, getting '500 error node index error: 2, which should be between 1 and 1' req(app, client, view='tasks', kws=dict(node=1, task_id=task_id, task_result_id=task_result_id), ins=["node: 1,", "server: '%s'," % app.config['SCRAPYD_SERVERS'][0], "status_code: 200,", "status: 'ok',", "node: 2,", "status_code: -1,", "status: 'exception',", "node index error", ":total='2'"]) req(app, client, view='tasks.xhr', kws=dict(node=1, action='delete', task_id=task_id))
def test_log_utf8_stats(app, client): upload_file_deploy(app, client, filename='demo.egg', project=PROJECT, redirect_project=PROJECT) with app.test_request_context(): url = url_for('api', node=1, opt='start', project=PROJECT, version_spider_job=SPIDER) response = client.get(url) js = load_json(response) jobid = js['jobid'] sleep() # UTF8 page url = url_for('log', node=1, opt='utf8', project=PROJECT, spider=SPIDER, job=jobid) response = client.get(url) assert 'utf8 - ScrapydWeb' in get_text(response) and not is_simple_ui(response) client.get(url_for('api', node=1, opt='forcestop', project=PROJECT, version_spider_job=jobid)) # Stats page url = url_for('log', node=1, opt='stats', project=PROJECT, spider=SPIDER, job=jobid) response = client.get(url) assert 'Stats collection' in get_text(response) and not is_simple_ui(response)
def test_task_start_execute_end(app, client): while True: now_datetime = datetime.now() if now_datetime.second % 10 != 1: sleep(1) else: break start_datetime = now_datetime + timedelta(seconds=8) first_execute_datetime = now_datetime + timedelta(seconds=9) second_execute_datetime = now_datetime + timedelta(seconds=14) end_datetime = now_datetime + timedelta(seconds=18) check_data_ = dict(check_data) check_data_.update(action='add', hour='*', minute='*', second='*/5', start_date=start_datetime.strftime("%Y-%m-%d %H:%M:%S"), end_date=end_datetime.strftime("%Y-%m-%d %H:%M:%S")) req(app, client, view='schedule.check', kws=dict(node=NODE), data=check_data_, jskws=dict(cmd="-d _version=%s" % cst.VERSION, filename=FILENAME)) text, __ = req(app, client, view='schedule.run', kws=dict(node=NODE), data=run_data_single_scrapyd, location=metadata['location']) m = re.search(cst.TASK_NEXT_RUN_TIME_PATTERN, unquote_plus(text)) task_id = int(m.group(1)) print("task_id: %s" % task_id) with app.test_request_context(): url_pause = url_for('tasks.xhr', node=NODE, action='pause', task_id=task_id) url_resume = url_for('tasks.xhr', node=NODE, action='resume', task_id=task_id) url_delete = url_for('tasks.xhr', node=NODE, action='delete', task_id=task_id) url_task_results = url_for('tasks', node=NODE, task_id=task_id) req(app, client, view='tasks', kws=dict(node=NODE), ins=[url_pause, url_task_results, "id: %s," % task_id, "prev_run_result: '%s'," % cst.NA, "run_times: 0,"], nos=[url_resume, url_delete]) __, js = req(app, client, view='tasks.xhr', kws=dict(node=NODE, action='dump', task_id=task_id)) assert first_execute_datetime.strftime("%Y-%m-%d %H:%M:%S") in js['data']['apscheduler_job']['next_run_time'] sleep(10) # The first execution may or may not has finished req(app, client, view='tasks', kws=dict(node=NODE), ins=["id: %s," % task_id, "run_times: 1,"]) req(app, client, view='tasks', kws=dict(node=NODE), ins=[url_pause, url_task_results, "id: %s," % task_id, "run_times: 1,"], nos=[url_resume, url_delete]) req(app, client, view='tasks', kws=dict(node=NODE, task_id=task_id), ins=":total='1'") __, js = req(app, client, view='tasks.xhr', kws=dict(node=NODE, action='dump', task_id=task_id)) assert second_execute_datetime.strftime("%Y-%m-%d %H:%M:%S") in js['data']['apscheduler_job']['next_run_time'] sleep(10) req(app, client, view='tasks', kws=dict(node=NODE), ins=["id: %s," % task_id, "run_times: 2,"]) req(app, client, view='tasks', kws=dict(node=NODE), ins=[url_delete, url_task_results, "id: %s," % task_id, "next_run_time: '%s'," % cst.NA, "run_times: 2,"], nos=[url_pause, url_resume]) req(app, client, view='tasks', kws=dict(node=NODE, task_id=task_id), ins=["status_code: 200,", "status: 'ok',", ":total='2'"], nos=["status_code: -1,", "status: 'error',"]) __, js = req(app, client, view='tasks.xhr', kws=dict(node=NODE, action='dump', task_id=task_id)) assert js['data']['apscheduler_job'] is None req(app, client, view='tasks.xhr', kws=dict(node=NODE, action='delete', task_id=task_id))
def test_auto_remove_apscheduler_job_if_task_not_exist(app, client): check_data_ = dict(check_data) check_data_.update(action='add') for kind in ['visit timer tasks', 'execute_task()']: req(app, client, view='schedule.check', kws=dict(node=NODE), data=dict(check_data_), jskws=dict(cmd="-d _version=%s" % cst.VERSION, filename=FILENAME)) text, __ = req(app, client, view='schedule.run', kws=dict(node=NODE), data=run_data_single_scrapyd, location=metadata['location']) m = re.search(cst.TASK_NEXT_RUN_TIME_PATTERN, unquote_plus(text)) task_id = int(m.group(1)) print("task_id: %s" % task_id) __, js = req(app, client, view='tasks.xhr', kws=dict(node=NODE, action='dump', task_id=task_id)) assert '03:00:00' in js['data']['apscheduler_job']['next_run_time'] req(app, client, view='tasks.xhr', kws=dict(node=NODE, action='delete', task_id=task_id, ignore_apscheduler_job='True')) __, js = req(app, client, view='tasks.xhr', kws=dict(node=NODE, action='dump', task_id=task_id), jskws=dict(message="apscheduler_job #{id} found. Task #{id} not found".format(id=task_id))) assert js['data']['apscheduler_job'] == task_id # apscheduler_job #1 removed since task #1 not exist if kind == 'execute_task()': req(app, client, view='tasks.xhr', kws=dict(node=NODE, action='fire', task_id=task_id)) sleep() else: req(app, client, view='tasks', kws=dict(node=NODE), ins="apscheduler_job #{id} removed since task #{id} not exist".format(id=task_id), nos="id: %s," % task_id) __, js = req(app, client, view='tasks.xhr', kws=dict(node=NODE, action='dump', task_id=task_id), jskws=dict( status=cst.ERROR, message="apscheduler_job #{id} not found. Task #{id} not found".format(id=task_id))) assert js['data'] is None
def test_run(app, client): node = 1 req(app, client, view='schedule.run', kws=dict(node=NODE), data=run_data, ins=[ 'run results - ScrapydWeb', 'id="checkbox_1"', 'id="checkbox_2"', 'onclick="passToServers();"' ]) # test handle_unique_constraint() in jobs.py sleep() req(app, client, view='schedule.run', kws=dict(node=NODE), data=run_data_single_scrapyd, ins=[ 'run results - ScrapydWeb', 'id="checkbox_1"', 'onclick="passToServers();"' ], nos='id="checkbox_2"') keep_text = '' for times in [1, 2]: __, js = req(app, client, view='api', kws=dict(node=node, opt='listjobs', project=cst.PROJECT)) if js['pending']: final_pending_job = js['pending'][-1] assert final_pending_job['id'] == cst.JOBID first_job = js['running'][-1] first_job_start = first_job['start_time'][:19] # Ignore seen pending job: ScrapydWeb_demo/test/2018-01-01T01_01_02 flash = "Ignore seen pending job: %s" % KEY ins = ["Vue.extend(Main)", "start: '%s'," % first_job_start] nos = ['class="table wrap"', "Ignore seen running job"] text, __ = req(app, client, view='jobs', kws=dict(node=node, style='database')) try: if times == 1: assert flash in text else: assert flash not in text for i in ins: assert i in text for n in nos: assert n not in text except AssertionError: # the response containS 'Ignore seen running' instead keep_text = text break else: break sleep() __, js = req(app, client, view='api', kws=dict(node=node, opt='listjobs', project=cst.PROJECT)) first_job = js['running'][-2] first_job_start = first_job['start_time'][:19] second_job = js['running'][-1] second_job_start = second_job['start_time'][:19] assert first_job['id'] == second_job['id'] == cst.JOBID # TODO: For compatibility with Debian?! Running job with same key is not ordered by start ASC?! # assert second_job_start > first_job_start # Ignore seen running job: ScrapydWeb_demo/test/2018-01-01T01_01_02, started at 2019-03-01 20:27:22 flash = "Ignore seen running job: %s, started at %s" % (KEY, first_job_start) if keep_text: text = keep_text else: text, __ = req(app, client, view='jobs', kws=dict(node=node, style='database')) for i in [flash, "Vue.extend(Main)", "start: '%s'," % second_job_start]: assert i in text for n in ['class="table wrap"', "start: '%s'," % first_job_start]: assert n not in text # flash only once req(app, client, view='jobs', kws=dict(node=node, style='database'), ins=["Vue.extend(Main)", "start: '%s'," % second_job_start], nos=[flash, 'class="table wrap"', "start: '%s'," % first_job_start]) for i in range(2): req(app, client, view='api', kws=dict(node=node, opt='forcestop', project=cst.PROJECT, version_spider_job=cst.JOBID)) sleep() __, js = req(app, client, view='api', kws=dict(node=node, opt='listjobs', project=cst.PROJECT)) last_but_two_finished_job = js['finished'][-2] last_but_two_finished_job_start = last_but_two_finished_job[ 'start_time'][:19] last_finished_job = js['finished'][-1] last_finished_job_start = last_finished_job['start_time'][:19] assert last_but_two_finished_job['id'] == last_finished_job[ 'id'] == cst.JOBID # Ignore seen finished job: ScrapydWeb_demo/test/2018-01-01T01_01_02, started at 2019-03-01 20:27:22 flash = "Ignore seen finished job: %s, started at %s" % ( KEY, last_but_two_finished_job_start) req(app, client, view='jobs', kws=dict(node=node, style='database'), ins=[ flash, "Vue.extend(Main)", "start: '%s'," % last_finished_job_start ], nos=[ 'class="table wrap"', "start: '%s'," % last_but_two_finished_job_start ]) # flash only once req(app, client, view='jobs', kws=dict(node=node, style='database'), ins=["Vue.extend(Main)", "start: '%s'," % last_finished_job_start], nos=[ flash, 'class="table wrap"', "start: '%s'," % last_but_two_finished_job_start ])
def test_telnet_in_stats(app, client): node = 1 desktop_ins = [ ">Log analysis</li>", ">Log categorization</li>", ">View log</li>", ">Progress visualization</li>" ] mobile_ins = [ ">Analysis</li>", ">Categories</li>", ">Charts</li>", ">Logs</li>" ] telnet_ins = [ ">Crawler.stats</li>", "<td>datetime.datetime(", ">Crawler.engine</li>", "<th>engine.has_capacity()</th>", "<td>telnet</td>" ] telnet_nos = ["CRITICAL: Unhandled Error", "telnet.OptionRefused"] req(app, client, view='schedule.run', kws=dict(node=NODE), data=run_data, ins="run results - ScrapydWeb") kws = dict(node=node, opt='stats', project=cst.PROJECT, spider=cst.SPIDER, job=cst.JOBID) for i in range(1, 10): sleep(10) print(i * 10) text, __ = req(app, client, view='log', kws=kws) if desktop_ins[-1] in text and telnet_ins[-1] in text: print("Found: %s %s" % (desktop_ins[-1], telnet_ins[-1])) break # test jobs POST data={} to save pages and items in database __, js = req(app, client, view='jobs', kws=dict(node=node), data={}) assert isinstance(js[KEY]['pages'], int) # and js[KEY]['pages'] > 0 if scrapy_version > '1.5.1': print("telnet not available for scrapy_version: %s" % scrapy_version) telnet_ins = [] req(app, client, view='log', kws=kws, ins=desktop_ins + telnet_ins, nos=telnet_nos) kws.update(ui='mobile') req(app, client, view='log', kws=kws, ins=mobile_ins + telnet_ins, nos=telnet_nos, mobileui=True) req(app, client, view='api', kws=dict(node=node, opt='forcestop', project=cst.PROJECT, version_spider_job=cst.JOBID))
def test_log_utf8_stats(app, client): upload_file_deploy(app, client, filename='demo.egg', project=PROJECT, redirect_project=PROJECT) with app.test_request_context(): __, js = req(app, client, view='api', kws=dict(node=1, opt='start', project=PROJECT, version_spider_job=SPIDER)) print(js) jobid = js['jobid'] sleep() # Log page req(app, client, view='log', kws=dict(node=1, opt='utf8', project=PROJECT, spider=SPIDER, job=jobid), ins='log - ScrapydWeb') # Stats page req(app, client, view='log', kws=dict(node=1, opt='stats', project=PROJECT, spider=SPIDER, job=jobid), ins='Stats collection') # Dashboard page url_stop = url_for('api', node=1, opt='stop', project=PROJECT, version_spider_job=jobid) req(app, client, view='dashboard', kws=dict(node=1), ins=url_stop) client.get( url_for('api', node=1, opt='forcestop', project=PROJECT, version_spider_job=jobid)) # /1/schedule/ScrapydWeb-demo/default:%20the%20latest%20version/test/ url_start = url_for('schedule.schedule', node=1, project=PROJECT, version=DEFAULT_LATEST_VERSION, spider=SPIDER) req(app, client, view='dashboard', kws=dict(node=1), ins=url_start)
def test_email(app, client): with app.test_request_context(): if not app.config.get('ENABLE_EMAIL', False): return # Simulate caching post 'Finished' url = url_for('api', node=1, opt='start', project=PROJECT, version_spider_job=SPIDER) response = client.get(url) js = load_json(response) jobid = js['jobid'] sleep() url = url_for('log', node=1, opt='stats', project=PROJECT, spider=SPIDER, job=jobid, job_finished='True') response = client.post(url, content_type='multipart/form-data') assert 'Stats collection' in get_text(response) client.get( url_for('api', node=1, opt='forcestop', project=PROJECT, version_spider_job=jobid)) # Simulate caching post 'ForceStopped' app.config['ON_JOB_FINISHED'] = False url = url_for('api', node=1, opt='start', project=PROJECT, version_spider_job=SPIDER) response = client.get(url) js = load_json(response) jobid = js['jobid'] sleep() url = url_for('log', node=1, opt='stats', project=PROJECT, spider=SPIDER, job=jobid, job_finished='') response = client.post(url, content_type='multipart/form-data') assert 'Stats collection' in get_text(response) client.get( url_for('api', node=1, opt='forcestop', project=PROJECT, version_spider_job=jobid)) # Simulate caching post 'Stopped' app.config['LOG_CRITICAL_THRESHOLD'] = 0 app.config['LOG_REDIRECT_THRESHOLD'] = 1 app.config['LOG_REDIRECT_TRIGGER_STOP'] = True url = url_for('api', node=1, opt='start', project=PROJECT, version_spider_job=SPIDER) response = client.get(url) js = load_json(response) jobid = js['jobid'] sleep() url = url_for('log', node=1, opt='stats', project=PROJECT, spider=SPIDER, job=jobid, job_finished='') response = client.post(url, content_type='multipart/form-data') assert 'Stats collection' in get_text(response) client.get( url_for('api', node=1, opt='forcestop', project=PROJECT, version_spider_job=jobid)) # Simulate caching post 'Triggered' app.config['LOG_REDIRECT_THRESHOLD'] = 0 app.config['LOG_IGNORE_THRESHOLD'] = 1 url = url_for('api', node=1, opt='start', project=PROJECT, version_spider_job=SPIDER) response = client.get(url) js = load_json(response) jobid = js['jobid'] sleep() url = url_for('log', node=1, opt='stats', project=PROJECT, spider=SPIDER, job=jobid, job_finished='') response = client.post(url, content_type='multipart/form-data') assert 'Stats collection' in get_text(response) client.get( url_for('api', node=1, opt='forcestop', project=PROJECT, version_spider_job=jobid)) # Simulate caching post 'Running' app.config['LOG_IGNORE_THRESHOLD'] = 0 app.config['ON_JOB_RUNNING_INTERVAL'] = 5 url = url_for('api', node=1, opt='start', project=PROJECT, version_spider_job=SPIDER) response = client.get(url) js = load_json(response) jobid = js['jobid'] # Would NOT trigger email sleep() url = url_for('log', node=1, opt='stats', project=PROJECT, spider=SPIDER, job=jobid, job_finished='') response = client.post(url, content_type='multipart/form-data') assert 'Stats collection' in get_text(response) # Would trigger email sleep() url = url_for('log', node=1, opt='stats', project=PROJECT, spider=SPIDER, job=jobid, job_finished='') response = client.post(url, content_type='multipart/form-data') assert 'Stats collection' in get_text(response) # Would NOT trigger email app.config['ON_JOB_RUNNING_INTERVAL'] = 0 sleep() url = url_for('log', node=1, opt='stats', project=PROJECT, spider=SPIDER, job=jobid, job_finished='') response = client.post(url, content_type='multipart/form-data') assert 'Stats collection' in get_text(response) client.get( url_for('api', node=1, opt='forcestop', project=PROJECT, version_spider_job=jobid))
def test_log_utf8_stats(app, client): upload_file_deploy(app, client, filename='demo.egg', project=PROJECT, redirect_project=PROJECT) with app.test_request_context(): url = url_for('api', node=1, opt='start', project=PROJECT, version_spider_job=SPIDER) response = client.get(url) js = load_json(response) jobid = js['jobid'] sleep() # Log page url = url_for('log', node=1, opt='utf8', project=PROJECT, spider=SPIDER, job=jobid) response = client.get(url) assert 'log - ScrapydWeb' in get_text( response) and not is_mobileui(response) # Stats page url = url_for('log', node=1, opt='stats', project=PROJECT, spider=SPIDER, job=jobid) response = client.get(url) assert 'Stats collection' in get_text( response) and not is_mobileui(response) # Dashboard page url = url_for('dashboard', node=1) response = client.get(url) url_stop = url_for('api', node=1, opt='stop', project=PROJECT, version_spider_job=jobid) assert url_stop in get_text(response) client.get( url_for('api', node=1, opt='forcestop', project=PROJECT, version_spider_job=jobid)) # /1/schedule/ScrapydWeb-demo/default:%20the%20latest%20version/test/ response = client.get(url) url_start = url_for('schedule.schedule', node=1, project=PROJECT, version=DEFAULT_LATEST_VERSION, spider=SPIDER) assert url_start in get_text(response)
def test_log_utf8_stats(app, client): # In ScrapydWeb_demo.egg: CONCURRENT_REQUESTS=1, DOWNLOAD_DELAY=10 upload_file_deploy(app, client, filename='ScrapydWeb_demo.egg', project=cst.PROJECT, redirect_project=cst.PROJECT) with app.test_request_context(): kws = dict(node=1, opt='start', project=cst.PROJECT, version_spider_job=cst.SPIDER) __, js = req(app, client, view='api', kws=kws) jobid = js['jobid'] sleep() # the Stats page req(app, client, view='log', kws=dict(node=1, opt='stats', project=cst.PROJECT, spider=cst.SPIDER, job=jobid), ins='Log analysis') # the Log page req(app, client, view='log', kws=dict(node=1, opt='utf8', project=cst.PROJECT, spider=cst.SPIDER, job=jobid), ins='log - ScrapydWeb') # For testing request_scrapy_log() of LogView in log.py app.config['LOCAL_SCRAPYD_LOGS_DIR'] = 'non-exist-dir' req(app, client, view='log', kws=dict(node=1, opt='utf8', project=cst.PROJECT, spider=cst.SPIDER, job=jobid), ins='log - ScrapydWeb') # the Jobs page GET url_stop = url_for('api', node=1, opt='stop', project=cst.PROJECT, version_spider_job=jobid) url_jobs_classic = url_for('jobs', node=1, style='classic') url_jobs_database = url_for('jobs', node=1, style='database') req(app, client, view='jobs', kws=dict(node=1, style='classic'), ins=[url_stop, url_jobs_database, 'class="table wrap"'], nos="Vue.extend(Main)") req(app, client, view='jobs', kws=dict(node=1, style='database'), ins=[url_stop, url_jobs_classic, "Vue.extend(Main)"], nos='class="table wrap"') # ?raise_exception=True req(app, client, view='jobs', kws=dict(node=1, style='database'), ins=[url_stop, url_jobs_classic, "Vue.extend(Main)"], nos='class="table wrap"') req(app, client, view='jobs', kws=dict(node=1), ins=[url_stop, url_jobs_classic, "Vue.extend(Main)"], nos='class="table wrap"') req(app, client, view='metadata', kws=dict(node=1), jskws=dict(jobs_style='database')) req(app, client, view='jobs', kws=dict(node=1, raise_exception='True'), ins=[url_stop, url_jobs_database, 'class="table wrap"'], nos="Vue.extend(Main)") req(app, client, view='metadata', kws=dict(node=1), jskws=dict(jobs_style='classic')) req(app, client, view='jobs', kws=dict(node=1), ins=[url_stop, url_jobs_database, 'class="table wrap"'], nos="Vue.extend(Main)") # jobs POST data={} jobs_key = '%s/%s/%s' % (cst.PROJECT, cst.SPIDER, jobid ) # type unicode in Python 2 print('######') print(repr(jobs_key)) print(type(jobs_key)) __, js = req(app, client, view='jobs', kws=dict(node=1), data={}, jskeys=jobs_key) jobs_id = js[jobs_key]['id'] jobs_start = js[jobs_key]['start'] assert js[jobs_key]['deleted'] == '0' # JobsXhrView delete running job req(app, client, view='jobs.xhr', kws=dict(node=1, action='delete', id=jobs_id), jskws=dict(status=cst.OK)) # Recover deleted running job req(app, client, view='jobs', kws=dict(node=1, style='database'), ins=[ 'Recover deleted job:', url_stop, 'id: %s,' % jobs_id, jobs_start ]) # forcestop client.get( url_for('api', node=1, opt='forcestop', project=cst.PROJECT, version_spider_job=jobid)) sleep() # /1/schedule/ScrapydWeb_demo/default:%20the%20latest%20version/test/ NOT unique url_start = url_for('schedule', node=1, project=cst.PROJECT, version=cst.DEFAULT_LATEST_VERSION, spider=cst.SPIDER) req(app, client, view='jobs', kws=dict(node=1, style='classic'), ins=url_start) req(app, client, view='jobs', kws=dict(node=1, style='database'), ins=url_start) # JobsXhrView delete finished req(app, client, view='jobs.xhr', kws=dict(node=1, action='delete', id=jobs_id), jskws=dict(status=cst.OK)) # JobsView: query_jobs(): self.jobs = self.Job.query.filter_by(deleted=NOT_DELETED) # POST data={} req(app, client, view='jobs', kws=dict(node=1), data={}, nos=['id: %s,' % jobs_id, jobs_start]) req(app, client, view='jobs', kws=dict(node=1, style='database'), nos=['id: %s,' % jobs_id, jobs_start]) req(app, client, view='jobs', kws=dict(node=1, style='classic'), ins=jobs_start[5:]) # delete id not exist req(app, client, view='jobs.xhr', kws=dict(node=1, action='delete', id=cst.BIGINT), jskws=dict(status=cst.ERROR))
def test_monitor_alert(app, client): # In ScrapydWeb_demo_no_delay.egg: unset CONCURRENT_REQUESTS, unset DOWNLOAD_DELAY upload_file_deploy(app, client, filename='ScrapydWeb_demo_no_delay.egg', project=cst.PROJECT, redirect_project=cst.PROJECT) # with app.test_request_context(): if not (app.config.get('ENABLE_MONITOR', False) and app.config.get('ENABLE_EMAIL_ALERT', False)): return def start_a_job(): kws = dict(node=1, opt='start', project=cst.PROJECT, version_spider_job=cst.SPIDER) __, js = req(app, client, view='api', kws=kws) sleep() return js['jobid'] def forcestop_a_job(job): req(app, client, view='api', kws=dict(node=1, opt='forcestop', project=cst.PROJECT, version_spider_job=job)) def post_for_poll(job, job_finished=''): kws = dict(node=1, opt='stats', project=cst.PROJECT, spider=cst.SPIDER, job=job, job_finished=job_finished) req(app, client, view='log', kws=kws, data={}, ins='Log analysis') # Simulate poll post 'Finished' app.config['ON_JOB_FINISHED'] = True jobid = start_a_job() post_for_poll(jobid, job_finished='True') forcestop_a_job(jobid) sleep() # Simulate poll post 'ForceStopped' app.config['ON_JOB_FINISHED'] = False app.config['LOG_CRITICAL_THRESHOLD'] = 1 app.config['LOG_CRITICAL_TRIGGER_FORCESTOP'] = True jobid = start_a_job() post_for_poll(jobid) forcestop_a_job(jobid) sleep() # Simulate poll post 'Stopped' app.config['LOG_CRITICAL_THRESHOLD'] = 0 app.config['LOG_REDIRECT_THRESHOLD'] = 1 app.config['LOG_REDIRECT_TRIGGER_STOP'] = True jobid = start_a_job() post_for_poll(jobid) forcestop_a_job(jobid) sleep() # Simulate poll post 'Triggered' app.config['LOG_REDIRECT_THRESHOLD'] = 0 app.config['LOG_IGNORE_THRESHOLD'] = 1 jobid = start_a_job() post_for_poll(jobid) forcestop_a_job(jobid) sleep() # Simulate poll post 'Running' app.config['LOG_IGNORE_THRESHOLD'] = 0 app.config['ON_JOB_RUNNING_INTERVAL'] = 5 jobid = start_a_job() post_for_poll(jobid) # Would not trigger email sleep() post_for_poll(jobid) # Would trigger email app.config['ON_JOB_RUNNING_INTERVAL'] = 0 sleep() post_for_poll(jobid) # Would not trigger email forcestop_a_job(jobid)