def test_run(app, client):
    set_single_scrapyd(app)
    # ScrapydWeb-demo.egg: custom_settings = {}, also log settings & arguments
    upload_file_deploy(app, client, filename='ScrapydWeb-demo.egg', project=PROJECT, redirect_project=PROJECT)

    with app.test_request_context():
        url = url_for('schedule.run', node=1)
        data = {'filename': '%s_%s_%s.pickle' % (PROJECT, VERSION, SPIDER)}
        response = client.post(url, data=data)
        assert url_for('dashboard', node=1) in get_text(response)

        sleep()
        url = url_for('log', node=1, opt='utf8', project=PROJECT, spider=SPIDER, job=JOBID)
        response = client.get(url)
        text = get_text(response)
        assert 'JOB: %s' % JOBID in text
        assert 'USER_AGENT: Mozilla/5.0' in text
        assert 'COOKIES_ENABLED: False' in text
        assert 'ROBOTSTXT_OBEY: False' in text
        assert 'CONCURRENT_REQUESTS: 1' in text
        assert 'DOWNLOAD_DELAY: 2' in text
        assert 'CLOSESPIDER_TIMEOUT: 60' in text
        assert 'CLOSESPIDER_PAGECOUNT: 10' in text
        assert 'self.arg1: val1' in text

        client.get(url_for('api', node=1, opt='forcestop', project=PROJECT, version_spider_job=JOBID))
예제 #2
0
def test_switch_template(app, client):
    task_id = metadata['task_id']
    task_result_id = metadata['task_result_id']
    req(app, client, view='tasks.xhr',
        kws=dict(node=NODE, action='delete', task_id=task_id, task_result_id=task_result_id))
    req(app, client, view='tasks', kws=dict(node=NODE, task_id=task_id),
        ins=["status_code: 200,", "status: 'ok',", ":total='1'"],
        nos=["status_code: -1,", "status: 'error',", 'label="Fail count"', 'label="Server"'])

    switch_scrapyd(app)

    req(app, client, view='tasks.xhr', kws=dict(node=NODE, action='fire', task_id=task_id))
    sleep(2)
    req(app, client, view='tasks', kws=dict(node=NODE),
        ins=["id: %s," % task_id, "prev_run_result: 'FAIL 0, PASS 0',", "run_times: 2,"])
    req(app, client, view='tasks', kws=dict(node=NODE, task_id=task_id),
        ins=['label="Fail count"', "pass_count: 0,", "fail_count: 0,", "pass_count: 1,", ":total='2'"],
        nos=['label="Server"', "status_code:", "status:"])

    sleep(28)
    req(app, client, view='tasks', kws=dict(node=NODE),
        ins=["id: %s," % task_id, "prev_run_result: 'FAIL 1, PASS 0',", "run_times: 'FAIL 1 / 2',"])
    req(app, client, view='tasks', kws=dict(node=NODE, task_id=task_id),
        ins=["status_code: 200,", "status: 'ok',", "status_code: -1,", "status: 'error',", ":total='2'"],
        nos=['label="Fail count"', 'label="Server"'])

    req(app, client, view='tasks.xhr', kws=dict(node=NODE, action='delete', task_id=task_id))
예제 #3
0
def test_email(app, client):
    # with app.test_request_context():
    if not app.config.get('ENABLE_EMAIL', False):
        return

    def start_a_job():
        kws = dict(node=1, opt='start', project=cst.PROJECT, version_spider_job=cst.SPIDER)
        __, js = req(app, client, view='api', kws=kws)
        sleep()
        return js['jobid']

    def forcestop_a_job(job):
        req(app, client, view='api', kws=dict(node=1, opt='forcestop', project=cst.PROJECT, version_spider_job=job))

    def post_for_poll(job, job_finished=''):
        kws = dict(node=1, opt='stats', project=cst.PROJECT, spider=cst.SPIDER, job=job, job_finished=job_finished)
        req(app, client, view='log', kws=kws, data={}, ins='Stats collection')

    # Simulate poll post 'Finished'
    app.config['ON_JOB_FINISHED'] = True
    jobid = start_a_job()
    post_for_poll(jobid, job_finished='True')
    forcestop_a_job(jobid)

    # Simulate poll post 'ForceStopped'
    app.config['ON_JOB_FINISHED'] = False
    app.config['LOG_CRITICAL_THRESHOLD'] = 1
    app.config['LOG_CRITICAL_TRIGGER_FORCESTOP'] = True
    jobid = start_a_job()
    post_for_poll(jobid)
    forcestop_a_job(jobid)

    # Simulate poll post 'Stopped'
    app.config['LOG_CRITICAL_THRESHOLD'] = 0
    app.config['LOG_REDIRECT_THRESHOLD'] = 1
    app.config['LOG_REDIRECT_TRIGGER_STOP'] = True
    jobid = start_a_job()
    post_for_poll(jobid)
    forcestop_a_job(jobid)

    # Simulate poll post 'Triggered'
    app.config['LOG_REDIRECT_THRESHOLD'] = 0
    app.config['LOG_IGNORE_THRESHOLD'] = 1
    jobid = start_a_job()
    post_for_poll(jobid)
    forcestop_a_job(jobid)

    # Simulate poll post 'Running'
    app.config['LOG_IGNORE_THRESHOLD'] = 0
    app.config['ON_JOB_RUNNING_INTERVAL'] = 5
    jobid = start_a_job()
    post_for_poll(jobid)  # Would NOT trigger email

    sleep()
    post_for_poll(jobid)  # Would trigger email

    app.config['ON_JOB_RUNNING_INTERVAL'] = 0
    sleep()
    post_for_poll(jobid)  # Would NOT trigger email
    forcestop_a_job(jobid)
def test_run(app, client):
    with app.test_request_context():
        req_single_scrapyd(app,
                           client,
                           view='schedule.run',
                           kws=dict(node=1),
                           data=dict(filename=FILENAME),
                           location=url_for('jobs', node=1))
    sleep()

    ins = [
        'JOB: %s' % cst.JOBID, 'USER_AGENT: Mozilla/5.0 (Windows',
        'ROBOTSTXT_OBEY: False', 'COOKIES_ENABLED: False',
        'CONCURRENT_REQUESTS: 1', 'DOWNLOAD_DELAY: 2',
        'CLOSESPIDER_TIMEOUT: 60', 'CLOSESPIDER_PAGECOUNT: 10',
        'self.arg1: %s' % metadata['value']
    ]
    req_single_scrapyd(app,
                       client,
                       view='log',
                       kws=dict(node=1,
                                opt='utf8',
                                project=cst.PROJECT,
                                spider=cst.SPIDER,
                                job=cst.JOBID),
                       ins=ins)
    req_single_scrapyd(app,
                       client,
                       view='api',
                       kws=dict(node=1,
                                opt='forcestop',
                                project=cst.PROJECT,
                                version_spider_job=cst.JOBID))
예제 #5
0
def test_enable_logparser(app, client):
    def json_loads_from_file(path):
        with io.open(path, 'r', encoding='utf-8') as f:
            return json.loads(f.read())

    # In conftest.py: ENABLE_LOGPARSER=False
    assert not os.path.exists(app.config['STATS_JSON_PATH'])
    assert not os.path.exists(app.config['DEMO_JSON_PATH'])
    app.config['ENABLE_LOGPARSER'] = True
    app.config['ENABLE_EMAIL'] = False

    # ['username:[email protected]:6800', ]
    app.config['SCRAPYD_SERVERS'] = app.config['_SCRAPYD_SERVERS']
    check_app_config(app.config)

    logparser_pid = app.config['LOGPARSER_PID']
    assert isinstance(logparser_pid, int) and logparser_pid > 0
    assert app.config['POLL_PID'] is None
    req(app, client, view='settings', kws=dict(node=1), ins='logparser_pid: %s' % logparser_pid)

    sleep()

    stats_json = json_loads_from_file(app.config['STATS_JSON_PATH'])
    assert stats_json['logparser_version'] == cst.LOGPARSER_VERSION
    assert cst.DEMO_JOBID in stats_json['datas'][cst.PROJECT][cst.SPIDER]
    demo_json = json_loads_from_file(app.config['DEMO_JSON_PATH'])
    assert demo_json['runtime'] == '0:01:08'
    assert demo_json['finish_reason'] == 'finished'
    assert demo_json['logparser_version'] == cst.LOGPARSER_VERSION
예제 #6
0
def test_log_utf8_stats(app, client):
    upload_file_deploy(app, client, filename='demo.egg', project=cst.PROJECT, redirect_project=cst.PROJECT)

    with app.test_request_context():
        kws = dict(node=1, opt='start', project=cst.PROJECT, version_spider_job=cst.SPIDER)
        __, js = req(app, client, view='api', kws=kws)
        jobid = js['jobid']

        sleep()

        # the Stats page
        req(app, client, view='log', kws=dict(node=1, opt='stats', project=cst.PROJECT, spider=cst.SPIDER, job=jobid),
            ins='Stats collection')
        # the Log page
        req(app, client, view='log', kws=dict(node=1, opt='utf8', project=cst.PROJECT, spider=cst.SPIDER, job=jobid),
            ins='log - ScrapydWeb')

        # For testing request_scrapy_log() of LogView in log.py
        app.config['SCRAPYD_LOGS_DIR'] = 'dir-not-exist'
        req(app, client, view='log', kws=dict(node=1, opt='utf8', project=cst.PROJECT, spider=cst.SPIDER, job=jobid),
            ins='log - ScrapydWeb')

        # the Dashboard page
        url_stop = url_for('api', node=1, opt='stop', project=cst.PROJECT, version_spider_job=jobid)
        req(app, client, view='dashboard', kws=dict(node=1), ins=url_stop)

        client.get(url_for('api', node=1, opt='forcestop', project=cst.PROJECT, version_spider_job=jobid))

        # /1/schedule/ScrapydWeb_demo/default:%20the%20latest%20version/test/
        url_start = url_for('schedule.schedule', node=1, project=cst.PROJECT,
                            version=cst.DEFAULT_LATEST_VERSION, spider=cst.SPIDER)
        req(app, client, view='dashboard', kws=dict(node=1), ins=url_start)
예제 #7
0
 def start_a_job():
     kws = dict(node=1,
                opt='start',
                project=cst.PROJECT,
                version_spider_job=cst.SPIDER)
     __, js = req(app, client, view='api', kws=kws)
     sleep()
     return js['jobid']
예제 #8
0
def test_api_stop(app, client):
    sleep()
    req(app,
        client,
        view='api',
        kws=dict(node=1, opt='stop', project=PROJECT,
                 version_spider_job=jobid),
        jskws=dict(status=OK, prevstate='running'),
        nos='times')
예제 #9
0
def test_api_forcestop(app, client):
    sleep(5)
    req(app,
        client,
        view='api',
        kws=dict(node=1,
                 opt='forcestop',
                 project=PROJECT,
                 version_spider_job=jobid),
        jskws=dict(status=OK, prevstate=None, times=2))
예제 #10
0
def test_delete_task_or_task_result_on_the_fly(app, client):
    for kind in ['delete_task', 'delete_task_result']:
        check_data_ = dict(check_data)

        req(app, client, view='schedule.check', kws=dict(node=NODE), data=check_data_,
            jskws=dict(cmd="-d _version=%s" % cst.VERSION, filename=FILENAME))

        with app.test_request_context():
            text, __ = req(app, client, view='schedule.run', kws=dict(node=NODE), data=run_data,
                           location=url_for('tasks', node=NODE))
        m = re.search(cst.TASK_NEXT_RUN_TIME_PATTERN, unquote_plus(text))
        task_id = int(m.group(1))
        print("task_id: %s" % task_id)

        __, js = req(app, client, view='tasks.xhr', kws=dict(node=NODE, action='dump', task_id=task_id))
        assert js['data']['selected_nodes'] == [1, 2]

        sleep(2)
        # the first execution has not finished yet
        __, js = req(app, client, view='tasks.xhr', kws=dict(node=NODE, action='list', task_id=task_id))
        assert len(js['ids']) == 1
        task_result_id = js['ids'][0]
        __, js = req(app, client, view='tasks.xhr',
                     kws=dict(node=NODE, action='list', task_id=task_id, task_result_id=task_result_id))
        assert len(js['ids']) == 1

        if kind == 'delete_task':
            req(app, client, view='tasks.xhr',
                kws=dict(node=NODE, action='delete', task_id=task_id))
        else:
            req(app, client, view='tasks.xhr',
                kws=dict(node=NODE, action='delete', task_id=task_id, task_result_id=task_result_id))

        __, js = req(app, client, view='tasks.xhr', kws=dict(node=NODE, action='list'))
        if kind == 'delete_task':
            assert task_id not in js['ids']
        else:
            assert task_id in js['ids']

        __, js = req(app, client, view='tasks.xhr', kws=dict(node=NODE, action='list', task_id=task_id))
        assert len(js['ids']) == 0
        __, js = req(app, client, view='tasks.xhr',
                     kws=dict(node=NODE, action='list', task_id=task_id, task_result_id=task_result_id))
        assert len(js['ids']) == 0

        sleep(28)
        req(app, client, view='tasks.xhr',
            kws=dict(node=NODE, action='delete', task_id=task_id, task_result_id=task_result_id))
        __, js = req(app, client, view='tasks.xhr', kws=dict(node=NODE, action='list', task_id=task_id))
        assert len(js['ids']) == 0
        __, js = req(app, client, view='tasks.xhr',
                     kws=dict(node=NODE, action='list', task_id=task_id, task_result_id=task_result_id))
        assert len(js['ids']) == 0

        req(app, client, view='tasks.xhr', kws=dict(node=1, action='delete', task_id=task_id))
예제 #11
0
def test_api_forcestop(app, client):
    sleep(5)
    with app.test_request_context():
        url = url_for('api',
                      node=1,
                      opt='forcestop',
                      project=PROJECT,
                      version_spider_job=jobid,
                      ui='mobile')
        response = client.get(url)
        js = load_json(response)
        assert js['status'] == OK and js['prevstate'] is None and js[
            'times'] == 2
예제 #12
0
 def check_pass(recipients=None, subject='Email from #scrapydweb', text=None):
     assert js['status'] == cst.OK
     assert js['result']['reason'] == 'Sent'
     assert js['result']['sender'] == app.config['EMAIL_SENDER']
     if recipients is not None:
         assert js['result']['recipients'] == recipients
     if subject is not None:
         assert js['result']['subject'] == subject
     if text is not None:
         assert js['result']['text'] == text
     assert 'debug' not in js
     assert js['when']
     sleep(10)
예제 #13
0
def test_api_stop(app, client):
    sleep()

    with app.test_request_context():
        url = url_for('api',
                      node=1,
                      opt='stop',
                      project=PROJECT,
                      version_spider_job=jobid,
                      ui='simple')
        response = client.get(url)
        js = load_json(response)
        assert js['status'] == OK and js[
            'prevstate'] == 'running' and 'times' not in js
예제 #14
0
def test_pending_jobs(app, client):
    node = 1
    for i in range(2):
        req(app,
            client,
            view='schedule.run',
            kws=dict(node=NODE),
            data=run_data,
            ins="run results - ScrapydWeb")
    req(app,
        client,
        view='jobs',
        kws=dict(node=node, style='database'),
        ins="Vue.extend(Main)",
        nos='class="table wrap"')
    sleep()
    for i in range(2):
        req(app,
            client,
            view='schedule.run',
            kws=dict(node=NODE),
            data=run_data,
            ins="run results - ScrapydWeb")
    req(app,
        client,
        view='jobs',
        kws=dict(node=node, style='database'),
        ins="Vue.extend(Main)",
        nos='class="table wrap"')
    sleep()
    req(app,
        client,
        view='jobs',
        kws=dict(node=node, style='database'),
        ins=[
            "Ignore seen running job: %s, started at" % KEY, "Vue.extend(Main)"
        ],
        nos='class="table wrap"')
    for i in range(4):
        req(app,
            client,
            view='api',
            kws=dict(node=node,
                     opt='forcestop',
                     project=cst.PROJECT,
                     version_spider_job=cst.JOBID))
예제 #15
0
def test_run(app, client):
    # ScrapydWeb_demo.egg: custom_settings = {}, also log settings & arguments
    upload_file_deploy(app,
                       client,
                       filename='ScrapydWeb_demo.egg',
                       project=cst.PROJECT,
                       redirect_project=cst.PROJECT)

    with app.test_request_context():
        req_single_scrapyd(app,
                           client,
                           view='schedule.run',
                           kws=dict(node=1),
                           data=dict(filename='%s_%s_%s.pickle' %
                                     (cst.PROJECT, cst.VERSION, cst.SPIDER)),
                           location=url_for('dashboard', node=1))

    sleep()

    ins = [
        'JOB: %s' % cst.JOBID, 'USER_AGENT: Mozilla/5.0',
        'COOKIES_ENABLED: False', 'ROBOTSTXT_OBEY: False',
        'CONCURRENT_REQUESTS: 1', 'DOWNLOAD_DELAY: 2',
        'CLOSESPIDER_TIMEOUT: 60', 'CLOSESPIDER_PAGECOUNT: 10',
        'self.arg1: val1'
    ]
    req_single_scrapyd(app,
                       client,
                       view='log',
                       kws=dict(node=1,
                                opt='utf8',
                                project=cst.PROJECT,
                                spider=cst.SPIDER,
                                job=cst.JOBID),
                       ins=ins)
    req_single_scrapyd(app,
                       client,
                       view='api',
                       kws=dict(node=1,
                                opt='forcestop',
                                project=cst.PROJECT,
                                version_spider_job=cst.JOBID))
예제 #16
0
def test_edit_task(app, client):
    task_id = metadata['task_id']
    # http://127.0.0.1:5000/1/schedule/?task_id=1
    req(app, client, view='schedule', kws=dict(node=NODE, task_id=task_id),
        ins=["checked />%s" % app.config['SCRAPYD_SERVERS'][0], "checked />%s" % app.config['SCRAPYD_SERVERS'][-1]])

    check_data_ = dict(check_data)
    check_data_.update(task_id=task_id, hour='6')
    req(app, client, view='schedule.check', kws=dict(node=NODE), data=check_data_,
        jskws=dict(cmd="-d _version=%s" % cst.VERSION, filename=FILENAME))

    with app.test_request_context():
        metadata['location'] = url_for('tasks', node=NODE)
    text, __ = req(app, client, view='schedule.run', kws=dict(node=NODE), data=run_data_single_scrapyd,
                   location=metadata['location'])
    m = re.search(cst.TASK_NEXT_RUN_TIME_PATTERN, unquote_plus(text))
    assert int(m.group(1)) == task_id

    __, js = req(app, client, view='tasks.xhr', kws=dict(node=NODE, action='dump', task_id=task_id))
    assert js['data']['selected_nodes'] == [1]

    sleep()
    req(app, client, view='tasks', kws=dict(node=NODE),
        ins=["fail_times: 1,", "run_times: 'FAIL 1 / 2',"])
    text, __ = req(app, client, view='tasks', kws=dict(node=NODE, task_id=task_id),
                   ins=["fail_count: 0,", "fail_count: 1,", "pass_count: 1,", ":total='2'"])
    with app.test_request_context():
        url_delete = url_for('tasks.xhr', node=NODE, action='delete', task_id=task_id)
    # in the task results page: url_action: '/1/tasks/xhr/delete/5/10/',
    new_task_result_id = int(re.search(r'%s(\d+)/' % url_delete, text).group(1))
    print("new_task_result_id: %s" % new_task_result_id)
    req(app, client, view='tasks', kws=dict(node=NODE, task_id=task_id, task_result_id=new_task_result_id),
        ins=["node: 1,", "server: '%s'," % app.config['SCRAPYD_SERVERS'][0],
             "status_code: 200,", "status: 'ok',", ":total='1'"])

    __, js = req(app, client, view='tasks.xhr', kws=dict(node=NODE, action='dump', task_id=task_id))
    assert '06:00:00' in js['data']['apscheduler_job']['next_run_time']

    req(app, client, view='schedule', kws=dict(node=NODE, task_id=task_id),
        ins="checked />%s" % app.config['SCRAPYD_SERVERS'][0],
        nos="checked />%s" % app.config['SCRAPYD_SERVERS'][-1])
예제 #17
0
def test_check_result(app, client):
    task_id = metadata['task_id']
    sleep(2)
    # The first execution has not finished yet: self.sleep_seconds_before_retry = 3
    req(app, client, view='tasks', kws=dict(node=NODE),
        ins=["id: %s," % task_id, "prev_run_result: 'FAIL 0, PASS 0',", "fail_times: 0,", "run_times: 1,"])
    text, __ = req(app, client, view='tasks', kws=dict(node=NODE, task_id=task_id),
                   ins=["fail_count: 0,", "pass_count: 0,", ":total='1'"])
    with app.test_request_context():
        url_delete = url_for('tasks.xhr', node=NODE, action='delete', task_id=task_id)
    # in the task results page: url_action: '/1/tasks/xhr/delete/5/10/',
    task_result_id = int(re.search(r'%s(\d+)/' % url_delete, text).group(1))
    print("task_result_id: %s" % task_result_id)
    metadata['task_result_id'] = task_result_id
    with app.test_request_context():
        url_delete_task_result = url_for('tasks.xhr', node=NODE, action='delete',
                                         task_id=task_id, task_result_id=task_result_id)
    assert url_delete_task_result in text
    sleep(8)
    req(app, client, view='tasks', kws=dict(node=NODE, task_id=task_id, task_result_id=task_result_id),
        ins=["node: 1,", "server: '%s'," % app.config['SCRAPYD_SERVERS'][0],
             "status_code: 200,", "status: 'ok',"])  # , ":total='1'"

    sleep(20)
    # The first execution has finished
    req(app, client, view='tasks', kws=dict(node=NODE),
        ins=["id: %s," % task_id, "prev_run_result: 'FAIL 1, PASS 1',", "fail_times: 1,", "run_times: 'FAIL 1 / 1',"])
    req(app, client, view='tasks', kws=dict(node=NODE, task_id=task_id),
        ins=["fail_count: 1,", "pass_count: 1,", ":total='1'"])
    req(app, client, view='tasks', kws=dict(node=NODE, task_id=task_id, task_result_id=task_result_id),
        ins=["node: 1,", "server: '%s'," % app.config['SCRAPYD_SERVERS'][0], "status_code: 200,", "status: 'ok',",
             "node: 2,", "server: '%s'," % app.config['SCRAPYD_SERVERS'][-1], "status_code: -1,", "status: 'error',",
             ":total='2'"])
    __, js = req(app, client, view='tasks.xhr', kws=dict(node=NODE, action='dump', task_id=task_id))
    assert '03:00:00' in js['data']['apscheduler_job']['next_run_time']
예제 #18
0
def test_execute_task_exception(app, client):
    check_data_ = dict(check_data)
    check_data_.update(action='add')

    req(app, client, view='schedule.check', kws=dict(node=NODE), data=check_data_,
        jskws=dict(cmd="-d _version=%s" % cst.VERSION, filename=FILENAME))

    with app.test_request_context():
        text, __ = req(app, client, view='schedule.run', kws=dict(node=NODE), data=run_data,
                       location=url_for('tasks', node=NODE))
    m = re.search(cst.TASK_NEXT_RUN_TIME_PATTERN, unquote_plus(text))
    task_id = int(m.group(1))
    print("task_id: %s" % task_id)

    __, js = req(app, client, view='tasks.xhr', kws=dict(node=NODE, action='dump', task_id=task_id))
    assert js['data']['selected_nodes'] == [1, 2]

    # req_single_scrapyd would set single_scrapyd=True
    req_single_scrapyd(app, client, view='tasks.xhr', kws=dict(node=1, action='fire', task_id=task_id))

    sleep()

    req(app, client, view='tasks', kws=dict(node=1),
        ins=["id: %s," % task_id, "prev_run_result: 'FAIL 1, PASS 1',", "fail_times: 1,", "run_times: 'FAIL 1 / 1',"])
    text, __ = req(app, client, view='tasks', kws=dict(node=1, task_id=task_id),
                   ins=["fail_count: 1,", "pass_count: 1,", ":total='1'"])
    with app.test_request_context():
        url_delete = url_for('tasks.xhr', node=1, action='delete', task_id=task_id)
    # in the task results page: url_action: '/1/tasks/xhr/delete/5/10/',
    task_result_id = int(re.search(r'%s(\d+)/' % url_delete, text).group(1))
    print("task_result_id: %s" % task_result_id)
    # In baseview.py: assert 0 < self.node <= self.SCRAPYD_SERVERS_AMOUNT
    # Note that AssertionError would be raise directly in test, whereas internal_server_error() would return 500.html
    # instead when the app is actually running, getting '500 error node index error: 2, which should be between 1 and 1'
    req(app, client, view='tasks', kws=dict(node=1, task_id=task_id, task_result_id=task_result_id),
        ins=["node: 1,", "server: '%s'," % app.config['SCRAPYD_SERVERS'][0], "status_code: 200,", "status: 'ok',",
             "node: 2,", "status_code: -1,", "status: 'exception',", "node index error", ":total='2'"])

    req(app, client, view='tasks.xhr', kws=dict(node=1, action='delete', task_id=task_id))
예제 #19
0
파일: test_log.py 프로젝트: wung/scrapydweb
def test_log_utf8_stats(app, client):
    upload_file_deploy(app, client, filename='demo.egg', project=PROJECT, redirect_project=PROJECT)

    with app.test_request_context():
        url = url_for('api', node=1, opt='start', project=PROJECT, version_spider_job=SPIDER)
        response = client.get(url)
        js = load_json(response)
        jobid = js['jobid']

        sleep()

        # UTF8 page
        url = url_for('log', node=1, opt='utf8', project=PROJECT, spider=SPIDER, job=jobid)
        response = client.get(url)
        assert 'utf8 - ScrapydWeb' in get_text(response) and not is_simple_ui(response)

        client.get(url_for('api', node=1, opt='forcestop', project=PROJECT, version_spider_job=jobid))

        # Stats page
        url = url_for('log', node=1, opt='stats', project=PROJECT, spider=SPIDER, job=jobid)
        response = client.get(url)
        assert 'Stats collection' in get_text(response) and not is_simple_ui(response)
예제 #20
0
def test_task_start_execute_end(app, client):
    while True:
        now_datetime = datetime.now()
        if now_datetime.second % 10 != 1:
            sleep(1)
        else:
            break
    start_datetime = now_datetime + timedelta(seconds=8)
    first_execute_datetime = now_datetime + timedelta(seconds=9)
    second_execute_datetime = now_datetime + timedelta(seconds=14)
    end_datetime = now_datetime + timedelta(seconds=18)
    check_data_ = dict(check_data)
    check_data_.update(action='add', hour='*', minute='*', second='*/5',
                       start_date=start_datetime.strftime("%Y-%m-%d %H:%M:%S"),
                       end_date=end_datetime.strftime("%Y-%m-%d %H:%M:%S"))
    req(app, client, view='schedule.check', kws=dict(node=NODE), data=check_data_,
        jskws=dict(cmd="-d _version=%s" % cst.VERSION, filename=FILENAME))
    text, __ = req(app, client, view='schedule.run', kws=dict(node=NODE), data=run_data_single_scrapyd,
                   location=metadata['location'])
    m = re.search(cst.TASK_NEXT_RUN_TIME_PATTERN, unquote_plus(text))
    task_id = int(m.group(1))
    print("task_id: %s" % task_id)
    with app.test_request_context():
        url_pause = url_for('tasks.xhr', node=NODE, action='pause', task_id=task_id)
        url_resume = url_for('tasks.xhr', node=NODE, action='resume', task_id=task_id)
        url_delete = url_for('tasks.xhr', node=NODE, action='delete', task_id=task_id)
        url_task_results = url_for('tasks', node=NODE, task_id=task_id)
    req(app, client, view='tasks', kws=dict(node=NODE),
        ins=[url_pause, url_task_results,
             "id: %s," % task_id, "prev_run_result: '%s'," % cst.NA, "run_times: 0,"],
        nos=[url_resume, url_delete])
    __, js = req(app, client, view='tasks.xhr', kws=dict(node=NODE, action='dump', task_id=task_id))
    assert first_execute_datetime.strftime("%Y-%m-%d %H:%M:%S") in js['data']['apscheduler_job']['next_run_time']

    sleep(10)
    # The first execution may or may not has finished
    req(app, client, view='tasks', kws=dict(node=NODE), ins=["id: %s," % task_id, "run_times: 1,"])
    req(app, client, view='tasks', kws=dict(node=NODE),
        ins=[url_pause, url_task_results, "id: %s," % task_id, "run_times: 1,"],
        nos=[url_resume, url_delete])
    req(app, client, view='tasks', kws=dict(node=NODE, task_id=task_id), ins=":total='1'")
    __, js = req(app, client, view='tasks.xhr', kws=dict(node=NODE, action='dump', task_id=task_id))
    assert second_execute_datetime.strftime("%Y-%m-%d %H:%M:%S") in js['data']['apscheduler_job']['next_run_time']

    sleep(10)
    req(app, client, view='tasks', kws=dict(node=NODE), ins=["id: %s," % task_id, "run_times: 2,"])
    req(app, client, view='tasks', kws=dict(node=NODE),
        ins=[url_delete, url_task_results, "id: %s," % task_id, "next_run_time: '%s'," % cst.NA, "run_times: 2,"],
        nos=[url_pause, url_resume])
    req(app, client, view='tasks', kws=dict(node=NODE, task_id=task_id),
        ins=["status_code: 200,", "status: 'ok',", ":total='2'"],
        nos=["status_code: -1,", "status: 'error',"])
    __, js = req(app, client, view='tasks.xhr', kws=dict(node=NODE, action='dump', task_id=task_id))
    assert js['data']['apscheduler_job'] is None

    req(app, client, view='tasks.xhr', kws=dict(node=NODE, action='delete', task_id=task_id))
예제 #21
0
def test_auto_remove_apscheduler_job_if_task_not_exist(app, client):
    check_data_ = dict(check_data)
    check_data_.update(action='add')

    for kind in ['visit timer tasks', 'execute_task()']:
        req(app, client, view='schedule.check', kws=dict(node=NODE), data=dict(check_data_),
            jskws=dict(cmd="-d _version=%s" % cst.VERSION, filename=FILENAME))
        text, __ = req(app, client, view='schedule.run', kws=dict(node=NODE), data=run_data_single_scrapyd,
                       location=metadata['location'])
        m = re.search(cst.TASK_NEXT_RUN_TIME_PATTERN, unquote_plus(text))
        task_id = int(m.group(1))
        print("task_id: %s" % task_id)

        __, js = req(app, client, view='tasks.xhr', kws=dict(node=NODE, action='dump', task_id=task_id))
        assert '03:00:00' in js['data']['apscheduler_job']['next_run_time']

        req(app, client, view='tasks.xhr',
            kws=dict(node=NODE, action='delete', task_id=task_id, ignore_apscheduler_job='True'))

        __, js = req(app, client, view='tasks.xhr', kws=dict(node=NODE, action='dump', task_id=task_id),
                     jskws=dict(message="apscheduler_job #{id} found. Task #{id} not found".format(id=task_id)))
        assert js['data']['apscheduler_job'] == task_id

        # apscheduler_job #1 removed since task #1 not exist
        if kind == 'execute_task()':
            req(app, client, view='tasks.xhr', kws=dict(node=NODE, action='fire', task_id=task_id))
            sleep()
        else:
            req(app, client, view='tasks', kws=dict(node=NODE),
                ins="apscheduler_job #{id} removed since task #{id} not exist".format(id=task_id),
                nos="id: %s," % task_id)
        __, js = req(app, client, view='tasks.xhr', kws=dict(node=NODE, action='dump', task_id=task_id),
                     jskws=dict(
                         status=cst.ERROR,
                         message="apscheduler_job #{id} not found. Task #{id} not found".format(id=task_id)))
        assert js['data'] is None
예제 #22
0
def test_run(app, client):
    node = 1

    req(app,
        client,
        view='schedule.run',
        kws=dict(node=NODE),
        data=run_data,
        ins=[
            'run results - ScrapydWeb', 'id="checkbox_1"', 'id="checkbox_2"',
            'onclick="passToServers();"'
        ])

    # test handle_unique_constraint() in jobs.py
    sleep()
    req(app,
        client,
        view='schedule.run',
        kws=dict(node=NODE),
        data=run_data_single_scrapyd,
        ins=[
            'run results - ScrapydWeb', 'id="checkbox_1"',
            'onclick="passToServers();"'
        ],
        nos='id="checkbox_2"')
    keep_text = ''
    for times in [1, 2]:
        __, js = req(app,
                     client,
                     view='api',
                     kws=dict(node=node, opt='listjobs', project=cst.PROJECT))
        if js['pending']:
            final_pending_job = js['pending'][-1]
            assert final_pending_job['id'] == cst.JOBID
            first_job = js['running'][-1]
            first_job_start = first_job['start_time'][:19]
            # Ignore seen pending job: ScrapydWeb_demo/test/2018-01-01T01_01_02
            flash = "Ignore seen pending job: %s" % KEY
            ins = ["Vue.extend(Main)", "start: '%s'," % first_job_start]
            nos = ['class="table wrap"', "Ignore seen running job"]
            text, __ = req(app,
                           client,
                           view='jobs',
                           kws=dict(node=node, style='database'))
            try:
                if times == 1:
                    assert flash in text
                else:
                    assert flash not in text
                for i in ins:
                    assert i in text
                for n in nos:
                    assert n not in text
            except AssertionError:
                # the response containS 'Ignore seen running' instead
                keep_text = text
                break
        else:
            break

    sleep()
    __, js = req(app,
                 client,
                 view='api',
                 kws=dict(node=node, opt='listjobs', project=cst.PROJECT))
    first_job = js['running'][-2]
    first_job_start = first_job['start_time'][:19]
    second_job = js['running'][-1]
    second_job_start = second_job['start_time'][:19]
    assert first_job['id'] == second_job['id'] == cst.JOBID
    # TODO: For compatibility with Debian?! Running job with same key is not ordered by start ASC?!
    # assert second_job_start > first_job_start
    # Ignore seen running job: ScrapydWeb_demo/test/2018-01-01T01_01_02, started at 2019-03-01 20:27:22
    flash = "Ignore seen running job: %s, started at %s" % (KEY,
                                                            first_job_start)
    if keep_text:
        text = keep_text
    else:
        text, __ = req(app,
                       client,
                       view='jobs',
                       kws=dict(node=node, style='database'))
    for i in [flash, "Vue.extend(Main)", "start: '%s'," % second_job_start]:
        assert i in text
    for n in ['class="table wrap"', "start: '%s'," % first_job_start]:
        assert n not in text
    # flash only once
    req(app,
        client,
        view='jobs',
        kws=dict(node=node, style='database'),
        ins=["Vue.extend(Main)",
             "start: '%s'," % second_job_start],
        nos=[flash, 'class="table wrap"',
             "start: '%s'," % first_job_start])

    for i in range(2):
        req(app,
            client,
            view='api',
            kws=dict(node=node,
                     opt='forcestop',
                     project=cst.PROJECT,
                     version_spider_job=cst.JOBID))
    sleep()
    __, js = req(app,
                 client,
                 view='api',
                 kws=dict(node=node, opt='listjobs', project=cst.PROJECT))
    last_but_two_finished_job = js['finished'][-2]
    last_but_two_finished_job_start = last_but_two_finished_job[
        'start_time'][:19]
    last_finished_job = js['finished'][-1]
    last_finished_job_start = last_finished_job['start_time'][:19]
    assert last_but_two_finished_job['id'] == last_finished_job[
        'id'] == cst.JOBID
    # Ignore seen finished job: ScrapydWeb_demo/test/2018-01-01T01_01_02, started at 2019-03-01 20:27:22
    flash = "Ignore seen finished job: %s, started at %s" % (
        KEY, last_but_two_finished_job_start)
    req(app,
        client,
        view='jobs',
        kws=dict(node=node, style='database'),
        ins=[
            flash, "Vue.extend(Main)",
            "start: '%s'," % last_finished_job_start
        ],
        nos=[
            'class="table wrap"',
            "start: '%s'," % last_but_two_finished_job_start
        ])
    # flash only once
    req(app,
        client,
        view='jobs',
        kws=dict(node=node, style='database'),
        ins=["Vue.extend(Main)",
             "start: '%s'," % last_finished_job_start],
        nos=[
            flash, 'class="table wrap"',
            "start: '%s'," % last_but_two_finished_job_start
        ])
예제 #23
0
def test_telnet_in_stats(app, client):
    node = 1
    desktop_ins = [
        ">Log analysis</li>", ">Log categorization</li>", ">View log</li>",
        ">Progress visualization</li>"
    ]
    mobile_ins = [
        ">Analysis</li>", ">Categories</li>", ">Charts</li>", ">Logs</li>"
    ]
    telnet_ins = [
        ">Crawler.stats</li>", "<td>datetime.datetime(",
        ">Crawler.engine</li>", "<th>engine.has_capacity()</th>",
        "<td>telnet</td>"
    ]
    telnet_nos = ["CRITICAL: Unhandled Error", "telnet.OptionRefused"]
    req(app,
        client,
        view='schedule.run',
        kws=dict(node=NODE),
        data=run_data,
        ins="run results - ScrapydWeb")

    kws = dict(node=node,
               opt='stats',
               project=cst.PROJECT,
               spider=cst.SPIDER,
               job=cst.JOBID)
    for i in range(1, 10):
        sleep(10)
        print(i * 10)
        text, __ = req(app, client, view='log', kws=kws)
        if desktop_ins[-1] in text and telnet_ins[-1] in text:
            print("Found: %s %s" % (desktop_ins[-1], telnet_ins[-1]))
            break
    # test jobs POST data={} to save pages and items in database
    __, js = req(app, client, view='jobs', kws=dict(node=node), data={})
    assert isinstance(js[KEY]['pages'], int)  # and js[KEY]['pages'] > 0

    if scrapy_version > '1.5.1':
        print("telnet not available for scrapy_version: %s" % scrapy_version)
        telnet_ins = []

    req(app,
        client,
        view='log',
        kws=kws,
        ins=desktop_ins + telnet_ins,
        nos=telnet_nos)

    kws.update(ui='mobile')
    req(app,
        client,
        view='log',
        kws=kws,
        ins=mobile_ins + telnet_ins,
        nos=telnet_nos,
        mobileui=True)

    req(app,
        client,
        view='api',
        kws=dict(node=node,
                 opt='forcestop',
                 project=cst.PROJECT,
                 version_spider_job=cst.JOBID))
예제 #24
0
def test_log_utf8_stats(app, client):
    upload_file_deploy(app,
                       client,
                       filename='demo.egg',
                       project=PROJECT,
                       redirect_project=PROJECT)

    with app.test_request_context():
        __, js = req(app,
                     client,
                     view='api',
                     kws=dict(node=1,
                              opt='start',
                              project=PROJECT,
                              version_spider_job=SPIDER))
        print(js)
        jobid = js['jobid']

        sleep()

        # Log page
        req(app,
            client,
            view='log',
            kws=dict(node=1,
                     opt='utf8',
                     project=PROJECT,
                     spider=SPIDER,
                     job=jobid),
            ins='log - ScrapydWeb')

        # Stats page
        req(app,
            client,
            view='log',
            kws=dict(node=1,
                     opt='stats',
                     project=PROJECT,
                     spider=SPIDER,
                     job=jobid),
            ins='Stats collection')

        # Dashboard page
        url_stop = url_for('api',
                           node=1,
                           opt='stop',
                           project=PROJECT,
                           version_spider_job=jobid)
        req(app, client, view='dashboard', kws=dict(node=1), ins=url_stop)

        client.get(
            url_for('api',
                    node=1,
                    opt='forcestop',
                    project=PROJECT,
                    version_spider_job=jobid))

        # /1/schedule/ScrapydWeb-demo/default:%20the%20latest%20version/test/
        url_start = url_for('schedule.schedule',
                            node=1,
                            project=PROJECT,
                            version=DEFAULT_LATEST_VERSION,
                            spider=SPIDER)
        req(app, client, view='dashboard', kws=dict(node=1), ins=url_start)
예제 #25
0
def test_email(app, client):
    with app.test_request_context():
        if not app.config.get('ENABLE_EMAIL', False):
            return

        # Simulate caching post 'Finished'
        url = url_for('api',
                      node=1,
                      opt='start',
                      project=PROJECT,
                      version_spider_job=SPIDER)
        response = client.get(url)
        js = load_json(response)
        jobid = js['jobid']

        sleep()
        url = url_for('log',
                      node=1,
                      opt='stats',
                      project=PROJECT,
                      spider=SPIDER,
                      job=jobid,
                      job_finished='True')
        response = client.post(url, content_type='multipart/form-data')
        assert 'Stats collection' in get_text(response)
        client.get(
            url_for('api',
                    node=1,
                    opt='forcestop',
                    project=PROJECT,
                    version_spider_job=jobid))

        # Simulate caching post 'ForceStopped'
        app.config['ON_JOB_FINISHED'] = False
        url = url_for('api',
                      node=1,
                      opt='start',
                      project=PROJECT,
                      version_spider_job=SPIDER)
        response = client.get(url)
        js = load_json(response)
        jobid = js['jobid']

        sleep()
        url = url_for('log',
                      node=1,
                      opt='stats',
                      project=PROJECT,
                      spider=SPIDER,
                      job=jobid,
                      job_finished='')
        response = client.post(url, content_type='multipart/form-data')
        assert 'Stats collection' in get_text(response)
        client.get(
            url_for('api',
                    node=1,
                    opt='forcestop',
                    project=PROJECT,
                    version_spider_job=jobid))

        # Simulate caching post 'Stopped'
        app.config['LOG_CRITICAL_THRESHOLD'] = 0
        app.config['LOG_REDIRECT_THRESHOLD'] = 1
        app.config['LOG_REDIRECT_TRIGGER_STOP'] = True
        url = url_for('api',
                      node=1,
                      opt='start',
                      project=PROJECT,
                      version_spider_job=SPIDER)
        response = client.get(url)
        js = load_json(response)
        jobid = js['jobid']

        sleep()
        url = url_for('log',
                      node=1,
                      opt='stats',
                      project=PROJECT,
                      spider=SPIDER,
                      job=jobid,
                      job_finished='')
        response = client.post(url, content_type='multipart/form-data')
        assert 'Stats collection' in get_text(response)
        client.get(
            url_for('api',
                    node=1,
                    opt='forcestop',
                    project=PROJECT,
                    version_spider_job=jobid))

        # Simulate caching post 'Triggered'
        app.config['LOG_REDIRECT_THRESHOLD'] = 0
        app.config['LOG_IGNORE_THRESHOLD'] = 1

        url = url_for('api',
                      node=1,
                      opt='start',
                      project=PROJECT,
                      version_spider_job=SPIDER)
        response = client.get(url)
        js = load_json(response)
        jobid = js['jobid']

        sleep()
        url = url_for('log',
                      node=1,
                      opt='stats',
                      project=PROJECT,
                      spider=SPIDER,
                      job=jobid,
                      job_finished='')
        response = client.post(url, content_type='multipart/form-data')
        assert 'Stats collection' in get_text(response)
        client.get(
            url_for('api',
                    node=1,
                    opt='forcestop',
                    project=PROJECT,
                    version_spider_job=jobid))

        # Simulate caching post 'Running'
        app.config['LOG_IGNORE_THRESHOLD'] = 0
        app.config['ON_JOB_RUNNING_INTERVAL'] = 5

        url = url_for('api',
                      node=1,
                      opt='start',
                      project=PROJECT,
                      version_spider_job=SPIDER)
        response = client.get(url)
        js = load_json(response)
        jobid = js['jobid']

        # Would NOT trigger email
        sleep()
        url = url_for('log',
                      node=1,
                      opt='stats',
                      project=PROJECT,
                      spider=SPIDER,
                      job=jobid,
                      job_finished='')
        response = client.post(url, content_type='multipart/form-data')
        assert 'Stats collection' in get_text(response)

        # Would trigger email
        sleep()
        url = url_for('log',
                      node=1,
                      opt='stats',
                      project=PROJECT,
                      spider=SPIDER,
                      job=jobid,
                      job_finished='')
        response = client.post(url, content_type='multipart/form-data')
        assert 'Stats collection' in get_text(response)

        # Would NOT trigger email
        app.config['ON_JOB_RUNNING_INTERVAL'] = 0
        sleep()
        url = url_for('log',
                      node=1,
                      opt='stats',
                      project=PROJECT,
                      spider=SPIDER,
                      job=jobid,
                      job_finished='')
        response = client.post(url, content_type='multipart/form-data')
        assert 'Stats collection' in get_text(response)
        client.get(
            url_for('api',
                    node=1,
                    opt='forcestop',
                    project=PROJECT,
                    version_spider_job=jobid))
예제 #26
0
def test_log_utf8_stats(app, client):
    upload_file_deploy(app,
                       client,
                       filename='demo.egg',
                       project=PROJECT,
                       redirect_project=PROJECT)

    with app.test_request_context():
        url = url_for('api',
                      node=1,
                      opt='start',
                      project=PROJECT,
                      version_spider_job=SPIDER)
        response = client.get(url)
        js = load_json(response)
        jobid = js['jobid']

        sleep()

        # Log page
        url = url_for('log',
                      node=1,
                      opt='utf8',
                      project=PROJECT,
                      spider=SPIDER,
                      job=jobid)
        response = client.get(url)
        assert 'log - ScrapydWeb' in get_text(
            response) and not is_mobileui(response)

        # Stats page
        url = url_for('log',
                      node=1,
                      opt='stats',
                      project=PROJECT,
                      spider=SPIDER,
                      job=jobid)
        response = client.get(url)
        assert 'Stats collection' in get_text(
            response) and not is_mobileui(response)

        # Dashboard page
        url = url_for('dashboard', node=1)
        response = client.get(url)
        url_stop = url_for('api',
                           node=1,
                           opt='stop',
                           project=PROJECT,
                           version_spider_job=jobid)
        assert url_stop in get_text(response)

        client.get(
            url_for('api',
                    node=1,
                    opt='forcestop',
                    project=PROJECT,
                    version_spider_job=jobid))

        # /1/schedule/ScrapydWeb-demo/default:%20the%20latest%20version/test/
        response = client.get(url)
        url_start = url_for('schedule.schedule',
                            node=1,
                            project=PROJECT,
                            version=DEFAULT_LATEST_VERSION,
                            spider=SPIDER)
        assert url_start in get_text(response)
예제 #27
0
def test_log_utf8_stats(app, client):
    # In ScrapydWeb_demo.egg: CONCURRENT_REQUESTS=1, DOWNLOAD_DELAY=10
    upload_file_deploy(app,
                       client,
                       filename='ScrapydWeb_demo.egg',
                       project=cst.PROJECT,
                       redirect_project=cst.PROJECT)

    with app.test_request_context():
        kws = dict(node=1,
                   opt='start',
                   project=cst.PROJECT,
                   version_spider_job=cst.SPIDER)
        __, js = req(app, client, view='api', kws=kws)
        jobid = js['jobid']
        sleep()

        # the Stats page
        req(app,
            client,
            view='log',
            kws=dict(node=1,
                     opt='stats',
                     project=cst.PROJECT,
                     spider=cst.SPIDER,
                     job=jobid),
            ins='Log analysis')
        # the Log page
        req(app,
            client,
            view='log',
            kws=dict(node=1,
                     opt='utf8',
                     project=cst.PROJECT,
                     spider=cst.SPIDER,
                     job=jobid),
            ins='log - ScrapydWeb')

        # For testing request_scrapy_log() of LogView in log.py
        app.config['LOCAL_SCRAPYD_LOGS_DIR'] = 'non-exist-dir'
        req(app,
            client,
            view='log',
            kws=dict(node=1,
                     opt='utf8',
                     project=cst.PROJECT,
                     spider=cst.SPIDER,
                     job=jobid),
            ins='log - ScrapydWeb')

        # the Jobs page GET
        url_stop = url_for('api',
                           node=1,
                           opt='stop',
                           project=cst.PROJECT,
                           version_spider_job=jobid)
        url_jobs_classic = url_for('jobs', node=1, style='classic')
        url_jobs_database = url_for('jobs', node=1, style='database')
        req(app,
            client,
            view='jobs',
            kws=dict(node=1, style='classic'),
            ins=[url_stop, url_jobs_database, 'class="table wrap"'],
            nos="Vue.extend(Main)")
        req(app,
            client,
            view='jobs',
            kws=dict(node=1, style='database'),
            ins=[url_stop, url_jobs_classic, "Vue.extend(Main)"],
            nos='class="table wrap"')

        # ?raise_exception=True
        req(app,
            client,
            view='jobs',
            kws=dict(node=1, style='database'),
            ins=[url_stop, url_jobs_classic, "Vue.extend(Main)"],
            nos='class="table wrap"')
        req(app,
            client,
            view='jobs',
            kws=dict(node=1),
            ins=[url_stop, url_jobs_classic, "Vue.extend(Main)"],
            nos='class="table wrap"')
        req(app,
            client,
            view='metadata',
            kws=dict(node=1),
            jskws=dict(jobs_style='database'))

        req(app,
            client,
            view='jobs',
            kws=dict(node=1, raise_exception='True'),
            ins=[url_stop, url_jobs_database, 'class="table wrap"'],
            nos="Vue.extend(Main)")
        req(app,
            client,
            view='metadata',
            kws=dict(node=1),
            jskws=dict(jobs_style='classic'))
        req(app,
            client,
            view='jobs',
            kws=dict(node=1),
            ins=[url_stop, url_jobs_database, 'class="table wrap"'],
            nos="Vue.extend(Main)")

        # jobs POST data={}
        jobs_key = '%s/%s/%s' % (cst.PROJECT, cst.SPIDER, jobid
                                 )  # type unicode in Python 2
        print('######')
        print(repr(jobs_key))
        print(type(jobs_key))
        __, js = req(app,
                     client,
                     view='jobs',
                     kws=dict(node=1),
                     data={},
                     jskeys=jobs_key)
        jobs_id = js[jobs_key]['id']
        jobs_start = js[jobs_key]['start']
        assert js[jobs_key]['deleted'] == '0'

        # JobsXhrView delete running job
        req(app,
            client,
            view='jobs.xhr',
            kws=dict(node=1, action='delete', id=jobs_id),
            jskws=dict(status=cst.OK))
        # Recover deleted running job
        req(app,
            client,
            view='jobs',
            kws=dict(node=1, style='database'),
            ins=[
                'Recover deleted job:', url_stop,
                'id: %s,' % jobs_id, jobs_start
            ])

        # forcestop
        client.get(
            url_for('api',
                    node=1,
                    opt='forcestop',
                    project=cst.PROJECT,
                    version_spider_job=jobid))
        sleep()

        # /1/schedule/ScrapydWeb_demo/default:%20the%20latest%20version/test/   NOT unique
        url_start = url_for('schedule',
                            node=1,
                            project=cst.PROJECT,
                            version=cst.DEFAULT_LATEST_VERSION,
                            spider=cst.SPIDER)
        req(app,
            client,
            view='jobs',
            kws=dict(node=1, style='classic'),
            ins=url_start)
        req(app,
            client,
            view='jobs',
            kws=dict(node=1, style='database'),
            ins=url_start)

        # JobsXhrView delete finished
        req(app,
            client,
            view='jobs.xhr',
            kws=dict(node=1, action='delete', id=jobs_id),
            jskws=dict(status=cst.OK))
        # JobsView: query_jobs(): self.jobs = self.Job.query.filter_by(deleted=NOT_DELETED)
        # POST data={}
        req(app,
            client,
            view='jobs',
            kws=dict(node=1),
            data={},
            nos=['id: %s,' % jobs_id, jobs_start])
        req(app,
            client,
            view='jobs',
            kws=dict(node=1, style='database'),
            nos=['id: %s,' % jobs_id, jobs_start])
        req(app,
            client,
            view='jobs',
            kws=dict(node=1, style='classic'),
            ins=jobs_start[5:])
        # delete id not exist
        req(app,
            client,
            view='jobs.xhr',
            kws=dict(node=1, action='delete', id=cst.BIGINT),
            jskws=dict(status=cst.ERROR))
예제 #28
0
def test_monitor_alert(app, client):
    # In ScrapydWeb_demo_no_delay.egg: unset CONCURRENT_REQUESTS, unset DOWNLOAD_DELAY
    upload_file_deploy(app,
                       client,
                       filename='ScrapydWeb_demo_no_delay.egg',
                       project=cst.PROJECT,
                       redirect_project=cst.PROJECT)

    # with app.test_request_context():
    if not (app.config.get('ENABLE_MONITOR', False)
            and app.config.get('ENABLE_EMAIL_ALERT', False)):
        return

    def start_a_job():
        kws = dict(node=1,
                   opt='start',
                   project=cst.PROJECT,
                   version_spider_job=cst.SPIDER)
        __, js = req(app, client, view='api', kws=kws)
        sleep()
        return js['jobid']

    def forcestop_a_job(job):
        req(app,
            client,
            view='api',
            kws=dict(node=1,
                     opt='forcestop',
                     project=cst.PROJECT,
                     version_spider_job=job))

    def post_for_poll(job, job_finished=''):
        kws = dict(node=1,
                   opt='stats',
                   project=cst.PROJECT,
                   spider=cst.SPIDER,
                   job=job,
                   job_finished=job_finished)
        req(app, client, view='log', kws=kws, data={}, ins='Log analysis')

    # Simulate poll post 'Finished'
    app.config['ON_JOB_FINISHED'] = True
    jobid = start_a_job()
    post_for_poll(jobid, job_finished='True')
    forcestop_a_job(jobid)
    sleep()

    # Simulate poll post 'ForceStopped'
    app.config['ON_JOB_FINISHED'] = False
    app.config['LOG_CRITICAL_THRESHOLD'] = 1
    app.config['LOG_CRITICAL_TRIGGER_FORCESTOP'] = True
    jobid = start_a_job()
    post_for_poll(jobid)
    forcestop_a_job(jobid)
    sleep()

    # Simulate poll post 'Stopped'
    app.config['LOG_CRITICAL_THRESHOLD'] = 0
    app.config['LOG_REDIRECT_THRESHOLD'] = 1
    app.config['LOG_REDIRECT_TRIGGER_STOP'] = True
    jobid = start_a_job()
    post_for_poll(jobid)
    forcestop_a_job(jobid)
    sleep()

    # Simulate poll post 'Triggered'
    app.config['LOG_REDIRECT_THRESHOLD'] = 0
    app.config['LOG_IGNORE_THRESHOLD'] = 1
    jobid = start_a_job()
    post_for_poll(jobid)
    forcestop_a_job(jobid)
    sleep()

    # Simulate poll post 'Running'
    app.config['LOG_IGNORE_THRESHOLD'] = 0
    app.config['ON_JOB_RUNNING_INTERVAL'] = 5
    jobid = start_a_job()
    post_for_poll(jobid)  # Would not trigger email

    sleep()
    post_for_poll(jobid)  # Would trigger email

    app.config['ON_JOB_RUNNING_INTERVAL'] = 0
    sleep()
    post_for_poll(jobid)  # Would not trigger email
    forcestop_a_job(jobid)