def test_end_to_end_basic(self): self.start_with_config(SINGLE_ECHO_CONFIG) client = self.sandbox.client assert_equal( self.client.config('MASTER')['config'], SINGLE_ECHO_CONFIG, ) # reconfigure and confirm results second_config = DOUBLE_ECHO_CONFIG + TOUCH_CLEANUP_FMT self.sandbox.tronfig(second_config) assert_equal(client.config('MASTER')['config'], second_config) # reconfigure, by uploading a third configuration self.sandbox.tronfig(ALT_NAMESPACED_ECHO_CONFIG, name='ohce') self.sandbox.client.home() # run the job and check its output echo_job_name = 'MASTER.echo_job' job_url = client.get_url(echo_job_name) action_url = client.get_url('MASTER.echo_job.1.echo_action') self.sandbox.tronctl('start', echo_job_name) def wait_on_cleanup(): return ( len(client.job(job_url)['runs']) >= 2 and client.action_runs(action_url)['state'] == actionrun.ActionRun.SUCCEEDED ) sandbox.wait_on_sandbox(wait_on_cleanup) echo_action_run = client.action_runs(action_url) another_action_url = client.get_url( 'MASTER.echo_job.1.another_echo_action', ) other_act_run = client.action_runs(another_action_url) assert_equal( echo_action_run['state'], actionrun.ActionRun.SUCCEEDED, ) assert_equal(echo_action_run['stdout'], ['Echo!']) assert_equal( other_act_run['state'], actionrun.ActionRun.FAILED, ) now = datetime.datetime.now() stdout = now.strftime( 'Today is %Y-%m-%d, which is the same as %Y-%m-%d', ) assert_equal(other_act_run['stdout'], [stdout]) job_runs_url = client.get_url('%s.1' % echo_job_name) assert_equal( client.job_runs(job_runs_url)['state'], actionrun.ActionRun.FAILED, )
def test_failure_on_multi_step_job_doesnt_wedge_tron(self): config = BASIC_CONFIG + dedent( """ jobs: - name: "random_failure_job" node: local queueing: true schedule: "constant" actions: - name: "fa" command: "sleep 0.1; failplz" - name: "sa" command: "echo 'you will never see this'" requires: [fa] """ ) self.start_with_config(config) job_url = self.client.get_url('MASTER.random_failure_job') def wait_on_random_failure_job(): return len(self.client.job(job_url)['runs']) >= 4 sandbox.wait_on_sandbox(wait_on_random_failure_job) job_runs = self.client.job(job_url)['runs'] expected = [actionrun.ActionRun.FAILED for _ in range(3)] assert_equal([run['state'] for run in job_runs[-3:]], expected)
def test_cleanup_on_failure(self): FAIL_CONFIG = BASIC_CONFIG + dedent(""" jobs: - name: "failjob" node: local schedule: "constant" actions: - name: "failaction" command: "failplz" """) + TOUCH_CLEANUP_FMT client = self.sandbox.client self.sandbox.save_config(FAIL_CONFIG) self.sandbox.trond() action_run_url = client.get_url('MASTER.failjob.0.failaction') def wait_on_failaction(): return client.action(action_run_url)['state'] == 'FAIL' sandbox.wait_on_sandbox(wait_on_failaction) action_run_url = client.get_url('MASTER.failjob.1.cleanup') def wait_on_cleanup(): return client.action(action_run_url)['state'] == 'SUCC' sandbox.wait_on_sandbox(wait_on_cleanup) assert_gt(len(client.job(client.get_url('MASTER.failjob'))['runs']), 1)
def test_job_queueing_false_with_overlap(self): """Test that a job that has queueing false properly cancels an overlapping job run. """ config = BASIC_CONFIG + dedent(""" jobs: - name: "cancel_overlap" schedule: "interval 1s" queueing: False node: local actions: - name: "do_something" command: "sleep 3s" - name: "do_other" command: "sleep 3s" cleanup_action: command: "echo done" """) self.start_with_config(config) job_url = self.client.get_url('MASTER.cancel_overlap') job_run_url = self.client.get_url('MASTER.cancel_overlap.1') def wait_on_job_schedule(): return len(self.client.job(job_url)['runs']) == 2 sandbox.wait_on_sandbox(wait_on_job_schedule) sandbox.wait_on_state(self.client.job, job_run_url, actionrun.ActionRun.STATE_CANCELLED.name) action_run_states = [action_run['state'] for action_run in self.client.job_runs(job_run_url)['runs']] expected = [actionrun.ActionRun.STATE_CANCELLED.name for _ in xrange(len(action_run_states))] assert_equal(action_run_states, expected)
def test_cancel_schedules_a_new_run(self): config = BASIC_CONFIG + dedent(""" jobs: - name: "a_job" node: local schedule: "daily 05:00:00" actions: - name: "first_action" command: "echo OK" """) self.start_with_config(config) job_name = 'MASTER.a_job' job_url = self.client.get_url(job_name) self.sandbox.tronctl('cancel', '%s.0' % job_name) def wait_on_cancel(): return len(self.client.job(job_url)['runs']) == 2 sandbox.wait_on_sandbox(wait_on_cancel) run_states = [run['state'] for run in self.client.job(job_url)['runs']] expected = [ actionrun.ActionRun.SCHEDULED, actionrun.ActionRun.CANCELLED, ] assert_equal(run_states, expected)
def test_skip_failed_actions(self): CONFIG = BASIC_CONFIG + dedent(""" jobs: - name: "multi_step_job" node: local schedule: "constant" actions: - name: "broken" command: "failingcommand" - name: "works" command: "echo ok" requires: broken """) client = self.sandbox.client self.sandbox.save_config(CONFIG) self.sandbox.trond() def build_wait_func(state): def wait_on_multi_step_job(): action_name = 'multi_step_job.0.broken' return client.action(action_name)['state'] == state return wait_on_multi_step_job sandbox.wait_on_sandbox(build_wait_func('FAIL')) self.sandbox.tronctl(['skip', 'multi_step_job.0.broken']) assert_equal(client.action('multi_step_job.0.broken')['state'], 'SKIP') sandbox.wait_on_sandbox(build_wait_func('SKIP')) assert_equal(client.action('multi_step_job.0.works')['state'], 'SUCC') assert_equal(client.job_runs('multi_step_job.0')['state'], 'SUCC')
def test_tronctl_service_zap(self): SERVICE_CONFIG = dedent(""" nodes: - name: local hostname: 'localhost' services: - name: "fake_service" node: local count: 1 pid_file: "%%(name)s-%%(instance_number)s.pid" command: "echo %(pid)s > %%(pid_file)s" monitor_interval: 0.1 """ % {'pid': os.getpid()}) client = self.sandbox.client self.sandbox.trond() self.sandbox.tronfig(SERVICE_CONFIG) wait_on_config = lambda: 'fake_service' in client.config() sandbox.wait_on_sandbox(wait_on_config) self.sandbox.tronctl(['start', 'fake_service']) def wait_on_start(): return client.service('fake_service')['state'] == 'STARTING' sandbox.wait_on_sandbox(wait_on_start) self.sandbox.tronctl(['zap', 'fake_service']) assert_equal('DOWN', client.service('fake_service')['state'])
def test_failure_on_multi_step_job_doesnt_wedge_tron(self): FAIL_CONFIG = BASIC_CONFIG + dedent(""" jobs: - name: "random_failure_job" node: local queueing: true schedule: "constant" actions: - name: "fa" command: "sleep 0.1; failplz" - name: "sa" command: "echo 'you will never see this'" requires: [fa] """) client = self.sandbox.client self.sandbox.save_config(FAIL_CONFIG) self.sandbox.trond() def wait_on_random_failure_job(): return len(client.job('random_failure_job')['runs']) >= 4 sandbox.wait_on_sandbox(wait_on_random_failure_job) job_runs = client.job('random_failure_job')['runs'] assert_equal([run['state'] for run in job_runs[-3:]], ['FAIL'] * 3)
def test_job_queueing_false_with_overlap(self): """Test that a job that has queueing false properly cancels an overlapping job run. """ config = BASIC_CONFIG + dedent(""" jobs: - name: "cancel_overlap" schedule: "interval 1s" queueing: False node: local actions: - name: "do_something" command: "sleep 3s" - name: "do_other" command: "sleep 3s" cleanup_action: command: "echo done" """) client = self.sandbox.client self.sandbox.save_config(config) self.sandbox.trond() job_run = client.get_url('MASTER.cancel_overlap') job_run_url = client.get_url('MASTER.cancel_overlap.1') def wait_on_job_schedule(): return len(client.job(job_run)['runs']) == 2 sandbox.wait_on_sandbox(wait_on_job_schedule) def wait_on_job_cancel(): return client.job(job_run_url)['state'] == 'CANC' sandbox.wait_on_sandbox(wait_on_job_cancel) action_run_states = [action_run['state'] for action_run in client.job(job_run_url)['runs']] assert_equal(action_run_states, ['CANC'] * len(action_run_states))
def test_cancel_schedules_a_new_run(self): config = BASIC_CONFIG + dedent(""" jobs: - name: "a_job" node: local schedule: "daily 05:00:00" actions: - name: "first_action" command: "echo OK" """) client = self.sandbox.client self.sandbox.save_config(config) self.sandbox.trond() job_url = client.get_url('MASTER.a_job') self.sandbox.tronctl(['cancel', 'MASTER.a_job.0']) def wait_on_cancel(): return len(client.job(job_url)['runs']) == 2 sandbox.wait_on_sandbox(wait_on_cancel) job_runs = client.job(job_url)['runs'] assert_length(job_runs, 2) run_states = [run['state'] for run in job_runs] assert_equal(run_states, ['SCHE', 'CANC'])
def test_failure_on_multi_step_job_doesnt_wedge_tron(self): config = BASIC_CONFIG + dedent(""" jobs: - name: "random_failure_job" node: local queueing: true schedule: "constant" actions: - name: "fa" command: "sleep 0.1; failplz" - name: "sa" command: "echo 'you will never see this'" requires: [fa] """) self.start_with_config(config) job_url = self.client.get_url('MASTER.random_failure_job') def wait_on_random_failure_job(): return len(self.client.job(job_url)['runs']) >= 4 sandbox.wait_on_sandbox(wait_on_random_failure_job) job_runs = self.client.job(job_url)['runs'] expected = [actionrun.ActionRun.FAILED for _ in range(3)] assert_equal([run['state'] for run in job_runs[-3:]], expected)
def test_cancel_schedules_a_new_run(self): config = BASIC_CONFIG + dedent( """ jobs: - name: "a_job" node: local schedule: "daily 05:00:00" actions: - name: "first_action" command: "echo OK" """ ) self.start_with_config(config) job_name = 'MASTER.a_job' job_url = self.client.get_url(job_name) self.sandbox.tronctl('cancel', '%s.0' % job_name) def wait_on_cancel(): return len(self.client.job(job_url)['runs']) == 2 sandbox.wait_on_sandbox(wait_on_cancel) run_states = [run['state'] for run in self.client.job(job_url)['runs']] expected = [ actionrun.ActionRun.SCHEDULED, actionrun.ActionRun.CANCELLED, ] assert_equal(run_states, expected)
def test_end_to_end_basic(self): self.start_with_config(SINGLE_ECHO_CONFIG) client = self.sandbox.client assert_equal( self.client.config('MASTER')['config'], SINGLE_ECHO_CONFIG, ) # reconfigure and confirm results second_config = DOUBLE_ECHO_CONFIG + TOUCH_CLEANUP_FMT self.sandbox.tronfig(second_config) events = summarize_events(client.events()) assert_in(('', 'restoring'), events) assert_in(('MASTER.echo_job.0', 'created'), events) assert_equal(client.config('MASTER')['config'], second_config) # reconfigure, by uploading a third configuration self.sandbox.tronfig(ALT_NAMESPACED_ECHO_CONFIG, name='ohce') self.sandbox.client.home() # run the job and check its output echo_job_name = 'MASTER.echo_job' job_url = client.get_url(echo_job_name) action_url = client.get_url('MASTER.echo_job.1.echo_action') self.sandbox.tronctl('start', echo_job_name) def wait_on_cleanup(): return (len(client.job(job_url)['runs']) >= 2 and client.action_runs(action_url)['state'] == actionrun.ActionRun.STATE_SUCCEEDED.name) sandbox.wait_on_sandbox(wait_on_cleanup) echo_action_run = client.action_runs(action_url) another_action_url = client.get_url( 'MASTER.echo_job.1.another_echo_action', ) other_act_run = client.action_runs(another_action_url) assert_equal( echo_action_run['state'], actionrun.ActionRun.STATE_SUCCEEDED.name, ) assert_equal(echo_action_run['stdout'], ['Echo!']) assert_equal( other_act_run['state'], actionrun.ActionRun.STATE_FAILED.name, ) now = datetime.datetime.now() stdout = now.strftime( 'Today is %Y-%m-%d, which is the same as %Y-%m-%d', ) assert_equal(other_act_run['stdout'], [stdout]) job_runs_url = client.get_url('%s.1' % echo_job_name) assert_equal( client.job_runs(job_runs_url)['state'], actionrun.ActionRun.STATE_FAILED.name, )
def test_end_to_end_basic(self): client = self.sandbox.client # start with a basic configuration self.sandbox.save_config(SINGLE_ECHO_CONFIG) self.sandbox.trond() # make sure it got in assert_equal(client.config(), SINGLE_ECHO_CONFIG) # reconfigure and confirm results second_config = DOUBLE_ECHO_CONFIG + TOUCH_CLEANUP_FMT self.sandbox.tronfig(second_config) events = client.events() assert_equal(events[0]['name'], 'restoring') assert_equal(events[1]['name'], 'run_created') assert_equal(client.config(), second_config) job = { 'action_names': ['echo_action', 'cleanup', 'another_echo_action'], 'status': 'ENABLED', 'href': '/jobs/echo_job', 'last_success': None, 'name': 'echo_job', 'scheduler': 'INTERVAL:1:00:00', 'node_pool': ['localhost'], 'runs': None } expected = { 'jobs': [job], 'status_href': '/status', 'jobs_href': '/jobs', 'config_href': '/config', 'services': [], 'services_href': '/services' } result = self.sandbox.client.home() assert_equal(result, expected) # run the job and check its output self.sandbox.tronctl(['start', 'echo_job']) def wait_on_cleanup(): return (len(client.job('echo_job')['runs']) >= 2 and client.action('echo_job.1.echo_action')['state'] == 'SUCC') sandbox.wait_on_sandbox(wait_on_cleanup) echo_action_run = client.action('echo_job.1.echo_action') other_act_run = client.action('echo_job.1.another_echo_action') assert_equal(echo_action_run['state'], 'SUCC') assert_equal(echo_action_run['stdout'], ['Echo!']) assert_equal(other_act_run['state'], 'FAIL') now = datetime.datetime.now() stdout = now.strftime('Today is %Y-%m-%d, which is the same as %Y-%m-%d') assert_equal(other_act_run['stdout'], [stdout]) assert_equal(client.job_runs('echo_job.1')['state'], 'FAIL')
def test_node_reconfig(self): job_service_config = dedent(""" jobs: - name: a_job node: local schedule: "interval 1s" actions: - name: first_action command: "echo something" services: - name: a_service node: local pid_file: /tmp/does_not_exist command: "echo service start" monitor_interval: 1 """) second_config = dedent(""" ssh_options: agent: true nodes: - name: local hostname: '127.0.0.1' state_persistence: name: "state_data.shelve" store_type: shelve """) + job_service_config self.start_with_config(BASIC_CONFIG + job_service_config) service_name = 'MASTER.a_service' service_url = self.client.get_url(service_name) self.sandbox.tronctl('start', service_name) sandbox.wait_on_state(self.client.service, service_url, service.ServiceState.FAILED) job_url = self.client.get_url('MASTER.a_job.0') sandbox.wait_on_state(self.client.job_runs, job_url, actionrun.ActionRun.STATE_SUCCEEDED.name) self.sandbox.tronfig(second_config) sandbox.wait_on_state(self.client.service, service_url, service.ServiceState.DISABLED) job_url = self.client.get_url('MASTER.a_job') def wait_on_next_run(): last_run = self.client.job(job_url)['runs'][0] return last_run['node']['hostname'] == '127.0.0.1' sandbox.wait_on_sandbox(wait_on_next_run)
def test_tronctl_basic(self): client = self.sandbox.client self.sandbox.save_config(SINGLE_ECHO_CONFIG + TOUCH_CLEANUP_FMT) self.sandbox.trond() self.sandbox.tronctl(['start', 'echo_job']) def wait_on_cleanup(): return client.action('echo_job.1.cleanup')['state'] == 'SUCC' sandbox.wait_on_sandbox(wait_on_cleanup) assert_equal(client.action('echo_job.1.echo_action')['state'], 'SUCC') assert_equal(client.job_runs('echo_job.1')['state'], 'SUCC')
def test_node_reconfig(self): job_config = dedent( """ jobs: - name: a_job node: local schedule: "interval 1s" actions: - name: first_action command: "echo something" """ ) second_config = dedent( """ ssh_options: agent: true nodes: - name: local hostname: '127.0.0.1' state_persistence: name: "state_data.shelve" store_type: shelve """ ) + job_config self.start_with_config(BASIC_CONFIG + job_config) job_url = self.client.get_url('MASTER.a_job.0') sandbox.wait_on_state( self.client.job_runs, job_url, actionrun.ActionRun.SUCCEEDED, ) self.sandbox.tronfig(second_config) job_url = self.client.get_url('MASTER.a_job') def wait_on_next_run(): last_run = self.client.job(job_url)['runs'][0] return last_run['node']['hostname'] == '127.0.0.1' sandbox.wait_on_sandbox(wait_on_next_run)
def test_job_queueing_false_with_overlap(self): """Test that a job that has queueing false properly cancels an overlapping job run. """ config = BASIC_CONFIG + dedent(""" jobs: - name: "cancel_overlap" schedule: "cron * * * * *" queueing: False node: local actions: - name: "do_something" command: "sleep 3s" - name: "do_other" command: "sleep 3s" cleanup_action: command: "echo done" """) self.start_with_config(config) job_url = self.client.get_url('MASTER.cancel_overlap') job_run_url = self.client.get_url('MASTER.cancel_overlap.1') def wait_on_job_schedule(): return len(self.client.job(job_url)['runs']) == 2 sandbox.wait_on_sandbox(wait_on_job_schedule) sandbox.wait_on_state( self.client.job, job_run_url, actionrun.ActionRun.CANCELLED, ) action_run_states = [ action_run['state'] for action_run in self.client.job_runs(job_run_url)['runs'] ] expected = [ actionrun.ActionRun.CANCELLED for _ in range(len(action_run_states)) ] assert_equal(action_run_states, expected)
def test_node_reconfig(self): job_config = dedent(""" jobs: - name: a_job node: local schedule: "cron * * * * *" actions: - name: first_action command: "echo something" """) second_config = dedent(""" ssh_options: agent: true nodes: - name: local hostname: '127.0.0.1' state_persistence: name: "state_data.shelve" store_type: shelve """) + job_config self.start_with_config(BASIC_CONFIG + job_config) job_url = self.client.get_url('MASTER.a_job.0') sandbox.wait_on_state( self.client.job_runs, job_url, actionrun.ActionRun.SUCCEEDED, ) self.sandbox.tronfig(second_config) job_url = self.client.get_url('MASTER.a_job') def wait_on_next_run(): last_run = self.client.job(job_url)['runs'][0] return last_run['node']['hostname'] == '127.0.0.1' sandbox.wait_on_sandbox(wait_on_next_run)
def test_service_reconfigure(self): config_template = BASIC_CONFIG + dedent(""" services: - name: "a_service" node: local pid_file: "{wd}/%(name)s-%(instance_number)s.pid" command: "{command}" monitor_interval: {monitor_interval} restart_interval: 2 """) command = ("cd {path} && PYTHONPATH=. python " "{path}/tests/mock_daemon.py %(pid_file)s") command = command.format(path=os.path.abspath('.')) config = config_template.format( command=command, monitor_interval=1, wd=self.sandbox.tmp_dir) client = self.sandbox.client self.sandbox.save_config(config) self.sandbox.trond() self.sandbox.tronctl(['start', 'MASTER_a_service']) def wait_on_service_start(): return client.service('MASTER_a_service')['state'] == 'UP' sandbox.wait_on_sandbox(wait_on_service_start) new_config = config_template.format( command=command, monitor_interval=2, wd=self.sandbox.tmp_dir) self.sandbox.tronfig(new_config) sandbox.wait_on_sandbox(wait_on_service_start) self.sandbox.tronctl(['stop', 'MASTER_a_service']) def wait_on_service_stop(): return client.service('MASTER_a_service')['state'] == 'DOWN' sandbox.wait_on_sandbox(wait_on_service_stop)