def test_execute_pipeline_raw_twice(self): """Execute a raw Pipeline and reuse an ExecRecord.""" run = Manager.execute_pipeline(self.user_bob, self.pipeline_raw, [self.dataset_raw], docker_handler_class=DummyDockerHandler).get_last_run() run = Run.objects.get(pk=run.pk) self.assertTrue(run.is_successful()) run2 = Manager.execute_pipeline(self.user_bob, self.pipeline_raw, [self.dataset_raw], docker_handler_class=DummyDockerHandler).get_last_run() run2 = Run.objects.get(pk=run2.pk) self.assertTrue(run2.is_successful())
def test_method_fails(self, slurm_sched_class=DummySlurmScheduler, docker_handler_class=DummyDockerHandler): """Properly handle a failed method in a pipeline.""" run = Manager.execute_pipeline( self.user_grandpa, self.pipeline_fubar, [self.dataset_grandpa], slurm_sched_class=slurm_sched_class, docker_handler_class=docker_handler_class ).get_last_run() self.assertTrue(run.is_failed()) self.assertIsNone(run.complete_clean()) runstep1 = run.runsteps.get(pipelinestep__step_num=1) self.cable_tester(runstep1) self.assertIsNone(runstep1.complete_clean()) self.assertTrue(runstep1.is_successful()) runstep2 = run.runsteps.get(pipelinestep__step_num=2) self.cable_tester(runstep2) self.assertIsNone(runstep2.complete_clean()) self.assertTrue(runstep2.is_failed()) log = runstep2.log self.assertFalse(log.is_successful()) self.assertEqual(log.methodoutput.return_code, 1) self.assertEqual(log.missing_outputs(), [runstep2.execrecord.execrecordouts.first().dataset])
def test_active_run_stopped(self): stop_username = '******' User.objects.create(username=stop_username) Run.objects.create(start_time=datetime(2000, 12, 21)) Manager(slurm_sched_class=self.scheduler_class, stop_username=stop_username, docker_handler_class=self.docker_class)
def test_find_dataset_pipeline_input_and_step_output(self): """ Finding a Dataset which was input to a Pipeline should return None as the generator, and the top-level run as the run. Finding a Dataset which was output from a step, and also input to a cable, should return the step (and in particular, not the cable). """ self.pipeline_noop = Pipeline.objects.get(family__name="simple pipeline") self.dataset_words = Dataset.objects.get(name='blahblah') self.user_bob = User.objects.get(username='******') mgr = Manager.execute_pipeline(self.user_bob, self.pipeline_noop, [self.dataset_words], docker_handler_class=DummyDockerHandler) x = mgr.history_queue.pop() self.assertIsNone(x.run.complete_clean()) self.assertTrue(x.run.is_successful()) run, gen = x.first_generator_of_dataset(self.dataset_words) self.assertEqual(run, x.run) self.assertEqual(gen, None) dataset_out_intermediate = x.run.runsteps.first().execrecord.execrecordouts.first().dataset run_2, gen_2 = x.first_generator_of_dataset(dataset_out_intermediate) self.assertEqual(run_2, x.run) self.assertEqual(gen_2, self.pipeline_noop.steps.first())
def test_find_dataset_subpipeline_input_and_intermediate(self): """ Find a dataset in a sub-pipeline, which is output from a step. Find a dataset in a sub-pipeline, which is input to the sub-pipeline on a custom cable. """ self.pipeline_nested = Pipeline.objects.get(family__name="nested pipeline") self.dataset_backwords = Dataset.objects.get(name='backwords') self.user_bob = User.objects.get(username='******') mgr = Manager.execute_pipeline(self.user_bob, self.pipeline_nested, [self.dataset_backwords], docker_handler_class=DummyDockerHandler) sandbox = mgr.history_queue.pop() self.assertIsNone(sandbox.run.complete_clean()) self.assertTrue(sandbox.run.is_successful()) subpipeline_step = sandbox.run.runsteps.get(pipelinestep__step_num=2) subrun = subpipeline_step.child_run runstep = subrun.runsteps.first() outrecord = runstep.execrecord.execrecordouts.first() dataset_to_find = outrecord.dataset run, gen = sandbox.first_generator_of_dataset(dataset_to_find) self.assertEqual(run, subrun) self.assertEqual(gen, runstep.pipelinestep) cable = runstep.RSICs.first() dataset_to_find_2 = runstep.execrecord.execrecordins.first().dataset run_2, gen_2 = sandbox.first_generator_of_dataset(dataset_to_find_2) self.assertEqual(run_2, subrun) self.assertEqual(gen_2, cable.PSIC)
def test_active_run_aborts(self): Run.objects.create(start_time=datetime(2000, 12, 21)) with self.assertRaises(ActiveRunsException) as result: Manager(slurm_sched_class=self.scheduler_class, docker_handler_class=self.docker_class) self.assertEqual(1, result.exception.count)
def execute_simple_run(environment, slurm_sched_class): """ A helper function that creates a simple pipeline and executes a run. This also populates the object -- e.g. a TestCase or a FixtureBuilder -- with some variables. Returns the Manager object that executed the run. """ tools.create_eric_martin_test_environment(environment) tools.create_sandbox_testing_tools_environment(environment) user = User.objects.get(username='******') # Everything in this pipeline will be a no-op, so all can be linked together # without remorse. p_basic = tools.make_first_pipeline("P_basic", "Innermost pipeline", user) tools.create_linear_pipeline(p_basic, [environment.method_noop, environment.method_noop], "basic_in", "basic_out") p_basic.family.grant_everyone_access() p_basic.grant_everyone_access() p_basic.create_outputs() p_basic.save() # Set up a dataset with words in it called environment.dataset_words. tools.make_words_dataset(environment) return Manager.execute_pipeline( environment.user_bob, p_basic, [environment.dataset_words], groups_allowed=[everyone_group()], slurm_sched_class=slurm_sched_class, docker_handler_class=DummyDockerHandler )
def execute_simple_run(environment, slurm_sched_class): """ A helper function that creates a simple pipeline and executes a run. This also populates the object -- e.g. a TestCase or a FixtureBuilder -- with some variables. Returns the Manager object that executed the run. """ tools.create_eric_martin_test_environment(environment) tools.create_sandbox_testing_tools_environment(environment) user = User.objects.get(username='******') # Everything in this pipeline will be a no-op, so all can be linked together # without remorse. p_basic = tools.make_first_pipeline("P_basic", "Innermost pipeline", user) tools.create_linear_pipeline( p_basic, [environment.method_noop, environment.method_noop], "basic_in", "basic_out") p_basic.family.grant_everyone_access() p_basic.grant_everyone_access() p_basic.create_outputs() p_basic.save() # Set up a dataset with words in it called environment.dataset_words. tools.make_words_dataset(environment) return Manager.execute_pipeline(environment.user_bob, p_basic, [environment.dataset_words], groups_allowed=[everyone_group()], slurm_sched_class=slurm_sched_class, docker_handler_class=DummyDockerHandler)
def test_execute_pipeline_raw(self): """Execute a raw Pipeline.""" run = Manager.execute_pipeline(self.user_bob, self.pipeline_raw, [self.dataset_raw], docker_handler_class=DummyDockerHandler).get_last_run() run.refresh_from_db() self.assertTrue(run.is_successful())
def test_recover_intermediate_dataset(self): """ Test recovery of an intermediate dataset. """ # In the fixture, we already ran self.pipeline_revcomp_v2, which discards the intermediate # output. We now run v3, which will recover it. run = Manager.execute_pipeline( self.user_alice, self.pipeline_revcomp_v3, [self.dataset_labdata], docker_handler_class=DummyDockerHandler ).get_last_run() self.assertTrue(run.is_successful())
def test_execute_pipeline_raw_with_docker(self): """Execute a raw Pipeline.""" self.maxDiff = None run = Manager.execute_pipeline(self.user_bob, self.pipeline_raw, [self.dataset_raw], docker_handler_class=DockerHandler).get_last_run() run.refresh_from_db() stderr_path = os.path.join(run.sandbox_path, "step1", "logs", "step1_stderr_slurmID0_node0.txt") with open(stderr_path, 'rU') as f: stderr_text = f.read() self.assertEqual("", stderr_text) self.assertTrue(run.is_successful())
def test_find_dataset_pipeline_input_and_intermediate_custom_wire(self): """ Finding a Dataset which was passed through a custom wire to a Pipeline should return the cable as the generator, and the top-level run as the run. Finding a Dataset which was produced by a custom wire as an intermediate step should return the cable as the generator, and the top-level run as the run. """ self.pipeline_twostep = Pipeline.objects.get(family__name="two-step pipeline") self.dataset_backwords = Dataset.objects.get(name='backwords') self.user_bob = User.objects.get(username='******') mgr = Manager.execute_pipeline(self.user_bob, self.pipeline_twostep, [self.dataset_backwords], docker_handler_class=DummyDockerHandler) sandbox = mgr.history_queue.pop() self.assertIsNone(sandbox.run.complete_clean()) self.assertTrue(sandbox.run.is_successful()) runcable = sandbox.run.runsteps.get(pipelinestep__step_num=1).RSICs.first() dataset_to_find = runcable.execrecord.execrecordouts.first().dataset run, gen = sandbox.first_generator_of_dataset(dataset_to_find) self.assertEqual(run, sandbox.run) self.assertEqual(gen, runcable.PSIC) # Testing on an intermediate Dataset. runcable_2 = sandbox.run.runsteps.get(pipelinestep__step_num=2).RSICs.first() dataset_to_find_2 = runcable_2.execrecord.execrecordouts.first().dataset run_2, gen_2 = sandbox.first_generator_of_dataset(dataset_to_find_2) self.assertEqual(run_2, sandbox.run) self.assertEqual(gen_2, runcable_2.PSIC)
def setUp(self): self.man = Manager(quit_idle=False, history=0, slurm_sched_class=slurmlib.DummySlurmScheduler, docker_handler_class=dockerlib.DummyDockerHandler)
def test_bad_slurm(self): self.scheduler_class.slurm_is_alive.return_value = False with self.assertRaisesRegexp(RuntimeError, 'Slurm is down or badly configured.'): Manager(slurm_sched_class=self.scheduler_class, docker_handler_class=self.docker_class)
def test_active_run_not_stopped(self): Run.objects.create(start_time=datetime(2000, 12, 21)) Manager(slurm_sched_class=self.scheduler_class, no_stop=True, docker_handler_class=self.docker_class)
def test_completed_run_does_not_abort(self): Run.objects.create(start_time=datetime(2000, 12, 21), end_time=datetime(2000, 12, 22)) Manager(slurm_sched_class=self.scheduler_class, docker_handler_class=self.docker_class)
def test_stopping_run_does_not_abort(self): stop_user = User.objects.create(username='******') Run.objects.create(start_time=datetime(2000, 12, 21), stopped_by=stop_user) Manager(slurm_sched_class=self.scheduler_class, docker_handler_class=self.docker_class)
def test_unknown_user(self): stop_username = '******' with self.assertRaises(User.DoesNotExist): Manager(slurm_sched_class=self.scheduler_class, stop_username=stop_username, docker_handler_class=self.docker_class)
class IdleTaskTests(TestCase): def setUp(self): self.man = Manager(quit_idle=False, history=0, slurm_sched_class=slurmlib.DummySlurmScheduler, docker_handler_class=dockerlib.DummyDockerHandler) def tearDown(self): self.man.slurm_sched_class.shutdown() def test_manager_ok(self): """ Make sure we have a manager class.""" self.assertIsNotNone(self.man) def test_add_idletask01(self): """Adding a non-generator should raise and exception.""" def test_func(myargs): return myargs+1000.0 with self.assertRaises(RuntimeError): self.man._add_idletask(test_func) def test_add_idletask02(self): """Adding a generator should work.""" def test_generator(myargs): while True: bla = (yield myargs) bla += 1 gen = test_generator(100) # just make sure our test is valid self.assertTrue(inspect.isgenerator(gen), "The test is broken: test_gen is not a generator") self.man._add_idletask(gen) def do_idle_tasks_test(self, lst, time_limit): """Add three generators and call do_idle_tasks. The generators modify lst if they are called.""" def gen1(target): while True: (yield None) target.append(1) def gen2(target): while True: (yield None) target.append(2) def gen3(target): while True: (yield None) target.append(3) self.man._add_idletask(gen1(lst)) self.man._add_idletask(gen2(lst)) self.man._add_idletask(gen3(lst)) self.man._do_idle_tasks(time_limit) def test_add_do_idle_tasks01(self): """Add three generators. Calling do_idle_tasks with a big time_limit should result in them being called all exactly once.""" lst, time_limit = [], time.time() + 1000.0 self.do_idle_tasks_test(lst, time_limit) self.assertTrue(len(lst) == 3, "unexpected lst length") self.assertTrue(set(lst) == {1, 2, 3}, "unexpected set") def test_add_do_idle_tasks02(self): """Add three generators. Calling do_idle_tasks with a negative time_limit should result in them being called all exactly never.""" lst, time_limit = [], time.time() - 1000.0 self.do_idle_tasks_test(lst, time_limit) self.assertTrue(len(lst) == 0, "unexpected lst length") self.assertTrue(set(lst) == set(), "unexpected set") def test_add_do_idle_tasks03(self): """ Add four time-delayed generators. Waiting a specific time should result in some of them being called and others not. """ def sleep_generator(target, task_id, secs_to_sleep): while True: (yield None) target.append(task_id) time.sleep(secs_to_sleep) wait_secs = 1.0 lst = [] self.man._add_idletask(sleep_generator(lst, 1, wait_secs)) self.man._add_idletask(sleep_generator(lst, 2, wait_secs)) self.man._add_idletask(sleep_generator(lst, 3, wait_secs)) self.man._add_idletask(sleep_generator(lst, 4, wait_secs)) time_limit = time.time() + 1.5*wait_secs self.man._do_idle_tasks(time_limit) self.assertTrue(len(lst) == 2, "unexpected lst length") # NOTE: the order of the idle_tasks is not defined by the interface # However, in fact the queue is rotated to the right... self.assertTrue(set(lst) == {1, 4}, "unexpected set") def test_create_next_month_upload_dir01(self): """ Test the creation of a monthly directory when the Dataset dir is not present. """ dataset_dir = os.path.join(settings.MEDIA_ROOT, Dataset.UPLOAD_DIR) date_str = (date.today() + timedelta(days=30)).strftime('%Y_%m') next_dirname = os.path.join(dataset_dir, date_str) # delete the dir iff it exists. try: shutil.rmtree(dataset_dir) except os.error as e: if e.errno != errno.ENOENT: raise gg = Dataset.idle_create_next_month_upload_dir() self.man._add_idletask(gg) time_limit = time.time() + 1000.0 self.man._do_idle_tasks(time_limit) self.assertTrue(os.path.exists(next_dirname), "directory was not made") def test_create_next_month_upload_dir02(self): """ Test the creation of a monthly directory where Dataset may be present.""" dataset_dir = os.path.join(settings.MEDIA_ROOT, Dataset.UPLOAD_DIR) date_str = (date.today() + timedelta(days=30)).strftime('%Y_%m') next_dirname = os.path.join(dataset_dir, date_str) # delete the dir iff it exists. try: shutil.rmtree(next_dirname) except os.error as e: if e.errno != errno.ENOENT: raise gg = Dataset.idle_create_next_month_upload_dir() self.man._add_idletask(gg) time_limit = time.time() + 1000.0 self.man._do_idle_tasks(time_limit) self.assertTrue(os.path.exists(next_dirname), "directory was not made") def test_create_next_month_upload_dir03(self): """ Test the creation of a monthly dir, where the dir is already present.""" dataset_dir = os.path.join(settings.MEDIA_ROOT, Dataset.UPLOAD_DIR) date_str = (date.today() + timedelta(days=30)).strftime('%Y_%m') next_dirname = os.path.join(dataset_dir, date_str) # make the directory iff it doesn't exist if not os.path.exists(next_dirname): os.makedirs(next_dirname) gg = Dataset.idle_create_next_month_upload_dir() self.man._add_idletask(gg) time_limit = time.time() + 1000.0 self.man._do_idle_tasks(time_limit) self.assertTrue(os.path.exists(next_dirname), "directory was not made") def test_logfile_purge01(self): # dataset_dir = os.path.join(settings.MEDIA_ROOT, Dataset.UPLOAD_DIR) gg = MethodOutput.idle_logfile_purge() self.man._add_idletask(gg) for i in range(10): # print "TEST", i time_limit = time.time() + 10.0 self.man._do_idle_tasks(time_limit)
def test_simple(self): Manager(slurm_sched_class=self.scheduler_class, docker_handler_class=self.docker_class)