def test_go_skip_dir(self): a,b,c,d = [os.path.join(self.workdir, letter+".txt") for letter in ("a", "b", "c", "d")] dirdep = anadama2.tracked.TrackedDirectory(self.workdir) out = "/tmp/foobaz" self.ctx.already_exists(dirdep) for letter in (a,b,c,d): anadama2.util.sh("echo {a} > {a}".format(a=letter), shell=True) self.ctx.add_task("ls [depends[0]] > [targets[0]]", depends=dirdep, targets=out) with capture(stderr=StringIO()): self.ctx.go() mtime = os.stat(out).st_mtime with capture(stderr=StringIO()): self.ctx.go() self.assertEqual(mtime, os.stat(out).st_mtime) with open(os.path.join(self.workdir, "f.txt"), 'w') as f: f.write("hi mom\n") time.sleep(SLEEPTIME) with capture(stderr=StringIO()): self.ctx.go() new_mtime = os.stat(out).st_mtime self.assertNotEqual(mtime, new_mtime) with capture(stderr=StringIO()): self.ctx.go() self.assertEqual(new_mtime, os.stat(out).st_mtime) os.remove(out)
def test_go_skip_config(self): a = os.path.join(self.workdir, "a.txt") conf = anadama2.tracked.Container(alpha="5", beta=2) self.ctx.add_task("echo beta:[depends[0]] > [targets[0]]", depends=conf.beta, targets=a) with capture(stderr=StringIO()): self.ctx.go() mtime = os.stat(a).st_mtime with capture(stderr=StringIO()): self.ctx.go() self.assertEqual(mtime, os.stat(a).st_mtime) conf.beta = 7 time.sleep(SLEEPTIME) with capture(stderr=StringIO()): self.ctx.go() new_mtime = os.stat(a).st_mtime self.assertNotEqual(mtime, new_mtime) conf.gamma = 10 time.sleep(SLEEPTIME) with capture(stderr=StringIO()): self.ctx.go() self.assertEqual(new_mtime, os.stat(a).st_mtime) self.ctx.add_task("echo beta > [targets[0]]", depends=list(conf.items()), targets=a) time.sleep(SLEEPTIME) with capture(stderr=StringIO()): self.ctx.go() self.assertNotEqual(new_mtime, os.stat(a).st_mtime)
def test_go_skip(self): outf = os.path.join(self.workdir, "blah.txt") self.ctx.add_task("touch [targets[0]]", targets=[outf]) with capture(stderr=StringIO()): self.ctx.go() ctime = os.stat(outf).st_ctime time.sleep(SLEEPTIME) with capture(stderr=StringIO()): self.ctx.go() self.assertEqual(ctime, os.stat(outf).st_ctime)
def test_go_skip_nothing(self): a = os.path.join(self.workdir, "a.txt") self.ctx.add_task("touch [targets[0]]", targets=[a]) with capture(stderr=StringIO()): self.ctx.go() mtime = os.stat(a).st_mtime time.sleep(SLEEPTIME) with capture(stderr=StringIO()): self.ctx.go() self.assertEqual(mtime, os.stat(a).st_mtime) with capture(stderr=StringIO()): self.ctx.go(skip_nothing=True) self.assertNotEqual(mtime, os.stat(a).st_mtime)
def test_go_skip_notargets(self): a,b,c,d = [os.path.join(self.workdir, letter+".txt") for letter in ("a", "b", "c", "d")] self.ctx.add_task("touch [targets[0]]", targets=[a]) self.ctx.add_task("touch [targets[0]] [targets[1]]", targets=[b, c]) self.ctx.add_task("touch {}".format(d), depends=[a,b]) with capture(stderr=StringIO()): self.ctx.go() mtime = os.stat(a).st_mtime with capture(stderr=StringIO()): self.ctx.go() self.assertEqual(mtime, os.stat(a).st_mtime) os.remove(a) time.sleep(SLEEPTIME) with capture(stderr=StringIO()): self.ctx.go() self.assertNotEqual(mtime, os.stat(a).st_mtime)
def test_go_parallel(self): for _ in range(10): self.ctx.add_task("sleep 0.5") earlier = datetime.now() with capture(stderr=StringIO()): self.ctx.go(jobs=10) later = datetime.now() self.assertLess(later-earlier, timedelta(seconds=5))
def test_go(self): self.ctx.already_exists("/etc/hosts") outf = os.path.join(self.workdir, "wordcount.txt") self.ctx.add_task("wc -l [depends[0]] > [targets[0]]", depends=["/etc/hosts"], targets=[outf] ) with capture(stderr=StringIO()): self.ctx.go() self.assertTrue(os.path.exists(outf), "should create wordcount.txt")
def test_slurm_do(self): self.ctx.do_gridable("echo true > @{true.txt}", time=5, mem=50, cores=1) self.assertFalse(os.path.exists("true.txt")) with capture(stderr=StringIO()): self.ctx.go() self.assertTrue(os.path.exists("true.txt")) os.remove("true.txt")
def test_go_exclude_task(self): a,b,c,d = [os.path.join(self.workdir, letter+".txt") for letter in ("a", "b", "c", "d")] self.ctx.add_task("touch [targets[0]]", targets=[a], name="a") self.ctx.add_task("touch [targets[0]] [targets[1]]", targets=[b, c], name="bc") self.ctx.add_task("touch "+d, depends=[a,b], name="d") with capture(stderr=StringIO()): self.ctx.go(exclude_task="bc") self.assertTrue(os.path.exists(a), "should quit at a") self.assertFalse(os.path.exists(b), "should quit at a") self.assertFalse(os.path.exists(c), "should quit at a") self.assertFalse(os.path.exists(d), "should quit at a")
def test_issue1(self): a,b,c,d,e,f = [os.path.join(self.workdir, letter+".txt") for letter in ("a", "b", "c", "d", "e", "f")] self.ctx.add_task("touch [targets[0]]", targets=[a], name="a") self.ctx.add_task("touch [targets[0]]; exit 1", depends=[a], targets=[b], name="task should fail") self.ctx.add_task("touch [targets[0]]", depends=[b], targets=[c], name="c") self.ctx.add_task("touch [targets[0]]", targets=[d], name="d") self.ctx.add_task("touch [targets[0]]", depends=[d], targets=[e], name="e") self.ctx.add_task("touch [targets[0]]", depends=[e], targets=[f], name="f") with capture(stderr=StringIO()): with self.assertRaises(anadama2.workflow.RunFailed): self.ctx.go(jobs=2)
def test_issue36(self): ctx = self.ctx step1_const = anadama2.tracked.Container(a = 12) step1_out = os.path.join(self.workdir, "step1.txt") step1_cmd = " ".join(["echo", str(step1_const.a), ">", step1_out]) ctx.already_exists(anadama2.tracked.TrackedString(step1_cmd)) step1 = ctx.add_task(step1_cmd, depends=[step1_const.a, anadama2.tracked.TrackedString(step1_cmd)], targets=[step1_out]) step2_out = os.path.join(self.workdir, "step2.txt") step2_cmd = "; ".join(["p=$(cat " + step1_out + ")", "echo $p > " + step2_out ]) ctx.already_exists(anadama2.tracked.TrackedString(step2_cmd)) step2 = ctx.add_task(step2_cmd, depends=[step1_out], targets=[step2_out], name="Step 2") with capture(stderr=StringIO()): ctx.go() step2skipped = False class CustomReporter(anadama2.reporters.ConsoleReporter): def task_skipped(self, task_no, *args, **kwargs): if task_no == step2.task_no: step2skipped = True return super(CustomReporter, self).task_skipped( task_no, *args, **kwargs) def task_running(self, task_no): pass step1_const.a = 10 with capture(stderr=StringIO()): ctx.go(reporter=CustomReporter(ctx)) self.assertFalse(step2skipped, "Shouldn't skip step 2; parent dep changed")
def test_go_until_task(self): a,b,c,d = [os.path.join(self.workdir, letter+".txt") for letter in ("a", "b", "c", "d")] self.ctx.add_task("touch [targets[0]]", targets=[a], name="a") self.ctx.add_task("touch [targets[0]] [targets[1]]", targets=[b, c], name="bc", depends=a) self.ctx.add_task("touch {}".format(d), depends=[c], targets=d, name="d") with capture(stderr=StringIO()): self.ctx.go(until_task="bc") for f in (a,b,c,): self.assertTrue(os.path.isfile(f)) self.assertFalse(os.path.isfile(d))
def test_go_quit_early(self): outf = os.path.join(self.workdir, "blah.txt") out2 = os.path.join(self.workdir, "shouldntexist.txt") self.ctx.add_task("echo blah > [targets[0]]; exit 1", targets=[outf], name="task should fail") self.ctx.add_task("cat [depends[0]] > [targets[0]]", depends=[outf], targets=[outf]) with capture(stderr=StringIO()): with self.assertRaises(anadama2.workflow.RunFailed): self.ctx.go(quit_early=True) self.assertFalse( os.path.exists(out2), "quit_early failed to stop before the second task was run")
def test_randomgraph_files(self): G = nx.gn_graph(20) targets = defaultdict(dict) depends = defaultdict(dict) allfiles = list() for a, b in G.edges(): f = os.path.join(self.workdir, "{}_{}.txt".format(a, b)) allfiles.append(f) targets[a][b] = depends[b][a] = f shall_fail = set( [random.choice(G.nodes()) for _ in range(int(len(G)**.5))]) nodes = nx.algorithms.dag.topological_sort(G) task_nos = [None for _ in range(len(nodes))] for n in nodes: cmd = "touch /dev/null " + " ".join(targets[n].values()) if n in shall_fail: cmd += " ;exit 1" t = self.ctx.add_task(cmd, name=cmd, targets=list(targets[n].values()), depends=list(depends[n].values())) task_nos[n] = t.task_no # self.ctx.fail_idx = task_nos[G.successors(list(shall_fail)[0])[-1]] self.assertFalse(any(map(os.path.exists, allfiles))) with capture(stderr=StringIO()): import anadama2.reporters with self.assertRaises(anadama2.workflow.RunFailed): rep = anadama2.reporters.LoggerReporter("debug", "/tmp/analog") self.ctx.go(reporter=rep) child_fail = set() for n in shall_fail: task_no = task_nos[n] self.assertIn(task_no, self.ctx.failed_tasks, ("tasks that raise exceptions should be marked" " as failed")) self.assertTrue(bool(self.ctx.task_results[task_no].error), "Failed tasks should have errors in task_results") for _, succ in dfs_edges(G, n): s_no = task_nos[succ] child_fail.add(succ) self.assertIn(s_no, self.ctx.failed_tasks, "all children of failed tasks should fail") self.assertIn("parent task", self.ctx.task_results[s_no].error, ("children of failed tasks should have errors" " in task_results")) for n in set(nodes).difference(shall_fail.union(child_fail)): task_no = task_nos[n] self.assertIn(task_no, self.ctx.completed_tasks) self.assertFalse(bool(self.ctx.task_results[task_no].error))
def test_randomgraph_files(self): G = nx.gn_graph(20) targets = defaultdict(dict) depends = defaultdict(dict) allfiles = list() for a, b in G.edges(): f = os.path.join(self.workdir, "{}_{}.txt".format(a, b)) allfiles.append(f) targets[a][b] = depends[b][a] = f shall_fail = set( [random.choice(G.nodes()) for _ in range(int(len(G)**.5))]) nodes = nx.algorithms.dag.topological_sort(G) task_nos = [None for _ in range(len(nodes))] for n in nodes: cmd = "touch /dev/null " + " ".join(targets[n].values()) if n in shall_fail: cmd += " ;exit 1" slurm_add_task = lambda *a, **kw: self.ctx.add_task_gridable( mem=50, time=5, cores=1, *a, **kw) add_task = self.ctx.add_task if bern(0.5) else slurm_add_task t = add_task(cmd, name=cmd, targets=list(targets[n].values()), depends=list(depends[n].values())) task_nos[n] = t.task_no self.assertFalse(any(map(os.path.exists, allfiles))) with capture(stderr=StringIO()): with self.assertRaises(anadama2.workflow.RunFailed): self.ctx.go(grid_jobs=2) child_fail = set() for n in shall_fail: task_no = task_nos[n] self.assertIn(task_no, self.ctx.failed_tasks, ("tasks that raise exceptions should be marked" " as failed")) self.assertTrue(bool(self.ctx.task_results[task_no].error), "Failed tasks should have errors in task_results") for _, succ in dfs_edges(G, n): s_no = task_nos[succ] child_fail.add(succ) self.assertIn(s_no, self.ctx.failed_tasks, "all children of failed tasks should fail") self.assertIn("parent task", self.ctx.task_results[s_no].error, ("children of failed tasks should have errors" " in task_results")) for n in set(nodes).difference(shall_fail.union(child_fail)): task_no = task_nos[n] self.assertIn(task_no, self.ctx.completed_tasks) self.assertFalse(bool(self.ctx.task_results[task_no].error))
def test_randomgraph_tasks(self): G = nx.gn_graph(20) targets = defaultdict(dict) depends = defaultdict(dict) shall_fail = set( [random.choice(G.nodes()) for _ in range(int(len(G)**.5))]) nodes = nx.algorithms.dag.topological_sort(G) task_nos = [None for _ in range(len(nodes))] for n in nodes: cmd = "touch /dev/null " name = None if n in shall_fail: cmd += " ;exit 1" name = "should fail" slurm_add_task = lambda *a, **kw: self.ctx.add_task_gridable( mem=50, time=5, cores=1, *a, **kw) add_task = self.ctx.add_task if bern(0.5) else slurm_add_task t = add_task(cmd, name=name, depends=[ self.ctx.tasks[task_nos[a]] for a in G.predecessors(n) ]) task_nos[n] = t.task_no with capture(stderr=StringIO()): with self.assertRaises(anadama2.workflow.RunFailed): self.ctx.go(grid_jobs=2) child_fail = set() for n in shall_fail: task_no = task_nos[n] self.assertIn(task_no, self.ctx.failed_tasks, ("tasks that raise exceptions should be marked" " as failed")) self.assertTrue(bool(self.ctx.task_results[task_no].error), "Failed tasks should have errors in task_results") for _, succ in dfs_edges(G, n): s_no = task_nos[succ] child_fail.add(succ) self.assertIn(s_no, self.ctx.failed_tasks, "all children of failed tasks should fail") self.assertIn("parent task", self.ctx.task_results[s_no].error, ("children of failed tasks should have errors" " in task_results")) for n in set(nodes).difference(shall_fail.union(child_fail)): task_no = task_nos[n] self.assertIn(task_no, self.ctx.completed_tasks) self.assertFalse(bool(self.ctx.task_results[task_no].error))
def test_print_within_function_action(self): stderr_msg = six.u("".join([chr(random.randint(32, 126)) for _ in range(10)])) stdout_msg = six.u("".join([chr(random.randint(32, 126)) for _ in range(10)])) def printer(task): sys.stderr.write(stderr_msg+"\n") sys.stdout.write(stdout_msg+"\n") t1 = self.ctx.add_task(printer) out, err = StringIO(), StringIO() with capture(stdout=out, stderr=err): self.ctx.go() self.assertNotIn(t1.task_no, self.ctx.failed_tasks) self.assertIn(stdout_msg, out.getvalue()) self.assertIn(stderr_msg, err.getvalue())
def test_add_task_kwargs(self): outf = os.path.join(self.workdir, "test.txt") t1 = self.ctx.add_task("echo [msg] > [targets[0]]", targets=outf, msg="foobar") self.assertEqual(len(t1.depends), 0) self.assertEqual(len(t1.targets), 1) self.assertIs(t1.targets[0], anadama2.tracked.HugeTrackedFile(outf), "the target should be a filedependency test.txt") with capture(stderr=StringIO()): self.ctx.go() self.assertTrue(os.path.exists(outf), "should create test.txt") with open(outf) as f: data = f.read().strip() if six.PY3: self.assertEqual(data, "foobar") else: self.assertEquals(data, "foobar")
def test_add_task_custom_dependency(self): class CustomDependency(anadama2.tracked.Base): @staticmethod def key(the_key): return str(the_key) def compare(self): self.compared = True yield str(self.name) def init(self, key): self.initialized = True self.compared = False d = CustomDependency("blah") self.ctx.add_task(anadama2.util.noop, targets=d) self.assertTrue(d.initialized) self.assertFalse(d.compared) with capture(stdout=StringIO(), stderr=StringIO()): self.ctx.go() self.assertTrue(d.compared)