def test_cleanup_crashed_jobs(self): rc = djmctl.main(["add-node", "--server-config-dir=%s" % self.dir, "--private-ip=127.0.0.1", "--bootstrap", "localhost"]) self.assertEqual(rc, 0) c = client.get_local_connection(self.dir) j = c.start_job("test", JobType.ONE_TIME_JOB, 1, "this is a test job", requested_nodes=["localhost"]) t = c.run_task(j, "Test", "test task", "localhost") # this really breaks the abstraction, but its the only good way # to get a task that's started by not finished. m = c.model m.begin_transaction() n = m.find_node(name="localhost") job = m.query_jobs(job_id=j)[0] m.create_task("bad task", job, "Test", n, "this task will not be stopped") m.commit_transaction() # simulate the crahsing of the process lock = c.job_locks[j] lock.release() del c.job_locks[j] rc = djmctl.main(["cleanup-crashed-jobs", "--server-config-dir=%s" % self.dir, "--debug", "--delete"]) self.assertEqual(rc, 0) rc = djmctl.main(["list-jobs", "--server-config-dir=%s" % self.dir, "--debug", "--job-id=%s" % j]) self.assertEqual(rc, 1)
def test_delete_job_with_force(self): rc = djmctl.main(["add-node", "--server-config-dir=%s" % self.dir, "--private-ip=127.0.0.1", "--bootstrap", "localhost"]) self.assertEqual(rc, 0) c = client.get_local_connection(self.dir) j = c.start_job("test", JobType.ONE_TIME_JOB, 1, "this is a test job", requested_nodes=["localhost"]) t = c.run_task(j, "Test", "test task", "localhost") # this really breaks the abstraction, but its the only good way # to get a task that's started by not finished. m = c.model m.begin_transaction() n = m.find_node(name="localhost") job = m.query_jobs(job_id=j)[0] m.create_task("bad task", job, "Test", n, "this task will not be stopped") m.commit_transaction() # need to release our lock on the job lock = c.job_locks[j] lock.release() rc = djmctl.main(["delete-job", "--server-config-dir=%s" % self.dir, "--force", j]) self.assertEqual(rc, 0) rc = djmctl.main(["list-jobs", "--server-config-dir=%s" % self.dir]) self.assertEqual(rc, 0)
def test_delete_completed_jobs(self): # first we have to do some work to create jobs rc = djmctl.main(["create-static-pool", "--server-config-dir=%s" % self.dir, "p1"]) self.assertEqual(rc, 0) rc = djmctl.main(["add-node", "--server-config-dir=%s" % self.dir, "--private-ip=127.0.0.1", "--pool=p1", "--bootstrap", "localhost"]) self.assertEqual(rc, 0) rc = djmctl.main(["start-worker", "--server-config-dir=%s" % self.dir, "--pool", "p1"]) self.assertEqual(rc, 0) rc = djmctl.main(["stop-worker", "--server-config-dir=%s" % self.dir, "--pool", "p1"]) self.assertEqual(rc, 0) # this one should print jobs rc = djmctl.main(["list-jobs", "--server-config-dir=%s" % self.dir]) self.assertEqual(rc, 0) rc = djmctl.main(["delete-completed-jobs", "--server-config-dir=%s" % self.dir, "--pool-name=p1"]) self.assertEqual(rc, 0) # this one should not find any jobs rc = djmctl.main(["list-jobs", "--server-config-dir=%s" % self.dir]) self.assertEqual(rc, 0)
def test_node_tasks(self): rc = djmctl.main(["add-node", "--server-config-dir=%s" % self.dir, "--private-ip=127.0.0.1", "--bootstrap", "localhost"]) self.assertEqual(rc, 0) rc = djmctl.main(["start-worker", "--server-config-dir=%s" % self.dir, "localhost"]) self.assertEqual(rc, 0) rc = djmctl.main(["run-command", "--server-config-dir=%s" % self.dir, "localhost", "/bin/ls", "/"]) self.assertEqual(rc, 0) rc = djmctl.main(["stop-worker", "--server-config-dir=%s" % self.dir, "localhost"]) self.assertEqual(rc, 0)
def run(nodes): logger.info("Running test on nodes %s" % [node.name for node in nodes]) djm_package = setup() main(["setup-server", "--djm-package=%s" % djm_package]) main(["create-static-pool", "p1"]) for (name, ip, user) in nodes: rc = main(["add-node", "--public-ip=%s" % ip, "--os-user=%s" % user, "--pool=p1", "--bootstrap", name]) if rc!=0: raise Exception("Node bootstrap failed for %s" % name) rc = main(["start-worker", "--pool", "p1"]) if rc != 0: raise Exception("start-worker failed") main(["run-command", "--pool", "p1", "/bin/ls", "/"]) if rc != 0: raise Exception("run-command failed") main(["stop-worker", "--pool", "p1"]) if rc != 0: raise Exception("stop-worker failed") logger.debug("multinode test successful")
def test_pool_tasks(self): rc = djmctl.main(["create-static-pool", "--server-config-dir=%s" % self.dir, "p1"]) self.assertEqual(rc, 0) rc = djmctl.main(["add-node", "--server-config-dir=%s" % self.dir, "--private-ip=127.0.0.1", "--pool=p1", "--bootstrap", "localhost"]) self.assertEqual(rc, 0) rc = djmctl.main(["start-worker", "--server-config-dir=%s" % self.dir, "--pool", "p1"]) self.assertEqual(rc, 0) rc = djmctl.main(["run-command", "--server-config-dir=%s" % self.dir, "--pool", "p1", "/bin/ls", "/"]) self.assertEqual(rc, 0) rc = djmctl.main(["stop-worker", "--server-config-dir=%s" % self.dir, "--pool", "p1"]) self.assertEqual(rc, 0) rc = djmctl.main(["list-jobs", "--server-config-dir=%s" % self.dir]) self.assertEqual(rc, 0) # note that the list-tasks command depends on how we # assign job ids. rc = djmctl.main(["list-tasks", "--server-config-dir=%s" % self.dir, "run-command-1"]) self.assertEqual(rc, 0)
def setUp(self): self.dir = tempfile.mkdtemp(prefix="test_djmctl") #self.dir = os.path.abspath(os.path.expanduser("~/genforma/djm/dist_job_mgr/svr")) rc = djmctl.main(["setup-server", "--server-config-dir=%s" % self.dir]) assert rc==0