def setup_class(cls): client = make_logged_in_client('test') cluster = mini_cluster.shared_cluster(conf=True) jobsubd = in_process_jobsubd(cluster.config_dir) # Make home directory cluster.fs.setuser(cluster.superuser) if not cluster.fs.exists("/user/test"): cluster.fs.mkdir("/user/test") cluster.fs.chown("/user/test", "test", "test") cluster.fs.setuser("test") cls.cluster = cluster cls.client = client cls.jobsubd = jobsubd
def setup_class(cls): client = make_logged_in_client('test') cluster = mini_cluster.shared_cluster(conf=True) jobsubd = in_process_jobsubd(cluster.config_dir) # Make home directory cluster.fs.setuser(cluster.superuser) if not cluster.fs.exists("/user/test"): cluster.fs.mkdir("/user/test") cluster.fs.chown("/user/test", "test", "test") if not cluster.fs.exists("/tmp"): cluster.fs.mkdir("/tmp") cluster.fs.chmod("/tmp", int('777', 8)) cluster.fs.setuser("test") cls.cluster = cluster cls.client = client cls.jobsubd = jobsubd
def test_jobsub_setup_and_samples(): """ Merely exercises jobsub_setup, and then runs all the examples. """ cluster = mini_cluster.shared_cluster(conf=True) jobsubd = in_process_jobsubd(cluster.config_dir) try: c = make_logged_in_client() # Create a job, to make sure that it sticks around response = c.post("/jobsub/new/jar", dict( name="should_stick_around", jarfile="foo", arguments="foo", submit="Save")) design_id = response.context["saved"] import jobsub.management.commands.jobsub_setup as jobsub_setup if not jobsub_setup.Command().has_been_setup(): jobsub_setup.Command().handle() # Make sure we have three job designs now. assert_equal(3, JobDesign.objects.filter(name__startswith="Example: ").count()) # Make sure "should_stick_around" is still there assert_equal(1, JobDesign.objects.filter(name="should_stick_around").count()) # Make sure sample user got created. assert_equal(1, User.objects.filter(username="******").count()) assert_equal(1, User.objects.filter(username="******").count()) # And now submit and run the samples # pi Example # Irritatingly, /user/test needs to exist first setup_cluster_fs(cluster) id = JobDesign.objects.get(name__contains="Example: Pi").id response = c.get("/jobsub/submit/%d" % id) assert_true("Iterations per mapper" in response.content) assert_true("Num of mappers" in response.content) response = c.post("/jobsub/submit/%d" % id, dict( iterations_per_mapper=10, num_of_mappers=1)) response = watch_till_complete(c, parse_out_id(response)) assert_true("Estimated value of Pi is" in response.context["job_data"].stdout_tail) assert_true("bin/hadoop returned 0" in response.content) # Wordcount example id = JobDesign.objects.get(name__contains="Example: Streaming Wordcount").id response = c.get("/jobsub/submit/%d" % id) response = c.post("/jobsub/submit/%d" % id, dict( output="/user/test/jobsub-streaming-test")) response = watch_till_complete(c, parse_out_id(response)) assert_true("streaming.StreamJob: Job complete:" in response.context["job_data"].stderr_tail) assert_true(cluster.fs.exists("/user/test/jobsub-streaming-test/part-00000")) # Not running sleep example, since it adds little. finally: jobsubd.exit() cluster.shutdown()
def test_job_submission(): JARNAME = posixpath.basename(hadoop.conf.HADOOP_EXAMPLES_JAR.get()) c = make_logged_in_client() cluster = mini_cluster.shared_cluster(conf=True) jobsubd = in_process_jobsubd(cluster.config_dir) # Remember the number of pending jobs beforehand n_pending = c.get("/jobsub/status_bar/").context["pending_count"] try: # Create a job response = c.post("/jobsub/new/jar", dict( name="wordcount", jarfile="/user/test/%s" % JARNAME, arguments="wordcount $input $output", submit="Save")) design_id = response.context["saved"] # Submission should get a parameterization form response = c.get("/jobsub/submit/%d" % design_id) assert_true("<form " in response.content) # Create home dir setup_cluster_fs(cluster) # Prepare sample data f = cluster.fs.open("/user/test/input", "w") f.write("alpha beta gamma\nepsilon zeta theta\nalpha beta\n") f.close() # We also have to upload the jar file src = file(hadoop.conf.HADOOP_EXAMPLES_JAR.get()) try: dst = cluster.fs.open("/user/test/%s" % JARNAME, "w") try: shutil.copyfileobj(src, dst) finally: dst.close() finally: src.close() # Status_bar should be at original assert_equal(n_pending, c.get("/jobsub/status_bar/").context["pending_count"]) # Let's parameterize and submit INPUT, OUTPUT = "/user/test/input", "/user/test/output" response = c.post("/jobsub/submit/%d" % design_id, dict(input=INPUT, output=OUTPUT)) watch_id = parse_out_id(response) # Status bar at original + 1 assert_equal(n_pending + 1, c.get("/jobsub/status_bar/").context["pending_count"]) # Let's take a look response = watch_till_complete(c, watch_id) assert_equal(1, len(response.context["job_data"].hadoop_job_ids), "Should have launched and captured exactly one Hadoop job") submission = Submission.objects.get(id=watch_id) assert_equal(["wordcount", INPUT, OUTPUT], submission.submission_plan.steps[1].bin_hadoop_step.arguments[2:]) hadoop_job_id = response.context["job_data"].hadoop_job_ids[0] # Status bar back to original assert_equal(n_pending, c.get("/jobsub/status_bar/").context["pending_count"]) # Make sure the counts are right: lines = cluster.fs.open("/user/test/output/part-r-00000").read().splitlines() counts = {} for line in lines: word, count = line.split("\t", 2) count = int(count) counts[word] = count assert_equal(dict(alpha=2, beta=2, gamma=1, epsilon=1, zeta=1, theta=1), counts) # And check that the output file has correct permissions. assert_equal("test", cluster.fs.stats("/user/test/output/part-r-00000")["user"], "Wrong username for job output.") assert_equal("test", cluster.fs.stats("/user/test/output/part-r-00000")["group"], "Wrong groupname for job output.") # Just to be sure it really happened, check the Job struct # There's no way to get just one job (eek!)... job_map = dict([ (x.jobID.asString, x) for x in cluster.jt.completed_jobs().jobs ]) this_job = job_map[hadoop_job_id] # Check username and group assert_equal("test", this_job.profile.user) # Let's kill the temporary directory, and make sure watch # output still works. We do file deletion very explicitly, # because tests that might mistakenly delete your home directory # tend to cause unhappiness. server_id = Submission.objects.get(id=watch_id).submission_handle.id tmp_dir = ServerSubmissionState.objects.get(id=server_id).tmp_dir for filename in ("jobs", "stderr", "stdout", os.path.join("work", "tmp.jar")): os.remove(os.path.join(tmp_dir, filename)) os.rmdir(os.path.join(tmp_dir, "work")) os.rmdir(tmp_dir) response = c.get("/jobsub/watch/%d" % watch_id) assert_true("No longer available" in response.content) finally: cluster.shutdown() jobsubd.exit()