Ejemplo n.º 1
0
Archivo: tests.py Proyecto: anutron/hue
  def setup_class(cls):
    client = make_logged_in_client('test')
    cluster = mini_cluster.shared_cluster(conf=True)
    jobsubd = in_process_jobsubd(cluster.config_dir)

    # Make home directory
    cluster.fs.setuser(cluster.superuser)
    if not cluster.fs.exists("/user/test"):
      cluster.fs.mkdir("/user/test")
    cluster.fs.chown("/user/test", "test", "test")
    cluster.fs.setuser("test")

    cls.cluster = cluster
    cls.client = client
    cls.jobsubd = jobsubd
Ejemplo n.º 2
0
    def setup_class(cls):
        client = make_logged_in_client('test')
        cluster = mini_cluster.shared_cluster(conf=True)
        jobsubd = in_process_jobsubd(cluster.config_dir)

        # Make home directory
        cluster.fs.setuser(cluster.superuser)
        if not cluster.fs.exists("/user/test"):
            cluster.fs.mkdir("/user/test")
        cluster.fs.chown("/user/test", "test", "test")

        if not cluster.fs.exists("/tmp"):
            cluster.fs.mkdir("/tmp")
        cluster.fs.chmod("/tmp", int('777', 8))

        cluster.fs.setuser("test")

        cls.cluster = cluster
        cls.client = client
        cls.jobsubd = jobsubd
Ejemplo n.º 3
0
def test_jobsub_setup_and_samples():
  """
  Merely exercises jobsub_setup, and then runs
  all the examples.
  """
  cluster = mini_cluster.shared_cluster(conf=True)
  jobsubd = in_process_jobsubd(cluster.config_dir)
  try:
    c = make_logged_in_client()

    # Create a job, to make sure that it sticks around
    response = c.post("/jobsub/new/jar", dict(
      name="should_stick_around", 
      jarfile="foo",
      arguments="foo", submit="Save"))
    design_id = response.context["saved"]

    import jobsub.management.commands.jobsub_setup as jobsub_setup
    if not jobsub_setup.Command().has_been_setup():
      jobsub_setup.Command().handle()

    # Make sure we have three job designs now.
    assert_equal(3, JobDesign.objects.filter(name__startswith="Example: ").count())

    # Make sure "should_stick_around" is still there
    assert_equal(1, JobDesign.objects.filter(name="should_stick_around").count())

    # Make sure sample user got created.
    assert_equal(1, User.objects.filter(username="******").count())
    assert_equal(1, User.objects.filter(username="******").count())

    # And now submit and run the samples
    # pi Example
    # Irritatingly, /user/test needs to exist first
    setup_cluster_fs(cluster)
    id = JobDesign.objects.get(name__contains="Example: Pi").id
    response = c.get("/jobsub/submit/%d" % id)
    assert_true("Iterations per mapper" in response.content)
    assert_true("Num of mappers" in response.content)
    response = c.post("/jobsub/submit/%d" % id, dict(
      iterations_per_mapper=10,
      num_of_mappers=1))
    response = watch_till_complete(c, parse_out_id(response))

    assert_true("Estimated value of Pi is" in response.context["job_data"].stdout_tail)
    assert_true("bin/hadoop returned 0" in response.content)

    # Wordcount example
    id = JobDesign.objects.get(name__contains="Example: Streaming Wordcount").id
    response = c.get("/jobsub/submit/%d" % id)
    response = c.post("/jobsub/submit/%d" % id, dict(
      output="/user/test/jobsub-streaming-test"))
    response = watch_till_complete(c, parse_out_id(response))

    assert_true("streaming.StreamJob: Job complete:" in response.context["job_data"].stderr_tail)
    assert_true(cluster.fs.exists("/user/test/jobsub-streaming-test/part-00000"))

    # Not running sleep example, since it adds little.
  finally:
    jobsubd.exit()
    cluster.shutdown()
Ejemplo n.º 4
0
def test_job_submission():
  JARNAME = posixpath.basename(hadoop.conf.HADOOP_EXAMPLES_JAR.get())
  c = make_logged_in_client()
  cluster = mini_cluster.shared_cluster(conf=True)
  jobsubd = in_process_jobsubd(cluster.config_dir)

  # Remember the number of pending jobs beforehand
  n_pending = c.get("/jobsub/status_bar/").context["pending_count"]

  try:
      # Create a job
      response = c.post("/jobsub/new/jar", dict(
        name="wordcount", 
        jarfile="/user/test/%s" % JARNAME,
        arguments="wordcount $input $output", submit="Save"))
      design_id = response.context["saved"]

      # Submission should get a parameterization form
      response = c.get("/jobsub/submit/%d" % design_id)
      assert_true("<form " in response.content)

      # Create home dir
      setup_cluster_fs(cluster)

      # Prepare sample data
      f = cluster.fs.open("/user/test/input", "w")
      f.write("alpha beta gamma\nepsilon zeta theta\nalpha beta\n")
      f.close()
      # We also have to upload the jar file
      src = file(hadoop.conf.HADOOP_EXAMPLES_JAR.get())
      try:
        dst = cluster.fs.open("/user/test/%s" % JARNAME, "w")
        try:
          shutil.copyfileobj(src, dst)
        finally:
	  dst.close()
      finally:
        src.close()

      # Status_bar should be at original
      assert_equal(n_pending, c.get("/jobsub/status_bar/").context["pending_count"])

      # Let's parameterize and submit
      INPUT, OUTPUT = "/user/test/input", "/user/test/output"
      response = c.post("/jobsub/submit/%d" % design_id, 
        dict(input=INPUT, output=OUTPUT))
      watch_id = parse_out_id(response)

      # Status bar at original + 1
      assert_equal(n_pending + 1, c.get("/jobsub/status_bar/").context["pending_count"])

      # Let's take a look
      response = watch_till_complete(c, watch_id)
      assert_equal(1, len(response.context["job_data"].hadoop_job_ids), 
        "Should have launched and captured exactly one Hadoop job")
      submission = Submission.objects.get(id=watch_id)
      assert_equal(["wordcount", INPUT, OUTPUT],
                   submission.submission_plan.steps[1].bin_hadoop_step.arguments[2:])

      hadoop_job_id = response.context["job_data"].hadoop_job_ids[0]

      # Status bar back to original
      assert_equal(n_pending, c.get("/jobsub/status_bar/").context["pending_count"])

      # Make sure the counts are right:
      lines = cluster.fs.open("/user/test/output/part-r-00000").read().splitlines()
      counts = {}
      for line in lines:
        word, count = line.split("\t", 2)
        count = int(count)
        counts[word] = count
      assert_equal(dict(alpha=2, beta=2, gamma=1, epsilon=1, zeta=1, theta=1), counts)

      # And check that the output file has correct permissions.
      assert_equal("test", cluster.fs.stats("/user/test/output/part-r-00000")["user"],
        "Wrong username for job output.")
      assert_equal("test", cluster.fs.stats("/user/test/output/part-r-00000")["group"],
        "Wrong groupname for job output.")

      # Just to be sure it really happened, check the Job struct
      # There's no way to get just one job (eek!)...
      job_map = dict([ (x.jobID.asString, x) for x in cluster.jt.completed_jobs().jobs ])
      this_job = job_map[hadoop_job_id]
      # Check username and group
      assert_equal("test", this_job.profile.user)

      # Let's kill the temporary directory, and make sure watch
      # output still works.  We do file deletion very explicitly,
      # because tests that might mistakenly delete your home directory
      # tend to cause unhappiness.
      server_id = Submission.objects.get(id=watch_id).submission_handle.id
      tmp_dir = ServerSubmissionState.objects.get(id=server_id).tmp_dir
      for filename in ("jobs", "stderr", "stdout", os.path.join("work", "tmp.jar")):
        os.remove(os.path.join(tmp_dir, filename))
      os.rmdir(os.path.join(tmp_dir, "work"))
      os.rmdir(tmp_dir)
      response = c.get("/jobsub/watch/%d" % watch_id)
      assert_true("No longer available" in response.content)
  finally:
    cluster.shutdown()
    jobsubd.exit()
Ejemplo n.º 5
0
def test_jobsub_setup_and_samples():
  """
  Merely exercises jobsub_setup, and then runs
  all the examples.
  """
  cluster = mini_cluster.shared_cluster(conf=True)
  jobsubd = in_process_jobsubd(cluster.config_dir)
  try:
    c = make_logged_in_client()

    # Create a job, to make sure that it sticks around
    response = c.post("/jobsub/new/jar", dict(
      name="should_stick_around", 
      jarfile="foo",
      arguments="foo", submit="Save"))
    design_id = response.context["saved"]

    import jobsub.management.commands.jobsub_setup as jobsub_setup
    if not jobsub_setup.Command().has_been_setup():
      jobsub_setup.Command().handle()

    # Make sure we have three job designs now.
    assert_equal(3, JobDesign.objects.filter(name__startswith="Example: ").count())

    # Make sure "should_stick_around" is still there
    assert_equal(1, JobDesign.objects.filter(name="should_stick_around").count())

    # Make sure sample user got created.
    assert_equal(1, User.objects.filter(username="******").count())
    assert_equal(1, User.objects.filter(username="******").count())

    # And now submit and run the samples
    # pi Example
    # Irritatingly, /user/test needs to exist first
    setup_cluster_fs(cluster)
    id = JobDesign.objects.get(name__contains="Example: Pi").id
    response = c.get("/jobsub/submit/%d" % id)
    assert_true("Iterations per mapper" in response.content)
    assert_true("Num of mappers" in response.content)
    response = c.post("/jobsub/submit/%d" % id, dict(
      iterations_per_mapper=10,
      num_of_mappers=1))
    response = watch_till_complete(c, parse_out_id(response))

    assert_true("Estimated value of Pi is" in response.context["job_data"].stdout_tail)
    assert_true("bin/hadoop returned 0" in response.content)

    # Wordcount example
    id = JobDesign.objects.get(name__contains="Example: Streaming Wordcount").id
    response = c.get("/jobsub/submit/%d" % id)
    response = c.post("/jobsub/submit/%d" % id, dict(
      output="/user/test/jobsub-streaming-test"))
    response = watch_till_complete(c, parse_out_id(response))

    assert_true("streaming.StreamJob: Job complete:" in response.context["job_data"].stderr_tail)
    assert_true(cluster.fs.exists("/user/test/jobsub-streaming-test/part-00000"))

    # Not running sleep example, since it adds little.
  finally:
    jobsubd.exit()
    cluster.shutdown()
Ejemplo n.º 6
0
def test_job_submission():
  JARNAME = posixpath.basename(hadoop.conf.HADOOP_EXAMPLES_JAR.get())
  c = make_logged_in_client()
  cluster = mini_cluster.shared_cluster(conf=True)
  jobsubd = in_process_jobsubd(cluster.config_dir)

  # Remember the number of pending jobs beforehand
  n_pending = c.get("/jobsub/status_bar/").context["pending_count"]

  try:
      # Create a job
      response = c.post("/jobsub/new/jar", dict(
        name="wordcount", 
        jarfile="/user/test/%s" % JARNAME,
        arguments="wordcount $input $output", submit="Save"))
      design_id = response.context["saved"]

      # Submission should get a parameterization form
      response = c.get("/jobsub/submit/%d" % design_id)
      assert_true("<form " in response.content)

      # Create home dir
      setup_cluster_fs(cluster)

      # Prepare sample data
      f = cluster.fs.open("/user/test/input", "w")
      f.write("alpha beta gamma\nepsilon zeta theta\nalpha beta\n")
      f.close()
      # We also have to upload the jar file
      src = file(hadoop.conf.HADOOP_EXAMPLES_JAR.get())
      try:
        dst = cluster.fs.open("/user/test/%s" % JARNAME, "w")
        try:
          shutil.copyfileobj(src, dst)
        finally:
	  dst.close()
      finally:
        src.close()

      # Status_bar should be at original
      assert_equal(n_pending, c.get("/jobsub/status_bar/").context["pending_count"])

      # Let's parameterize and submit
      INPUT, OUTPUT = "/user/test/input", "/user/test/output"
      response = c.post("/jobsub/submit/%d" % design_id, 
        dict(input=INPUT, output=OUTPUT))
      watch_id = parse_out_id(response)

      # Status bar at original + 1
      assert_equal(n_pending + 1, c.get("/jobsub/status_bar/").context["pending_count"])

      # Let's take a look
      response = watch_till_complete(c, watch_id)
      assert_equal(1, len(response.context["job_data"].hadoop_job_ids), 
        "Should have launched and captured exactly one Hadoop job")
      submission = Submission.objects.get(id=watch_id)
      assert_equal(["wordcount", INPUT, OUTPUT],
                   submission.submission_plan.steps[1].bin_hadoop_step.arguments[2:])

      hadoop_job_id = response.context["job_data"].hadoop_job_ids[0]

      # Status bar back to original
      assert_equal(n_pending, c.get("/jobsub/status_bar/").context["pending_count"])

      # Make sure the counts are right:
      lines = cluster.fs.open("/user/test/output/part-r-00000").read().splitlines()
      counts = {}
      for line in lines:
        word, count = line.split("\t", 2)
        count = int(count)
        counts[word] = count
      assert_equal(dict(alpha=2, beta=2, gamma=1, epsilon=1, zeta=1, theta=1), counts)

      # And check that the output file has correct permissions.
      assert_equal("test", cluster.fs.stats("/user/test/output/part-r-00000")["user"],
        "Wrong username for job output.")
      assert_equal("test", cluster.fs.stats("/user/test/output/part-r-00000")["group"],
        "Wrong groupname for job output.")

      # Just to be sure it really happened, check the Job struct
      # There's no way to get just one job (eek!)...
      job_map = dict([ (x.jobID.asString, x) for x in cluster.jt.completed_jobs().jobs ])
      this_job = job_map[hadoop_job_id]
      # Check username and group
      assert_equal("test", this_job.profile.user)

      # Let's kill the temporary directory, and make sure watch
      # output still works.  We do file deletion very explicitly,
      # because tests that might mistakenly delete your home directory
      # tend to cause unhappiness.
      server_id = Submission.objects.get(id=watch_id).submission_handle.id
      tmp_dir = ServerSubmissionState.objects.get(id=server_id).tmp_dir
      for filename in ("jobs", "stderr", "stdout", os.path.join("work", "tmp.jar")):
        os.remove(os.path.join(tmp_dir, filename))
      os.rmdir(os.path.join(tmp_dir, "work"))
      os.rmdir(tmp_dir)
      response = c.get("/jobsub/watch/%d" % watch_id)
      assert_true("No longer available" in response.content)
  finally:
    cluster.shutdown()
    jobsubd.exit()