Ejemplo n.º 1
0
    def test_capure_stderr(self):
        config["jobtype_capture_process_output"] = True
        jobtype = JobType(fake_assignment())

        with patch.object(process_stderr, "info") as mocked:
            jobtype.log_stderr_line(Mock(id=2), "stderr")

        mocked.assert_called_once_with("task %r: %s", 2, "stderr")
Ejemplo n.º 2
0
    def test_none(self):
        jobtype = JobType(fake_assignment())

        with patch.object(logger, "error") as mocked_error:
            self.assertIsNone(jobtype.format_error(None))

        mocked_error.assert_called_with(
            "No error was defined for this failure.")
Ejemplo n.º 3
0
    def test_process_stderr_line_calls_stdout_line_processing(self):
        jobtype = JobType(fake_assignment())
        logpool.open_log(jobtype.uuid, self.create_file(), ignore_existing=True)
        protocol = Mock(id=4)

        with patch.object(jobtype, "process_stdout_line") as process_mock:
            jobtype.process_stderr_line(protocol, "stderr 4")

        process_mock.assert_called_with(protocol, "stderr 4")
Ejemplo n.º 4
0
    def test_no_capure_stderr(self):
        config["jobtype_capture_process_output"] = False
        protocol = Mock(id=3, pid=33)
        jobtype = JobType(fake_assignment())
        logpool.open_log(jobtype.uuid, self.create_file(), ignore_existing=True)

        with patch.object(logpool, "log") as mocked:
            jobtype.log_stderr_line(protocol, "stderr")

        mocked.assert_called_once_with(jobtype.uuid, STDERR, "stderr", 33)
Ejemplo n.º 5
0
    def test_process_done(self):
        error = Failure(exc_value=Exception())
        error.type = ProcessDone
        error.value = Mock(exitCode=1, message="foobar")

        jobtype = JobType(fake_assignment())
        self.assertEqual(
            jobtype.format_error(error),
            "Process has finished with no apparent errors."
        )
Ejemplo n.º 6
0
    def test_preprocess_can_stop_handling(self):
        jobtype = JobType(fake_assignment())
        protocol = Mock(id=1)

        with nested(
            patch.object(jobtype, "preprocess_stderr_line", return_value=False),
            patch.object(jobtype, "format_stderr_line"),
        ) as (_, mocked_format):
            jobtype.handle_stderr_line(protocol, "stderr 1")

        self.assertEqual(mocked_format.call_count, 0)
Ejemplo n.º 7
0
    def test_preprocess_replaces_output(self):
        jobtype = JobType(fake_assignment())
        logpool.open_log(jobtype.uuid, self.create_file(), ignore_existing=True)
        protocol = Mock(id=2)

        with nested(
            patch.object(jobtype, "preprocess_stderr_line", return_value="foo"),
            patch.object(jobtype, "format_stderr_line"),
        ) as (_, mocked):
            jobtype.handle_stderr_line(protocol, "stderr 2")

        mocked.assert_called_with(protocol, "foo")
Ejemplo n.º 8
0
    def test_process_terminated(self):
        error = Failure(exc_value=Exception())
        error.type = ProcessTerminated
        error.value = Mock(exitCode=1, message="foobar")

        jobtype = JobType(fake_assignment())
        self.assertEqual(
            jobtype.format_error(error),
            "Process may have terminated abnormally, please check "
            "the logs.  Message from error "
            "was 'foobar'"
        )
Ejemplo n.º 9
0
    def test_format_replaces_output(self):
        jobtype = JobType(fake_assignment())
        logpool.open_log(jobtype.uuid, self.create_file(), ignore_existing=True)
        protocol = Mock(id=3)

        with nested(
            patch.object(jobtype, "format_stdout_line", return_value="bar"),
            patch.object(jobtype, "log_stdout_line"),
            patch.object(jobtype, "process_stdout_line"),
        ) as (_, log_mock, process_mock):
            jobtype.handle_stdout_line(protocol, "stdout 3")

        log_mock.assert_called_with(protocol, "bar")
        process_mock.assert_called_with(protocol, "bar")
Ejemplo n.º 10
0
    def test_other_conversion_problem(self):
        jobtype = JobType(fake_assignment())

        class OtherType(object):
            def __str__(self):
                raise NotImplementedError("__str__ not implemented")

        other = OtherType()

        try:
            str(other)
        except NotImplementedError as str_error:
            pass

        with patch.object(logger, "error") as mocked_error:
            self.assertIsNone(jobtype.format_error(other))

        mocked_error.assert_called_with(
            "Don't know how to format %r as a string.  Error while calling "
            "str(%r) was %s.", other, str(str_error))
Ejemplo n.º 11
0
 def test_other(self):
     jobtype = JobType(fake_assignment())
     self.assertEqual(jobtype.format_error(42), str(42))
Ejemplo n.º 12
0
 def test_string(self):
     jobtype = JobType(fake_assignment())
     self.assertEqual(
         jobtype.format_error("This is a string"),
         "This is a string"
     )
Ejemplo n.º 13
0
 def test_exception(self):
     jobtype = JobType(fake_assignment())
     self.assertEqual(
         jobtype.format_error(TypeError("foobar 2")),
         "foobar 2"
     )
Ejemplo n.º 14
0
 def test_schema(self):
     with self.assertRaises(MultipleInvalid):
         JobType.load({})
Ejemplo n.º 15
0
    def post(self, **kwargs):
        if request_from_master(kwargs["request"]):
            config.master_contacted()

        request = kwargs["request"]
        request_data = kwargs["data"]

        # First, get the resources we have *right now*.  In some cases
        # this means using the functions in pyfarm.core.sysinfo because
        # entries in `config` could be slightly out of sync with the system.
        memory_free = free_ram()
        cpus = config["agent_cpus"]
        requires_ram = request_data["job"].get("ram")
        requires_cpus = request_data["job"].get("cpus")

        if ("agent_id" in request_data and
            request_data["agent_id"] != config["agent_id"]):
            logger.error("Wrong agent_id in assignment: %s. Our id is %s",
                         request_data["agent_id"], config["agent_id"])
            return (
                dumps({"error": "You have the wrong agent. "
                                "I am %s." % config["agent_id"],
                       "agent_id": config["agent_id"]}),
                BAD_REQUEST
            )

        elif self.agent.reannounce_lock.locked:
            logger.warning("Temporarily rejecting assignment because we "
                           "are in the middle of a reannounce.")
            return (
                dumps({"error": "Agent cannot accept assignments because of a "
                                "reannounce in progress. Try again shortly."}),
                SERVICE_UNAVAILABLE
            )

        elif self.agent.shutting_down:
            logger.error("Rejecting assignment because the agent is in the "
                         "process of shutting down.")
            return (
                dumps({"error": "Agent cannot accept assignments because it is "
                                "shutting down."}),
                SERVICE_UNAVAILABLE
            )

        elif "restart_requested" in config \
                and config["restart_requested"] is True:
            logger.error("Rejecting assignment because of scheduled restart.")
            return (
                dumps({"error": "Agent cannot accept assignments because of a "
                                "pending restart."}),
                SERVICE_UNAVAILABLE
            )

        elif "agent_id" not in config:
            logger.error(
                "Agent has not yet connected to the master or `agent_id` "
                "has not been set yet.")
            return (
                dumps({"error": "agent_id has not been set in the config"}),
                SERVICE_UNAVAILABLE
            )

        # Do we have enough ram?
        elif requires_ram is not None and requires_ram > memory_free:
            logger.error(
                "Task %s requires %sMB of ram, this agent has %sMB free.  "
                "Rejecting Task %s.",
                request_data["job"]["id"], requires_ram, memory_free,
                request_data["job"]["id"])
            config["free_ram"] = memory_free
            return (
                dumps({"error": "Not enough ram",
                       "agent_ram": memory_free,
                       "requires_ram": requires_ram}),
                BAD_REQUEST
            )

        # Do we have enough cpus (count wise)?
        elif requires_cpus is not None and requires_cpus > cpus:
            logger.error(
                "Task %s requires %s CPUs, this agent has %s CPUs.  "
                "Rejecting Task %s.",
                request_data["job"]["id"], requires_cpus, cpus,
                request_data["job"]["id"])
            return (
                dumps({"error": "Not enough cpus",
                       "agent_cpus": cpus,
                       "requires_cpus": requires_cpus}),
                BAD_REQUEST
            )

        new_task_ids = set(task["id"] for task in request_data["tasks"])

        for assignment in config["current_assignments"].itervalues():
            existing_task_ids = set(x["id"] for x in assignment["tasks"])

            # If the assignment is identical to one we already have
            if existing_task_ids == new_task_ids:
                logger.debug(
                    "Ignoring repeated assignment of the same batch")
                return dumps({"id": assignment["id"]}), ACCEPTED

            # If there is only a partial overlap
            elif existing_task_ids & new_task_ids:
                logger.error("Rejecting assignment with partial overlap with "
                             "existing assignment.")
                unknown_task_ids = new_task_ids - existing_task_ids
                return (
                    dumps({"error": "Partial overlap of tasks",
                           "rejected_task_ids": list(unknown_task_ids)}),
                    CONFLICT
                )

        if not config["agent_allow_sharing"]:
            for jobtype in config["jobtypes"].itervalues():
                num_finished_tasks = (len(jobtype.finished_tasks) +
                                      len(jobtype.failed_tasks))
                if len(jobtype.assignment["tasks"]) > num_finished_tasks:
                    logger.error("Rejecting an assignment that would require "
                                 "agent sharing")
                    return (
                        dumps({
                            "error": "Agent does not allow multiple "
                                     "assignments",
                            "rejected_task_ids": list(new_task_ids)}),
                        CONFLICT
                    )

        assignment_uuid = uuid4()
        request_data.update(id=assignment_uuid)
        config["current_assignments"][assignment_uuid] = request_data
        logger.debug("Accepted assignment %s: %r",
                     assignment_uuid, request_data)
        logger.info("Accept assignment from job %s with %s tasks",
                    request_data["job"]["title"], len(request_data["tasks"]))

        def assignment_failed(result, assign_id):
            logger.error(
                "Assignment %s failed, result: %r, removing.", assign_id, result)
            logger.error(result.getTraceback())
            if (len(config["current_assignments"]) <= 1 and
                not self.agent.shutting_down):
                config["state"] = AgentState.ONLINE
                self.agent.reannounce(force=True)
            # Do not mark the assignment as failed if the reason for failing
            # was that we ran out of disk space
            failed = not isinstance(result.value, InsufficientSpaceError)
            assignment = config["current_assignments"].pop(assign_id)
            if "jobtype" in assignment:
                jobtype_id = assignment["jobtype"].pop("id", None)
                if jobtype_id:
                    instance = config["jobtypes"].pop(jobtype_id, None)
                    instance.stop(
                        assignment_failed=failed,
                        avoid_reassignment=not failed,
                        error="Error in jobtype: %r. "
                              "Traceback: %s" % (result,
                                                 traceback.format_exc()))

        def assignment_started(_, assign_id):
            logger.debug("Assignment %s has started", assign_id)
            config["state"] = AgentState.RUNNING
            self.agent.reannounce(force=True)

        def remove_assignment(_, assign_id):
            assignment = config["current_assignments"].pop(assign_id)
            if "jobtype" in assignment:
                jobtype_id = assignment["jobtype"].pop("id", None)
                if jobtype_id:
                    config["jobtypes"].pop(jobtype_id, None)

        def assignment_stopped(_, assign_id):
            logger.debug("Assignment %s has stopped", assign_id)
            if (len(config["current_assignments"]) <= 1 and
                not self.agent.shutting_down):
                config["state"] = AgentState.ONLINE
                self.agent.reannounce(force=True)
            assignment = config["current_assignments"][assign_id]
            if "jobtype" in assignment:
                jobtype_id = assignment["jobtype"].pop("id", None)
                if jobtype_id:
                    jobtype = config["jobtypes"].pop(jobtype_id, None)
                    updates_deferred = DeferredList(
                        jobtype.task_update_deferreds)
                    updates_deferred.addBoth(remove_assignment, assign_id)
            else:
                config["current_assignments"].pop(assign_id)

        def restart_if_necessary(_):  # pragma: no cover
            if "restart_requested" in config and config["restart_requested"]:
                stopping = config["agent"].stop()
                stopping.addCallbacks(lambda _: reactor.stop(),
                                      lambda _: reactor.stop())

        def load_jobtype_failed(result, assign_id):
            logger.error(
                "Loading jobtype for assignment %s failed, removing.", assign_id)
            traceback = result.getTraceback()
            logger.debug("Got traceback")
            logger.error(traceback)
            assignment = config["current_assignments"].pop(assign_id)

            # Mark all tasks as failed on master and set an error message
            logger.debug("Marking tasks in assignment as failed")
            def post_update(post_url, post_data, task, delay=0):
                post_func = partial(post, post_url, data=post_data,
                    callback=lambda x: result_callback(
                        post_url, post_data, task, x),
                    errback=lambda x: error_callback(
                        post_url, post_data, task, x))
                reactor.callLater(delay, post_func)

            def result_callback(cburl, cbdata, task, response):
                if 500 <= response.code < 600:
                    logger.error(
                        "Error while marking task %s as failed on master, "
                        "retrying", task["id"])
                    post_update(cburl, cbdata, task, delay=http_retry_delay())

                elif response.code != OK:
                    logger.error(
                        "Could not mark task %s as failed, server response "
                        "code was %s", task["id"], response.code)

                else:
                    logger.info(
                        "Marked task %s as failed on master", task["id"])

            def error_callback(cburl, cbdata, task, failure_reason):
                logger.error(
                    "Error while marking task %s as failed, retrying",
                    task["id"], failure_reason)
                post_update(cburl, cbdata, task, delay=http_retry_delay())

            for task in assignment["tasks"]:
                url = "%s/jobs/%s/tasks/%s" % (
                    config["master_api"], assignment["job"]["id"], task["id"])
                data = {
                    "state": WorkState.FAILED,
                    "last_error": traceback}
                post_update(url, data, task)

            # If the loading was partially successful for some reason, there
            # might already be an entry for this jobtype in the config.
            # Remove it if it exists.
            if "jobtype" in assignment:
                jobtype_id = assignment["jobtype"].pop("id", None)
                if jobtype_id:
                    config["jobtypes"].pop(jobtype_id, None)

        def loaded_jobtype(jobtype_class, assign_id):
            # TODO: report error to master
            if hasattr(jobtype_class, "getTraceback"):
                logger.error(jobtype_class.getTraceback())
                return

            # TODO: add call to prepare_for_job
            # TODO: add call to spawn_persistent_process

            # Instance the job type and pass in the assignment data.
            instance = jobtype_class(request_data)

            if not isinstance(instance, JobType):
                raise TypeError(
                    "Expected a subclass of "
                    "pyfarm.jobtypes.core.jobtype.JobType")

            # TODO: add callback to cleanup_after_job
            # TODO: add callback to stop persistent process
            try:
                started_deferred, stopped_deferred = instance._start()
                started_deferred.addCallback(assignment_started, assign_id)
                started_deferred.addErrback(assignment_failed, assign_id)
                stopped_deferred.addCallback(assignment_stopped, assign_id)
                stopped_deferred.addErrback(assignment_failed, assign_id)
                stopped_deferred.addBoth(restart_if_necessary)
                stopped_deferred.addBoth(
                    lambda *args: instance._remove_tempdirs())
                stopped_deferred.addBoth(
                    lambda *args: instance._close_logs())
                stopped_deferred.addBoth(
                    lambda *args: instance._upload_logfile())
            except Exception as e:
                logger.error("Error on starting jobtype, stopping it now.  "
                             "Error was: %r. Traceback: %s", e,
                             traceback.format_exc())
                instance.stop(assignment_failed=True,
                              error="Error while loading jobtype: %r. "
                                    "Traceback: %s" %
                                    (e, traceback.format_exc()))
                assignment = config["current_assignments"].pop(assign_id)
                if "jobtype" in assignment:
                    jobtype_id = assignment["jobtype"].pop("id", None)
                    if jobtype_id:
                        config["jobtypes"].pop(jobtype_id, None)

        # Load the job type then pass the class along to the
        # callback.  No errback here because all the errors
        # are handled internally in this case.
        jobtype_loader = JobType.load(request_data)
        jobtype_loader.addCallback(loaded_jobtype, assignment_uuid)
        jobtype_loader.addErrback(load_jobtype_failed, assignment_uuid)

        return dumps({"id": assignment_uuid}), ACCEPTED