def error(self, request, code, message): """ Writes the proper out an error response message depending on the content type in the request """ response_types = self.get_accept(request) logger.error(message) if "text/html" in response_types: request.setResponseCode(code) html_error = template.load("error.html") result = html_error.render( code=code, code_msg=responses[code], message=message) request.write(result.encode()) elif "application/json" in response_types: request.setResponseCode(code) request.write(dumps({"error": message})) else: request.setResponseCode(UNSUPPORTED_MEDIA_TYPE) error = dumps( {"error": "Can only handle one of %s here" % self.ALLOWED_ACCEPT}) request.write(error) request.finish()
def get(self, **kwargs): request = kwargs.get("request") if request is not None and request_from_master(request): config.master_contacted() return dumps(versions=[1]), OK
def request(self, method, **kwargs): data = kwargs.pop("data", None) headers = kwargs.pop("headers", {}) uri = kwargs.pop("uri", self.URI) request = DummyRequest(uri) request.method = method.upper() if data is not None: request.content = StringIO() request.content.write(dumps(data)) request.content.seek(0) if self.DEFAULT_HEADERS is not NotImplemented: headers.update(self.DEFAULT_HEADERS) if headers: self.failUnlessIsInstance(headers, dict) for key, value in headers.items(): if isinstance(value, STRING_TYPES): value = [value] self.failUnlessIsInstance(value, list) request.requestHeaders.setRawHeaders(key, value) self.failUnlessEqual(kwargs, {}, "Unknown keywords %s" % kwargs.keys()) return request
def get(self, **_): # Get counts for child processes and grandchild processes process = psutil.Process() direct_child_processes = len(process.children(recursive=False)) all_child_processes = len(process.children(recursive=True)) grandchild_processes = all_child_processes - direct_child_processes # Determine the last time we talked to the master (if ever) contacted = config.master_contacted(update=False) if isinstance(contacted, datetime): # pragma: no cover contacted = datetime.utcnow() - contacted # Determine the last time we announced ourselves to the # master (if ever) last_announce = config.get("last_announce", None) if isinstance(last_announce, datetime): # pragma: no cover last_announce = datetime.utcnow() - last_announce data = {"state": config["state"], "agent_hostname": config["agent_hostname"], "free_ram": memory.free_ram(), "agent_process_ram": memory.process_memory(), "consumed_ram": memory.total_consumption(), "child_processes": direct_child_processes, "grandchild_processes": grandchild_processes, "pids": config["pids"], "agent_id": config["agent_id"], "last_master_contact": contacted, "last_announce": last_announce, "agent_lock_file": config["agent_lock_file"], "uptime": total_seconds( timedelta(seconds=time.time() - config["start"])), "jobs": list(config["jobtypes"].keys())} if config["farm_name"]: data["farm_name"] = config["farm_name"] return dumps(data)
def fake_work(): parser = argparse.ArgumentParser( description="Quick and dirty script to create a job type, a job, and " "some tasks which are then posted directly to the " "agent. The primary purpose of this script is to test " "the internal of the job types") parser.add_argument( "--master-api", default="http://127.0.0.1/api/v1", help="The url to the master's api [default: %(default)s]") parser.add_argument( "--agent-api", default="http://127.0.0.1:50000/api/v1", help="The url to the agent's api [default: %(default)s]") parser.add_argument( "--jobtype", default="FakeRender", help="The job type to use [default: %(default)s]") parser.add_argument( "--job", type=int, help="If provided then this will be the job we pull tasks from " "and assign to the agent. Please note we'll only be pulling " "tasks that aren't running or assigned.") args = parser.parse_args() logger.info("Master args.master_api: %s", args.master_api) logger.info("Agent args.master_api: %s", args.agent_api) assert not args.agent_api.endswith("/") assert not args.master_api.endswith("/") # session to make requests with session = requests.Session() session.headers.update({"content-type": "application/json"}) existing_jobtype = session.get( args.master_api + "/jobtypes/%s" % args.jobtype) # Create a FakeRender job type if one does not exist if not existing_jobtype.ok: sourcecode = dedent(""" from pyfarm.jobtypes.examples import %s as _%s class %s(_%s): pass""" % (args.jobtype, args.jobtype, args.jobtype, args.jobtype)) response = session.post( args.master_api + "/jobtypes/", data=dumps({ "name": args.jobtype, "classname": args.jobtype, "code": sourcecode, "max_batch": 1})) assert response.ok, response.json() jobtype_data = response.json() logger.info( "Created job type %r, id %r", args.jobtype, jobtype_data["id"]) else: jobtype_data = existing_jobtype.json() logger.info( "Job type %r already exists, id %r", args.jobtype, jobtype_data["id"]) jobtype_version = jobtype_data["version"] if args.job is None: job = session.post( args.master_api + "/jobs/", data=dumps({ "start": 1, "end": 1, "title": "Fake Job - %s" % int(time.time()), "jobtype": args.jobtype})) assert job.ok, job.json() job = job.json() logger.info("Job %r created", job["id"]) else: job = session.get(args.master_api + "/jobs/%s" % args.job) if not job.ok: logger.error("No such job with id %r", args.job) return else: job = job.json() logger.info("Job %r exists", job["id"]) tasks = session.get(args.master_api + "/jobs/%s/tasks/" % job["id"]) assert tasks.ok job_tasks = [] for task in tasks.json(): if task["state"] not in ("queued", "failed"): logger.info( "Can't use task %s, it's state is not 'queued' or 'failed'", task["id"]) continue if task["agent_id"] is not None: logger.info( "Can't use task %s, it already has an agent assigned", task["id"]) job_tasks.append({"id": task["id"], "frame": task["frame"]}) if not job_tasks: logger.error("Could not find any tasks to send for job %s", job["id"]) return logger.info( "Found %s tasks from job %s to assign to %r", len(job_tasks), job["id"], args.agent_api) assignment_data = { "job": { "id": job["id"], "by": job["by"], "ram": job["ram"], "ram_warning": job["ram_warning"], "ram_max": job["ram_max"], "cpus": job["cpus"], "batch": job["batch"], "user": job["user"], "data": job["data"], "environ": job["environ"], "title": job["title"]}, "jobtype": { "name": args.jobtype, "version": jobtype_version}, "tasks": job_tasks} # Drop any keys which don't have values since this # would break the schema validation in the agent. for key in list(assignment_data["job"]): if assignment_data["job"][key] is None: del assignment_data["job"][key] response = session.post( args.agent_api + "/assign", data=dumps(assignment_data)) assert response.ok, response.json() logger.info("Tasks posted to agent")
def test_dumps_uuid(self): data = {"uuid": uuid4()} self.assertEqual(dumps(data), dumps({"uuid": str(data["uuid"])}))
def test_dumps_decimal(self): config["agent_pretty_json"] = False data = {"decimal": Decimal("1.2")} self.assertEqual( dumps(data), dumps_(data, default=default_json_encoder))
def test_dumps_datetime(self): config["agent_pretty_json"] = False data = {"datetime": datetime.utcnow()} self.assertEqual( dumps(data), dumps_(data, default=default_json_encoder))
def test_dumps_single_argument(self): config["agent_pretty_json"] = False data = self.data.keys()[0] self.assertEqual(dumps(data), dumps_(data))
def test_dumps_not_pretty(self): config["agent_pretty_json"] = False self.assertEqual(dumps(self.data), dumps_(self.data))
def test_dumps_pretty(self): config["agent_pretty_json"] = True self.assertEqual(dumps(self.data), dumps_(self.data, indent=2))
def post(self, **kwargs): if request_from_master(kwargs["request"]): config.master_contacted() request = kwargs["request"] request_data = kwargs["data"] # First, get the resources we have *right now*. In some cases # this means using the functions in pyfarm.core.sysinfo because # entries in `config` could be slightly out of sync with the system. memory_free = free_ram() cpus = config["agent_cpus"] requires_ram = request_data["job"].get("ram") requires_cpus = request_data["job"].get("cpus") if ("agent_id" in request_data and request_data["agent_id"] != config["agent_id"]): logger.error("Wrong agent_id in assignment: %s. Our id is %s", request_data["agent_id"], config["agent_id"]) return ( dumps({"error": "You have the wrong agent. " "I am %s." % config["agent_id"], "agent_id": config["agent_id"]}), BAD_REQUEST ) elif self.agent.reannounce_lock.locked: logger.warning("Temporarily rejecting assignment because we " "are in the middle of a reannounce.") return ( dumps({"error": "Agent cannot accept assignments because of a " "reannounce in progress. Try again shortly."}), SERVICE_UNAVAILABLE ) elif self.agent.shutting_down: logger.error("Rejecting assignment because the agent is in the " "process of shutting down.") return ( dumps({"error": "Agent cannot accept assignments because it is " "shutting down."}), SERVICE_UNAVAILABLE ) elif "restart_requested" in config \ and config["restart_requested"] is True: logger.error("Rejecting assignment because of scheduled restart.") return ( dumps({"error": "Agent cannot accept assignments because of a " "pending restart."}), SERVICE_UNAVAILABLE ) elif "agent_id" not in config: logger.error( "Agent has not yet connected to the master or `agent_id` " "has not been set yet.") return ( dumps({"error": "agent_id has not been set in the config"}), SERVICE_UNAVAILABLE ) # Do we have enough ram? elif requires_ram is not None and requires_ram > memory_free: logger.error( "Task %s requires %sMB of ram, this agent has %sMB free. " "Rejecting Task %s.", request_data["job"]["id"], requires_ram, memory_free, request_data["job"]["id"]) config["free_ram"] = memory_free return ( dumps({"error": "Not enough ram", "agent_ram": memory_free, "requires_ram": requires_ram}), BAD_REQUEST ) # Do we have enough cpus (count wise)? elif requires_cpus is not None and requires_cpus > cpus: logger.error( "Task %s requires %s CPUs, this agent has %s CPUs. " "Rejecting Task %s.", request_data["job"]["id"], requires_cpus, cpus, request_data["job"]["id"]) return ( dumps({"error": "Not enough cpus", "agent_cpus": cpus, "requires_cpus": requires_cpus}), BAD_REQUEST ) new_task_ids = set(task["id"] for task in request_data["tasks"]) for assignment in config["current_assignments"].itervalues(): existing_task_ids = set(x["id"] for x in assignment["tasks"]) # If the assignment is identical to one we already have if existing_task_ids == new_task_ids: logger.debug( "Ignoring repeated assignment of the same batch") return dumps({"id": assignment["id"]}), ACCEPTED # If there is only a partial overlap elif existing_task_ids & new_task_ids: logger.error("Rejecting assignment with partial overlap with " "existing assignment.") unknown_task_ids = new_task_ids - existing_task_ids return ( dumps({"error": "Partial overlap of tasks", "rejected_task_ids": list(unknown_task_ids)}), CONFLICT ) if not config["agent_allow_sharing"]: for jobtype in config["jobtypes"].itervalues(): num_finished_tasks = (len(jobtype.finished_tasks) + len(jobtype.failed_tasks)) if len(jobtype.assignment["tasks"]) > num_finished_tasks: logger.error("Rejecting an assignment that would require " "agent sharing") return ( dumps({ "error": "Agent does not allow multiple " "assignments", "rejected_task_ids": list(new_task_ids)}), CONFLICT ) assignment_uuid = uuid4() request_data.update(id=assignment_uuid) config["current_assignments"][assignment_uuid] = request_data logger.debug("Accepted assignment %s: %r", assignment_uuid, request_data) logger.info("Accept assignment from job %s with %s tasks", request_data["job"]["title"], len(request_data["tasks"])) def assignment_failed(result, assign_id): logger.error( "Assignment %s failed, result: %r, removing.", assign_id, result) logger.error(result.getTraceback()) if (len(config["current_assignments"]) <= 1 and not self.agent.shutting_down): config["state"] = AgentState.ONLINE self.agent.reannounce(force=True) # Do not mark the assignment as failed if the reason for failing # was that we ran out of disk space failed = not isinstance(result.value, InsufficientSpaceError) assignment = config["current_assignments"].pop(assign_id) if "jobtype" in assignment: jobtype_id = assignment["jobtype"].pop("id", None) if jobtype_id: instance = config["jobtypes"].pop(jobtype_id, None) instance.stop( assignment_failed=failed, avoid_reassignment=not failed, error="Error in jobtype: %r. " "Traceback: %s" % (result, traceback.format_exc())) def assignment_started(_, assign_id): logger.debug("Assignment %s has started", assign_id) config["state"] = AgentState.RUNNING self.agent.reannounce(force=True) def remove_assignment(_, assign_id): assignment = config["current_assignments"].pop(assign_id) if "jobtype" in assignment: jobtype_id = assignment["jobtype"].pop("id", None) if jobtype_id: config["jobtypes"].pop(jobtype_id, None) def assignment_stopped(_, assign_id): logger.debug("Assignment %s has stopped", assign_id) if (len(config["current_assignments"]) <= 1 and not self.agent.shutting_down): config["state"] = AgentState.ONLINE self.agent.reannounce(force=True) assignment = config["current_assignments"][assign_id] if "jobtype" in assignment: jobtype_id = assignment["jobtype"].pop("id", None) if jobtype_id: jobtype = config["jobtypes"].pop(jobtype_id, None) updates_deferred = DeferredList( jobtype.task_update_deferreds) updates_deferred.addBoth(remove_assignment, assign_id) else: config["current_assignments"].pop(assign_id) def restart_if_necessary(_): # pragma: no cover if "restart_requested" in config and config["restart_requested"]: stopping = config["agent"].stop() stopping.addCallbacks(lambda _: reactor.stop(), lambda _: reactor.stop()) def load_jobtype_failed(result, assign_id): logger.error( "Loading jobtype for assignment %s failed, removing.", assign_id) traceback = result.getTraceback() logger.debug("Got traceback") logger.error(traceback) assignment = config["current_assignments"].pop(assign_id) # Mark all tasks as failed on master and set an error message logger.debug("Marking tasks in assignment as failed") def post_update(post_url, post_data, task, delay=0): post_func = partial(post, post_url, data=post_data, callback=lambda x: result_callback( post_url, post_data, task, x), errback=lambda x: error_callback( post_url, post_data, task, x)) reactor.callLater(delay, post_func) def result_callback(cburl, cbdata, task, response): if 500 <= response.code < 600: logger.error( "Error while marking task %s as failed on master, " "retrying", task["id"]) post_update(cburl, cbdata, task, delay=http_retry_delay()) elif response.code != OK: logger.error( "Could not mark task %s as failed, server response " "code was %s", task["id"], response.code) else: logger.info( "Marked task %s as failed on master", task["id"]) def error_callback(cburl, cbdata, task, failure_reason): logger.error( "Error while marking task %s as failed, retrying", task["id"], failure_reason) post_update(cburl, cbdata, task, delay=http_retry_delay()) for task in assignment["tasks"]: url = "%s/jobs/%s/tasks/%s" % ( config["master_api"], assignment["job"]["id"], task["id"]) data = { "state": WorkState.FAILED, "last_error": traceback} post_update(url, data, task) # If the loading was partially successful for some reason, there # might already be an entry for this jobtype in the config. # Remove it if it exists. if "jobtype" in assignment: jobtype_id = assignment["jobtype"].pop("id", None) if jobtype_id: config["jobtypes"].pop(jobtype_id, None) def loaded_jobtype(jobtype_class, assign_id): # TODO: report error to master if hasattr(jobtype_class, "getTraceback"): logger.error(jobtype_class.getTraceback()) return # TODO: add call to prepare_for_job # TODO: add call to spawn_persistent_process # Instance the job type and pass in the assignment data. instance = jobtype_class(request_data) if not isinstance(instance, JobType): raise TypeError( "Expected a subclass of " "pyfarm.jobtypes.core.jobtype.JobType") # TODO: add callback to cleanup_after_job # TODO: add callback to stop persistent process try: started_deferred, stopped_deferred = instance._start() started_deferred.addCallback(assignment_started, assign_id) started_deferred.addErrback(assignment_failed, assign_id) stopped_deferred.addCallback(assignment_stopped, assign_id) stopped_deferred.addErrback(assignment_failed, assign_id) stopped_deferred.addBoth(restart_if_necessary) stopped_deferred.addBoth( lambda *args: instance._remove_tempdirs()) stopped_deferred.addBoth( lambda *args: instance._close_logs()) stopped_deferred.addBoth( lambda *args: instance._upload_logfile()) except Exception as e: logger.error("Error on starting jobtype, stopping it now. " "Error was: %r. Traceback: %s", e, traceback.format_exc()) instance.stop(assignment_failed=True, error="Error while loading jobtype: %r. " "Traceback: %s" % (e, traceback.format_exc())) assignment = config["current_assignments"].pop(assign_id) if "jobtype" in assignment: jobtype_id = assignment["jobtype"].pop("id", None) if jobtype_id: config["jobtypes"].pop(jobtype_id, None) # Load the job type then pass the class along to the # callback. No errback here because all the errors # are handled internally in this case. jobtype_loader = JobType.load(request_data) jobtype_loader.addCallback(loaded_jobtype, assignment_uuid) jobtype_loader.addErrback(load_jobtype_failed, assignment_uuid) return dumps({"id": assignment_uuid}), ACCEPTED