예제 #1
0
    def __run(self):
        """This runs the process."""

        # Wait for the run time to have passed

        try:
            # The end time comes back as ISO 8601.  Parse it.
            end_time = dateutil.parser.parse(self.run_data["end-time"])
        except ValueError as ex:
            self.__diag(
                "Server did not return a valid end time for the task: %s" %
                (str(ex)))

        now = datetime.datetime.now(tzlocal())
        sleep_time = end_time - now if end_time > now else datetime.timedelta()

        # The extra five seconds is a breather for the server to
        # assemble the final result.
        sleep_seconds = (sleep_time.days * 86400) \
                        + (sleep_time.seconds) \
                        + (sleep_time.microseconds / (10.0**6)) \
                        + 5

        self.__debug("Sleeping %f seconds" % (sleep_seconds))
        time.sleep(sleep_seconds)

        # Fetch the results in all formats we return.

        # TODO: Need to handle run failures

        self.results["results"] = {}

        result_href = self.run_data["result-href"]
        for fmt in ["application/json", "text/plain", "text/html"]:
            status, result = pscheduler.url_get(result_href,
                                                params={
                                                    "wait-merged": True,
                                                    "format": fmt
                                                },
                                                json=fmt == "application/json",
                                                throw=False)
            if status != 200:
                self.__diag("Failed to get %s result: %s" % (fmt, result))
                return

            self.results["results"][fmt] = result

        # Grab a final copy of the task and its details for posterity

        status, task_detail = pscheduler.url_get(self.task_href,
                                                 params={"detail": True},
                                                 throw=False)
        if status != 200:
            self.__diag("Unable to get detailed task data: %s" % (task_data))
            return

        self.results["task"] = task_detail
예제 #2
0
    def __populate_cidrs_update__(self):
        """
        Update the CIDR list.  It is assumed that the caller will have
        protected against calling two of these at once.
        """

        status, text = pscheduler.url_get(self.source,
                                          bind=self.bind,
                                          json=False,
                                          throw=False)

        possible_next_attempt = datetime.datetime.now() + self.retry

        if status != 200:
            # TODO: Would be nice if we could log the failure
            with self.data_lock:
                self.next_attempt = possible_next_attempt
            return

        # If there's a transform, apply it.
        if self.transform is not None:
            try:
                json = pscheduler.json_load(text)
                text = self.transform(json)
            except (ValueError, pscheduler.jqfilter.JQRuntimeError):
                # TODO: Would be nice if we could log the failure
                with self.data_lock:
                    self.next_attempt = possible_next_attempt
                return

        # TODO: Consider caching this on disk someplace so that it can
        # be retrieved if we fail to fetch at startup.

        # TODO: When threaded, hold this separately and swap old list
        new_cidrs = radix.Radix()
        new_length = 0

        for cidr in text.split('\n'):

            # Remove comments and ditch excess whitespace
            cidr = cidr.split('#', 1)[0].strip()
            if len(cidr) == 0:
                continue
            try:
                new_cidrs.add(cidr)
                new_length += 1
            except ValueError:
                # Just ignore anything that looks fishy.
                # TODO: Log it?
                pass

        with self.data_lock:
            self.cidrs = new_cidrs
            self.length = new_length
            self.next_attempt = datetime.datetime.now() + self.update
예제 #3
0
    def __populate_cidrs__(self):

        # TODO: Turn this into a thread so checks aren't delayed.

        if self.next_attempt > datetime.datetime.now():
            # Not time yet.
            return

        status, text = pscheduler.url_get(self.source,
                                          bind=self.bind,
                                          json=False,
                                          throw=False)

        possible_next_attempt = datetime.datetime.now() + self.retry

        if status != 200:
            # TODO: Would be nice if we could log the failure
            self.next_attempt = possible_next_attempt
            return

        # If there's a transform, apply it.
        if self.transform is not None:
            try:
                json = pscheduler.json_load(text)
                text = self.transform(json)
            except (ValueError, pscheduler.jqfilter.JQRuntimeError):
                # TODO: Would be nice if we could log the failure
                self.next_attempt = possible_next_attempt
                return

        # TODO: Consider caching this on disk someplace so that it can
        # be retrieved if we fail to fetch at startup.

        # TODO: When threaded, hold this separately and swap old list
        self.cidrs = radix.Radix()
        self.length = 0

        for cidr in text.split('\n'):

            # Remove comments and ditch excess whitespace
            cidr = cidr.split('#', 1)[0].strip()
            if len(cidr) == 0:
                continue
            try:
                self.cidrs.add(cidr)
                self.length += 1
            except ValueError:
                # Just ignore anything that looks fishy.
                # TODO: Log it?
                pass

        self.next_attempt = datetime.datetime.now() + self.update
예제 #4
0
def tasks():

    if request.method == 'GET':

        expanded = is_expanded()

        query = """
            SELECT json, uuid
            FROM task
            """
        args = []

        try:
            json_query = arg_json("json")
        except ValueError as ex:
            return bad_request(str(ex))

        if json_query is not None:
            query += "WHERE json @> %s"
            args.append(request.args.get("json"))

        query += " ORDER BY added"

        try:
            cursor = dbcursor_query(query, args)
        except Exception as ex:
            return error(str(ex))

        result = []
        for row in cursor:
            url = base_url(row[1])
            if not expanded:
                result.append(url)
                continue
            row[0]['href'] = url
            result.append(row[0])
        return json_response(result)

    elif request.method == 'POST':

        try:
            task = pscheduler.json_load(request.data)
        except ValueError:
            return bad_request("Invalid JSON:" + request.data)

        # TODO: Validate the JSON against a TaskSpecification


        # See if the task spec is valid

        try:
            returncode, stdout, stderr = pscheduler.run_program(
                [ "pscheduler", "internal", "invoke", "test",
                  task['test']['type'], "spec-is-valid" ],
                stdin = pscheduler.json_dump(task['test']['spec'])
                )

            if returncode != 0:
                return error("Invalid test specification: " + stderr)
        except Exception as ex:
            return error("Unable to validate test spec: " + str(ex))

        log.debug("Validated test: %s", pscheduler.json_dump(task['test']))


        # Find the participants

        try:
            returncode, stdout, stderr = pscheduler.run_program(
                [ "pscheduler", "internal", "invoke", "test",
                  task['test']['type'], "participants" ],
                stdin = pscheduler.json_dump(task['test']['spec'])
                )

            if returncode != 0:
                return error("Unable to determine participants: " + stderr)

            participants = [ host if host is not None
                             else pscheduler.api_this_host()
                             for host in pscheduler.json_load(stdout)["participants"] ]
        except Exception as ex:
            return error("Unable to determine participants: " + str(ex))
        nparticipants = len(participants)

        # TODO: The participants must be unique.  This should be
        # verified by fetching the host name from each one.

        #
        # TOOL SELECTION
        #

        # TODO: Need to provide for tool being specified by the task
        # package.

        tools = []

        for participant in participants:

            try:
                # TODO: This will fail with a very large test spec.
                status, result = pscheduler.url_get(
                    pscheduler.api_url(participant, "tools"),
                    params={ 'test': pscheduler.json_dump(task['test']) }
                    )
                if status != 200:
                    raise Exception("%d: %s" % (status, result))
                tools.append(result)
            except Exception as ex:
                return error("Error getting tools from %s: %s" \
                                     % (participant, str(ex)))
            log.debug("Participant %s offers tools %s", participant, tools)

        if len(tools) != nparticipants:
            return error("Didn't get a full set of tool responses")

        if "tools" in task:
            tool = pick_tool(tools, pick_from=task['tools'])
        else:
            tool = pick_tool(tools)

        if tool is None:
            # TODO: This could stand some additional diagnostics.
            return no_can_do("Couldn't find a tool in common among the participants.")

        task['tool'] = tool

        #
        # TASK CREATION
        #

        task_data = pscheduler.json_dump(task)
        log.debug("Task data: %s", task_data)

        tasks_posted = []

        # Evaluate the task against the limits and reject the request
        # if it doesn't pass.

        log.debug("Checking limits on %s", task["test"])

        (processor, whynot) = limitprocessor()
        if processor is None:
            log.debug("Limit processor is not initialized. %s", whynot)
            return no_can_do("Limit processor is not initialized: %s" % whynot)

        # TODO: This is cooked up in two places.  Make a function of it.
        hints = {
            "ip": request.remote_addr
            }
        hints_data = pscheduler.json_dump(hints)

        log.debug("Processor = %s" % processor)
        passed, diags = processor.process(task["test"], hints)

        if not passed:
            return forbidden("Task forbidden by limits:\n" + diags)

        # Post the lead with the local database, which also assigns
        # its UUID.  Make it disabled so the scheduler doesn't try to
        # do anything with it until the task has been submitted to all
        # of the other participants.

        try:
            cursor = dbcursor_query("SELECT * FROM api_task_post(%s, %s, 0, NULL, FALSE)",
                                    [task_data, hints_data], onerow=True)
        except Exception as ex:
            return error(str(ex.diag.message_primary))

        if cursor.rowcount == 0:
            return error("Task post failed; poster returned nothing.")

        task_uuid = cursor.fetchone()[0]

        log.debug("Tasked lead, UUID %s", task_uuid)

        # Other participants get the UUID forced upon them.

        for participant in range(1,nparticipants):
            part_name = participants[participant]
            try:
                log.debug("Tasking %d@%s: %s", participant, part_name, task_data)
                post_url = pscheduler.api_url(part_name,
                                              'tasks/' + task_uuid)
                log.debug("Posting task to %s", post_url)
                status, result = pscheduler.url_post(
                    post_url,
                    params={ 'participant': participant },
                    data=task_data,
                    json=False,
                    throw=False)
                log.debug("Remote returned %d: %s", status, result)
                if status != 200:
                    raise Exception("Unable to post task to %s: %s"
                                    % (part_name, result))
                tasks_posted.append(result)

            except Exception as ex:

                log.exception()

                for url in tasks_posted:
                    # TODO: Handle failure?
                    status, result = requests.delete(url)

                    try:
                        dbcursor_query("SELECT api_task_delete(%s)",
                                       [task_uuid])
                    except Exception as ex:
                        log.exception()

                return error("Error while tasking %d@%s: %s" % (participant, part_name, ex))


        # Enable the task so the scheduler will schedule it.
        try:
            dbcursor_query("SELECT api_task_enable(%s)", [task_uuid])
        except Exception as ex:
            log.exception()
            return error("Failed to enable task %s.  See system logs." % task_uuid)
        log.debug("Task enabled for scheduling.")

        return ok_json("%s/%s" % (request.base_url, task_uuid))

    else:

        return not_allowed()
예제 #5
0
    def evaluate(
            self,
            proposal  # Task and hints
    ):

        private = {"hints": proposal["hints"]}

        # Generate the URL
        url = self.url
        if self.url_transform is not None:
            try:
                url = self.url_transform({
                    "url": self.url,
                    "run": proposal["task"],
                    PRIVATE_KEY: private
                })[0]
                if not isinstance(url, basestring):
                    raise ValueError("Transform did not return a string")
            except Exception as ex:
                return {
                    "passed": self.fail_result,
                    "reasons": ["URL transform failed: %s" % (str(ex))]
                }

        # Generate the headers
        if self.headers_transform is not None:
            try:
                headers = self.headers_transform({
                    "headers": self.headers,
                    "run": proposal["task"],
                    PRIVATE_KEY: private
                })[0].get("headers", {})
            except Exception as ex:
                return {
                    "passed": self.fail_result,
                    "reasons": ["Header transform failed: %s" % (str(ex))]
                }
        else:
            headers = {}

        # Generate the parameters
        if self.params_transform is not None:
            try:
                params = self.params_transform({
                    "params": self.params,
                    "run": proposal["task"],
                    PRIVATE_KEY: private
                })[0].get("params", {})
            except Exception as ex:
                return {
                    "passed": self.fail_result,
                    "reasons": ["Parameter transform failed: %s" % (str(ex))]
                }
        else:
            params = {}

        # Fetch the result
        status, text = pscheduler.url_get(url,
                                          bind=self.bind,
                                          headers=headers,
                                          params=params,
                                          json=False,
                                          throw=False,
                                          timeout=self.timeout,
                                          allow_redirects=self.follow,
                                          verify_keys=self.verify)

        if self.success_only:
            if status == 200:
                return {"passed": True}
            elif status == 404:
                return {"passed": False, "reasons": ["Resource not found"]}
            # For anything else, fall through and let the error
            # handler below deal with it.

        # Take errors at face value
        if status != 200:
            return {
                "passed": self.fail_result,
                "reasons": ["Fetch %s failed: %d: %s" % (url, status, text)]
            }

        try:
            json = pscheduler.json_load(text)
        except ValueError:
            return {
                "passed": self.fail_result,
                "reasons": ["Server returned invalid JSON '%s'" % (text)]
            }

        try:
            passed = json["result"]
            if not isinstance(passed, bool):
                raise KeyError
        except KeyError:
            return {
                "passed": self.fail_result,
                "reasons": ["Server returned an invalid result '%s'" % (text)]
            }

        result = {"passed": passed}
        try:
            result["reasons"] = [json["message"]]
        except KeyError:
            pass  # Not there?  Don't care.

        return result
예제 #6
0
def tasks():

    if request.method == 'GET':

        where_clause = "TRUE"
        args = []

        try:
            json_query = arg_json("json")
        except ValueError as ex:
            return bad_request(str(ex))

        if json_query is not None:
            where_clause += " AND task.json @> %s"
            args.append(request.args.get("json"))

        where_clause += " ORDER BY added"

        try:
            tasks = __tasks_get_filtered(request.base_url,
                                         where_clause=where_clause,
                                         args=args,
                                         expanded=is_expanded(),
                                         detail=arg_boolean("detail"),
                                         single=False)
        except Exception as ex:
            return error(str(ex))

        return ok_json(tasks)

    elif request.method == 'POST':

        try:
            task = pscheduler.json_load(request.data, max_schema=1)
        except ValueError as ex:
            return bad_request("Invalid task specification: %s" % (str(ex)))

        # Validate the JSON against a TaskSpecification
        # TODO: Figure out how to do this without the intermediate object

        valid, message = pscheduler.json_validate({"": task}, {
            "type": "object",
            "properties": {
                "": {
                    "$ref": "#/pScheduler/TaskSpecification"
                }
            },
            "required": [""]
        })

        if not valid:
            return bad_request("Invalid task specification: %s" % (message))

        # See if the test spec is valid

        try:
            returncode, stdout, stderr = pscheduler.run_program(
                [
                    "pscheduler", "internal", "invoke", "test",
                    task['test']['type'], "spec-is-valid"
                ],
                stdin=pscheduler.json_dump(task['test']['spec']))

            if returncode != 0:
                return error("Unable to validate test spec: %s" % (stderr))
            validate_json = pscheduler.json_load(stdout, max_schema=1)
            if not validate_json["valid"]:
                return bad_request(
                    "Invalid test specification: %s" %
                    (validate_json.get("error", "Unspecified error")))
        except Exception as ex:
            return error("Unable to validate test spec: " + str(ex))

        log.debug("Validated test: %s", pscheduler.json_dump(task['test']))

        # Reject tasks that have archive specs that use transforms.
        # See ticket #330.

        try:
            for archive in task['archives']:
                if "transform" in archive:
                    return bad_request(
                        "Use of transforms in archives is not yet supported.")
        except KeyError:
            pass  # Not there

        # Find the participants

        try:

            # HACK: BWCTLBC
            if "lead-bind" in task:
                lead_bind_env = {
                    "PSCHEDULER_LEAD_BIND_HACK": task["lead-bind"]
                }
            else:
                lead_bind_env = None

            returncode, stdout, stderr = pscheduler.run_program(
                [
                    "pscheduler", "internal", "invoke", "test",
                    task['test']['type'], "participants"
                ],
                stdin=pscheduler.json_dump(task['test']['spec']),
                timeout=5,
                env_add=lead_bind_env)

            if returncode != 0:
                return error("Unable to determine participants: " + stderr)

            participants = [
                host if host is not None else server_netloc() for host in
                pscheduler.json_load(stdout, max_schema=1)["participants"]
            ]
        except Exception as ex:
            return error("Exception while determining participants: " +
                         str(ex))
        nparticipants = len(participants)

        # TODO: The participants must be unique.  This should be
        # verified by fetching the host name from each one.

        #
        # TOOL SELECTION
        #

        lead_bind = task.get("lead-bind", None)

        # TODO: Need to provide for tool being specified by the task
        # package.

        tools = []

        tool_params = {"test": pscheduler.json_dump(task["test"])}
        # HACK: BWCTLBC
        if lead_bind is not None:
            log.debug("Using lead bind of %s" % str(lead_bind))
            tool_params["lead-bind"] = lead_bind

        for participant_no in range(0, len(participants)):

            participant = participants[participant_no]

            try:

                # Make sure the other participants are running pScheduler

                participant_api = pscheduler.api_url_hostport(participant)

                log.debug("Pinging %s" % (participant))
                status, result = pscheduler.url_get(participant_api,
                                                    throw=False,
                                                    timeout=10,
                                                    bind=lead_bind)

                if status == 400:
                    raise TaskPostingException(result)
                elif status in [ 202, 204, 205, 206, 207, 208, 226,
                                 300, 301, 302, 303, 304, 205, 306, 307, 308 ] \
                    or ( (status >= 400) and (status <=499) ):
                    raise TaskPostingException(
                        "Host is not running pScheduler")
                elif status != 200:
                    raise TaskPostingException("returned status %d: %s" %
                                               (status, result))

                # TODO: This will fail with a very large test spec.
                status, result = pscheduler.url_get("%s/tools" %
                                                    (participant_api),
                                                    params=tool_params,
                                                    throw=False,
                                                    bind=lead_bind)
                if status != 200:
                    raise TaskPostingException("%d: %s" % (status, result))
                tools.append(result)
            except TaskPostingException as ex:
                return error("Error getting tools from %s: %s" \
                                     % (participant, str(ex)))
            log.debug("Participant %s offers tools %s", participant, result)

        if len(tools) != nparticipants:
            return error("Didn't get a full set of tool responses")

        if "tools" in task:
            tool = pick_tool(tools, pick_from=task['tools'])
        else:
            tool = pick_tool(tools)

        if tool is None:
            # TODO: This could stand some additional diagnostics.
            return no_can_do(
                "Couldn't find a tool in common among the participants.")

        task['tool'] = tool

        #
        # TASK CREATION
        #

        tasks_posted = []

        # Evaluate the task against the limits and reject the request
        # if it doesn't pass.

        log.debug("Checking limits on %s", task["test"])

        (processor, whynot) = limitprocessor()
        if processor is None:
            log.debug("Limit processor is not initialized. %s", whynot)
            return no_can_do("Limit processor is not initialized: %s" % whynot)

        hints = request_hints()
        hints_data = pscheduler.json_dump(hints)

        log.debug("Processor = %s" % processor)
        passed, limits_passed, diags = processor.process(task["test"], hints)

        if not passed:
            return forbidden("Task forbidden by limits:\n" + diags)

        # Post the lead with the local database, which also assigns
        # its UUID.  Make it disabled so the scheduler doesn't try to
        # do anything with it until the task has been submitted to all
        # of the other participants.

        try:
            cursor = dbcursor_query(
                "SELECT * FROM api_task_post(%s, %s, %s, %s, 0, NULL, FALSE)",
                [
                    pscheduler.json_dump(task), participants, hints_data,
                    pscheduler.json_dump(limits_passed)
                ],
                onerow=True)
        except Exception as ex:
            return error(str(ex.diag.message_primary))

        if cursor.rowcount == 0:
            return error("Task post failed; poster returned nothing.")

        task_uuid = cursor.fetchone()[0]

        log.debug("Tasked lead, UUID %s", task_uuid)

        # Other participants get the UUID and participant list forced upon them.

        task["participants"] = participants
        task_data = pscheduler.json_dump(task)

        for participant in range(1, nparticipants):

            part_name = participants[participant]
            log.debug("Tasking participant %s", part_name)
            try:

                # Post the task

                log.debug("Tasking %d@%s: %s", participant, part_name,
                          task_data)
                post_url = pscheduler.api_url_hostport(part_name,
                                                       'tasks/' + task_uuid)
                log.debug("Posting task to %s", post_url)
                status, result = pscheduler.url_post(
                    post_url,
                    params={'participant': participant},
                    data=task_data,
                    bind=lead_bind,
                    json=False,
                    throw=False)
                log.debug("Remote returned %d: %s", status, result)
                if status != 200:
                    raise TaskPostingException(
                        "Unable to post task to %s: %s" % (part_name, result))
                tasks_posted.append(result)

                # Fetch the task's details and add the list of limits
                # passed to our own.

                status, result = pscheduler.url_get(post_url,
                                                    params={"detail": True},
                                                    bind=lead_bind,
                                                    throw=False)
                if status != 200:
                    raise TaskPostingException(
                        "Unable to fetch posted task from %s: %s" %
                        (part_name, result))
                log.debug("Fetched %s", result)
                try:
                    details = result["detail"]["spec-limits-passed"]
                    log.debug("Details from %s: %s", post_url, details)
                    limits_passed.extend(details)
                except KeyError:
                    pass

            except TaskPostingException as ex:

                # Disable the task locally and let it get rid of the
                # other participants.

                posted_to = "%s/%s" % (request.url, task_uuid)
                parsed = list(urlparse.urlsplit(posted_to))
                parsed[1] = "%s"
                template = urlparse.urlunsplit(parsed)

                try:
                    dbcursor_query("SELECT api_task_disable(%s, %s)",
                                   [task_uuid, template])
                except Exception:
                    log.exception()

                return error("Error while tasking %s: %s" % (part_name, ex))

        # Update the list of limits passed in the local database
        # TODO: How do the other participants know about this?
        log.debug("Limits passed: %s", limits_passed)
        try:
            cursor = dbcursor_query(
                "UPDATE task SET limits_passed = %s::JSON WHERE uuid = %s",
                [pscheduler.json_dump(limits_passed), task_uuid])
        except Exception as ex:
            return error(str(ex.diag.message_primary))

        # Enable the task so the scheduler will schedule it.
        try:
            dbcursor_query("SELECT api_task_enable(%s)", [task_uuid])
        except Exception as ex:
            log.exception()
            return error("Failed to enable task %s.  See system logs." %
                         task_uuid)
        log.debug("Task enabled for scheduling.")

        return ok_json("%s/%s" % (request.base_url, task_uuid))

    else:

        return not_allowed()
예제 #7
0
파일: tasks.py 프로젝트: krihal/pscheduler
def tasks():

    if request.method == 'GET':

        where_clause = "TRUE"
        args = []

        try:
            json_query = arg_json("json")
        except ValueError as ex:
            return bad_request(str(ex))

        if json_query is not None:
            where_clause += " AND task.json_detail @> %s"
            args.append(request.args.get("json"))

        where_clause += " ORDER BY added"

        tasks = __tasks_get_filtered(request.base_url,
                                     where_clause=where_clause,
                                     args=args,
                                     expanded=is_expanded(),
                                     detail=arg_boolean("detail"),
                                     single=False)

        return ok_json(tasks)

    elif request.method == 'POST':

        data = request.data.decode("ascii")

        try:
            task = pscheduler.json_load(data, max_schema=3)
        except ValueError as ex:
            return bad_request("Invalid task specification: %s" % (str(ex)))

        # Validate the JSON against a TaskSpecification
        # TODO: Figure out how to do this without the intermediate object

        valid, message = pscheduler.json_validate({"": task}, {
            "type": "object",
            "properties": {
                "": {
                    "$ref": "#/pScheduler/TaskSpecification"
                }
            },
            "required": [""]
        })

        if not valid:
            return bad_request("Invalid task specification: %s" % (message))

        # See if the test spec is valid

        try:
            returncode, stdout, stderr = pscheduler.plugin_invoke(
                "test",
                task['test']['type'],
                "spec-is-valid",
                stdin=pscheduler.json_dump(task['test']['spec']))

            if returncode != 0:
                return error("Unable to validate test spec: %s" % (stderr))
            validate_json = pscheduler.json_load(stdout, max_schema=1)
            if not validate_json["valid"]:
                return bad_request(
                    "Invalid test specification: %s" %
                    (validate_json.get("error", "Unspecified error")))
        except Exception as ex:
            return error("Unable to validate test spec: " + str(ex))

        log.debug("Validated test: %s", pscheduler.json_dump(task['test']))

        # Validate the schedule

        try:
            cron = crontab.CronTab(task["schedule"]["repeat-cron"])
        except (AttributeError, ValueError):
            return error("Cron repeat specification is invalid.")
        except KeyError:
            pass

        # Validate the archives

        for archive in task.get("archives", []):

            # Data

            try:
                returncode, stdout, stderr = pscheduler.plugin_invoke(
                    "archiver",
                    archive["archiver"],
                    "data-is-valid",
                    stdin=pscheduler.json_dump(archive["data"]),
                )
                if returncode != 0:
                    return error("Unable to validate archive spec: %s" %
                                 (stderr))
            except Exception as ex:
                return error("Unable to validate test spec: " + str(ex))

            try:
                returned_json = pscheduler.json_load(stdout)
                if not returned_json["valid"]:
                    return bad_request("Invalid archiver data: %s" %
                                       (returned_json["error"]))
            except Exception as ex:
                return error("Internal probelm validating archiver data: %s" %
                             (str(ex)))

            # Transform, if there was one.

            if "transform" in archive:
                transform = archive["transform"]
                try:
                    _ = pscheduler.JQFilter(filter_spec=transform["script"],
                                            args=transform.get("args", {}))

                except ValueError as ex:
                    return error("Invalid transform: %s" % (str(ex)))

        # Validate the lead binding if there was one.

        lead_bind = task.get("lead-bind", None)
        if lead_bind is not None \
           and (pscheduler.address_interface(lead_bind) is None):
            return bad_request("Lead bind '%s' is not  on this host" %
                               (lead_bind))

        # Evaluate the task against the limits and reject the request
        # if it doesn't pass.  We do this early so anything else in
        # the process gets any rewrites.

        log.debug("Checking limits on %s", task)

        (processor, whynot) = limitprocessor()
        if processor is None:
            log.debug("Limit processor is not initialized. %s", whynot)
            return no_can_do("Limit processor is not initialized: %s" % whynot)

        hints, error_response = request_hints()
        if hints is None:
            log.debug("Can't come up with valid hints for lead task limits.")
            return error_response

        hints_data = pscheduler.json_dump(hints)

        log.debug("Processor = %s" % processor)
        passed, limits_passed, diags, new_task, _priority \
            = processor.process(task, hints)

        if not passed:
            return forbidden("Task forbidden by limits:\n" + diags)

        if new_task is not None:
            try:
                task = new_task
                returncode, stdout, stderr = pscheduler.plugin_invoke(
                    "test",
                    task['test']['type'],
                    "spec-is-valid",
                    stdin=pscheduler.json_dump(task["test"]["spec"]))

                if returncode != 0:
                    return error(
                        "Failed to validate rewritten test specification: %s" %
                        (stderr))
                validate_json = pscheduler.json_load(stdout, max_schema=1)
                if not validate_json["valid"]:
                    return bad_request(
                        "Rewritten test specification is invalid: %s" %
                        (validate_json.get("error", "Unspecified error")))
            except Exception as ex:
                return error(
                    "Unable to validate rewritten test specification: " +
                    str(ex))

        # Find the participants

        try:

            returncode, stdout, stderr = pscheduler.plugin_invoke(
                "test",
                task['test']['type'],
                "participants",
                stdin=pscheduler.json_dump(task['test']['spec']),
                timeout=5)

            if returncode != 0:
                return error("Unable to determine participants: " + stderr)

            participants = [
                host if host is not None else server_netloc() for host in
                pscheduler.json_load(stdout, max_schema=1)["participants"]
            ]
        except Exception as ex:
            return error("Exception while determining participants: " +
                         str(ex))
        nparticipants = len(participants)

        # TODO: The participants must be unique.  This should be
        # verified by fetching the host name from each one.

        #
        # TOOL SELECTION
        #

        # TODO: Need to provide for tool being specified by the task
        # package.

        tools = []

        tool_params = {"test": pscheduler.json_dump(task["test"])}

        tool_offers = {}

        for participant_no in range(0, len(participants)):

            participant = participants[participant_no]

            try:

                # Make sure the other participants are running pScheduler

                participant_api = pscheduler.api_url_hostport(participant)

                log.debug("Pinging %s" % (participant))
                status, result = pscheduler.url_get(participant_api,
                                                    throw=False,
                                                    timeout=10,
                                                    bind=lead_bind)

                if status == 400:
                    raise TaskPostingException(result)
                elif status in [ 202, 204, 205, 206, 207, 208, 226,
                                 300, 301, 302, 303, 304, 205, 306, 307, 308 ] \
                    or ( (status >= 400) and (status <=499) ):
                    raise TaskPostingException(
                        "Host is not running pScheduler")
                elif status != 200:
                    raise TaskPostingException("returned status %d: %s" %
                                               (status, result))

                # TODO: This will fail with a very large test spec.
                status, result = pscheduler.url_get("%s/tools" %
                                                    (participant_api),
                                                    params=tool_params,
                                                    throw=False,
                                                    bind=lead_bind)
                if status != 200:
                    raise TaskPostingException("%d: %s" % (status, result))
                tools.append(result)
            except TaskPostingException as ex:
                return error("Error getting tools from %s: %s" \
                                     % (participant, str(ex)))
            log.debug("Participant %s offers tools %s", participant, result)
            tool_offers[participant] = result

        if len(tools) != nparticipants:
            return error("Didn't get a full set of tool responses")

        if "tools" in task:
            tool = pick_tool(tools, pick_from=task['tools'])
        else:
            tool = pick_tool(tools)

        # Complain if no usable tool was found

        if tool is None:

            offers = []
            for participant in participants:
                participant_offers = tool_offers.get(participant,
                                                     [{
                                                         "name": "nothing"
                                                     }])
                if participant_offers is not None:
                    offer_set = [offer["name"] for offer in participant_offers]
                else:
                    offer_set = ["nothing"]
                offers.append("%s offered %s" %
                              (participant, ", ".join(offer_set)))

            return no_can_do("No tool in common among the participants:  %s." %
                             (";  ".join(offers)))

        task['tool'] = tool

        #
        # TASK CREATION
        #

        tasks_posted = []

        # Post the lead with the local database, which also assigns
        # its UUID.  Make it disabled so the scheduler doesn't try to
        # do anything with it until the task has been submitted to all
        # of the other participants.

        cursor = dbcursor_query(
            "SELECT * FROM api_task_post(%s, %s, %s, %s, 0, %s, NULL, FALSE, %s)",
            [
                pscheduler.json_dump(task), participants, hints_data,
                pscheduler.json_dump(limits_passed),
                task.get("priority", None), diags
            ],
            onerow=True)

        if cursor.rowcount == 0:
            return error("Task post failed; poster returned nothing.")

        task_uuid = cursor.fetchone()[0]

        log.debug("Tasked lead, UUID %s", task_uuid)

        # Other participants get the UUID and participant list forced upon them.

        task["participants"] = participants

        task_params = {"key": task["_key"]} if "_key" in task else {}

        for participant in range(1, nparticipants):

            part_name = participants[participant]
            log.debug("Tasking participant %s", part_name)
            try:

                # Post the task

                log.debug("Tasking %d@%s: %s", participant, part_name, task)
                post_url = pscheduler.api_url_hostport(part_name,
                                                       'tasks/' + task_uuid)

                task_params["participant"] = participant

                log.debug("Posting task to %s", post_url)
                status, result = pscheduler.url_post(post_url,
                                                     params=task_params,
                                                     data=task,
                                                     bind=lead_bind,
                                                     json=False,
                                                     throw=False)
                log.debug("Remote returned %d: %s", status, result)
                if status != 200:
                    raise TaskPostingException(
                        "Unable to post task to %s: %s" % (part_name, result))
                tasks_posted.append(result)

                # Fetch the task's details and add the list of limits
                # passed to our own.

                status, result = pscheduler.url_get(post_url,
                                                    params={"detail": True},
                                                    bind=lead_bind,
                                                    throw=False)
                if status != 200:
                    raise TaskPostingException(
                        "Unable to fetch posted task from %s: %s" %
                        (part_name, result))
                log.debug("Fetched %s", result)
                try:
                    details = result["detail"]["spec-limits-passed"]
                    log.debug("Details from %s: %s", post_url, details)
                    limits_passed.extend(details)
                except KeyError:
                    pass

            except TaskPostingException as ex:

                # Disable the task locally and let it get rid of the
                # other participants.

                posted_to = "%s/%s" % (request.url, task_uuid)
                parsed = list(urllib.parse.urlsplit(posted_to))
                parsed[1] = "%s"
                template = urllib.parse.urlunsplit(parsed)

                try:
                    dbcursor_query("SELECT api_task_disable(%s, %s)",
                                   [task_uuid, template])
                except Exception:
                    log.exception()

                return error("Error while tasking %s: %s" % (part_name, ex))

        # Update the list of limits passed in the local database
        # TODO: How do the other participants know about this?
        log.debug("Limits passed: %s", limits_passed)
        cursor = dbcursor_query(
            "UPDATE task SET limits_passed = %s::JSON WHERE uuid = %s",
            [pscheduler.json_dump(limits_passed), task_uuid])

        # Enable the task so the scheduler will schedule it.
        try:
            dbcursor_query("SELECT api_task_enable(%s)", [task_uuid])
        except Exception:
            log.exception()
            return error("Failed to enable task %s.  See system logs." %
                         task_uuid)
        log.debug("Task enabled for scheduling.")

        task_url = "%s/%s" % (request.base_url, task_uuid)

        # Non-expanded gets just the URL
        if not arg_boolean("expanded"):
            return ok_json(task_url)

        # Expanded gets a redirect to GET+expanded

        params = []
        for arg in ["detail", "pretty"]:
            if arg_boolean(arg):
                params.append(arg)

        if params:
            task_url += "?%s" % ("&".join(params))

        return see_other(task_url)

    else:

        return not_allowed()
예제 #8
0
    def __init__(self, test, nparticipants, a, z, debug=False):
        """
        Construct a task runner
        """

        self.debug = debug

        self.results = {
            "hosts": {
                "a": a,
                "z": z
            },
            "nparticipants": nparticipants,
            "diags": []
        }
        self.diags = self.results["diags"]

        # Make sure we have sufficient pSchedulers to cover the participants
        if (nparticipants == 2) and ("pscheduler" not in z):
            # TODO: Assert that Z has a host?
            self.__diag("No pScheduler for or on %s." % (z["host"]))
            return

        # Fill in the test's blanks and construct a task spec

        test = copy.deepcopy(test)
        test = pscheduler.json_substitute(test, "__A__", a["pscheduler"])

        z_end = z["host"] if nparticipants == 1 else z.get(
            "pscheduler", z["host"])
        test = pscheduler.json_substitute(test, "__Z__", z_end)

        task = {
            "schema": 1,
            "test": test,
            # This is required; empty is fine.
            "schedule": {
                # TODO: Don't hard-wire this.
                "slip": "PT10M"
            }
        }

        # Post the task

        task_post = pscheduler.api_url(host=a["pscheduler"], path="/tasks")

        status, task_href = pscheduler.url_post(
            task_post, data=pscheduler.json_dump(task), throw=False)
        if status != 200:
            self.__diag("Unable to post task: %s" % (task_href))
            return

        self.__debug("Posted task %s" % (task_href))

        self.task_href = task_href

        # Get the task from the server with full details

        status, task_data = pscheduler.url_get(task_href,
                                               params={"detail": True},
                                               throw=False)
        if status != 200:
            self.__diag("Unable to get detailed task data: %s" % (task_data))
            return

        # Wait for the first run to be scheduled

        first_run_url = task_data["detail"]["first-run-href"]

        status, run_data = pscheduler.url_get(first_run_url, throw=False)

        if status == 404:
            self.__diag("The server never scheduled a run for the task.")
            return
        if status != 200:
            self.__diag("Error %d: %s" % (status, run_data))
            return

        for key in ["start-time", "end-time", "result-href"]:
            if key not in run_data:
                self.__diag("Server did not return %s with run data" % (key))
                return

        self.results["href"] = run_data["href"]
        self.run_data = run_data
        self.__debug(
            "Run times: %s to %s" \
            % (run_data["start-time"], run_data["end-time"]))

        self.worker = threading.Thread(target=lambda: self.run())
        self.worker.setDaemon(True)
        self.worker.start()