Example #1
0
    def predict_arrival_rates(
            self,
            topology_id: str,
            cluster: str,
            environ: str,
            spout_traffic: Dict[int, Dict[str, float]],
            start: dt.datetime,
            end: dt.datetime,
            metric_bucket_length: int,
            topology_ref: str = None,
            **kwargs: Any) -> Tuple[pd.DataFrame, pd.DataFrame]:

        if not topology_ref:
            # Get the reference of the latest physical graph entry for this
            # topology, or create a physical graph if there are non.
            topology_ref = graph_check(self.graph_client, self.config,
                                       self.tracker_url, cluster, environ,
                                       topology_id)

        # Predict Arrival Rates for all elements
        instance_ars: pd.DataFrame
        strmgr_ars: pd.DataFrame
        instance_ars, strmgr_ars =  \
            arrival_rates.calculate(
                self.graph_client, self.metrics_client, topology_id, cluster,
                environ, topology_ref, start, end, metric_bucket_length,
                self.tracker_url, spout_traffic, **kwargs)

        # Sum the arrivals from each source component of each incoming stream
        instance_ars.groupby(["task", "incoming_stream"]).sum()

        in_ars: pd.DataFrame =  \
            (instance_ars.groupby(["task", "incoming_stream"]).sum()
             .reset_index().rename(index=str,
                                   columns={"incoming_stream": "stream"}))

        return in_ars, strmgr_ars
Example #2
0
    cluster = ARGS.cluster
    environ = ARGS.environ
    topology = ARGS.topology

    topology_latencies: pd.DataFrame = pd.DataFrame(columns=[
        'topology', 'av_actual_latency', 'std_actual_latency',
        'av_calculated_latency', 'std_predicted_latency'
    ])
    system_metrics: pd.DataFrame = pd.DataFrame(columns=[
        'topology', 'component', 'av_gc', 'std_gc', 'av_cpu_load',
        'std_cpu_load'
    ])

    # Make sure we have a current graph representing the physical plan for
    # the topology
    graph_check(graph_client, CONFIG["heron.topology.models.config"],
                CONFIG["heron.tracker.url"], cluster, environ, topology)

    # Make sure we have a file containing all paths for the job
    paths_check(graph_client, CONFIG["heron.topology.models.config"], cluster,
                environ, topology)
    model_kwargs = dict()

    model_kwargs["zk.time.offset"] = CONFIG["heron.topology.models.config"][
        "zk.time.offset"]
    model_kwargs["heron.statemgr.root.path"] = CONFIG[
        "heron.topology.models.config"]["heron.statemgr.root.path"]
    model_kwargs["heron.statemgr.connection.string"] = \
        CONFIG["heron.topology.models.config"]["heron.statemgr.connection.string"]

    now = dt.datetime.now()
    start, end = now - dt.timedelta(minutes=HISTORICAL_METRICS_DURATION), now
Example #3
0
    def get(self, topology_id) -> Tuple[Dict[str, Any], int]:

        # Make sure we have the args we need
        errors: List[Dict[str, str]] = []

        if "cluster" not in request.args:
            errors.append({"type": "MissingParameter",
                           "error": "'cluster' parameter should be supplied"})

        if "environ" not in request.args:
            errors.append({"type": "MissingParameter",
                           "error": "'environ' parameter should be supplied"})

        if "model" not in request.args:
            errors.append({"type": "MissingParameter",
                           "error": ("At least one 'model' parameter should "
                                     "be supplied. Supply 'all' to run all "
                                     "configured models")})

        # Return useful errors to the client if any parameters are missing
        if errors:
            return {"errors": errors}, 400

        LOG.info("Traffic prediction requested for Heron topology: %s on "
                 "cluster: %s in environment: %s", topology_id,
                 request.args["cluster"], request.args["environ"])

        # Make sure we have a current graph representing the physical plan for
        # the topology
        try:
            graph_check(self.graph_client, self.model_config, self.tracker_url,
                        request.args["cluster"], request.args["environ"],
                        topology_id)
        except Exception as err:
            LOG.error("Error running graph check for topology: %s -> %s",
                      topology_id, str(err))
            errors.append({"topology": topology_id,
                           "type": str(type(err)),
                           "error": str(err)})
            return {"errors": errors}, 400

        output: Dict[str, Any] = {}
        output["errors"] = {}
        output["results"] = {}

        if "all" in request.args.getlist("model"):
            LOG.info("Running all configured Heron traffic performance models")
            models = self.models.keys()
        else:
            models = request.args.getlist("model")

        # Convert the request.args to a dict suitable for passing as **kwargs
        model_kwargs: Dict[str, Any] = \
            utils.convert_wimd_to_dict(request.args)

        # Remove the models list from the kwargs as it is only needed by this
        # method, same with topology_id, cluster and environ values
        model_kwargs.pop("model")
        model_kwargs.pop("cluster")
        model_kwargs.pop("environ")

        output = {}
        for model_name in models:
            LOG.info("Running traffic performance model %s", model_name)

            model: HeronTrafficModel = self.models[model_name]

            try:
                results: Dict[str, Any] = model.predict_traffic(
                    topology_id=topology_id,
                    cluster=request.args.get("cluster"),
                    environ=request.args.get("environ"),
                    **model_kwargs)
            except Exception as err:
                LOG.error("Error running model: %s -> %s", model.name,
                          str(err))
                errors.append({"model": model.name, "type": str(type(err)),
                               "error": str(err)})
            else:
                output[model_name] = results

        if errors:
            return {"errors": errors}, 500

        return output, 200
Example #4
0
    def predict_current_performance(self, topology_id: str, cluster: str,
                                    environ: str,
                                    spout_traffic: Dict[int, Dict[str, float]],
                                    **kwargs: Any) -> pd.DataFrame:
        """
        Arguments:
            topology_id (str): The topology identification string
            spout_traffic (dict):   The expected output of the spout instances.
                                    These emit values should be in tuples per
                                    second (tps) otherwise they will not match
                                    with the service time measurements.
        """
        # TODO: check spout traffic keys are integers!
        start, end = get_start_end_times(**kwargs)

        metric_bucket_length: int = cast(int,
                                         self.config["metric.bucket.length"])

        LOG.info(
            "Predicting traffic levels and backpressure of currently running "
            "topology %s using queueing theory model", topology_id)

        # Remove the start and end time kwargs so we don't supply them twice to
        # the metrics client.
        # TODO: We need to make this cleaner? Add start and end to topo model?
        other_kwargs: Dict[str, Any] = {
            key: value
            for key, value in kwargs.items() if key not in ["start", "end"]
        }

        # Get the service time for all elements
        service_times: pd.DataFrame = self.metrics_client.get_service_times(
            topology_id, cluster, environ, start, end, **other_kwargs)
        if service_times.empty:
            raise Exception(
                "Metric client returned empty data frame for service times.")

        # Calculate the service rate for each instance
        service_times["tuples_per_sec"] = 1.0 / (service_times["latency_ms"] /
                                                 1000.0)

        # Drop the system streams
        service_times = (
            service_times[~service_times["stream"].str.contains("__")])

        # Calculate the median service time and rate
        service_time_summary: pd.DataFrame = \
            (service_times[["task", "stream", "latency_ms", "tuples_per_sec"]]
             .groupby(["task", "stream"]).median().reset_index())

        # Get the reference of the latest physical graph entry for this
        # topology, or create a physical graph if there are non.
        topology_ref: str = graph_check(self.graph_client, self.config,
                                        self.tracker_url, cluster, environ,
                                        topology_id)

        # Predict the arrival rate at all instances with the supplied spout
        # traffic
        in_ars, strmgr_ars = self.predict_arrival_rates(
            topology_id, cluster, environ, spout_traffic, start, end,
            metric_bucket_length, topology_ref)

        combined: pd.DataFrame = service_time_summary.merge(
            in_ars, on=["task", "stream"])

        combined["capacity"] = (combined["arrival_rate"] /
                                combined["tuples_per_sec"]) * 100.0

        combined["back_pressure"] = combined["capacity"] > 100.0

        return combined
Example #5
0
    def post(self, topology_id: str) -> Tuple[Dict[str, Any], int]:
        """ Method handling POST requests to the current topology performance
        modelling endpoint."""

        # Make sure we have the args we need
        errors: List[Dict[str, str]] = []
        if "cluster" not in request.args:
            errors.append({
                "type": "MissingParameter",
                "error": "'cluster' parameter should be supplied"
            })

        if "environ" not in request.args:
            errors.append({
                "type": "MissingParameter",
                "error": "'environ' parameter should be supplied"
            })

        if "model" not in request.args:
            errors.append({
                "type":
                "MissingParameter",
                "error": ("At least one 'model' parameter should "
                          "be supplied. Supply 'all' to run all "
                          "configured models")
            })

        # Return useful errors to the client if any parameters are missing
        if errors:
            return {"errors": errors}, 400

        LOG.info(
            "Processing performance modelling request for topology: %s, "
            "cluster: %s, environment: %s, using model: %s", topology_id,
            request.args.get("cluster"), request.args.get("environ"),
            str(request.args.getlist("model")))

        # Make sure we have a current graph representing the physical plan for
        # the topology
        try:
            graph_check(self.graph_client, self.model_config, self.tracker_url,
                        request.args["cluster"], request.args["environ"],
                        topology_id)
        except Exception as err:
            LOG.error("Error running graph check for topology: %s -> %s",
                      topology_id, str(err))
            errors.append({
                "topology": topology_id,
                "type": str(type(err)),
                "error": str(err)
            })
            return {"errors": errors}, 400

        # Get the spout traffic state and convert the json string task ID to
        # integers
        json_traffic: Dict[str, Dict[str, float]] = request.get_json()
        traffic: Dict[int, Dict[str, float]] = \
            {int(key): value for key, value in json_traffic.items()}

        if "all" in request.args.getlist("model"):
            LOG.info("Running all configured Heron topology performance "
                     "models")
            models = self.models.keys()
        else:
            models = request.args.getlist("model")

        # Convert the request.args to a dict suitable for passing as **kwargs
        model_kwargs: Dict[str, Any] = \
            utils.convert_wimd_to_dict(request.args)

        # Remove the models list + other keys from the kwargs as it is only
        # needed by this method
        model_kwargs.pop("model")
        model_kwargs.pop("cluster")
        model_kwargs.pop("environ")
        cluster = request.args.get("cluster")
        environ = request.args.get("environ")

        output = {}
        for model_name in models:
            LOG.info("Running topology performance model %s", model_name)

            model = self.models[model_name]

            try:
                results: pd.DataFrame = model.predict_current_performance(
                    topology_id=topology_id,
                    cluster=cluster,
                    environ=environ,
                    spout_traffic=traffic,
                    **model_kwargs)
            except Exception as err:
                LOG.error("Error running model: %s -> %s", model.name,
                          str(err))
                errors.append({
                    "model": model.name,
                    "type": str(type(err)),
                    "error": str(err)
                })
            else:
                output[model_name] = results.to_json()

        if errors:
            return {"errors": errors}, 500

        return output, 200
Example #6
0
    def get(self, topology_id: str, traffic_source: str):
        """ Method handling get requests to the current topology packing plan
            modelling endpoint."""

        # Checking to make sure we have required arguments
        errors: List[Dict[str, str]] = []
        if "cluster" not in request.args:
            errors.append({
                "type": "MissingParameter",
                "error": "'cluster' parameter should be supplied"
            })

        if "environ" not in request.args:
            errors.append({
                "type": "MissingParameter",
                "error": "'environ' parameter should be supplied"
            })

        if "model" not in request.args:
            errors.append({
                "type":
                "MissingParameter",
                "error": ("At least one 'model' parameter should "
                          "be supplied. Supply 'all' to run all "
                          "configured models")
            })

        # Return useful errors to the client if any parameters are missing
        if errors:
            return {"errors": errors}, 400

        LOG.info(
            "Processing performance modelling request for topology: %s, "
            "cluster: %s, environment: %s, using model: %s", topology_id,
            request.args.get("cluster"), request.args.get("environ"),
            str(request.args.getlist("model")))

        cluster = request.args.get("cluster")
        environ = request.args.get("environ")

        # Make sure we have a current graph representing the physical plan for
        # the topology
        graph_check(self.graph_client, self.model_config, self.tracker_url,
                    cluster, environ, topology_id)

        # Make sure we have a file containing all paths for the job
        paths_check(self.graph_client, self.model_config, cluster, environ,
                    topology_id)

        if "all" in request.args.getlist("model"):
            LOG.info("Running all configured Heron topology performance "
                     "models")
            models = self.models.keys()
        else:
            models = request.args.getlist("model")

        # Convert the request.args to a dict suitable for passing as **kwargs
        model_kwargs: Dict[str, Any] = \
            utils.convert_wimd_to_dict(request.args)

        # Remove the models list from the kwargs as it is only needed by this method
        model_kwargs.pop("model")
        model_kwargs.pop("cluster")
        model_kwargs.pop("environ")

        start, end = get_start_end_times(**model_kwargs)

        # traffic source can be one of two values -- current or future. If it is of a future value, we must first
        # create an object that gathers together future traffic information. Otherwise, if it is current, then we
        # simply propose a packing plan based on current information
        if traffic_source == self.CURRENT:
            traffic_provider: CurrentTraffic = CurrentTraffic(
                self.metrics_client, self.graph_client, topology_id, cluster,
                environ, start, end, {}, **model_kwargs)
        elif traffic_source == self.FUTURE:
            # the predicted traffic variable is initialized by the future traffic. It contains functions to convert
            # the predicted traffic into arrival rates
            traffic_provider: PredictedTraffic = PredictedTraffic(
                self.metrics_client, self.graph_client, topology_id, cluster,
                environ, start, end, self.traffic_config, **model_kwargs)

        else:
            errors.append({
                "type":
                "ValueError",
                "error":
                (f"{traffic_source} is an incorrect URI. Please either specify"
                 f" future or current as possible values and provide parameters"
                 f" accordingly.")
            })
            return errors, 400

        model_kwargs["zk.time.offset"] = self.model_config["zk.time.offset"]
        model_kwargs["heron.statemgr.root.path"] = self.model_config[
            "heron.statemgr.root.path"]
        model_kwargs["heron.statemgr.connection.string"] = self.model_config[
            "heron.statemgr.connection.string"]

        for model_name in models:
            LOG.info("Running topology packing plan model %s", model_name)
            model = self.models[model_name]
            results: list = model.predict_packing_plan(
                topology_id=topology_id,
                cluster=cluster,
                environ=environ,
                start=start,
                end=end,
                traffic_provider=traffic_provider,
                **model_kwargs)

        return results
Example #7
0
    def get(self, topology_id: str) -> Tuple[Dict[str, Any], int]:
        """ Method handling requests for the currently running topology's end to
        end latency"""

        # Make sure we have the args we need
        errors: List[Dict[str, str]] = []
        if "cluster" not in request.args:
            errors.append({
                "type": "MissingParameter",
                "error": "'cluster' parameter should be supplied"
            })

        if "environ" not in request.args:
            errors.append({
                "type": "MissingParameter",
                "error": "'environ' parameter should be supplied"
            })

        if "model" not in request.args:
            errors.append({
                "type":
                "MissingParameter",
                "error": ("At least one 'model' parameter should "
                          "be supplied. Supply 'all' to run all "
                          "configured models")
            })

        # Return useful errors to the client if any parameters are missing
        if errors:
            return {"errors": errors}, 400

        LOG.info(
            "Processing performance modelling request for topology: %s, "
            "cluster: %s, environment: %s, using model: %s", topology_id,
            request.args.get("cluster"), request.args.get("environ"),
            str(request.args.getlist("model")))

        cluster = request.args.get("cluster")
        environ = request.args.get("environ")
        # Make sure we have a current graph representing the physical plan for
        # the topology
        graph_check(self.graph_client, self.model_config, self.tracker_url,
                    cluster, environ, topology_id)

        # Make sure we have a file containing all paths for the job
        paths_check(self.graph_client, self.model_config, cluster, environ,
                    topology_id)

        if "all" in request.args.getlist("model"):
            LOG.info("Running all configured Heron topology performance "
                     "models")
            models = self.models.keys()
        else:
            models = request.args.getlist("model")

        # Convert the request.args to a dict suitable for passing as **kwargs
        model_kwargs: Dict[str, Any] = \
            utils.convert_wimd_to_dict(request.args)

        # Remove the models list from the kwargs as it is only needed by this
        # method
        model_kwargs.pop("model")
        model_kwargs.pop("cluster")
        model_kwargs.pop("environ")
        model_kwargs["zk.time.offset"] = self.model_config["zk.time.offset"]
        model_kwargs["heron.statemgr.root.path"] = self.model_config[
            "heron.statemgr.root.path"]
        model_kwargs["heron.statemgr.connection.string"] = self.model_config[
            "heron.statemgr.connection.string"]

        start, end = get_start_end_times(**model_kwargs)
        traffic_provider: CurrentTraffic = CurrentTraffic(
            self.metrics_client, self.graph_client, topology_id, cluster,
            environ, start, end, {}, **model_kwargs)
        output = {}
        for model_name in models:
            LOG.info("Running topology performance model %s", model_name)

            model = self.models[model_name]

            try:
                results: list = model.find_current_instance_waiting_times(
                    topology_id=topology_id,
                    cluster=cluster,
                    environ=environ,
                    traffic_source=traffic_provider,
                    start=start,
                    end=end,
                    **model_kwargs)
            except Exception as err:
                LOG.error("Error running model: %s -> %s", model.name,
                          str(err))
                errors.append({
                    "model": model.name,
                    "type": str(type(err)),
                    "error": str(err)
                })
            else:
                output[model_name] = json.dumps(results)

        if errors:
            return {"errors": errors}, 500

        return output, 200