def predict_arrival_rates( self, topology_id: str, cluster: str, environ: str, spout_traffic: Dict[int, Dict[str, float]], start: dt.datetime, end: dt.datetime, metric_bucket_length: int, topology_ref: str = None, **kwargs: Any) -> Tuple[pd.DataFrame, pd.DataFrame]: if not topology_ref: # Get the reference of the latest physical graph entry for this # topology, or create a physical graph if there are non. topology_ref = graph_check(self.graph_client, self.config, self.tracker_url, cluster, environ, topology_id) # Predict Arrival Rates for all elements instance_ars: pd.DataFrame strmgr_ars: pd.DataFrame instance_ars, strmgr_ars = \ arrival_rates.calculate( self.graph_client, self.metrics_client, topology_id, cluster, environ, topology_ref, start, end, metric_bucket_length, self.tracker_url, spout_traffic, **kwargs) # Sum the arrivals from each source component of each incoming stream instance_ars.groupby(["task", "incoming_stream"]).sum() in_ars: pd.DataFrame = \ (instance_ars.groupby(["task", "incoming_stream"]).sum() .reset_index().rename(index=str, columns={"incoming_stream": "stream"})) return in_ars, strmgr_ars
cluster = ARGS.cluster environ = ARGS.environ topology = ARGS.topology topology_latencies: pd.DataFrame = pd.DataFrame(columns=[ 'topology', 'av_actual_latency', 'std_actual_latency', 'av_calculated_latency', 'std_predicted_latency' ]) system_metrics: pd.DataFrame = pd.DataFrame(columns=[ 'topology', 'component', 'av_gc', 'std_gc', 'av_cpu_load', 'std_cpu_load' ]) # Make sure we have a current graph representing the physical plan for # the topology graph_check(graph_client, CONFIG["heron.topology.models.config"], CONFIG["heron.tracker.url"], cluster, environ, topology) # Make sure we have a file containing all paths for the job paths_check(graph_client, CONFIG["heron.topology.models.config"], cluster, environ, topology) model_kwargs = dict() model_kwargs["zk.time.offset"] = CONFIG["heron.topology.models.config"][ "zk.time.offset"] model_kwargs["heron.statemgr.root.path"] = CONFIG[ "heron.topology.models.config"]["heron.statemgr.root.path"] model_kwargs["heron.statemgr.connection.string"] = \ CONFIG["heron.topology.models.config"]["heron.statemgr.connection.string"] now = dt.datetime.now() start, end = now - dt.timedelta(minutes=HISTORICAL_METRICS_DURATION), now
def get(self, topology_id) -> Tuple[Dict[str, Any], int]: # Make sure we have the args we need errors: List[Dict[str, str]] = [] if "cluster" not in request.args: errors.append({"type": "MissingParameter", "error": "'cluster' parameter should be supplied"}) if "environ" not in request.args: errors.append({"type": "MissingParameter", "error": "'environ' parameter should be supplied"}) if "model" not in request.args: errors.append({"type": "MissingParameter", "error": ("At least one 'model' parameter should " "be supplied. Supply 'all' to run all " "configured models")}) # Return useful errors to the client if any parameters are missing if errors: return {"errors": errors}, 400 LOG.info("Traffic prediction requested for Heron topology: %s on " "cluster: %s in environment: %s", topology_id, request.args["cluster"], request.args["environ"]) # Make sure we have a current graph representing the physical plan for # the topology try: graph_check(self.graph_client, self.model_config, self.tracker_url, request.args["cluster"], request.args["environ"], topology_id) except Exception as err: LOG.error("Error running graph check for topology: %s -> %s", topology_id, str(err)) errors.append({"topology": topology_id, "type": str(type(err)), "error": str(err)}) return {"errors": errors}, 400 output: Dict[str, Any] = {} output["errors"] = {} output["results"] = {} if "all" in request.args.getlist("model"): LOG.info("Running all configured Heron traffic performance models") models = self.models.keys() else: models = request.args.getlist("model") # Convert the request.args to a dict suitable for passing as **kwargs model_kwargs: Dict[str, Any] = \ utils.convert_wimd_to_dict(request.args) # Remove the models list from the kwargs as it is only needed by this # method, same with topology_id, cluster and environ values model_kwargs.pop("model") model_kwargs.pop("cluster") model_kwargs.pop("environ") output = {} for model_name in models: LOG.info("Running traffic performance model %s", model_name) model: HeronTrafficModel = self.models[model_name] try: results: Dict[str, Any] = model.predict_traffic( topology_id=topology_id, cluster=request.args.get("cluster"), environ=request.args.get("environ"), **model_kwargs) except Exception as err: LOG.error("Error running model: %s -> %s", model.name, str(err)) errors.append({"model": model.name, "type": str(type(err)), "error": str(err)}) else: output[model_name] = results if errors: return {"errors": errors}, 500 return output, 200
def predict_current_performance(self, topology_id: str, cluster: str, environ: str, spout_traffic: Dict[int, Dict[str, float]], **kwargs: Any) -> pd.DataFrame: """ Arguments: topology_id (str): The topology identification string spout_traffic (dict): The expected output of the spout instances. These emit values should be in tuples per second (tps) otherwise they will not match with the service time measurements. """ # TODO: check spout traffic keys are integers! start, end = get_start_end_times(**kwargs) metric_bucket_length: int = cast(int, self.config["metric.bucket.length"]) LOG.info( "Predicting traffic levels and backpressure of currently running " "topology %s using queueing theory model", topology_id) # Remove the start and end time kwargs so we don't supply them twice to # the metrics client. # TODO: We need to make this cleaner? Add start and end to topo model? other_kwargs: Dict[str, Any] = { key: value for key, value in kwargs.items() if key not in ["start", "end"] } # Get the service time for all elements service_times: pd.DataFrame = self.metrics_client.get_service_times( topology_id, cluster, environ, start, end, **other_kwargs) if service_times.empty: raise Exception( "Metric client returned empty data frame for service times.") # Calculate the service rate for each instance service_times["tuples_per_sec"] = 1.0 / (service_times["latency_ms"] / 1000.0) # Drop the system streams service_times = ( service_times[~service_times["stream"].str.contains("__")]) # Calculate the median service time and rate service_time_summary: pd.DataFrame = \ (service_times[["task", "stream", "latency_ms", "tuples_per_sec"]] .groupby(["task", "stream"]).median().reset_index()) # Get the reference of the latest physical graph entry for this # topology, or create a physical graph if there are non. topology_ref: str = graph_check(self.graph_client, self.config, self.tracker_url, cluster, environ, topology_id) # Predict the arrival rate at all instances with the supplied spout # traffic in_ars, strmgr_ars = self.predict_arrival_rates( topology_id, cluster, environ, spout_traffic, start, end, metric_bucket_length, topology_ref) combined: pd.DataFrame = service_time_summary.merge( in_ars, on=["task", "stream"]) combined["capacity"] = (combined["arrival_rate"] / combined["tuples_per_sec"]) * 100.0 combined["back_pressure"] = combined["capacity"] > 100.0 return combined
def post(self, topology_id: str) -> Tuple[Dict[str, Any], int]: """ Method handling POST requests to the current topology performance modelling endpoint.""" # Make sure we have the args we need errors: List[Dict[str, str]] = [] if "cluster" not in request.args: errors.append({ "type": "MissingParameter", "error": "'cluster' parameter should be supplied" }) if "environ" not in request.args: errors.append({ "type": "MissingParameter", "error": "'environ' parameter should be supplied" }) if "model" not in request.args: errors.append({ "type": "MissingParameter", "error": ("At least one 'model' parameter should " "be supplied. Supply 'all' to run all " "configured models") }) # Return useful errors to the client if any parameters are missing if errors: return {"errors": errors}, 400 LOG.info( "Processing performance modelling request for topology: %s, " "cluster: %s, environment: %s, using model: %s", topology_id, request.args.get("cluster"), request.args.get("environ"), str(request.args.getlist("model"))) # Make sure we have a current graph representing the physical plan for # the topology try: graph_check(self.graph_client, self.model_config, self.tracker_url, request.args["cluster"], request.args["environ"], topology_id) except Exception as err: LOG.error("Error running graph check for topology: %s -> %s", topology_id, str(err)) errors.append({ "topology": topology_id, "type": str(type(err)), "error": str(err) }) return {"errors": errors}, 400 # Get the spout traffic state and convert the json string task ID to # integers json_traffic: Dict[str, Dict[str, float]] = request.get_json() traffic: Dict[int, Dict[str, float]] = \ {int(key): value for key, value in json_traffic.items()} if "all" in request.args.getlist("model"): LOG.info("Running all configured Heron topology performance " "models") models = self.models.keys() else: models = request.args.getlist("model") # Convert the request.args to a dict suitable for passing as **kwargs model_kwargs: Dict[str, Any] = \ utils.convert_wimd_to_dict(request.args) # Remove the models list + other keys from the kwargs as it is only # needed by this method model_kwargs.pop("model") model_kwargs.pop("cluster") model_kwargs.pop("environ") cluster = request.args.get("cluster") environ = request.args.get("environ") output = {} for model_name in models: LOG.info("Running topology performance model %s", model_name) model = self.models[model_name] try: results: pd.DataFrame = model.predict_current_performance( topology_id=topology_id, cluster=cluster, environ=environ, spout_traffic=traffic, **model_kwargs) except Exception as err: LOG.error("Error running model: %s -> %s", model.name, str(err)) errors.append({ "model": model.name, "type": str(type(err)), "error": str(err) }) else: output[model_name] = results.to_json() if errors: return {"errors": errors}, 500 return output, 200
def get(self, topology_id: str, traffic_source: str): """ Method handling get requests to the current topology packing plan modelling endpoint.""" # Checking to make sure we have required arguments errors: List[Dict[str, str]] = [] if "cluster" not in request.args: errors.append({ "type": "MissingParameter", "error": "'cluster' parameter should be supplied" }) if "environ" not in request.args: errors.append({ "type": "MissingParameter", "error": "'environ' parameter should be supplied" }) if "model" not in request.args: errors.append({ "type": "MissingParameter", "error": ("At least one 'model' parameter should " "be supplied. Supply 'all' to run all " "configured models") }) # Return useful errors to the client if any parameters are missing if errors: return {"errors": errors}, 400 LOG.info( "Processing performance modelling request for topology: %s, " "cluster: %s, environment: %s, using model: %s", topology_id, request.args.get("cluster"), request.args.get("environ"), str(request.args.getlist("model"))) cluster = request.args.get("cluster") environ = request.args.get("environ") # Make sure we have a current graph representing the physical plan for # the topology graph_check(self.graph_client, self.model_config, self.tracker_url, cluster, environ, topology_id) # Make sure we have a file containing all paths for the job paths_check(self.graph_client, self.model_config, cluster, environ, topology_id) if "all" in request.args.getlist("model"): LOG.info("Running all configured Heron topology performance " "models") models = self.models.keys() else: models = request.args.getlist("model") # Convert the request.args to a dict suitable for passing as **kwargs model_kwargs: Dict[str, Any] = \ utils.convert_wimd_to_dict(request.args) # Remove the models list from the kwargs as it is only needed by this method model_kwargs.pop("model") model_kwargs.pop("cluster") model_kwargs.pop("environ") start, end = get_start_end_times(**model_kwargs) # traffic source can be one of two values -- current or future. If it is of a future value, we must first # create an object that gathers together future traffic information. Otherwise, if it is current, then we # simply propose a packing plan based on current information if traffic_source == self.CURRENT: traffic_provider: CurrentTraffic = CurrentTraffic( self.metrics_client, self.graph_client, topology_id, cluster, environ, start, end, {}, **model_kwargs) elif traffic_source == self.FUTURE: # the predicted traffic variable is initialized by the future traffic. It contains functions to convert # the predicted traffic into arrival rates traffic_provider: PredictedTraffic = PredictedTraffic( self.metrics_client, self.graph_client, topology_id, cluster, environ, start, end, self.traffic_config, **model_kwargs) else: errors.append({ "type": "ValueError", "error": (f"{traffic_source} is an incorrect URI. Please either specify" f" future or current as possible values and provide parameters" f" accordingly.") }) return errors, 400 model_kwargs["zk.time.offset"] = self.model_config["zk.time.offset"] model_kwargs["heron.statemgr.root.path"] = self.model_config[ "heron.statemgr.root.path"] model_kwargs["heron.statemgr.connection.string"] = self.model_config[ "heron.statemgr.connection.string"] for model_name in models: LOG.info("Running topology packing plan model %s", model_name) model = self.models[model_name] results: list = model.predict_packing_plan( topology_id=topology_id, cluster=cluster, environ=environ, start=start, end=end, traffic_provider=traffic_provider, **model_kwargs) return results
def get(self, topology_id: str) -> Tuple[Dict[str, Any], int]: """ Method handling requests for the currently running topology's end to end latency""" # Make sure we have the args we need errors: List[Dict[str, str]] = [] if "cluster" not in request.args: errors.append({ "type": "MissingParameter", "error": "'cluster' parameter should be supplied" }) if "environ" not in request.args: errors.append({ "type": "MissingParameter", "error": "'environ' parameter should be supplied" }) if "model" not in request.args: errors.append({ "type": "MissingParameter", "error": ("At least one 'model' parameter should " "be supplied. Supply 'all' to run all " "configured models") }) # Return useful errors to the client if any parameters are missing if errors: return {"errors": errors}, 400 LOG.info( "Processing performance modelling request for topology: %s, " "cluster: %s, environment: %s, using model: %s", topology_id, request.args.get("cluster"), request.args.get("environ"), str(request.args.getlist("model"))) cluster = request.args.get("cluster") environ = request.args.get("environ") # Make sure we have a current graph representing the physical plan for # the topology graph_check(self.graph_client, self.model_config, self.tracker_url, cluster, environ, topology_id) # Make sure we have a file containing all paths for the job paths_check(self.graph_client, self.model_config, cluster, environ, topology_id) if "all" in request.args.getlist("model"): LOG.info("Running all configured Heron topology performance " "models") models = self.models.keys() else: models = request.args.getlist("model") # Convert the request.args to a dict suitable for passing as **kwargs model_kwargs: Dict[str, Any] = \ utils.convert_wimd_to_dict(request.args) # Remove the models list from the kwargs as it is only needed by this # method model_kwargs.pop("model") model_kwargs.pop("cluster") model_kwargs.pop("environ") model_kwargs["zk.time.offset"] = self.model_config["zk.time.offset"] model_kwargs["heron.statemgr.root.path"] = self.model_config[ "heron.statemgr.root.path"] model_kwargs["heron.statemgr.connection.string"] = self.model_config[ "heron.statemgr.connection.string"] start, end = get_start_end_times(**model_kwargs) traffic_provider: CurrentTraffic = CurrentTraffic( self.metrics_client, self.graph_client, topology_id, cluster, environ, start, end, {}, **model_kwargs) output = {} for model_name in models: LOG.info("Running topology performance model %s", model_name) model = self.models[model_name] try: results: list = model.find_current_instance_waiting_times( topology_id=topology_id, cluster=cluster, environ=environ, traffic_source=traffic_provider, start=start, end=end, **model_kwargs) except Exception as err: LOG.error("Error running model: %s -> %s", model.name, str(err)) errors.append({ "model": model.name, "type": str(type(err)), "error": str(err) }) else: output[model_name] = json.dumps(results) if errors: return {"errors": errors}, 500 return output, 200