Exemple #1
0
def summary(
    tracker_url: str, topology_id: str, cluster: str, environ: str
) -> Dict[str, int]:
    """ Gets a summary of the numbers of each stream grouping type in the
    specified topology.

    Arguments:
        tracker_url (str):  The URL for the Heron Tracker API
        topology_id (str):  The topology ID string
        cluster (str):  The name of the cluster the topology is running on
        environ (str):  The environment the topology is running in

    Returns:
        A dictionary mapping from stream grouping name to the count for the
        number of these type of stream groupings in the topology. Also includes
        counts for stream combination, e.g. SHUFFLE->FIELDS : 2 implies that
        there are 2 cases where the source component of a fields grouped stream
        has an incoming shuffle grouped stream.
    """
    lplan: Dict[str, Any] = tracker.get_logical_plan(
        tracker_url, cluster, environ, topology_id
    )

    stream_set: Set[Tuple[str, str, str]] = set()

    for bolt_details in lplan["bolts"].values():
        for input_stream in bolt_details["inputs"]:
            stream_set.add(
                (
                    input_stream["stream_name"],
                    input_stream["component_name"],
                    input_stream["grouping"],
                )
            )

    grouping_counts: DefaultDict[str, int] = defaultdict(int)
    for _, source_component, grouping in stream_set:
        grouping_counts[grouping] += 1

        # Now look at the inputs in to this source component and count the
        # types of input grouping
        if source_component in lplan["bolts"]:
            for in_stream in lplan["bolts"][source_component]["inputs"]:
                in_grouping: str = in_stream["grouping"]
                grouping_counts[in_grouping + "->" + grouping] += 1

    return dict(grouping_counts)
Exemple #2
0
    def _query_setup(
        self,
        topology_id: str,
        cluster: str,
        environ: str,
        start: dt.datetime,
        end: dt.datetime,
    ) -> Tuple[Dict[str, Any], int, int]:
        """ Helper method for setting up each of the query methods with the
        required variables."""

        time_check(start, end, self.time_limit_hrs)

        start_time: int = int(round(start.timestamp()))
        end_time: int = int(round(end.timestamp()))

        logical_plan: Dict[str, Any] = tracker.get_logical_plan(
            self.tracker_url, cluster, environ, topology_id
        )

        return logical_plan, start_time, end_time
Exemple #3
0
def get_spout_emissions(
    metric_client: HeronMetricsClient,
    tracker_url: str,
    topology_id: str,
    cluster: str,
    environ: str,
    start: dt.datetime,
    end: dt.datetime,
) -> pd.DataFrame:

    emit_counts: pd.DataFrame = metric_client.get_emit_counts(
        topology_id, cluster, environ, start, end
    )

    lplan: Dict[str, Any] = tracker.get_logical_plan(
        tracker_url, cluster, environ, topology_id
    )

    spout_emits: pd.DataFrame = emit_counts[
        emit_counts.component.isin(lplan["spouts"].keys())
    ]

    return spout_emits
Exemple #4
0
def _build_graph(
    graph_client: GremlinClient,
    tracker_url: str,
    cluster: str,
    environ: str,
    topology_id: str,
    ref_prefix: str = "heron",
) -> str:

    topology_ref: str = create_graph_ref(cluster, environ, topology_id, ref_prefix)

    logical_plan: Dict[str, Any] = tracker.get_logical_plan(
        tracker_url, cluster, environ, topology_id
    )

    physical_plan: Dict[str, Any] = tracker.get_physical_plan(
        tracker_url, cluster, environ, topology_id
    )

    builder.create_physical_graph(
        graph_client, topology_id, topology_ref, logical_plan, physical_plan
    )

    return topology_ref
Exemple #5
0
def get_spout_state(
    metrics_client: HeronMetricsClient,
    topology_id: str,
    cluster: str,
    environ: str,
    tracker_url: str,
    start: dt.datetime,
    end: dt.datetime,
    metrics_sample_period: float,
    summary_method: str = "median",
    **kwargs: Union[str, int, float],
) -> Dict[int, Dict[str, float]]:
    """ Helper script that will fetch the median or mean spout emission rates
    and format them into the dictionary structure expected by the topology
    performance prediction methods.

    Arguments:
        metrics_client (HeronMetricsClient):    The client for the metrics
                                                database.
        topology_id (str):  The topology identification string.
        cluster (str):  The cluster that that the topology is running on.
        environ (str): The environment that the topology is running in.
        tracker_url (str):  The URL for the Heron Tracker API>
        start (datetime):   The UTC datetime for the start of the metrics
                            gathering period.
        end (datetime): The UTC datetime for the start of the metrics
                        gathering period.
        metrics_sample_period (float):  The period that metrics are sampled
                                        into. eg 60 secs (1 min), 300 secs
                                        (5 mins).
        summary_method (str):   The method to use to summerise the emit counts.
                                Either "mean" to "median". Defaults to median.
        **kwargs:   Any additional keyword arguments required by the metrics
                    client.

    Returns:
        Dict[int, Dict[str, float]]:    A dictionary mapping from task ID to a
        dict that maps from output stream name to an emission rate in tuples
        per second.
    """

    LOG.info(
        "Getting spout emission state dictionary for topology %s over a"
        "period of %d seconds from %s to %s",
        topology_id,
        (end - start).total_seconds(),
        start.isoformat(),
        end.isoformat(),
    )

    lplan: Dict[str, Any] = tracker.get_logical_plan(tracker_url, cluster,
                                                     environ, topology_id)

    emit_counts: pd.DataFrame = metrics_client.get_emit_counts(
        topology_id, cluster, environ, start, end, **kwargs)

    spout_groups: pd.core.groupby.DataFrameGroupBy = (
        emit_counts[emit_counts["component"].isin(lplan["spouts"])].groupby(
            ["task", "stream"]))

    if summary_method == "median":

        spout_emits: pd.Series = spout_groups.emit_count.median()

    elif summary_method == "mean":

        spout_emits = spout_groups.emit_count.mean()

    else:
        msg: str = f"Unknown summary method: {summary_method}"
        LOG.error(msg)
        raise RuntimeError(msg)

    output: DefaultDict[int, Dict[str, float]] = defaultdict(dict)

    for (task_id, stream), emit_count in spout_emits.iteritems():

        output[task_id][stream] = emit_count / metrics_sample_period

    return dict(output)
Exemple #6
0
    def get_spout_complete_latencies(
        self,
        topology_id: str,
        cluster: str,
        environ: str,
        component_name: str,
        start: int,
        end: int,
        logical_plan: Dict[str, Any] = None,
    ) -> pd.DataFrame:
        """ Gets the complete latency, as a timeseries, for every instance of
        the specified component of the specified topology. The start and end
        times define the window over which to gather the metrics. The window
        duration should be less then 3 hours as this is the limit of what the
        Topology master stores.

        Arguments:
            topology_id (str):    The topology identification string.
            cluster (str):  The cluster the topology is running in.
            environ (str):  The environment the topology is running in (eg.
                            prod, devel, test, etc).
            component_name (str):   The name of the spout component whose
                                    metrics are required.
            start (int):    Start time for the time period the query is run
                            against. This should be a UTC POSIX time integer
                            (seconds since epoch).
            end (int):  End time for the time period the query is run against.
                        This should be a UTC POSIX time integer (seconds since
                        epoch).
            logical_plan (dict):    Optional dictionary logical plan returned
                                    by the Heron Tracker API. If not supplied
                                    this method will call the API to get the
                                    logical plan.

        Returns:
            pandas.DataFrame:   A DataFrame containing the complete latency
            measurements as a timeseries. Each row represents a measurement
            (averaged over one minute) with the following columns:

            * timestamp:  The UTC timestamp for the metric,
            * component: The component this metric comes from,
            * task: The instance ID number for the instance that the metric
              comes from,
            * container:  The ID for the container this metric comes from,
            * stream: The name of the incoming stream from which the tuples
              that lead to this metric came from,
            * latency_ms: The average execute latency measurement in
              milliseconds for that metric time period.
        """

        LOG.info(
            "Getting complete latency metrics for component %s of " "topology %s",
            component_name,
            topology_id,
        )

        if not logical_plan:
            LOG.debug("Logical plan not supplied, fetching from Heron Tracker")
            logical_plan = tracker.get_logical_plan(
                self.tracker_url, cluster, environ, topology_id
            )

        outgoing_streams: List[str] = tracker.get_outgoing_streams(
            logical_plan, component_name
        )

        metrics: List[str] = [
            "__complete-latency/" + stream for stream in outgoing_streams
        ]

        results: Dict[str, Any] = tracker.get_metrics_timeline(
            self.tracker_url,
            cluster,
            environ,
            topology_id,
            component_name,
            start,
            end,
            metrics,
        )

        output: pd.DataFrame = None

        for stream_metric, instance_timelines in results["timeline"].items():
            metric_list: List[str] = stream_metric.split("/")
            outgoing_stream: str = metric_list[1]

            instance_tls_df: pd.DataFrame = instance_timelines_to_dataframe(
                instance_timelines,
                outgoing_stream,
                "latency_ms",
                str_nano_to_float_milli,
            )

            if output is None:
                output = instance_tls_df
            else:
                output = output.append(instance_tls_df, ignore_index=True)

        return output
Exemple #7
0
    except FileNotFoundError:
        MSG2: str = f"The config file: {ARGS.config} was not found. Aborting"

        if ARGS.quiet:
            print(MSG2)
        else:
            LOG.error(MSG2)

        sys.exit(1)

    TIMER_START = dt.datetime.now()

    TRACKER_URL: str = cast(str, CONFIG[ConfKeys.HERON_TRACKER_URL.value])

    LPLAN: Dict[str, Any] = tracker.get_logical_plan(
        TRACKER_URL, ARGS.zone, ARGS.environment, ARGS.topology
    )

    PPLAN: Dict[str, Any] = tracker.get_physical_plan(
        TRACKER_URL, ARGS.zone, ARGS.environment, ARGS.topology
    )

    GRAPH_CLIENT: GremlinClient = GremlinClient(CONFIG["graph.client.config"])

    builder.create_physical_graph(
        GRAPH_CLIENT, ARGS.topology, ARGS.reference, LPLAN, PPLAN
    )

    if ARGS.populate and ARGS.duration:

        METRIC_CLIENT_CLASS: Type = loader.get_class(CONFIG["heron.metrics.client"])