Example #1
0
async def get_component_metrics(
    tmanager,
    component: str,
    metric_names: List[str],
    instances: List[str],
    interval: int,
) -> ComponentMetrics:
    """
  Return metrics from the Tmanager over the given interval.

  The metrics property is keyed with (metric, instance) to metric values.

  Metrics not included in `metric_names` will be truncated.

  """
    if not (tmanager and tmanager.host and tmanager.stats_port):
        raise Exception("No Tmanager found")

    metric_request = tmanager_pb2.MetricRequest()
    metric_request.component_name = component
    if instances:
        metric_request.instance_id.extend(instances)
    metric_request.metric.extend(metric_names)
    metric_request.interval = interval
    url = f"http://{tmanager.host}:{tmanager.stats_port}/stats"
    async with httpx.AsyncClient() as client:
        response = await client.post(url,
                                     data=metric_request.SerializeToString())
    metric_response = tmanager_pb2.MetricResponse()
    metric_response.ParseFromString(response.content)

    if metric_response.status.status == common_pb2.NOTOK:
        if metric_response.status.HasField("message"):
            Log.warn("Received response from Tmanager: %s",
                     metric_response.status.message)

    metrics = {}
    for metric in metric_response.metric:
        instance = metric.instance_id
        for instance_metric in metric.metric:
            metrics.setdefault(instance_metric.name,
                               {})[instance] = instance_metric.value

    return ComponentMetrics(
        interval=metric_response.interval,
        component=component,
        metrics=metrics,
    )
Example #2
0
    def getComponentMetrics(self,
                            tmanager,
                            componentName,
                            metric_names,
                            instances,
                            interval,
                            callback=None):
        """
    Get the specified metrics for the given component name of this topology.
    Returns the following dict on success:
    {
      "metrics": {
        <metricname>: {
          <instance>: <numeric value>,
          <instance>: <numeric value>,
          ...
        }, ...
      },
      "interval": <numeric value>,
      "component": "..."
    }

    Raises exception on failure.
    """
        if not tmanager or not tmanager.host or not tmanager.stats_port:
            raise Exception("No Tmanager found")

        host = tmanager.host
        port = tmanager.stats_port

        metricRequest = tmanager_pb2.MetricRequest()
        metricRequest.component_name = componentName
        if len(instances) > 0:
            for instance in instances:
                metricRequest.instance_id.append(instance)
        for metric_name in metric_names:
            metricRequest.metric.append(metric_name)
        metricRequest.interval = interval

        # Serialize the metricRequest to send as a payload
        # with the HTTP request.
        metricRequestString = metricRequest.SerializeToString()

        url = "http://{0}:{1}/stats".format(host, port)
        request = tornado.httpclient.HTTPRequest(url,
                                                 body=metricRequestString,
                                                 method='POST',
                                                 request_timeout=5)

        Log.debug("Making HTTP call to fetch metrics")
        Log.debug("url: " + url)
        try:
            client = tornado.httpclient.AsyncHTTPClient()
            result = yield client.fetch(request)
            Log.debug("HTTP call complete.")
        except tornado.httpclient.HTTPError as e:
            raise Exception(str(e))

        # Check the response code - error if it is in 400s or 500s
        responseCode = result.code
        if responseCode >= 400:
            message = "Error in getting metrics from Tmanager, code: " + responseCode
            Log.error(message)
            raise Exception(message)

        # Parse the response from tmanager.
        metricResponse = tmanager_pb2.MetricResponse()
        metricResponse.ParseFromString(result.body)

        if metricResponse.status.status == common_pb2.NOTOK:
            if metricResponse.status.HasField("message"):
                Log.warn("Received response from Tmanager: %s",
                         metricResponse.status.message)

        # Form the response.
        ret = {}
        ret["interval"] = metricResponse.interval
        ret["component"] = componentName
        ret["metrics"] = {}
        for metric in metricResponse.metric:
            instance = metric.instance_id
            for im in metric.metric:
                metricname = im.name
                value = im.value
                if metricname not in ret["metrics"]:
                    ret["metrics"][metricname] = {}
                ret["metrics"][metricname][instance] = value

        raise tornado.gen.Return(ret)
Example #3
0
async def get_metrics_timeline(
    tmanager: tmanager_pb2.TManagerLocation,
    component_name: str,
    metric_names: List[str],
    instances: List[str],
    start_time: int,
    end_time: int,
    callback=None,
) -> MetricsTimeline:
    """
  Get the specified metrics for the given component name of this topology.

  """

    # Tmanager is the proto object and must have host and port for stats.
    if not tmanager or not tmanager.host or not tmanager.stats_port:
        raise Exception("No Tmanager found")

    # Create the proto request object to get metrics.
    request_parameters = tmanager_pb2.MetricRequest()
    request_parameters.component_name = component_name

    # If no instances are given, metrics for all instances
    # are fetched by default.
    request_parameters.instance_id.extend(instances)
    request_parameters.metric.extend(metric_names)

    request_parameters.explicit_interval.start = start_time
    request_parameters.explicit_interval.end = end_time
    request_parameters.minutely = True

    # Form and send the http request.
    url = f"http://{tmanager.host}:{tmanager.stats_port}/stats"
    Log.debug(f"Making HTTP call to fetch metrics: {url}")
    async with httpx.AsyncClient() as client:
        result = await client.post(url,
                                   data=request_parameters.SerializeToString())

    # Check the response code - error if it is in 400s or 500s
    if result.status_code >= 400:
        message = f"Error in getting metrics from Tmanager, code: {result.code}"
        raise Exception(message)

    # Parse the response from tmanager.
    response_data = tmanager_pb2.MetricResponse()
    response_data.ParseFromString(result.content)

    if response_data.status.status == common_pb2.NOTOK:
        if response_data.status.HasField("message"):
            Log.warn("Received response from Tmanager: %s",
                     response_data.status.message)

    timeline = {}
    # Loop through all the metrics
    # One instance corresponds to one metric, which can have
    # multiple IndividualMetrics for each metricname requested.
    for metric in response_data.metric:
        instance = metric.instance_id

        # Loop through all individual metrics.
        for im in metric.metric:
            metricname = im.name
            if metricname not in timeline:
                timeline[metricname] = {}
            if instance not in timeline[metricname]:
                timeline[metricname][instance] = {}

            # We get minutely metrics.
            # Interval-values correspond to the minutely mark for which
            # this metric value corresponds to.
            for interval_value in im.interval_values:
                timeline[metricname][instance][
                    interval_value.interval.start] = interval_value.value

    return MetricsTimeline(
        starttime=start_time,
        endtime=end_time,
        component=component_name,
        timeline=timeline,
    )
Example #4
0
def get_metrics_timeline(
    tmanager: tmanager_pb2.TManagerLocation,
    component_name: str,
    metric_names: List[str],
    instances: List[str],
    start_time: int,
    end_time: int,
    callback=None,
) -> dict:
    """
  Get the specified metrics for the given component name of this topology.
  Returns the following dict on success:
  {
    "timeline": {
      <metricname>: {
        <instance>: {
          <start_time> : <numeric value>,
          <start_time> : <numeric value>,
          ...
        }
        ...
      }, ...
    },
    "starttime": <numeric value>,
    "endtime": <numeric value>,
    "component": "..."
  }

  Returns the following dict on failure:
  {
    "message": "..."
  }
  """
    # Tmanager is the proto object and must have host and port for stats.
    if not tmanager or not tmanager.host or not tmanager.stats_port:
        raise Exception("No Tmanager found")

    host = tmanager.host
    port = tmanager.stats_port

    # Create the proto request object to get metrics.

    request_parameters = tmanager_pb2.MetricRequest()
    request_parameters.component_name = component_name

    # If no instances are given, metrics for all instances
    # are fetched by default.
    request_parameters.instance_id.extend(instances)
    request_parameters.metric.extend(metric_names)

    request_parameters.explicit_interval.start = start_time
    request_parameters.explicit_interval.end = end_time
    request_parameters.minutely = True

    # Form and send the http request.
    url = f"http://{host}:{port}/stats"
    request = tornado.httpclient.HTTPRequest(
        url,
        body=request_parameters.SerializeToString(),
        method='POST',
        request_timeout=5)

    Log.debug("Making HTTP call to fetch metrics")
    Log.debug("url: " + url)
    try:
        client = tornado.httpclient.AsyncHTTPClient()
        result = yield client.fetch(request)
        Log.debug("HTTP call complete.")
    except tornado.httpclient.HTTPError as e:
        raise Exception(str(e))

    # Check the response code - error if it is in 400s or 500s
    if result.code >= 400:
        message = f"Error in getting metrics from Tmanager, code: {result.code}"
        raise Exception(message)

    # Parse the response from tmanager.
    response_data = tmanager_pb2.MetricResponse()
    response_data.ParseFromString(result.body)

    if response_data.status.status == common_pb2.NOTOK:
        if response_data.status.HasField("message"):
            Log.warn("Received response from Tmanager: %s",
                     response_data.status.message)

    # Form the response.
    ret = {}
    ret["starttime"] = start_time
    ret["endtime"] = end_time
    ret["component"] = component_name
    ret["timeline"] = {}

    # Loop through all the metrics
    # One instance corresponds to one metric, which can have
    # multiple IndividualMetrics for each metricname requested.
    for metric in response_data.metric:
        instance = metric.instance_id

        # Loop through all individual metrics.
        for im in metric.metric:
            metricname = im.name
            if metricname not in ret["timeline"]:
                ret["timeline"][metricname] = {}
            if instance not in ret["timeline"][metricname]:
                ret["timeline"][metricname][instance] = {}

            # We get minutely metrics.
            # Interval-values correspond to the minutely mark for which
            # this metric value corresponds to.
            for interval_value in im.interval_values:
                ret["timeline"][metricname][instance][
                    interval_value.interval.start] = interval_value.value

    raise tornado.gen.Return(ret)