Example #1
0
async def get_component_metrics(
    tmanager,
    component: str,
    metric_names: List[str],
    instances: List[str],
    interval: int,
) -> ComponentMetrics:
    """
  Return metrics from the Tmanager over the given interval.

  The metrics property is keyed with (metric, instance) to metric values.

  Metrics not included in `metric_names` will be truncated.

  """
    if not (tmanager and tmanager.host and tmanager.stats_port):
        raise Exception("No Tmanager found")

    metric_request = tmanager_pb2.MetricRequest()
    metric_request.component_name = component
    if instances:
        metric_request.instance_id.extend(instances)
    metric_request.metric.extend(metric_names)
    metric_request.interval = interval
    url = f"http://{tmanager.host}:{tmanager.stats_port}/stats"
    async with httpx.AsyncClient() as client:
        response = await client.post(url,
                                     data=metric_request.SerializeToString())
    metric_response = tmanager_pb2.MetricResponse()
    metric_response.ParseFromString(response.content)

    if metric_response.status.status == common_pb2.NOTOK:
        if metric_response.status.HasField("message"):
            Log.warn("Received response from Tmanager: %s",
                     metric_response.status.message)

    metrics = {}
    for metric in metric_response.metric:
        instance = metric.instance_id
        for instance_metric in metric.metric:
            metrics.setdefault(instance_metric.name,
                               {})[instance] = instance_metric.value

    return ComponentMetrics(
        interval=metric_response.interval,
        component=component,
        metrics=metrics,
    )
Example #2
0
    def initializeFromRC(cls, rcfile):
        if len(cls.cmdmap) > 0:
            return
        effective_rc = (rcfile, HERON_RC)[rcfile is None]
        Log.debug('Effective RC file is %s', effective_rc)
        if os.path.exists(effective_rc):
            with open(effective_rc) as f:
                cls.cmdmap['*']['*'] = collections.defaultdict(dict)
                cls.cmdmap['*']['*']['*'] = ''
                for line in f:
                    m = heron_command_pattern.match(line)
                    app, value, command, env = '', '', '', ''
                    if m is not None:
                        value = cls.remove_comments(
                            m.group(4).rstrip(os.linesep))
                        app = (m.group(1), '')[m.group(1) is None
                                               or m.group(1) == '']
                        command = (m.group(2), '')[m.group(2) is None
                                                   or m.group(1) == '']
                        env = (m.group(3), '')[m.group(3) is None
                                               or m.group(2) == '']
                    else:
                        continue
                    # make sure that all the single args have a boolean value
                    # associated so that we can load the args to a key value
                    # structure
                    args_list = config.insert_bool_values(value.split())
                    args_list_string = ' '.join(args_list)
                    if not command or not app or not env:
                        Log.warn(
                            "heronrc config entry %s does not have key parameters (command:app:env) ",
                            line)
                        continue
                    if app not in cls.cmdmap:
                        cls.cmdmap[app] = collections.defaultdict(dict)

                    if command in cls.cmdmap[app] and env in cls.cmdmap[app][
                            command]:
                        cls.cmdmap[app][command][env] = cls.cmdmap[app][
                            command][env] + ' ' + args_list_string
                    else:
                        cls.cmdmap[app][command][env] = args_list_string
            Log.debug("RC cmdmap %s", json.dumps(cls.cmdmap))
        else:
            Log.debug("%s is not an existing file", effective_rc)
Example #3
0
  def initializeFromRC(cls, rcfile):
    if len(cls.cmdmap) > 0:
      return
    effective_rc = (rcfile, HERON_RC)[rcfile is None]
    Log.debug('Effective RC file is %s', effective_rc)
    if os.path.exists(effective_rc):
      with open(effective_rc) as f:
        cls.cmdmap['*']['*'] = collections.defaultdict(dict)
        cls.cmdmap['*']['*']['*'] = ''
        for line in f:
          m = heron_command_pattern.match(line)
          app, value, command, env = '', '', '', ''
          if m is not None:
            value = cls.remove_comments(m.group(4).rstrip(os.linesep))
            app = (m.group(1), '')[m.group(1) is None or m.group(1) == '']
            command = (m.group(2), '')[m.group(2) is None or m.group(1) == '']
            env = (m.group(3), '')[m.group(3) is None or m.group(2) == '']
          else:
            continue
          # make sure that all the single args have a boolean value
          # associated so that we can load the args to a key value
          # structure
          args_list = config.insert_bool_values(value.split())
          args_list_string = ' '.join(args_list)
          if not  command or not app or not env:
            Log.warn("heronrc config entry %s does not have key parameters (command:app:env) ",
                     line)
            continue
          if app not in cls.cmdmap:
            cls.cmdmap[app] = collections.defaultdict(dict)

          if command in cls.cmdmap[app] and env in cls.cmdmap[app][command]:
            cls.cmdmap[app][command][env] = cls.cmdmap[app][command][env] + ' ' + args_list_string
          else:
            cls.cmdmap[app][command][env] = args_list_string
      Log.debug("RC cmdmap %s", json.dumps(cls.cmdmap))
    else:
      Log.debug("%s is not an existing file", effective_rc)
Example #4
0
  def getComponentMetrics(self,
                          tmaster,
                          componentName,
                          metricNames,
                          instances,
                          interval,
                          callback=None):
    """
    Get the specified metrics for the given component name of this topology.
    Returns the following dict on success:
    {
      "metrics": {
        <metricname>: {
          <instance>: <numeric value>,
          <instance>: <numeric value>,
          ...
        }, ...
      },
      "interval": <numeric value>,
      "component": "..."
    }

    Raises exception on failure.
    """
    if not tmaster or not tmaster.host or not tmaster.stats_port:
      raise Exception("No Tmaster found")

    host = tmaster.host
    port = tmaster.stats_port

    metricRequest = tmaster_pb2.MetricRequest()
    metricRequest.component_name = componentName
    if len(instances) > 0:
      for instance in instances:
        metricRequest.instance_id.append(instance)
    for metricName in metricNames:
      metricRequest.metric.append(metricName)
    metricRequest.interval = interval

    # Serialize the metricRequest to send as a payload
    # with the HTTP request.
    metricRequestString = metricRequest.SerializeToString()

    url = "http://{0}:{1}/stats".format(host, port)
    request = tornado.httpclient.HTTPRequest(url,
                                             body=metricRequestString,
                                             method='POST',
                                             request_timeout=5)

    Log.debug("Making HTTP call to fetch metrics")
    Log.debug("url: " + url)
    try:
      client = tornado.httpclient.AsyncHTTPClient()
      result = yield client.fetch(request)
      Log.debug("HTTP call complete.")
    except tornado.httpclient.HTTPError as e:
      raise Exception(str(e))

    # Check the response code - error if it is in 400s or 500s
    responseCode = result.code
    if responseCode >= 400:
      message = "Error in getting metrics from Tmaster, code: " + responseCode
      Log.error(message)
      raise Exception(message)

    # Parse the response from tmaster.
    metricResponse = tmaster_pb2.MetricResponse()
    metricResponse.ParseFromString(result.body)

    if metricResponse.status.status == common_pb2.NOTOK:
      if metricResponse.status.HasField("message"):
        Log.warn("Received response from Tmaster: %s", metricResponse.status.message)

    # Form the response.
    ret = {}
    ret["interval"] = metricResponse.interval
    ret["component"] = componentName
    ret["metrics"] = {}
    for metric in metricResponse.metric:
      instance = metric.instance_id
      for im in metric.metric:
        metricname = im.name
        value = im.value
        if metricname not in ret["metrics"]:
          ret["metrics"][metricname] = {}
        ret["metrics"][metricname][instance] = value

    raise tornado.gen.Return(ret)
Example #5
0
def getMetricsTimeline(tmaster,
                       component_name,
                       metric_names,
                       instances,
                       start_time,
                       end_time,
                       callback=None):
  """
  Get the specified metrics for the given component name of this topology.
  Returns the following dict on success:
  {
    "timeline": {
      <metricname>: {
        <instance>: {
          <start_time> : <numeric value>,
          <start_time> : <numeric value>,
          ...
        }
        ...
      }, ...
    },
    "starttime": <numeric value>,
    "endtime": <numeric value>,
    "component": "..."
  }

  Returns the following dict on failure:
  {
    "message": "..."
  }
  """
  # Tmaster is the proto object and must have host and port for stats.
  if not tmaster or not tmaster.host or not tmaster.stats_port:
    raise Exception("No Tmaster found")

  host = tmaster.host
  port = tmaster.stats_port

  # Create the proto request object to get metrics.

  metricRequest = tmaster_pb2.MetricRequest()
  metricRequest.component_name = component_name

  # If no instances are give, metrics for all instances
  # are fetched by default.
  if len(instances) > 0:
    for instance in instances:
      metricRequest.instance_id.append(instance)

  for metricName in metric_names:
    metricRequest.metric.append(metricName)

  metricRequest.explicit_interval.start = start_time
  metricRequest.explicit_interval.end = end_time
  metricRequest.minutely = True

  # Serialize the metricRequest to send as a payload
  # with the HTTP request.
  metricRequestString = metricRequest.SerializeToString()

  # Form and send the http request.
  url = "http://{0}:{1}/stats".format(host, port)
  request = tornado.httpclient.HTTPRequest(url,
                                           body=metricRequestString,
                                           method='POST',
                                           request_timeout=5)

  Log.debug("Making HTTP call to fetch metrics")
  Log.debug("url: " + url)
  try:
    client = tornado.httpclient.AsyncHTTPClient()
    result = yield client.fetch(request)
    Log.debug("HTTP call complete.")
  except tornado.httpclient.HTTPError as e:
    raise Exception(str(e))


  # Check the response code - error if it is in 400s or 500s
  responseCode = result.code
  if responseCode >= 400:
    message = "Error in getting metrics from Tmaster, code: " + responseCode
    Log.error(message)
    raise Exception(message)

  # Parse the response from tmaster.
  metricResponse = tmaster_pb2.MetricResponse()
  metricResponse.ParseFromString(result.body)

  if metricResponse.status.status == common_pb2.NOTOK:
    if metricResponse.status.HasField("message"):
      Log.warn("Received response from Tmaster: %s", metricResponse.status.message)

  # Form the response.
  ret = {}
  ret["starttime"] = start_time
  ret["endtime"] = end_time
  ret["component"] = component_name
  ret["timeline"] = {}

  # Loop through all the metrics
  # One instance corresponds to one metric, which can have
  # multiple IndividualMetrics for each metricname requested.
  for metric in metricResponse.metric:
    instance = metric.instance_id

    # Loop through all individual metrics.
    for im in metric.metric:
      metricname = im.name
      if metricname not in ret["timeline"]:
        ret["timeline"][metricname] = {}
      if instance not in ret["timeline"][metricname]:
        ret["timeline"][metricname][instance] = {}

      # We get minutely metrics.
      # Interval-values correspond to the minutely mark for which
      # this metric value corresponds to.
      for interval_value in im.interval_values:
        ret["timeline"][metricname][instance][interval_value.interval.start] = interval_value.value

  raise tornado.gen.Return(ret)
Example #6
0
    def getComponentMetrics(self,
                            tmaster,
                            componentName,
                            metricNames,
                            instances,
                            interval,
                            callback=None):
        """
    Get the specified metrics for the given component name of this topology.
    Returns the following dict on success:
    {
      "metrics": {
        <metricname>: {
          <instance>: <numeric value>,
          <instance>: <numeric value>,
          ...
        }, ...
      },
      "interval": <numeric value>,
      "component": "..."
    }

    Raises exception on failure.
    """
        if not tmaster or not tmaster.host or not tmaster.stats_port:
            raise Exception("No Tmaster found")

        host = tmaster.host
        port = tmaster.stats_port

        metricRequest = tmaster_pb2.MetricRequest()
        metricRequest.component_name = componentName
        if len(instances) > 0:
            for instance in instances:
                metricRequest.instance_id.append(instance)
        for metricName in metricNames:
            metricRequest.metric.append(metricName)
        metricRequest.interval = interval

        # Serialize the metricRequest to send as a payload
        # with the HTTP request.
        metricRequestString = metricRequest.SerializeToString()

        url = "http://{0}:{1}/stats".format(host, port)
        request = tornado.httpclient.HTTPRequest(url,
                                                 body=metricRequestString,
                                                 method='POST',
                                                 request_timeout=5)

        Log.debug("Making HTTP call to fetch metrics")
        Log.debug("url: " + url)
        try:
            client = tornado.httpclient.AsyncHTTPClient()
            result = yield client.fetch(request)
            Log.debug("HTTP call complete.")
        except tornado.httpclient.HTTPError as e:
            raise Exception(str(e))

        # Check the response code - error if it is in 400s or 500s
        responseCode = result.code
        if responseCode >= 400:
            message = "Error in getting metrics from Tmaster, code: " + responseCode
            Log.error(message)
            raise Exception(message)

        # Parse the response from tmaster.
        metricResponse = tmaster_pb2.MetricResponse()
        metricResponse.ParseFromString(result.body)

        if metricResponse.status.status == common_pb2.NOTOK:
            if metricResponse.status.HasField("message"):
                Log.warn("Received response from Tmaster: %s",
                         metricResponse.status.message)

        # Form the response.
        ret = {}
        ret["interval"] = metricResponse.interval
        ret["component"] = componentName
        ret["metrics"] = {}
        for metric in metricResponse.metric:
            instance = metric.instance_id
            for im in metric.metric:
                metricname = im.name
                value = im.value
                if metricname not in ret["metrics"]:
                    ret["metrics"][metricname] = {}
                ret["metrics"][metricname][instance] = value

        raise tornado.gen.Return(ret)
Example #7
0
async def get_metrics_timeline(
    tmanager: tmanager_pb2.TManagerLocation,
    component_name: str,
    metric_names: List[str],
    instances: List[str],
    start_time: int,
    end_time: int,
    callback=None,
) -> MetricsTimeline:
    """
  Get the specified metrics for the given component name of this topology.

  """

    # Tmanager is the proto object and must have host and port for stats.
    if not tmanager or not tmanager.host or not tmanager.stats_port:
        raise Exception("No Tmanager found")

    # Create the proto request object to get metrics.
    request_parameters = tmanager_pb2.MetricRequest()
    request_parameters.component_name = component_name

    # If no instances are given, metrics for all instances
    # are fetched by default.
    request_parameters.instance_id.extend(instances)
    request_parameters.metric.extend(metric_names)

    request_parameters.explicit_interval.start = start_time
    request_parameters.explicit_interval.end = end_time
    request_parameters.minutely = True

    # Form and send the http request.
    url = f"http://{tmanager.host}:{tmanager.stats_port}/stats"
    Log.debug(f"Making HTTP call to fetch metrics: {url}")
    async with httpx.AsyncClient() as client:
        result = await client.post(url,
                                   data=request_parameters.SerializeToString())

    # Check the response code - error if it is in 400s or 500s
    if result.status_code >= 400:
        message = f"Error in getting metrics from Tmanager, code: {result.code}"
        raise Exception(message)

    # Parse the response from tmanager.
    response_data = tmanager_pb2.MetricResponse()
    response_data.ParseFromString(result.content)

    if response_data.status.status == common_pb2.NOTOK:
        if response_data.status.HasField("message"):
            Log.warn("Received response from Tmanager: %s",
                     response_data.status.message)

    timeline = {}
    # Loop through all the metrics
    # One instance corresponds to one metric, which can have
    # multiple IndividualMetrics for each metricname requested.
    for metric in response_data.metric:
        instance = metric.instance_id

        # Loop through all individual metrics.
        for im in metric.metric:
            metricname = im.name
            if metricname not in timeline:
                timeline[metricname] = {}
            if instance not in timeline[metricname]:
                timeline[metricname][instance] = {}

            # We get minutely metrics.
            # Interval-values correspond to the minutely mark for which
            # this metric value corresponds to.
            for interval_value in im.interval_values:
                timeline[metricname][instance][
                    interval_value.interval.start] = interval_value.value

    return MetricsTimeline(
        starttime=start_time,
        endtime=end_time,
        component=component_name,
        timeline=timeline,
    )
Example #8
0
def getMetricsTimeline(tmaster,
                       component_name,
                       metric_names,
                       instances,
                       start_time,
                       end_time,
                       callback=None):
    """
  Get the specified metrics for the given component name of this topology.
  Returns the following dict on success:
  {
    "timeline": {
      <metricname>: {
        <instance>: {
          <start_time> : <numeric value>,
          <start_time> : <numeric value>,
          ...
        }
        ...
      }, ...
    },
    "starttime": <numeric value>,
    "endtime": <numeric value>,
    "component": "..."
  }

  Returns the following dict on failure:
  {
    "message": "..."
  }
  """
    # Tmaster is the proto object and must have host and port for stats.
    if not tmaster or not tmaster.host or not tmaster.stats_port:
        raise Exception("No Tmaster found")

    host = tmaster.host
    port = tmaster.stats_port

    # Create the proto request object to get metrics.

    metricRequest = tmaster_pb2.MetricRequest()
    metricRequest.component_name = component_name

    # If no instances are give, metrics for all instances
    # are fetched by default.
    if len(instances) > 0:
        for instance in instances:
            metricRequest.instance_id.append(instance)

    for metricName in metric_names:
        metricRequest.metric.append(metricName)

    metricRequest.explicit_interval.start = start_time
    metricRequest.explicit_interval.end = end_time
    metricRequest.minutely = True

    # Serialize the metricRequest to send as a payload
    # with the HTTP request.
    metricRequestString = metricRequest.SerializeToString()

    # Form and send the http request.
    url = "http://{0}:{1}/stats".format(host, port)
    request = tornado.httpclient.HTTPRequest(url,
                                             body=metricRequestString,
                                             method='POST',
                                             request_timeout=5)

    Log.debug("Making HTTP call to fetch metrics")
    Log.debug("url: " + url)
    try:
        client = tornado.httpclient.AsyncHTTPClient()
        result = yield client.fetch(request)
        Log.debug("HTTP call complete.")
    except tornado.httpclient.HTTPError as e:
        raise Exception(str(e))

    # Check the response code - error if it is in 400s or 500s
    responseCode = result.code
    if responseCode >= 400:
        message = "Error in getting metrics from Tmaster, code: " + responseCode
        Log.error(message)
        raise Exception(message)

    # Parse the response from tmaster.
    metricResponse = tmaster_pb2.MetricResponse()
    metricResponse.ParseFromString(result.body)

    if metricResponse.status.status == common_pb2.NOTOK:
        if metricResponse.status.HasField("message"):
            Log.warn("Received response from Tmaster: %s",
                     metricResponse.status.message)

    # Form the response.
    ret = {}
    ret["starttime"] = start_time
    ret["endtime"] = end_time
    ret["component"] = component_name
    ret["timeline"] = {}

    # Loop through all the metrics
    # One instance corresponds to one metric, which can have
    # multiple IndividualMetrics for each metricname requested.
    for metric in metricResponse.metric:
        instance = metric.instance_id

        # Loop through all individual metrics.
        for im in metric.metric:
            metricname = im.name
            if metricname not in ret["timeline"]:
                ret["timeline"][metricname] = {}
            if instance not in ret["timeline"][metricname]:
                ret["timeline"][metricname][instance] = {}

            # We get minutely metrics.
            # Interval-values correspond to the minutely mark for which
            # this metric value corresponds to.
            for interval_value in im.interval_values:
                ret["timeline"][metricname][instance][
                    interval_value.interval.start] = interval_value.value

    raise tornado.gen.Return(ret)