Example #1
0
    def test_epoch_tz_aware(self):
        dt = pytz.utc.localize(datetime(1970, 1, 1, 0, 10, 0, 0))
        self.assertEqual(util.epoch(dt), 600)

        dt = pytz.timezone('Europe/Berlin').localize(
            datetime(1970, 1, 1, 1, 10, 0, 0))
        self.assertEqual(util.epoch(dt), 600)
Example #2
0
def fetchData(requestContext, pathExpr):

  seriesList = []
  startTime = int( epoch( requestContext['startTime'] ) )
  endTime   = int( epoch( requestContext['endTime'] ) )

  def _fetchData(pathExpr,startTime, endTime, requestContext, seriesList):
    matching_nodes = STORE.find(pathExpr, startTime, endTime, local=requestContext['localOnly'])
    log.info("matching_nodes " + str([node for node in matching_nodes]))
    log.info("is leafs" + str([node.is_leaf for node in matching_nodes]))
    fetches = [(node, node.fetch(startTime, endTime)) for node in matching_nodes if node.is_leaf]

    for node, results in fetches:
      if isinstance(results, FetchInProgress):
        results = results.waitForResults()

      if not results:
        log.info("render.datalib.fetchData :: no results for %s.fetch(%s, %s)" % (node, startTime, endTime))
        continue

      try:
          (timeInfo, values) = results
      except ValueError as e:
          raise Exception("could not parse timeInfo/values from metric '%s': %s" % (node.path, e))
      (start, end, step) = timeInfo

      series = TimeSeries(node.path, start, end, step, values)
      series.pathExpression = pathExpr #hack to pass expressions through to render functions
      seriesList.append(series)

    # Prune empty series with duplicate metric paths to avoid showing empty graph elements for old whisper data
    names = set([ s.name for s in seriesList ])
    for name in names:
      series_with_duplicate_names = [ s for s in seriesList if s.name == name ]
      empty_duplicates = [ s for s in series_with_duplicate_names if not nonempty(s) ]

      if series_with_duplicate_names == empty_duplicates and len(empty_duplicates) > 0: # if they're all empty
        empty_duplicates.pop() # make sure we leave one in seriesList

      for series in empty_duplicates:
        seriesList.remove(series)

    return seriesList

  retries = 1 # start counting at one to make log output and settings more readable
  while True:
    try:
      seriesList = _fetchData(pathExpr,startTime, endTime, requestContext, seriesList)
      return seriesList
    except Exception, e:
      if retries >= settings.MAX_FETCH_RETRIES:
        log.exception("Failed after %i retry! See: %s" % (settings.MAX_FETCH_RETRIES, e))
        raise Exception("Failed after %i retry! See: %s" % (settings.MAX_FETCH_RETRIES, e))
      else:
        log.exception("Got an exception when fetching data! See: %s Will do it again! Run: %i of %i" %
                     (e, retries, settings.MAX_FETCH_RETRIES))
        retries += 1
Example #3
0
    def test_epoch_naive(self, mock_log):
        with self.settings(TIME_ZONE='UTC'):
            dt = datetime(1970, 1, 1, 0, 10, 0, 0)
            self.assertEqual(util.epoch(dt), 600)
            self.assertEqual(mock_log.call_count, 1)
            self.assertEqual(len(mock_log.call_args[0]), 1)
            self.assertRegexpMatches(mock_log.call_args[0][0], 'epoch\(\) called with non-timezone-aware datetime in test_epoch_naive at .+/webapp/tests/test_util\.py:[0-9]+')

        with self.settings(TIME_ZONE='Europe/Berlin'):
            dt = datetime(1970, 1, 1, 1, 10, 0, 0)
            self.assertEqual(util.epoch(dt), 600)
            self.assertEqual(mock_log.call_count, 2)
            self.assertEqual(len(mock_log.call_args[0]), 1)
            self.assertRegexpMatches(mock_log.call_args[0][0], 'epoch\(\) called with non-timezone-aware datetime in test_epoch_naive at .+/webapp/tests/test_util\.py:[0-9]+')
Example #4
0
 def default(self, obj):
     if isinstance(obj, datetime.datetime):
         return epoch(obj)
     return json.JSONEncoder.default(self, obj)
Example #5
0
def find_view(request):
    "View for finding metrics matching a given pattern"

    queryParams = request.GET.copy()
    queryParams.update(request.POST)

    format = queryParams.get('format', 'treejson')
    leaves_only = queryParamAsInt(queryParams, 'leavesOnly', 0)
    local_only = queryParamAsInt(queryParams, 'local', 0)
    wildcards = queryParamAsInt(queryParams, 'wildcards', 0)

    tzinfo = pytz.timezone(settings.TIME_ZONE)
    if 'tz' in queryParams:
        try:
            value = queryParams['tz']
            tzinfo = pytz.timezone(value)
        except pytz.UnknownTimeZoneError:
            pass
        except Exception as e:
            raise InputParameterError(
                'Invalid value {value} for param tz: {err}'.format(
                    value=repr(value), err=str(e)))

    if 'now' in queryParams:
        try:
            value = queryParams['now']
            now = parseATTime(value, tzinfo)
        except Exception as e:
            raise InputParameterError(
                'Invalid value {value} for param now: {err}'.format(
                    value=repr(value), err=str(e)))
    else:
        now = datetime.now(tzinfo)

    if 'from' in queryParams and str(queryParams['from']) != '-1':
        try:
            value = queryParams['from']
            fromTime = int(epoch(parseATTime(value, tzinfo, now)))
        except Exception as e:
            raise InputParameterError(
                'Invalid value {value} for param from: {err}'.format(
                    value=repr(value), err=str(e)))
    else:
        fromTime = -1

    if 'until' in queryParams and str(queryParams['until']) != '-1':
        try:
            value = queryParams['until']
            untilTime = int(epoch(parseATTime(value, tzinfo, now)))
        except Exception as e:
            raise InputParameterError(
                'Invalid value {value} for param until: {err}'.format(
                    value=repr(value), err=str(e)))
    else:
        untilTime = -1

    nodePosition = queryParamAsInt(queryParams, 'position', -1)
    jsonp = queryParams.get('jsonp', False)
    forward_headers = extractForwardHeaders(request)

    if fromTime == -1:
        fromTime = None
    if untilTime == -1:
        untilTime = None

    automatic_variants = queryParamAsInt(queryParams, 'automatic_variants', 0)

    try:
        query = str(queryParams['query'])
    except KeyError:
        raise InputParameterError('Missing required parameter \'query\'')

    if query == '':
        raise InputParameterError('Required parameter \'query\' is empty')

    if '.' in query:
        base_path = query.rsplit('.', 1)[0] + '.'
    else:
        base_path = ''

    if format == 'completer':
        query = query.replace('..', '*.')
        if not query.endswith('*'):
            query += '*'

        if automatic_variants:
            query_parts = query.split('.')
            for i, part in enumerate(query_parts):
                if ',' in part and '{' not in part:
                    query_parts[i] = '{%s}' % part
            query = '.'.join(query_parts)

    try:
        matches = list(
            STORE.find(
                query,
                fromTime,
                untilTime,
                local=local_only,
                headers=forward_headers,
                leaves_only=leaves_only,
            ))
    except Exception:
        log.exception()
        raise

    log.info('find_view query=%s local_only=%s matches=%d' %
             (query, local_only, len(matches)))
    matches.sort(key=lambda node: node.name)
    log.info(
        "received remote find request: pattern=%s from=%s until=%s local_only=%s format=%s matches=%d"
        % (query, fromTime, untilTime, local_only, format, len(matches)))

    if format == 'treejson':
        profile = getProfile(request)
        content = tree_json(matches,
                            base_path,
                            wildcards=profile.advancedUI or wildcards)
        response = json_response_for(request, content, jsonp=jsonp)

    elif format == 'nodelist':
        content = nodes_by_position(matches, nodePosition)
        response = json_response_for(request, content, jsonp=jsonp)

    elif format == 'pickle':
        content = pickle_nodes(matches)
        response = HttpResponse(content, content_type='application/pickle')

    elif format == 'msgpack':
        content = msgpack_nodes(matches)
        response = HttpResponse(content, content_type='application/x-msgpack')

    elif format == 'json':
        content = json_nodes(matches)
        response = json_response_for(request, content, jsonp=jsonp)

    elif format == 'completer':
        results = []
        for node in matches:
            node_info = dict(path=node.path,
                             name=node.name,
                             is_leaf=str(int(node.is_leaf)))
            if not node.is_leaf:
                node_info['path'] += '.'
            results.append(node_info)

        if len(results) > 1 and wildcards:
            wildcardNode = {'name': '*'}
            results.append(wildcardNode)

        response = json_response_for(request, {'metrics': results},
                                     jsonp=jsonp)

    else:
        return HttpResponseBadRequest(
            content="Invalid value for 'format' parameter",
            content_type='text/plain')

    response['Pragma'] = 'no-cache'
    response['Cache-Control'] = 'no-cache'
    return response
Example #6
0
def fetchData(requestContext, pathExpr):
    seriesList = {}
    startTime = int(epoch(requestContext['startTime']))
    endTime = int(epoch(requestContext['endTime']))

    def _fetchData(pathExpr, startTime, endTime, requestContext, seriesList):
        matching_nodes = STORE.find(pathExpr,
                                    startTime,
                                    endTime,
                                    local=requestContext['localOnly'])
        fetches = [(node, node.fetch(startTime, endTime))
                   for node in matching_nodes if node.is_leaf]

        for node, results in fetches:
            if isinstance(results, FetchInProgress):
                results = results.waitForResults()

            if not results:
                log.info(
                    "render.datalib.fetchData :: no results for %s.fetch(%s, %s)"
                    % (node, startTime, endTime))
                continue

            try:
                (timeInfo, values) = results
            except ValueError as e:
                raise Exception(
                    "could not parse timeInfo/values from metric '%s': %s" %
                    (node.path, e))
            (start, end, step) = timeInfo

            series = TimeSeries(node.path, start, end, step, values)
            series.pathExpression = pathExpr  #hack to pass expressions through to render functions

            # Used as a cache to avoid recounting series None values below.
            series_best_nones = {}

            if series.name in seriesList:
                # This counts the Nones in each series, and is unfortunately O(n) for each
                # series, which may be worth further optimization. The value of doing this
                # at all is to avoid the "flipping" effect of loading a graph multiple times
                # and having inconsistent data returned if one of the backing stores has
                # inconsistent data. This is imperfect as a validity test, but in practice
                # nicely keeps us using the "most complete" dataset available. Think of it
                # as a very weak CRDT resolver.
                candidate_nones = 0
                if not settings.REMOTE_STORE_MERGE_RESULTS:
                    candidate_nones = len(
                        [val for val in series['values'] if val is None])

                known = seriesList[series.name]
                # To avoid repeatedly recounting the 'Nones' in series we've already seen,
                # cache the best known count so far in a dict.
                if known.name in series_best_nones:
                    known_nones = series_best_nones[known.name]
                else:
                    known_nones = len([val for val in known if val is None])

                if known_nones > candidate_nones:
                    if settings.REMOTE_STORE_MERGE_RESULTS:
                        # This series has potential data that might be missing from
                        # earlier series.  Attempt to merge in useful data and update
                        # the cache count.
                        log.info("Merging multiple TimeSeries for %s" %
                                 known.name)
                        for i, j in enumerate(known):
                            if j is None and series[i] is not None:
                                known[i] = series[i]
                                known_nones -= 1
                        # Store known_nones in our cache
                        series_best_nones[known.name] = known_nones
                    else:
                        # Not merging data -
                        # we've found a series better than what we've already seen. Update
                        # the count cache and replace the given series in the array.
                        series_best_nones[known.name] = candidate_nones
                        seriesList[known.name] = series
                else:
                    # In case if we are merging data - the existing series has no gaps and there is nothing to merge
                    # together.  Save ourselves some work here.
                    #
                    # OR - if we picking best serie:
                    #
                    # We already have this series in the seriesList, and the
                    # candidate is 'worse' than what we already have, we don't need
                    # to compare anything else. Save ourselves some work here.
                    break

                    # If we looked at this series above, and it matched a 'known'
                    # series already, then it's already in the series list (or ignored).
                    # If not, append it here.
            else:
                seriesList[series.name] = series

        # Stabilize the order of the results by ordering the resulting series by name.
        # This returns the result ordering to the behavior observed pre PR#1010.
        return [seriesList[k] for k in sorted(seriesList)]

    retries = 1  # start counting at one to make log output and settings more readable
    while True:
        try:
            seriesList = _fetchData(pathExpr, startTime, endTime,
                                    requestContext, seriesList)
            return seriesList
        except Exception, e:
            if retries >= settings.MAX_FETCH_RETRIES:
                log.exception("Failed after %s retry! Root cause:\n%s" %
                              (settings.MAX_FETCH_RETRIES, format_exc()))
                raise e
            else:
                log.exception(
                    "Got an exception when fetching data! Try: %i of %i. Root cause:\n%s"
                    % (retries, settings.MAX_FETCH_RETRIES, format_exc()))
                retries += 1
Example #7
0
 def default(self, obj):
     if isinstance(obj, datetime.datetime):
         return epoch(obj)
     return json.JSONEncoder.default(self, obj)
Example #8
0
def find_view(request):
  "View for finding metrics matching a given pattern"

  queryParams = request.GET.copy()
  queryParams.update(request.POST)

  format = queryParams.get('format', 'treejson')
  leaves_only = int( queryParams.get('leavesOnly', 0) )
  local_only = int( queryParams.get('local', 0) )
  wildcards = int( queryParams.get('wildcards', 0) )

  tzinfo = pytz.timezone(settings.TIME_ZONE)
  if 'tz' in queryParams:
    try:
      tzinfo = pytz.timezone(queryParams['tz'])
    except pytz.UnknownTimeZoneError:
      pass

  if 'now' in queryParams:
    now = parseATTime(queryParams['now'], tzinfo)
  else:
    now = datetime.now(tzinfo)

  if 'from' in queryParams and str(queryParams['from']) != '-1':
    fromTime = int(epoch(parseATTime(queryParams['from'], tzinfo, now)))
  else:
    fromTime = -1

  if 'until' in queryParams and str(queryParams['from']) != '-1':
    untilTime = int(epoch(parseATTime(queryParams['until'], tzinfo, now)))
  else:
    untilTime = -1

  nodePosition = int( queryParams.get('position', -1) )
  jsonp = queryParams.get('jsonp', False)
  forward_headers = extractForwardHeaders(request)

  if fromTime == -1:
    fromTime = None
  if untilTime == -1:
    untilTime = None

  automatic_variants = int( queryParams.get('automatic_variants', 0) )

  try:
    query = str(queryParams['query'])
  except KeyError:
    return HttpResponseBadRequest(content="Missing required parameter 'query'",
                                  content_type='text/plain')

  if query == '':
    return HttpResponseBadRequest(content="Required parameter 'query' is empty",
                                  content_type='text/plain')

  if '.' in query:
    base_path = query.rsplit('.', 1)[0] + '.'
  else:
    base_path = ''

  if format == 'completer':
    query = query.replace('..', '*.')
    if not query.endswith('*'):
      query += '*'

    if automatic_variants:
      query_parts = query.split('.')
      for i,part in enumerate(query_parts):
        if ',' in part and '{' not in part:
          query_parts[i] = '{%s}' % part
      query = '.'.join(query_parts)

  try:
    matches = list(STORE.find(
      query, fromTime, untilTime,
      local=local_only,
      headers=forward_headers,
      leaves_only=leaves_only,
    ))
  except Exception:
    log.exception()
    raise

  log.info('find_view query=%s local_only=%s matches=%d' % (query, local_only, len(matches)))
  matches.sort(key=lambda node: node.name)
  log.info("received remote find request: pattern=%s from=%s until=%s local_only=%s format=%s matches=%d" % (query, fromTime, untilTime, local_only, format, len(matches)))

  if format == 'treejson':
    profile = getProfile(request)
    content = tree_json(matches, base_path, wildcards=profile.advancedUI or wildcards)
    response = json_response_for(request, content, jsonp=jsonp)

  elif format == 'nodelist':
    content = nodes_by_position(matches, nodePosition)
    response = json_response_for(request, content, jsonp=jsonp)

  elif format == 'pickle':
    content = pickle_nodes(matches)
    response = HttpResponse(content, content_type='application/pickle')

  elif format == 'msgpack':
    content = msgpack_nodes(matches)
    response = HttpResponse(content, content_type='application/x-msgpack')

  elif format == 'json':
    content = json_nodes(matches)
    response = json_response_for(request, content, jsonp=jsonp)

  elif format == 'completer':
    results = []
    for node in matches:
      node_info = dict(path=node.path, name=node.name, is_leaf=str(int(node.is_leaf)))
      if not node.is_leaf:
        node_info['path'] += '.'
      results.append(node_info)

    if len(results) > 1 and wildcards:
      wildcardNode = {'name' : '*'}
      results.append(wildcardNode)

    response = json_response_for(request, { 'metrics' : results }, jsonp=jsonp)

  else:
    return HttpResponseBadRequest(
        content="Invalid value for 'format' parameter",
        content_type='text/plain')

  response['Pragma'] = 'no-cache'
  response['Cache-Control'] = 'no-cache'
  return response
def fetchData(requestContext, pathExpr):

    seriesList = []
    startTime = int(epoch(requestContext['startTime']))
    endTime = int(epoch(requestContext['endTime']))

    def _fetchData(pathExpr, startTime, endTime, requestContext, seriesList):
        matching_nodes = STORE.find(pathExpr,
                                    startTime,
                                    endTime,
                                    local=requestContext['localOnly'])
        fetches = [(node, node.fetch(startTime, endTime))
                   for node in matching_nodes if node.is_leaf]

        for node, results in fetches:
            if isinstance(results, FetchInProgress):
                results = results.waitForResults()

            if not results:
                log.info(
                    "render.datalib.fetchData :: no results for %s.fetch(%s, %s)"
                    % (node, startTime, endTime))
                continue

            try:
                (timeInfo, values) = results
            except ValueError as e:
                raise Exception(
                    "could not parse timeInfo/values from metric '%s': %s" %
                    (node.path, e))
            (start, end, step) = timeInfo

            series = TimeSeries(node.path, start, end, step, values)
            series.pathExpression = pathExpr  #hack to pass expressions through to render functions
            seriesList.append(series)

        # Prune empty series with duplicate metric paths to avoid showing empty graph elements for old whisper data
        names = set([s.name for s in seriesList])
        for name in names:
            series_with_duplicate_names = [
                s for s in seriesList if s.name == name
            ]
            empty_duplicates = [
                s for s in series_with_duplicate_names if not nonempty(s)
            ]

            if series_with_duplicate_names == empty_duplicates and len(
                    empty_duplicates) > 0:  # if they're all empty
                empty_duplicates.pop()  # make sure we leave one in seriesList

            for series in empty_duplicates:
                seriesList.remove(series)

        return seriesList

    retries = 1  # start counting at one to make log output and settings more readable
    while True:
        try:
            seriesList = _fetchData(pathExpr, startTime, endTime,
                                    requestContext, seriesList)
            return seriesList
        except Exception, e:
            if retries >= settings.MAX_FETCH_RETRIES:
                log.exception("Failed after %i retry! See: %s" %
                              (settings.MAX_FETCH_RETRIES, e))
                raise Exception("Failed after %i retry! See: %s" %
                                (settings.MAX_FETCH_RETRIES, e))
            else:
                log.exception(
                    "Got an exception when fetching data! See: %s Will do it again! Run: %i of %i"
                    % (e, retries, settings.MAX_FETCH_RETRIES))
                retries += 1
Example #10
0
    def test_epoch_tz_aware(self):
        dt = pytz.utc.localize(datetime(1970, 1, 1, 0, 10, 0, 0))
        self.assertEqual(util.epoch(dt), 600)

        dt = pytz.timezone('Europe/Berlin').localize(datetime(1970, 1, 1, 1, 10, 0, 0))
        self.assertEqual(util.epoch(dt), 600)
Example #11
0
def fetchData(requestContext, pathExpr):
  seriesList = {}
  startTime = int( epoch( requestContext['startTime'] ) )
  endTime   = int( epoch( requestContext['endTime'] ) )

  def _fetchData(pathExpr,startTime, endTime, requestContext, seriesList):
    matching_nodes = STORE.find(pathExpr, startTime, endTime, local=requestContext['localOnly'])
    fetches = [(node, node.fetch(startTime, endTime)) for node in matching_nodes if node.is_leaf]

    for node, results in fetches:
      if isinstance(results, FetchInProgress):
        results = results.waitForResults()

      if not results:
        log.info("render.datalib.fetchData :: no results for %s.fetch(%s, %s)" % (node, startTime, endTime))
        continue

      try:
          (timeInfo, values) = results
      except ValueError as e:
          raise Exception("could not parse timeInfo/values from metric '%s': %s" % (node.path, e))
      (start, end, step) = timeInfo

      series = TimeSeries(node.path, start, end, step, values)
      series.pathExpression = pathExpr #hack to pass expressions through to render functions

      # Used as a cache to avoid recounting series None values below.
      series_best_nones = {}

      if series.name in seriesList:
        # This counts the Nones in each series, and is unfortunately O(n) for each
        # series, which may be worth further optimization. The value of doing this
        # at all is to avoid the "flipping" effect of loading a graph multiple times
        # and having inconsistent data returned if one of the backing stores has
        # inconsistent data. This is imperfect as a validity test, but in practice
        # nicely keeps us using the "most complete" dataset available. Think of it
        # as a very weak CRDT resolver.
        candidate_nones = 0
        if not settings.REMOTE_STORE_MERGE_RESULTS:
          candidate_nones = len(
            [val for val in series['values'] if val is None])

        known = seriesList[series.name]
        # To avoid repeatedly recounting the 'Nones' in series we've already seen,
        # cache the best known count so far in a dict.
        if known.name in series_best_nones:
          known_nones = series_best_nones[known.name]
        else:
          known_nones = len([val for val in known if val is None])

        if known_nones > candidate_nones:
          if settings.REMOTE_STORE_MERGE_RESULTS:
            # This series has potential data that might be missing from
            # earlier series.  Attempt to merge in useful data and update
            # the cache count.
            log.info("Merging multiple TimeSeries for %s" % known.name)
            for i, j in enumerate(known):
              if j is None and series[i] is not None:
                known[i] = series[i]
                known_nones -= 1
            # Store known_nones in our cache
            series_best_nones[known.name] = known_nones
          else:
            # Not merging data -
            # we've found a series better than what we've already seen. Update
            # the count cache and replace the given series in the array.
            series_best_nones[known.name] = candidate_nones
            seriesList[known.name] = series
        else:
          # In case if we are merging data - the existing series has no gaps and there is nothing to merge
          # together.  Save ourselves some work here.
          #
          # OR - if we picking best serie:
          #
          # We already have this series in the seriesList, and the
          # candidate is 'worse' than what we already have, we don't need
          # to compare anything else. Save ourselves some work here.
          break

          # If we looked at this series above, and it matched a 'known'
          # series already, then it's already in the series list (or ignored).
          # If not, append it here.
      else:
        seriesList[series.name] = series

    # Stabilize the order of the results by ordering the resulting series by name.
    # This returns the result ordering to the behavior observed pre PR#1010.
    return [seriesList[k] for k in sorted(seriesList)]

  retries = 1 # start counting at one to make log output and settings more readable
  while True:
    try:
      seriesList = _fetchData(pathExpr,startTime, endTime, requestContext, seriesList)
      return seriesList
    except Exception, e:
      if retries >= settings.MAX_FETCH_RETRIES:
        log.exception("Failed after %s retry! Root cause:\n%s" %
            (settings.MAX_FETCH_RETRIES, format_exc()))
        raise e
      else:
        log.exception("Got an exception when fetching data! Try: %i of %i. Root cause:\n%s" %
                     (retries, settings.MAX_FETCH_RETRIES, format_exc()))
        retries += 1
Example #12
0
def _timebounds(requestContext):
  startTime = int(epoch(requestContext['startTime']))
  endTime = int(epoch(requestContext['endTime']))
  now = int(epoch(requestContext['now']))

  return (startTime, endTime, now)