Esempio n. 1
0
def prefetchData(requestContext, pathExpressions):
  """Prefetch a bunch of path expressions and stores them in the context.

  The idea is that this will allow more batching than doing a query
  each time evaluateTarget() needs to fetch a path. All the prefetched
  data is stored in the requestContext, to be accessed later by fetchData.
  """
  if not pathExpressions:
    return

  start = time.time()
  log.debug("Fetching data for [%s]" % (', '.join(pathExpressions)))

  (startTime, endTime, now) = timebounds(requestContext)

  prefetched = collections.defaultdict(list)

  for result in STORE.fetch(pathExpressions, startTime, endTime, now, requestContext):
    if result is None:
      continue

    prefetched[result['pathExpression']].append((
      result['name'],
      (
        result['time_info'],
        result['values'],
      ),
    ))

  if not requestContext.get('prefetched'):
    requestContext['prefetched'] = {}

  requestContext['prefetched'][(startTime, endTime, now)] = prefetched

  log.rendering("Fetched data for [%s] in %fs" % (', '.join(pathExpressions), time.time() - start))
Esempio n. 2
0
    def _find(self, query):
        jobs = [
            Job(finder.find_nodes, query)
            for finder in self.get_finders(query.local)
        ]

        # Group matching nodes by their path
        nodes_by_path = defaultdict(list)

        done = 0
        errors = 0

        # Start finds
        start = time.time()
        try:
          for job in self.pool_exec(jobs, settings.REMOTE_FIND_TIMEOUT):
            done += 1

            if job.exception:
              errors += 1
              log.info("Find for %s failed after %fs: %s" % (str(query), time.time() - start, str(job.exception)))
              continue

            log.debug("Got a find result for %s after %fs" % (str(query), time.time() - start))
            for node in job.result or []:
              nodes_by_path[node.path].append(node)
        except PoolTimeoutError:
          log.info("Timed out in find after %fs" % (time.time() - start))

        if errors == done:
          raise Exception('All finds failed for %s' % (str(query)))

        log.debug("Got all find results for %s in %fs" % (str(query), time.time() - start))
        return self._list_nodes(query, nodes_by_path)
Esempio n. 3
0
    def wait_jobs(self, jobs, timeout, context):
        if not jobs:
            return []

        start = time.time()
        results = []
        failed = []
        done = 0
        try:
            for job in self.pool_exec(jobs, timeout):
                elapsed = time.time() - start
                done += 1
                if job.exception:
                    failed.append(job)
                    log.info("Exception during %s after %fs: %s" %
                             (job, elapsed, str(job.exception)))
                else:
                    log.debug("Got a result for %s after %fs" % (job, elapsed))
                    results.append(job.result)
        except PoolTimeoutError:
            message = "Timed out after %fs for %s" % (time.time() - start,
                                                      context)
            log.info(message)
            if done == 0:
                raise Exception(message)

        if len(failed) == done:
            message = "All requests failed for %s (%d)" % (context,
                                                           len(failed))
            for job in failed:
                message += "\n\n%s: %s: %s" % (job, job.exception, '\n'.join(
                    traceback.format_exception(*job.exception_info)))
            raise Exception(message)

        return results
Esempio n. 4
0
    def request(self, path, fields=None, headers=None, timeout=None):
        url = "%s%s" % (self.url, path)
        url_full = "%s?%s" % (url, urlencode(fields))

        try:
            result = http.request(
                'POST' if settings.REMOTE_STORE_USE_POST else 'GET',
                url,
                fields=fields,
                headers=headers,
                timeout=timeout,
                preload_content=False)
        except BaseException as err:
            self.fail()
            log.exception("RemoteFinder[%s] Error requesting %s: %s" %
                          (self.host, url_full, err))
            raise Exception("Error requesting %s: %s" % (url_full, err))

        if result.status != 200:
            result.release_conn()
            self.fail()
            log.exception("RemoteFinder[%s] Error response %d from %s" %
                          (self.host, result.status, url_full))
            raise Exception("Error response %d from %s" %
                            (result.status, url_full))

        result.url_full = url_full

        # reset last failure time so that retried fetches can re-enable a remote
        self.last_failure = 0

        log.debug("RemoteFinder[%s] Fetched %s" % (self.host, url_full))
        return result
Esempio n. 5
0
    def fetch_remote(self, patterns, startTime, endTime, now, requestContext):
        patterns = set(patterns)

        # TODO: Change this to simply `fetch()` in order to support optimizations
        # for local finders too. This also require using the thread pool and
        # limiting the number of results using the warning and failure thresholds.
        # Also support the nice merging features of MultiReader.
        if requestContext['localOnly']:
            return []

        if not patterns:
            return []

        log.debug(
            'prefetchRemoteData:: Starting fetch_list on all backends')

        results = []
        for finder in self.finders:
            is_local = getattr(finder, 'local', True)
            if is_local:
                continue

            result = finder.fetch(
                patterns, startTime, endTime,
                now=now, requestContext=requestContext
            )
            results.append(result)
        return results
Esempio n. 6
0
    def __init__(self,
                 name,
                 start,
                 end,
                 step,
                 values,
                 consolidate='average',
                 tags=None):
        list.__init__(self, values)
        self.name = name
        self.start = start
        self.end = end
        self.step = step
        self.consolidationFunc = consolidate
        self.valuesPerPoint = 1
        self.options = {}
        self.pathExpression = name

        if tags:
            self.tags = tags
        else:
            self.tags = {'name': name}
            # parse for tags if a tagdb is configured and name doesn't look like a function-wrapped name
            if STORE.tagdb and not re.match('^[a-z]+[(].+[)]$', name,
                                            re.IGNORECASE):
                try:
                    self.tags = STORE.tagdb.parse(name).tags
                except Exception as err:
                    # tags couldn't be parsed, just use "name" tag
                    log.debug("Couldn't parse tags for %s: %s" % (name, err))
Esempio n. 7
0
    def fetch_remote(self, patterns, startTime, endTime, now, requestContext):
        patterns = set(patterns)

        # TODO: Change this to simply `fetch()` in order to support optimizations
        # for local finders too. This also require using the thread pool and
        # limiting the number of results using the warning and failure thresholds.
        # Also support the nice merging features of MultiReader.
        if requestContext['localOnly']:
            return []

        if not patterns:
            return []

        log.debug('prefetchRemoteData:: Starting fetch_list on all backends')

        results = []
        for finder in self.finders:
            is_local = getattr(finder, 'local', True)
            if is_local:
                continue

            result = finder.fetch(patterns,
                                  startTime,
                                  endTime,
                                  now=now,
                                  requestContext=requestContext)
            results.append(result)
        return results
Esempio n. 8
0
def prefetchData(requestContext, pathExpressions):
  """Prefetch a bunch of path expressions and stores them in the context.

  The idea is that this will allow more batching than doing a query
  each time evaluateTarget() needs to fetch a path. All the prefetched
  data is stored in the requestContext, to be accessed later by fetchData.
  """
  if not pathExpressions:
    return

  start = time.time()
  log.debug("Fetching data for [%s]" % (', '.join(pathExpressions)))

  (startTime, endTime, now) = timebounds(requestContext)

  prefetched = collections.defaultdict(list)

  for result in STORE.fetch(pathExpressions, startTime, endTime, now, requestContext):
    if result is None:
      continue

    prefetched[result['pathExpression']].append((
      result['name'],
      (
        result['time_info'],
        result['values'],
      ),
    ))

  if not requestContext.get('prefetched'):
    requestContext['prefetched'] = {}

  requestContext['prefetched'][(startTime, endTime, now)] = prefetched

  log.rendering("Fetched data for [%s] in %fs" % (', '.join(pathExpressions), time.time() - start))
Esempio n. 9
0
    def request(self, path, fields=None, headers=None, timeout=None):
        url = "%s%s" % (self.url, path)
        url_full = "%s?%s" % (url, urlencode(fields))

        try:
            result = http.request(
                'POST' if settings.REMOTE_STORE_USE_POST else 'GET',
                url,
                fields=fields,
                headers=headers,
                timeout=timeout,
                preload_content=False)
        except BaseException as err:
            self.fail()
            log.exception("RemoteFinder[%s] Error requesting %s: %s" % (self.host, url_full, err))
            raise Exception("Error requesting %s: %s" % (url_full, err))

        if result.status != 200:
            result.release_conn()
            self.fail()
            log.exception(
                "RemoteFinder[%s] Error response %d from %s" % (self.host, result.status, url_full))
            raise Exception("Error response %d from %s" % (result.status, url_full))

        result.url_full = url_full

        # reset last failure time so that retried fetches can re-enable a remote
        self.last_failure = 0

        log.debug("RemoteFinder[%s] Fetched %s" % (self.host, url_full))
        return result
Esempio n. 10
0
    def fetch(self, patterns, startTime, endTime, now, requestContext):
        # deduplicate patterns
        patterns = sorted(set(patterns))

        if not patterns:
            return []

        log.debug(
            'graphite.storage.Store.fetch :: Starting fetch on all backends')

        jobs = []
        tag_patterns = None
        pattern_aliases = defaultdict(list)
        for finder in self.get_finders(requestContext.get('localOnly')):
            # if the finder supports tags, just pass the patterns through
            if getattr(finder, 'tags', False):
                job = Job(finder.fetch,
                          'fetch for %s' % patterns,
                          patterns,
                          startTime,
                          endTime,
                          now=now,
                          requestContext=requestContext)
                jobs.append(job)
                continue

            # if we haven't resolved the seriesByTag calls, build resolved patterns and translation table
            if tag_patterns is None:
                tag_patterns, pattern_aliases = self._tag_patterns(
                    patterns, requestContext)

            # dispatch resolved patterns to finder
            job = Job(finder.fetch,
                      'fetch for %s' % tag_patterns,
                      tag_patterns,
                      startTime,
                      endTime,
                      now=now,
                      requestContext=requestContext)
            jobs.append(job)

        # Start fetches
        start = time.time()
        results = self.wait_jobs(jobs, settings.FETCH_TIMEOUT,
                                 'fetch for %s' % str(patterns))
        results = [i for l in results for i in l]  # flatten

        # translate path expressions for responses from resolved seriesByTag patterns
        for result in results:
            if result['name'] == result['pathExpression'] and result[
                    'pathExpression'] in pattern_aliases:
                for pathExpr in pattern_aliases[result['pathExpression']]:
                    newresult = deepcopy(result)
                    newresult['pathExpression'] = pathExpr
                    results.append(newresult)

        log.debug("Got all fetch results for %s in %fs" %
                  (str(patterns), time.time() - start))
        return results
Esempio n. 11
0
    def fetch(self, patterns, startTime, endTime, now, requestContext):
        # deduplicate patterns
        patterns = sorted(set(patterns))

        if not patterns:
            return []

        log.debug(
            'graphite.storage.Store.fetch :: Starting fetch on all backends')

        jobs = []
        tag_patterns = None
        pattern_aliases = defaultdict(list)
        for finder in self.get_finders(requestContext.get('localOnly')):
          # if the finder supports tags, just pass the patterns through
          if getattr(finder, 'tags', False):
            job = Job(
                self._observed_fetch,
                'fetch for %s' % patterns,
                finder,
                patterns, startTime, endTime,
                now=now, requestContext=requestContext
            )
            jobs.append(job)
            continue

          # if we haven't resolved the seriesByTag calls, build resolved patterns and translation table
          if tag_patterns is None:
            tag_patterns, pattern_aliases = self._tag_patterns(patterns, requestContext)

          # dispatch resolved patterns to finder
          job = Job(
              self._observed_fetch,
              'fetch for %s' % tag_patterns,
              finder,
              tag_patterns, startTime, endTime,
              now=now, requestContext=requestContext
          )
          jobs.append(job)

        done = 0
        errors = 0

        # Start fetches
        start = time.time()
        results = self.wait_jobs(jobs, settings.FETCH_TIMEOUT,
                                 'fetch for %s' % str(patterns))
        results = [i for l in results for i in l]  # flatten

        # translate path expressions for responses from resolved seriesByTag patterns
        for result in results:
          if result['name'] == result['pathExpression'] and result['pathExpression'] in pattern_aliases:
            for pathExpr in pattern_aliases[result['pathExpression']]:
              newresult = deepcopy(result)
              newresult['pathExpression'] = pathExpr
              results.append(newresult)

        log.debug("Got all fetch results for %s in %fs" % (str(patterns), time.time() - start))
        return results
Esempio n. 12
0
    def _fetch_list_locked(self, url, query_string, query_params, headers):
        url_full = "%s?%s" % (url, query_string)

        jobs = [(self._fetch, url, query_string, query_params, headers)]
        q = pool_apply(self.store.finder.worker_pool(), jobs)

        log.debug('RemoteReader:: Storing FetchInProgress for %s' % url_full)
        return FetchInProgress(_Results(q))
Esempio n. 13
0
    def _fetch_list_locked(self, url, query_string, query_params, headers):
        url_full = "%s?%s" % (url, query_string)

        jobs = [(self._fetch, url, query_string, query_params, headers)]
        q = pool_apply(self.store.finder.worker_pool(), jobs)

        log.debug('RemoteReader:: Storing FetchInProgress for %s' % url_full)
        return FetchInProgress(_Results(q))
Esempio n. 14
0
    def prepare_slow_pool(self, req_key):
        self.pool_name = 'graphouse_slow_requests_pool'

        if settings.USE_WORKER_POOL:
            self.thread_count = min(parallel_jobs_for_slow_pool,
                                    settings.POOL_MAX_WORKERS)
        log.debug('DEBUG[{}]: Using slow pool with "{}" threads'.format(
            req_key, self.thread_count))
Esempio n. 15
0
    def tagdb_auto_complete_values(self,
                                   exprs,
                                   tag,
                                   valuePrefix=None,
                                   limit=None,
                                   requestContext=None):
        log.debug(
            'graphite.storage.Store.auto_complete_values :: Starting lookup on all backends'
        )

        if requestContext is None:
            requestContext = {}

        context = 'values for %s %s %s' % (str(exprs), tag, valuePrefix or '')
        jobs = []
        use_tagdb = False
        for finder in self.get_finders(requestContext.get('localOnly')):
            if getattr(finder, 'tags', False):
                job = Job(finder.auto_complete_values,
                          context,
                          exprs,
                          tag,
                          valuePrefix=valuePrefix,
                          limit=limit,
                          requestContext=requestContext)
                jobs.append(job)
            else:
                use_tagdb = True

        # start finder jobs
        start = time.time()
        results = set()

        # if we're using the local tagdb then execute it (in the main thread
        # so that LocalDatabaseTagDB will work)
        if use_tagdb:
            results.update(
                self.tagdb.auto_complete_values(exprs,
                                                tag,
                                                valuePrefix=valuePrefix,
                                                limit=limit,
                                                requestContext=requestContext))

        for result in self.wait_jobs(jobs, settings.FIND_TIMEOUT, context):
            results.update(result)

        # sort & limit results
        results = sorted(results)
        if limit:
            results = results[:int(limit)]

        log.debug("Got all autocomplete %s in %fs" %
                  (context, time.time() - start))
        return results
Esempio n. 16
0
    def fetch(self, patterns, startTime, endTime, now, requestContext):
        # deduplicate patterns
        patterns = list(set(patterns))

        if not patterns:
            return []

        log.debug(
            'graphite.storage.Store.fetch :: Starting fetch on all backends')

        jobs = [
            Job(finder.fetch,
                patterns,
                startTime,
                endTime,
                now=now,
                requestContext=requestContext)
            for finder in self.get_finders(requestContext.get('localOnly'))
        ]

        results = []

        done = 0
        errors = 0

        # Start fetches
        start = time.time()
        try:
            for job in pool_exec(get_pool(), jobs,
                                 settings.REMOTE_FETCH_TIMEOUT):
                done += 1

                if job.exception:
                    errors += 1
                    log.debug("Fetch for %s failed after %fs: %s" %
                              (str(patterns), time.time() - start,
                               str(job.exception)))
                    continue

                log.debug("Got a fetch result for %s after %fs" %
                          (str(patterns), time.time() - start))
                results.extend(job.result)
        except PoolTimeoutError:
            log.debug("Timed out in fetch after %fs" % (time.time() - start))

        if errors == done:
            raise Exception('All fetches failed for %s' % (str(patterns)))

        log.debug("Got all fetch results for %s in %fs" %
                  (str(patterns), time.time() - start))
        return results
Esempio n. 17
0
    def wait_jobs(self, jobs, timeout, context):
        if not jobs:
            return []

        start = time.time()
        results = []
        failed = []
        done = 0
        try:
            for job in self.pool_exec(jobs, timeout):
                elapsed = time.time() - start
                done += 1
                if job.exception:
                    failed.append(job)
                    log.info("Exception during %s after %fs: %s" % (
                        job, elapsed, str(job.exception))
                    )
                else:
                    log.debug("Got a result for %s after %fs" % (job, elapsed))
                    results.append(job.result)
        except PoolTimeoutError:
            message = "Timed out after %fs for %s" % (
                time.time() - start, context
            )
            log.info(message)
            if done == 0:
                raise Exception(message)

        if len(failed) == done:
            message = "All requests failed for %s (%d)" % (
                context, len(failed)
            )
            for job in failed:
                message += "\n\n%s: %s: %s" % (
                    job, job.exception,
                    '\n'.join(traceback.format_exception(*job.exception_info))
                )
            raise Exception(message)

        if len(results) < len(jobs) and settings.STORE_FAIL_ON_ERROR:
            message = "%s request(s) failed for %s (%d)" % (
                len(jobs) - len(results), context, len(jobs)
            )
            for job in failed:
                message += "\n\n%s: %s: %s" % (
                    job, job.exception,
                    '\n'.join(traceback.format_exception(*job.exception_info))
                )
            raise Exception(message)

        return results
Esempio n. 18
0
 def _request(self, url, query, flatbuffers=False):
     tag_headers = copy.deepcopy(self.headers)
     if flatbuffers:
         tag_headers['Accept'] = 'application/x-flatbuffer-metric-find-result-list'
     if not isinstance(query, dict):
         query = {'query': query}
     source = ""
     if settings.DEBUG:
         source = sys._getframe().f_back.f_code.co_name
     for i in range(0, self.max_retries):
         try:
             if self.zipkin_enabled == True:
                 traceheader = binascii.hexlify(os.urandom(8))
                 tag_headers['X-B3-TraceId'] = traceheader
                 tag_headers['X-B3-SpanId'] = traceheader
                 if self.zipkin_event_trace_level == 1:
                     tag_headers['X-Mtev-Trace-Event'] = '1'
                 elif self.zipkin_event_trace_level == 2:
                     tag_headers['X-Mtev-Trace-Event'] = '2'
             r = requests.get(url, params=query, headers=tag_headers,
                                  timeout=((self.connection_timeout / 1000.0), (self.timeout / 1000.0)))
             r.raise_for_status()
             if flatbuffers:
                 r = irondb_flatbuf.metric_find_results(r.content)
             else:
                 r = r.json()
             if settings.DEBUG:
                 log.debug("IRONdbTagFetcher.%s, result: %s" % (source, json.dumps(r)))
             return r
         except (socket.gaierror, requests.exceptions.ConnectionError) as ex:
             # on down nodes, try again on another node until "tries"
             log.exception("IRONdbTagFetcher.%s ConnectionError %s" % (source, ex))
         except requests.exceptions.ConnectTimeout as ex:
             # on down nodes, try again on another node until "tries"
             log.exception("IRONdbTagFetcher.%s ConnectTimeout %s" % (source, ex))
         except irondb_flatbuf.FlatBufferError as ex:
             # flatbuffer error, try again
             log.exception("IRONdbTagFetcher.%s FlatBufferError %s" % (source, ex))
         except JSONDecodeError as ex:
             # json error, try again
             log.exception("IRONdbTagFetcher.%s JSONDecodeError %s" % (source, ex))
         except requests.exceptions.ReadTimeout as ex:
             # on down nodes, try again on another node until "tries"
             log.exception("IRONdbTagFetcher.%s ReadTimeout %s" % (source, ex))
         except requests.exceptions.HTTPError as ex:
             # http status code errors are failures, stop immediately
             log.exception("IRONdbTagFetcher.%s HTTPError %s %s" % (source, ex, r.content))
             break
     return ()
Esempio n. 19
0
def prefetchData(requestContext, pathExpressions):
    """Prefetch a bunch of path expressions and stores them in the context.

  The idea is that this will allow more batching than doing a query
  each time evaluateTarget() needs to fetch a path. All the prefetched
  data is stored in the requestContext, to be accessed later by fetchData.
  """
    if not pathExpressions:
        return

    start = time.time()
    log.debug("Fetching data for [%s]" % (', '.join(pathExpressions)))

    (startTime, endTime, now) = timebounds(requestContext)

    prefetched = collections.defaultdict(list)

    for result in STORE.fetch(pathExpressions, startTime, endTime, now,
                              requestContext):
        if result is None:
            continue

        prefetched[result['pathExpression']].append((
            result['name'],
            (
                result['time_info'],
                result['values'],
            ),
        ))

    # Several third-party readers including rrdtool and biggraphite return values in a
    # generator which can only be iterated on once. These must be converted to a list.
    for pathExpression, items in prefetched.items():
        for i, (name, (time_info, values)) in enumerate(items):
            if isinstance(values, types.GeneratorType):
                prefetched[pathExpression][i] = (name, (time_info,
                                                        list(values)))

    if not requestContext.get('prefetched'):
        requestContext['prefetched'] = {}

    if (startTime, endTime, now) in requestContext['prefetched']:
        requestContext['prefetched'][(startTime, endTime,
                                      now)].update(prefetched)
    else:
        requestContext['prefetched'][(startTime, endTime, now)] = prefetched

    log.rendering("Fetched data for [%s] in %fs" %
                  (', '.join(pathExpressions), time.time() - start))
Esempio n. 20
0
    def tagdb_auto_complete_tags(self, exprs, tagPrefix=None, limit=None, requestContext=None):
        log.debug(
            'graphite.storage.Store.auto_complete_tags :: Starting lookup on all backends')

        if requestContext is None:
          requestContext = {}

        context = 'tags for %s %s' % (str(exprs), tagPrefix or '')
        jobs = []
        use_tagdb = False
        for finder in self.get_finders(requestContext.get('localOnly')):
          if getattr(finder, 'tags', False):
            job = Job(
                finder.auto_complete_tags, context,
                exprs, tagPrefix=tagPrefix,
                limit=limit, requestContext=requestContext
            )
            jobs.append(job)
          else:
            use_tagdb = True


        results = set()

        # if we're using the local tagdb then execute it (in the main thread
        # so that LocalDatabaseTagDB will work)
        if use_tagdb:
          results.update(self.tagdb.auto_complete_tags(
              exprs, tagPrefix=tagPrefix,
              limit=limit, requestContext=requestContext
          ))

        # Start fetches
        start = time.time()
        for result in self.wait_jobs(jobs, settings.FIND_TIMEOUT, context):
            results.update(result)

        # sort & limit results
        results = sorted(results)
        if limit:
          results = results[:int(limit)]

        log.debug("Got all autocomplete %s in %fs" % (
            context, time.time() - start)
        )
        return results
Esempio n. 21
0
    def get_index(self, requestContext=None):
        log.debug('graphite.storage.Store.get_index :: Starting get_index on all backends')

        if not requestContext:
          requestContext = {}

        context = 'get_index'
        jobs = [
            Job(finder.get_index, context, requestContext=requestContext)
            for finder in self.get_finders(local=requestContext.get('localOnly'))
        ]

        start = time.time()
        results = self.wait_jobs(jobs, settings.FETCH_TIMEOUT, context)
        results = [i for l in results if l is not None for i in l]  # flatten

        log.debug("Got all index results in %fs" % (time.time() - start))
        return sorted(list(set(results)))
Esempio n. 22
0
def prefetchData(requestContext, pathExpressions):
  """Prefetch a bunch of path expressions and stores them in the context.

  The idea is that this will allow more batching than doing a query
  each time evaluateTarget() needs to fetch a path. All the prefetched
  data is stored in the requestContext, to be accessed later by fetchData.
  """
  if not pathExpressions:
    return

  start = time.time()
  log.debug("Fetching data for [%s]" % (', '.join(pathExpressions)))

  (startTime, endTime, now) = timebounds(requestContext)

  prefetched = collections.defaultdict(list)

  for result in STORE.fetch(pathExpressions, startTime, endTime, now, requestContext):
    if result is None:
      continue

    prefetched[result['pathExpression']].append((
      result['name'],
      (
        result['time_info'],
        result['values'],
      ),
    ))

  # Several third-party readers including rrdtool and biggraphite return values in a
  # generator which can only be iterated on once. These must be converted to a list.
  for pathExpression, items in prefetched.items():
    for i, (name, (time_info, values)) in enumerate(items):
      if isinstance(values, types.GeneratorType):
        prefetched[pathExpression][i] = (name, (time_info, list(values)))

  if not requestContext.get('prefetched'):
    requestContext['prefetched'] = {}

  requestContext['prefetched'][(startTime, endTime, now)] = prefetched

  log.rendering("Fetched data for [%s] in %fs" % (', '.join(pathExpressions), time.time() - start))
Esempio n. 23
0
    def _find(self, query):
        context = 'find %s' % query
        jobs = [
            Job(finder.find_nodes, context, query)
            for finder in self.get_finders(query.local)
        ]

        # Group matching nodes by their path
        nodes_by_path = defaultdict(list)

        # Start finds
        start = time.time()
        results = self.wait_jobs(jobs, settings.FIND_TIMEOUT, context)
        for result in results:
            for node in result or []:
                nodes_by_path[node.path].append(node)

        log.debug("Got all find results for %s in %fs" %
                  (str(query), time.time() - start))
        return self._list_nodes(query, nodes_by_path)
Esempio n. 24
0
    def fetch_list(self, startTime, endTime, now=None, requestContext=None):
        t = time.time()
        in_flight = InFlight(self.store, requestContext)

        query_params = [
            ('format', 'pickle'),
            ('local', '1'),
            ('noCache', '1'),
            ('from', str(int(startTime))),
            ('until', str(int(endTime)))
        ]

        if not self.bulk_query:
            return []

        for target in self.bulk_query:
            query_params.append(('target', target))

        if now is not None:
            query_params.append(('now', str(int(now))))

        query_string = urlencode(query_params)
        urlpath = '/render/'
        url = "%s://%s%s" % ('https' if settings.INTRACLUSTER_HTTPS else 'http',
                             self.store.host, urlpath)
        url_full = "%s?%s" % (url, query_string)
        headers = requestContext.get('forwardHeaders') if requestContext else None

        lock = in_flight.get_request_lock(url_full)
        with lock:
            request = in_flight.get_request(url_full)
            if request:
                log.debug("RemoteReader:: Returning cached FetchInProgress %s" % url_full)
                return request

            data = self._fetch_list_locked(url, query_string, query_params, headers)
            in_flight.start_request(url_full, data)

        log.debug(
            "RemoteReader:: Returning %s in %fs" % (url_full, time.time() - t))
        return data
Esempio n. 25
0
    def fetch_list(self, startTime, endTime, now=None, requestContext=None):
        t = time.time()
        in_flight = InFlight(self.store, requestContext)

        query_params = [
            ('format', 'pickle'),
            ('local', '1'),
            ('noCache', '1'),
            ('from', str(int(startTime))),
            ('until', str(int(endTime)))
        ]

        if not self.bulk_query:
            return []

        for target in self.bulk_query:
            query_params.append(('target', target))

        if now is not None:
            query_params.append(('now', str(int(now))))

        query_string = urlencode(query_params)
        urlpath = '/render/'
        url = "%s://%s%s" % ('https' if settings.INTRACLUSTER_HTTPS else 'http',
                             self.store.host, urlpath)
        url_full = "%s?%s" % (url, query_string)
        headers = requestContext.get('forwardHeaders') if requestContext else None

        lock = in_flight.get_request_lock(url_full)
        with lock:
            request = in_flight.get_request(url_full)
            if request:
                log.debug("RemoteReader:: Returning cached FetchInProgress %s" % url_full)
                return request

            data = self._fetch_list_locked(url, query_string, query_params, headers)
            in_flight.start_request(url, data)

        log.debug(
            "RemoteReader:: Returning %s in %fs" % (url_full, time.time() - t))
        return data
Esempio n. 26
0
    def fetch_remote(self, patterns, requestContext):
        if requestContext['localOnly']:
            return

        if patterns is None:
            return

        (startTime, endTime, now) = timebounds(requestContext)
        log.debug('prefetchRemoteData:: Starting fetch_list on all backends')

        results = []
        for finder in self.finders:
            if not hasattr(finder, 'fetch') or finder.local:
                continue
            result = finder.fetch(patterns,
                                  startTime,
                                  endTime,
                                  now=now,
                                  requestContext=requestContext)
            results.append(result)
        return results
Esempio n. 27
0
    def get_index(self, requestContext=None):
        log.debug(
            'graphite.storage.Store.get_index :: Starting get_index on all backends'
        )

        if not requestContext:
            requestContext = {}

        context = 'get_index'
        jobs = [
            Job(finder.get_index, context, requestContext=requestContext)
            for finder in self.get_finders(
                local=requestContext.get('localOnly'))
        ]

        start = time.time()
        results = self.wait_jobs(jobs, settings.FETCH_TIMEOUT, context)
        results = [i for l in results if l is not None for i in l]  # flatten

        log.debug("Got all index results in %fs" % (time.time() - start))
        return sorted(list(set(results)))
Esempio n. 28
0
    def _find(self, query):
        context = 'find %s' % query
        jobs = [
            Job(finder.find_nodes, context, query)
            for finder in self.get_finders(query.local)
        ]

        # Group matching nodes by their path
        nodes_by_path = defaultdict(list)

        # Start finds
        start = time.time()
        results = self.wait_jobs(jobs, settings.FIND_TIMEOUT, context)
        for result in results:
            for node in result or []:
                nodes_by_path[node.path].append(node)

        log.debug("Got all find results for %s in %fs" % (
            str(query), time.time() - start)
        )
        return self._list_nodes(query, nodes_by_path)
Esempio n. 29
0
    def _fetch(self, url, query_string, query_params, headers):
        url_full = "%s?%s" % (url, query_string)

        log.debug(
            "RemoteReader:: Starting to execute _fetch %s" % url_full)
        try:
            log.debug("ReadResult:: Requesting %s" % url_full)
            result = http.request(
                'POST' if settings.REMOTE_STORE_USE_POST else 'GET',
                url,
                fields=query_params,
                headers=headers,
                timeout=settings.REMOTE_FETCH_TIMEOUT,
            )

            if result.status != 200:
                self.store.fail()
                self.log_error("ReadResult:: Error response %d from %s" % url_full)
                data = []
            else:
                data = unpickle.loads(result.data)
        except Exception as err:
            self.store.fail()
            self.log_error("ReadResult:: Error requesting %s: %s" % (url_full, err))
            data = []

        log.debug("RemoteReader:: Completed _fetch %s" % url_full)
        return data
Esempio n. 30
0
    def _fetch(self, url, query_string, query_params, headers):
        url_full = "%s?%s" % (url, query_string)

        log.debug(
            "RemoteReader:: Starting to execute _fetch %s" % url_full)
        try:
            log.debug("ReadResult:: Requesting %s" % url_full)
            result = http.request(
                'POST' if settings.REMOTE_STORE_USE_POST else 'GET',
                url,
                fields=query_params,
                headers=headers,
                timeout=settings.REMOTE_FETCH_TIMEOUT,
            )

            if result.status != 200:
                self.store.fail()
                self.log_error("ReadResult:: Error response %d from %s" % url_full)
                data = []
            else:
                data = unpickle.loads(result.data)
        except Exception as err:
            self.store.fail()
            self.log_error("ReadResult:: Error requesting %s: %s" % (url_full, err))
            data = []

        log.debug("RemoteReader:: Completed _fetch %s" % url_full)
        return data
Esempio n. 31
0
    def deserialize(self, result):
        """
        Based on configuration, either stream-deserialize a response in settings.REMOTE_BUFFER_SIZE chunks,
        or read the entire payload and use inline deserialization.
        :param result: an http response object
        :return: deserialized response payload from cluster server
        """
        start = time.time()
        try:
            should_buffer = settings.REMOTE_BUFFER_SIZE > 0
            measured_reader = MeasuredReader(
                BufferedHTTPReader(result, settings.REMOTE_BUFFER_SIZE))

            if should_buffer:
                log.debug("Using streaming deserializer.")
                reader = BufferedHTTPReader(measured_reader,
                                            settings.REMOTE_BUFFER_SIZE)
                return self._deserialize_stream(
                    reader, result.getheader('content-type'))

            log.debug("Using inline deserializer for small payload")
            return self._deserialize_buffer(measured_reader.read(),
                                            result.getheader('content-type'))
        except Exception as err:
            self.fail()
            log.exception(
                "RemoteFinder[%s] Error decoding response from %s: %s" %
                (self.host, result.url_full, err))
            raise Exception("Error decoding response from %s: %s" %
                            (result.url_full, err))
        finally:
            log.debug("Processed %d bytes in %f seconds." %
                      (measured_reader.bytes_read, time.time() - start))
            result.release_conn()
Esempio n. 32
0
    def deserialize(self, result):
        """
        Based on configuration, either stream-deserialize a response in settings.REMOTE_BUFFER_SIZE chunks,
        or read the entire payload and use inline deserialization.
        :param result: an http response object
        :return: deserialized response payload from cluster server
        """
        start = time.time()
        try:
            should_buffer = settings.REMOTE_BUFFER_SIZE > 0
            measured_reader = MeasuredReader(BufferedHTTPReader(result, settings.REMOTE_BUFFER_SIZE))

            if should_buffer:
                log.debug("Using streaming deserializer.")
                reader = BufferedHTTPReader(measured_reader, settings.REMOTE_BUFFER_SIZE)
                return self._deserialize_stream(reader, result.getheader('content-type'))

            log.debug("Using inline deserializer for small payload")
            return self._deserialize_buffer(measured_reader.read(), result.getheader('content-type'))
        except Exception as err:
            self.fail()
            log.exception(
                "RemoteFinder[%s] Error decoding response from %s: %s" %
                (self.host, result.url_full, err))
            raise Exception("Error decoding response from %s: %s" % (result.url_full, err))
        finally:
            log.debug("Processed %d bytes in %f seconds." % (measured_reader.bytes_read, time.time() - start))
            result.release_conn()
Esempio n. 33
0
  def __init__(self, name, start, end, step, values, consolidate='average', tags=None):
    list.__init__(self, values)
    self.name = name
    self.start = start
    self.end = end
    self.step = step
    self.consolidationFunc = consolidate
    self.valuesPerPoint = 1
    self.options = {}
    self.pathExpression = name

    if tags:
      self.tags = tags
    else:
      self.tags = {'name': name}
      # parse for tags if a tagdb is configured and name doesn't look like a function-wrapped name
      if STORE.tagdb and not re.match('^[a-z]+[(].+[)]$', name, re.IGNORECASE):
        try:
          self.tags = STORE.tagdb.parse(name).tags
        except Exception as err:
          # tags couldn't be parsed, just use "name" tag
          log.debug("Couldn't parse tags for %s: %s" % (name, err))
Esempio n. 34
0
    def find_all(self, query):
        start = time.time()
        jobs = []

        # Start local searches
        for finder in self.finders:
            # Support legacy finders by defaulting to 'local = True'
            is_local = not hasattr(finder, 'local') or finder.local
            if query.local and not is_local:
                continue
            if getattr(finder, 'disabled', False):
                continue
            jobs.append((finder.find_nodes, query))

        result_queue = pool_apply(get_pool(), jobs)

        # Group matching nodes by their path
        nodes_by_path = defaultdict(list)

        timeout = settings.REMOTE_FIND_TIMEOUT
        deadline = start + timeout
        done = 0
        total = len(jobs)

        while done < total:
            wait_time = deadline - time.time()
            nodes = []

            try:
                nodes = result_queue.get(True, wait_time)

            # ValueError could happen if due to really unlucky timing wait_time
            # is negative
            except (Queue.Empty, ValueError):
                if time.time() > deadline:
                    log.debug("Timed out in find_nodes after %fs" % timeout)
                    break
                else:
                    continue

            log.debug("Got a find result after %fs" % (time.time() - start))
            done += 1
            for node in nodes or []:
                nodes_by_path[node.path].append(node)

        log.debug("Got all find results in %fs" % (time.time() - start))
        return self._list_nodes(query, nodes_by_path)
Esempio n. 35
0
    def get_index(self, requestContext=None):
        log.debug(
            'graphite.storage.Store.get_index :: Starting get_index on all backends'
        )

        if not requestContext:
            requestContext = {}

        jobs = [
            Job(finder.get_index, requestContext=requestContext)
            for finder in self.get_finders(
                local=requestContext.get('localOnly'))
        ]

        results = []

        done = 0
        errors = 0

        # Start index lookups
        start = time.time()
        try:
            for job in self.pool_exec(jobs, settings.REMOTE_FETCH_TIMEOUT):
                done += 1

                if job.exception:
                    errors += 1
                    log.info("get_index failed after %fs: %s" %
                             (time.time() - start, str(job.exception)))
                    continue

                log.debug("Got an index result after %fs" %
                          (time.time() - start))
                results.extend(job.result)
        except PoolTimeoutError:
            log.info("Timed out in get_index after %fs" %
                     (time.time() - start))

        if errors == done:
            if errors == 1:
                raise Exception("get_index failed: %s" % (str(job.exception)))
            raise Exception('All index lookups failed')

        log.debug("Got all index results in %fs" % (time.time() - start))
        return sorted(list(set(results)))
Esempio n. 36
0
    def find_all(self, query):
        start = time.time()
        jobs = []

        # Start local searches
        for finder in self.finders:
            # Support legacy finders by defaulting to 'local = True'
            is_local = not hasattr(finder, 'local') or finder.local
            if query.local and not is_local:
                continue
            jobs.append((finder.find_nodes, query))

        result_queue = pool_apply(get_pool(), jobs)

        # Group matching nodes by their path
        nodes_by_path = defaultdict(list)

        timeout = settings.REMOTE_FIND_TIMEOUT
        deadline = start + timeout
        done = 0
        total = len(jobs)

        while done < total:
            wait_time = deadline - time.time()
            nodes = []

            try:
                nodes = result_queue.get(True, wait_time)

            # ValueError could happen if due to really unlucky timing wait_time
            # is negative
            except (Queue.Empty, ValueError):
                if time.time() > deadline:
                    log.debug("Timed out in find_nodes after %fs" % timeout)
                    break
                else:
                    continue

            log.debug("Got a find result after %fs" % (time.time() - start))
            done += 1
            for node in nodes or []:
                nodes_by_path[node.path].append(node)

        log.debug("Got all find results in %fs" % (time.time() - start))
        return self._list_nodes(query, nodes_by_path)
Esempio n. 37
0
    def find_nodes(self, query):
        start = time.time()
        jobs = []
        random.shuffle(self.remote_stores)
        for store in self.remote_stores:
            if store.available:
                jobs.append((store.find, query))

        queue = pool_apply(self.worker_pool(), jobs)

        timeout = settings.REMOTE_FIND_TIMEOUT
        deadline = start + timeout
        done = 0
        total = len(jobs)

        while done < total:
            wait_time = deadline - time.time()
            nodes = []

            try:
                nodes = queue.get(True, wait_time)

            # ValueError could happen if due to really unlucky timing wait_time
            # is negative.
            except (Queue.Empty, ValueError):
                if time.time() > deadline:
                    log.debug("Timed out in find_nodes after %fs" % timeout)
                    break
                else:
                    continue

            log.debug("Got a remote find result after %fs" %
                      (time.time() - start))
            done += 1
            for node in nodes or []:
                yield node

        log.debug("Got all remote find results in %fs" % (time.time() - start))
Esempio n. 38
0
    def find_nodes(self, query):
        start = time.time()
        jobs = []
        random.shuffle(self.remote_stores)
        for store in self.remote_stores:
            if store.available:
                jobs.append((store.find, query))

        queue = pool_apply(self.worker_pool(), jobs)

        timeout = settings.REMOTE_FIND_TIMEOUT
        deadline = start + timeout
        done = 0
        total = len(jobs)

        while done < total:
            wait_time = deadline - time.time()
            nodes = []

            try:
                nodes = queue.get(True, wait_time)

            # ValueError could happen if due to really unlucky timing wait_time
            # is negative.
            except (Queue.Empty, ValueError):
                if time.time() > deadline:
                    log.debug("Timed out in find_nodes after %fs" % timeout)
                    break
                else:
                    continue

            log.debug("Got a remote find result after %fs" % (time.time() - start))
            done += 1
            for node in nodes or []:
                yield node

        log.debug("Got all remote find results in %fs" % (time.time() - start))
Esempio n. 39
0
    def read_locked(self):
        if self.results is not None:
            log.debug(
                'RemoteReader:: retrieve completed (cached) %s' %
                (', '.join([result['path'] for result in self.results])),
            )
            return self.results

        # otherwise we get it from the queue and keep it for later
        results = self.queue.get(block=True)

        for i in range(len(results)):
            results[i]['path'] = results[i]['name']

        if not results:
            log.debug('RemoteReader:: retrieve has received no results')

        self.results = results or []

        log.debug(
            'RemoteReader:: retrieve completed %s' %
            (', '.join([result['path'] for result in results])),
        )
        return self.results
Esempio n. 40
0
    def read_locked(self):
        if self.results is not None:
            log.debug(
                'RemoteReader:: retrieve completed (cached) %s' %
                (', '.join([result['path'] for result in self.results])),
            )
            return self.results

        # otherwise we get it from the queue and keep it for later
        results = self.queue.get(block=True)

        for i in range(len(results)):
            results[i]['path'] = results[i]['name']

        if not results:
            log.debug('RemoteReader:: retrieve has received no results')

        self.results = results or []

        log.debug(
            'RemoteReader:: retrieve completed %s' %
            (', '.join([result['path'] for result in results])),
        )
        return self.results
Esempio n. 41
0
    def fetch(self, patterns, start_time, end_time, now=None, requestContext=None):
        log.debug("IRONdbFinder.fetch called")
        all_names = {}
        for pattern in patterns:
            log.debug("IRONdbFinder.fetch pattern: %s" % pattern)
            names = {}
            name_headers = copy.deepcopy(self.headers)
            name_headers['Accept'] = 'application/x-flatbuffer-metric-find-result-list'
            for i in range(0, self.max_retries):
                try:
                    node = urls.names
                    query_start = time.gmtime()
                    data_type = "json"
                    if self.zipkin_enabled == True:
                        traceheader = binascii.hexlify(os.urandom(8))
                        name_headers['X-B3-TraceId'] = traceheader
                        name_headers['X-B3-SpanId'] = traceheader
                        if self.zipkin_event_trace_level == 1:
                            name_headers['X-Mtev-Trace-Event'] = '1'
                        if self.zipkin_event_trace_level == 2:
                            name_headers['X-Mtev-Trace-Event'] = '2'
                    name_params = {'query': pattern}
                    if self.activity_tracking:
                        name_params['activity_start_secs'] = start_time
                        name_params['activity_end_secs'] = end_time
                    r = requests.get(node, params=name_params, headers=name_headers,
                                     timeout=((self.connection_timeout / 1000.0), (self.timeout / 1000.0)))
                    r.raise_for_status()
                    if r.headers['content-type'] == 'application/json':
                        names = r.json()
                    elif r.headers['content-type'] == 'application/x-flatbuffer-metric-find-result-list':
                        names = irondb_flatbuf.metric_find_results(r.content)
                        data_type = "flatbuffer"
                    else:
                        pass
                    result_count = len(names) if names else -1
                    self.query_log(node, query_start, r.elapsed, result_count, pattern, "names", data_type, start_time, end_time)
                    break
                except (socket.gaierror, requests.exceptions.ConnectionError) as ex:
                    # on down nodes, try again on another node until "tries"
                    log.exception("IRONdbFinder.fetch ConnectionError %s" % ex)
                except requests.exceptions.ConnectTimeout as ex:
                    # on down nodes, try again on another node until "tries"
                    log.exception("IRONdbFinder.fetch ConnectTimeout %s" % ex)
                except irondb_flatbuf.FlatBufferError as ex:
                    # flatbuffer error, try again
                    log.exception("IRONdbFinder.fetch FlatBufferError %s" % ex)
                except JSONDecodeError as ex:
                    # json error, try again
                    log.exception("IRONdbFinder.fetch JSONDecodeError %s" % ex)
                except requests.exceptions.ReadTimeout as ex:
                    # on down nodes, try again on another node until "tries"
                    log.exception("IRONdbFinder.fetch ReadTimeout %s" % ex)
                except requests.exceptions.HTTPError as ex:
                    # http status code errors are failures, stop immediately
                    log.exception("IRONdbFinder.fetch HTTPError %s %s" % (ex, r.content))
                    break

            all_names[pattern] = names

        measurement_headers = copy.deepcopy(self.headers)
        measurement_headers['Accept'] = 'application/x-flatbuffer-metric-get-result-list'
        in_this_batch = 0
        fset = []
        fetcher = self.newfetcher(fset, measurement_headers)
        for pattern, names in all_names.items():
            for name in names:
                if 'leaf' in name and 'leaf_data' in name:
                    if self.batch_size == 0 or in_this_batch >= self.batch_size:
                        in_this_batch = 0
                        fetcher = self.newfetcher(fset, measurement_headers)
                    fetcher.add_leaf(name['name'], name['leaf_data'])
                    name['fetcher'] = fetcher
                    in_this_batch += 1

        self.dispatchfetches(fset, start_time, end_time)

        results = []
        first_correction = False
        for pattern, names in all_names.items():
            for name in names:
                fetcher = fset[0]
                if 'fetcher' in name:
                    fetcher = name['fetcher']
                res = fetcher.series(name['name'])
                if res is None:
                    continue

                time_info, values = res

                # At least one series needs to have the right start time
                # And to not be complete jerks we cull leading nulls, so on
                # data fetches where everything has leading nulls, the start
                # time in the graph can slide forward.  We need one anchor,
                # it will be whatever series we see first.
                if not first_correction:
                    prepend = []
                    # time_info is immutable, recreate it so we can muck with it
                    time_info = [ time_info[0], time_info[1], time_info[2] ]
                    while time_info[0] > start_time:
                       time_info[0] -= time_info[2]
                       prepend.append(None)
                    if len(prepend) > 0:
                       values = prepend + values
                    first_correction = True
                results.append({
                    'pathExpression': pattern,
                    'path' : name['name'],
                    'name' : name['name'],
                    'time_info' : time_info,
                    'values': values
                })
        return results
Esempio n. 42
0
    def find_nodes(self, query):
        log.debug("IRONdbFinder.find_nodes, query: %s, max_retries: %d" % (query.pattern, self.max_retries))
        metrics_expand = False
        if query.pattern.endswith('.**'):
            query.pattern = query.pattern[:-1]
            metrics_expand = True
        names = {}
        name_headers = copy.deepcopy(self.headers)
        name_headers['Accept'] = 'application/x-flatbuffer-metric-find-result-list'
        for i in range(0, self.max_retries):
            try:
                if self.zipkin_enabled == True:
                    traceheader = binascii.hexlify(os.urandom(8))
                    name_headers['X-B3-TraceId'] = traceheader
                    name_headers['X-B3-SpanId'] = traceheader
                    if self.zipkin_event_trace_level == 1:
                        name_headers['X-Mtev-Trace-Event'] = '1'
                    elif self.zipkin_event_trace_level == 2:
                        name_headers['X-Mtev-Trace-Event'] = '2'
                r = requests.get(urls.names, params={'query': query.pattern}, headers=name_headers,
                                 timeout=((self.connection_timeout / 1000.0), (self.timeout / 1000.0)))
                r.raise_for_status()
                if r.headers['content-type'] == 'application/json':
                    names = r.json()
                elif r.headers['content-type'] == 'application/x-flatbuffer-metric-find-result-list':
                    names = irondb_flatbuf.metric_find_results(r.content)
                else:
                    pass
                break
            except (socket.gaierror, requests.exceptions.ConnectionError) as ex:
                # on down nodes, try again on another node until "tries"
                log.exception("IRONdbFinder.find_nodes ConnectionError %s" % ex)
            except requests.exceptions.ConnectTimeout as ex:
                # on down nodes, try again on another node until "tries"
                log.exception("IRONdbFinder.find_nodes ConnectTimeout %s" % ex)
            except irondb_flatbuf.FlatBufferError as ex:
                # flatbuffer error, try again
                log.exception("IRONdbFinder.find_nodes FlatBufferError %s" % ex)
            except JSONDecodeError as ex:
                # json error, try again
                log.exception("IRONdbFinder.find_nodes JSONDecodeError %s" % ex)
            except requests.exceptions.ReadTimeout as ex:
                # on down nodes, try again on another node until "tries"
                log.exception("IRONdbFinder.find_nodes ReadTimeout %s" % ex)
            except requests.exceptions.HTTPError as ex:
                # http status code errors are failures, stop immediately
                log.exception("IRONdbFinder.find_nodes HTTPError %s %s" % (ex, r.content))
                break
        if settings.DEBUG:
            log.debug("IRONdbFinder.find_nodes, result: %s" % json.dumps(names))

        # for each set of self.batch_size leafnodes, execute an IRONdbMeasurementFetcher
        # so we can do these in batches.
        measurement_headers = copy.deepcopy(self.headers)
        measurement_headers['Accept'] = 'application/x-flatbuffer-metric-get-result-list'
        fetcher = IRONdbMeasurementFetcher(measurement_headers, self.timeout, self.connection_timeout, self.database_rollups, self.rollup_window, self.max_retries,
                                           self.zipkin_enabled, self.zipkin_event_trace_level)

        for name in names:
            if 'leaf' in name and 'leaf_data' in name:
                fetcher.add_leaf(name['name'], name['leaf_data'])
                reader = IRONdbReader(name['name'], fetcher)
                yield LeafNode(name['name'], reader)
            else:
                yield BranchNode(name['name'])
                if metrics_expand:
                    query = FindQuery(name['name'] + '.**', None, None)
                    for node in self.find_nodes(query):
                        yield node
Esempio n. 43
0
def _merge_results(pathExpr, startTime, endTime, result_queue, seriesList):
  log.debug("render.datalib.fetchData :: starting to merge")
  for path, results in result_queue:
    results = wait_for_result(results)

    if not results:
      log.debug("render.datalib.fetchData :: no results for %s.fetch(%s, %s)" % (path, startTime, endTime))
      continue

    try:
      (timeInfo, values) = results
    except ValueError as e:
      raise Exception("could not parse timeInfo/values from metric '%s': %s" % (path, e))
    (start, end, step) = timeInfo

    series = TimeSeries(path, start, end, step, values)

    # hack to pass expressions through to render functions
    series.pathExpression = pathExpr

    # Used as a cache to avoid recounting series None values below.
    series_best_nones = {}

    if series.name in seriesList:
      # This counts the Nones in each series, and is unfortunately O(n) for each
      # series, which may be worth further optimization. The value of doing this
      # at all is to avoid the "flipping" effect of loading a graph multiple times
      # and having inconsistent data returned if one of the backing stores has
      # inconsistent data. This is imperfect as a validity test, but in practice
      # nicely keeps us using the "most complete" dataset available. Think of it
      # as a very weak CRDT resolver.
      candidate_nones = 0
      if not settings.REMOTE_STORE_MERGE_RESULTS:
        candidate_nones = len(
          [val for val in values if val is None])

      known = seriesList[series.name]
      # To avoid repeatedly recounting the 'Nones' in series we've already seen,
      # cache the best known count so far in a dict.
      if known.name in series_best_nones:
        known_nones = series_best_nones[known.name]
      else:
        known_nones = len([val for val in known if val is None])

      if known_nones > candidate_nones and len(series):
        if settings.REMOTE_STORE_MERGE_RESULTS:
          # This series has potential data that might be missing from
          # earlier series.  Attempt to merge in useful data and update
          # the cache count.
          log.debug("Merging multiple TimeSeries for %s" % known.name)
          for i, j in enumerate(known):
            if j is None and series[i] is not None:
              known[i] = series[i]
              known_nones -= 1
          # Store known_nones in our cache
          series_best_nones[known.name] = known_nones
        else:
          # Not merging data -
          # we've found a series better than what we've already seen. Update
          # the count cache and replace the given series in the array.
          series_best_nones[known.name] = candidate_nones
          seriesList[known.name] = series
      else:
        if settings.REMOTE_PREFETCH_DATA:
          # if we're using REMOTE_PREFETCH_DATA we can save some time by skipping
          # find, but that means we don't know how many nodes to expect so we
          # have to iterate over all returned results
          continue

        # In case if we are merging data - the existing series has no gaps and
        # there is nothing to merge together.  Save ourselves some work here.
        #
        # OR - if we picking best serie:
        #
        # We already have this series in the seriesList, and the
        # candidate is 'worse' than what we already have, we don't need
        # to compare anything else. Save ourselves some work here.
        break

    else:
      # If we looked at this series above, and it matched a 'known'
      # series already, then it's already in the series list (or ignored).
      # If not, append it here.
      seriesList[series.name] = series

  # Stabilize the order of the results by ordering the resulting series by name.
  # This returns the result ordering to the behavior observed pre PR#1010.
  return [seriesList[k] for k in sorted(seriesList)]
Esempio n. 44
0
    def find_nodes(self, query, timer=None):
        timer.set_msg(
            'host: {host}, query: {query}'.format(
                host=self.host,
                query=query))

        log.debug("RemoteFinder.find_nodes(host=%s, query=%s) called" % (self.host, query))

        # prevent divide by 0
        cacheTTL = settings.FIND_CACHE_DURATION or 1
        if query.startTime:
            start = query.startTime - (query.startTime % cacheTTL)
        else:
            start = ""

        if query.endTime:
            end = query.endTime - (query.endTime % cacheTTL)
        else:
            end = ""

        cacheKey = "find:%s:%s:%s:%s" % (self.host, compactHash(query.pattern), start, end)

        results = cache.get(cacheKey)
        if results is not None:
            log.debug(
                "RemoteFinder.find_nodes(host=%s, query=%s) using cached result" %
                (self.host, query))
        else:
            url = '/metrics/find/'

            query_params = [
                ('local', self.params.get('local', '1')),
                ('format', self.params.get('format', 'pickle')),
                ('query', query.pattern),
            ]
            if query.startTime:
                query_params.append(('from', int(query.startTime)))

            if query.endTime:
                query_params.append(('until', int(query.endTime)))

            result = self.request(
                url,
                fields=query_params,
                headers=query.headers,
                timeout=settings.FIND_TIMEOUT)

            try:
                if result.getheader('content-type') == 'application/x-msgpack':
                  results = msgpack.load(BufferedHTTPReader(
                    result, buffer_size=settings.REMOTE_BUFFER_SIZE), encoding='utf-8')
                else:
                  results = unpickle.load(BufferedHTTPReader(
                    result, buffer_size=settings.REMOTE_BUFFER_SIZE))
            except Exception as err:
                self.fail()
                log.exception(
                    "RemoteFinder[%s] Error decoding find response from %s: %s" %
                    (self.host, result.url_full, err))
                raise Exception("Error decoding find response from %s: %s" % (result.url_full, err))
            finally:
                result.release_conn()

            cache.set(cacheKey, results, settings.FIND_CACHE_DURATION)

        for node_info in results:
            # handle both 1.x and 0.9.x output
            path = node_info.get('path') or node_info.get('metric_path')
            is_leaf = node_info.get('is_leaf') or node_info.get('isLeaf')
            intervals = node_info.get('intervals') or []
            if not isinstance(intervals, IntervalSet):
                intervals = IntervalSet(
                    [Interval(interval[0], interval[1]) for interval in intervals])

            node_info = {
                'is_leaf': is_leaf,
                'path': path,
                'intervals': intervals,
            }

            if is_leaf:
                reader = RemoteReader(self, node_info)
                node = LeafNode(path, reader)
            else:
                node = BranchNode(path)

            node.local = False
            yield node
Esempio n. 45
0
    def send(self, headers=None, msg_setter=None):
        log.debug("FindRequest.send(host=%s, query=%s) called" %
                  (self.store.host, self.query))

        if headers is None:
            headers = {}

        results = cache.get(self.cacheKey)
        if results is not None:
            log.debug(
                "FindRequest.send(host=%s, query=%s) using cached result" %
                (self.store.host, self.query))
        else:
            url = "%s://%s/metrics/find/" % (
                'https' if settings.INTRACLUSTER_HTTPS else 'http',
                self.store.host)

            query_params = [
                ('local', '1'),
                ('format', 'pickle'),
                ('query', self.query.pattern),
            ]
            if self.query.startTime:
                query_params.append(('from', self.query.startTime))

            if self.query.endTime:
                query_params.append(('until', self.query.endTime))

            try:
                result = http.request(
                    'POST' if settings.REMOTE_STORE_USE_POST else 'GET',
                    url,
                    fields=query_params,
                    headers=headers,
                    timeout=settings.REMOTE_FIND_TIMEOUT)
            except BaseException:
                log.exception(
                    "FindRequest.send(host=%s, query=%s) exception during request"
                    % (self.store.host, self.query))
                self.store.fail()
                return

            if result.status != 200:
                log.exception(
                    "FindRequest.send(host=%s, query=%s) error response %d from %s?%s"
                    % (self.store.host, self.query, result.status, url,
                       urlencode(query_params)))
                self.store.fail()
                return

            try:
                results = unpickle.loads(result.data)
            except BaseException:
                log.exception(
                    "FindRequest.send(host=%s, query=%s) exception processing response"
                    % (self.store.host, self.query))
                self.store.fail()
                return

            cache.set(self.cacheKey, results, settings.FIND_CACHE_DURATION)

        msg_setter('host: {host}, query: {query}'.format(host=self.store.host,
                                                         query=self.query))

        for node_info in results:
            # handle both 1.x and 0.9.x output
            path = node_info.get('path') or node_info.get('metric_path')
            is_leaf = node_info.get('is_leaf') or node_info.get('isLeaf')
            intervals = node_info.get('intervals') or []
            if not isinstance(intervals, IntervalSet):
                intervals = IntervalSet([
                    Interval(interval[0], interval[1])
                    for interval in intervals
                ])

            node_info = {
                'is_leaf': is_leaf,
                'path': path,
                'intervals': intervals,
            }

            if is_leaf:
                reader = RemoteReader(self.store,
                                      node_info,
                                      bulk_query=[self.query.pattern])
                node = LeafNode(path, reader)
            else:
                node = BranchNode(path)

            node.local = False
            yield node
Esempio n. 46
0
    def find_nodes(self, query, timer=None):
        timer.set_msg('host: {host}, query: {query}'.format(host=self.host,
                                                            query=query))

        log.debug("RemoteFinder.find_nodes(host=%s, query=%s) called" %
                  (self.host, query))

        # prevent divide by 0
        cacheTTL = settings.FIND_CACHE_DURATION or 1
        if query.startTime:
            start = query.startTime - (query.startTime % cacheTTL)
        else:
            start = ""

        if query.endTime:
            end = query.endTime - (query.endTime % cacheTTL)
        else:
            end = ""

        cacheKey = "find:%s:%s:%s:%s" % (self.host, compactHash(
            query.pattern), start, end)

        results = cache.get(cacheKey)
        if results is not None:
            log.debug(
                "RemoteFinder.find_nodes(host=%s, query=%s) using cached result"
                % (self.host, query))
        else:
            url = '/metrics/find/'

            query_params = [
                ('local', self.params.get('local', '1')),
                ('format', self.params.get('format', 'pickle')),
                ('query', query.pattern),
            ]
            if query.startTime:
                query_params.append(('from', int(query.startTime)))

            if query.endTime:
                query_params.append(('until', int(query.endTime)))

            result = self.request(url,
                                  fields=query_params,
                                  headers=query.headers,
                                  timeout=settings.REMOTE_FIND_TIMEOUT)

            try:
                if result.getheader('content-type') == 'application/x-msgpack':
                    results = msgpack.load(BufferedHTTPReader(
                        result, buffer_size=settings.REMOTE_BUFFER_SIZE),
                                           encoding='utf-8')
                else:
                    results = unpickle.load(
                        BufferedHTTPReader(
                            result, buffer_size=settings.REMOTE_BUFFER_SIZE))
            except Exception as err:
                self.fail()
                log.exception(
                    "RemoteFinder[%s] Error decoding find response from %s: %s"
                    % (self.host, result.url_full, err))
                raise Exception("Error decoding find response from %s: %s" %
                                (result.url_full, err))
            finally:
                result.release_conn()

            cache.set(cacheKey, results, settings.FIND_CACHE_DURATION)

        for node_info in results:
            # handle both 1.x and 0.9.x output
            path = node_info.get('path') or node_info.get('metric_path')
            is_leaf = node_info.get('is_leaf') or node_info.get('isLeaf')
            intervals = node_info.get('intervals') or []
            if not isinstance(intervals, IntervalSet):
                intervals = IntervalSet([
                    Interval(interval[0], interval[1])
                    for interval in intervals
                ])

            node_info = {
                'is_leaf': is_leaf,
                'path': path,
                'intervals': intervals,
            }

            if is_leaf:
                reader = RemoteReader(self, node_info)
                node = LeafNode(path, reader)
            else:
                node = BranchNode(path)

            node.local = False
            yield node
Esempio n. 47
0
    def tagdb_auto_complete_values(self,
                                   exprs,
                                   tag,
                                   valuePrefix=None,
                                   limit=None,
                                   requestContext=None):
        log.debug(
            'graphite.storage.Store.auto_complete_values :: Starting lookup on all backends'
        )

        if requestContext is None:
            requestContext = {}

        jobs = []
        use_tagdb = False
        for finder in self.get_finders(requestContext.get('localOnly')):
            if getattr(finder, 'tags', False):
                jobs.append(
                    Job(finder.auto_complete_values,
                        exprs,
                        tag,
                        valuePrefix=valuePrefix,
                        limit=limit,
                        requestContext=requestContext))
            else:
                use_tagdb = True

        if not jobs:
            if not use_tagdb:
                return []

            return self.tagdb.auto_complete_values(
                exprs,
                tag,
                valuePrefix=valuePrefix,
                limit=limit,
                requestContext=requestContext)

        # start finder jobs
        jobs = self.pool_exec(jobs, settings.REMOTE_FIND_TIMEOUT)

        results = set()

        # if we're using the local tagdb then execute it (in the main thread so that LocalDatabaseTagDB will work)
        if use_tagdb:
            results.update(
                self.tagdb.auto_complete_values(exprs,
                                                tag,
                                                valuePrefix=valuePrefix,
                                                limit=limit,
                                                requestContext=requestContext))

        done = 0
        errors = 0

        # Start fetches
        start = time.time()
        try:
            for job in jobs:
                done += 1

                if job.exception:
                    errors += 1
                    log.info(
                        "Autocomplete values for %s %s %s failed after %fs: %s"
                        % (str(exprs), tag, valuePrefix
                           or '', time.time() - start, str(job.exception)))
                    continue

                log.debug(
                    "Got an autocomplete result for %s %s %s after %fs" %
                    (str(exprs), tag, valuePrefix or '', time.time() - start))
                results.update(job.result)
        except PoolTimeoutError:
            raise Exception(
                "Timed out in autocomplete values for %s %s %s after %fs" %
                (str(exprs), tag, valuePrefix or '', time.time() - start))

        if errors == done:
            if errors == 1:
                raise Exception(
                    "Autocomplete values for %s %s %s failed: %s" %
                    (str(exprs), tag, valuePrefix or '', str(job.exception)))
            raise Exception(
                'All autocomplete value requests failed for %s %s %s' %
                (str(exprs), tag, valuePrefix or ''))

        # sort & limit results
        results = sorted(results)
        if limit:
            results = results[:int(limit)]

        log.debug("Got all autocomplete value results for %s %s %s in %fs" %
                  (str(exprs), tag, valuePrefix or '', time.time() - start))
        return results
Esempio n. 48
0
    def fetch(self, patterns, startTime, endTime, now, requestContext):
        # deduplicate patterns
        patterns = sorted(set(patterns))

        if not patterns:
            return []

        log.debug(
            'graphite.storage.Store.fetch :: Starting fetch on all backends')

        jobs = []
        tag_patterns = None
        pattern_aliases = defaultdict(list)
        for finder in self.get_finders(requestContext.get('localOnly')):
            # if the finder supports tags, just pass the patterns through
            if getattr(finder, 'tags', False):
                jobs.append(
                    Job(finder.fetch,
                        patterns,
                        startTime,
                        endTime,
                        now=now,
                        requestContext=requestContext))
                continue

            # if we haven't resolved the seriesByTag calls, build resolved patterns and translation table
            if tag_patterns is None:
                tag_patterns, pattern_aliases = self._tag_patterns(
                    patterns, requestContext)

            # dispatch resolved patterns to finder
            jobs.append(
                Job(finder.fetch,
                    tag_patterns,
                    startTime,
                    endTime,
                    now=now,
                    requestContext=requestContext))

        results = []

        done = 0
        errors = 0

        # Start fetches
        start = time.time()
        try:
            for job in self.pool_exec(jobs, settings.REMOTE_FETCH_TIMEOUT):
                done += 1

                if job.exception:
                    errors += 1
                    log.info("Fetch for %s failed after %fs: %s" %
                             (str(patterns), time.time() - start,
                              str(job.exception)))
                    continue

                log.debug("Got a fetch result for %s after %fs" %
                          (str(patterns), time.time() - start))
                results.extend(job.result)
        except PoolTimeoutError:
            log.info("Timed out in fetch after %fs" % (time.time() - start))

        if errors == done:
            if errors == 1:
                raise Exception("Fetch for %s failed: %s" %
                                (str(patterns), str(job.exception)))
            raise Exception('All fetches failed for %s' % (str(patterns)))

        # translate path expressions for responses from resolved seriesByTag patterns
        for result in results:
            if result['name'] == result['pathExpression'] and result[
                    'pathExpression'] in pattern_aliases:
                for pathExpr in pattern_aliases[result['pathExpression']]:
                    newresult = deepcopy(result)
                    newresult['pathExpression'] = pathExpr
                    results.append(newresult)

        log.debug("Got all fetch results for %s in %fs" %
                  (str(patterns), time.time() - start))
        return results
Esempio n. 49
0
    def send(self, headers=None, msg_setter=None):
        log.debug(
            "FindRequest.send(host=%s, query=%s) called" %
            (self.store.host, self.query))

        if headers is None:
            headers = {}

        results = cache.get(self.cacheKey)
        if results is not None:
            log.debug(
                "FindRequest.send(host=%s, query=%s) using cached result" %
                (self.store.host, self.query))
        else:
            url = "%s://%s/metrics/find/" % (
                'https' if settings.INTRACLUSTER_HTTPS else 'http', self.store.host)

            query_params = [
                ('local', '1'),
                ('format', 'pickle'),
                ('query', self.query.pattern),
            ]
            if self.query.startTime:
                query_params.append(('from', self.query.startTime))

            if self.query.endTime:
                query_params.append(('until', self.query.endTime))

            try:
                result = http.request(
                    'POST' if settings.REMOTE_STORE_USE_POST else 'GET',
                    url,
                    fields=query_params,
                    headers=headers,
                    timeout=settings.REMOTE_FIND_TIMEOUT)
            except BaseException:
                log.exception(
                    "FindRequest.send(host=%s, query=%s) exception during request" %
                    (self.store.host, self.query))
                self.store.fail()
                return

            if result.status != 200:
                log.exception(
                    "FindRequest.send(host=%s, query=%s) error response %d from %s?%s" %
                    (self.store.host, self.query, result.status, url, urlencode(query_params)))
                self.store.fail()
                return

            try:
                results = unpickle.loads(result.data)
            except BaseException:
                log.exception(
                    "FindRequest.send(host=%s, query=%s) exception processing response" %
                    (self.store.host, self.query))
                self.store.fail()
                return

            cache.set(self.cacheKey, results, settings.FIND_CACHE_DURATION)

        msg_setter(
            'host: {host}, query: {query}'.format(
                host=self.store.host,
                query=self.query))

        for node_info in results:
            # handle both 1.x and 0.9.x output
            path = node_info.get('path') or node_info.get('metric_path')
            is_leaf = node_info.get('is_leaf') or node_info.get('isLeaf')
            intervals = node_info.get('intervals') or []
            if not isinstance(intervals, IntervalSet):
                intervals = IntervalSet(
                    [Interval(interval[0], interval[1]) for interval in intervals])

            node_info = {
                'is_leaf': is_leaf,
                'path': path,
                'intervals': intervals,
            }

            if is_leaf:
                reader = RemoteReader(
                    self.store, node_info, bulk_query=[
                        self.query.pattern])
                node = LeafNode(path, reader)
            else:
                node = BranchNode(path)

            node.local = False
            yield node
Esempio n. 50
0
def _merge_results(pathExpr, startTime, endTime, prefetched, seriesList, requestContext):
  log.debug("render.datalib.fetchData :: starting to merge")

  # Used as a cache to avoid recounting series None values below.
  series_best_nones = {}

  for path, results in prefetched:
    if not results:
      log.debug("render.datalib.fetchData :: no results for %s.fetch(%s, %s)" % (path, startTime, endTime))
      continue

    try:
      (timeInfo, values) = results
    except ValueError as e:
      raise Exception("could not parse timeInfo/values from metric '%s': %s" % (path, e))
    (start, end, step) = timeInfo

    series = TimeSeries(path, start, end, step, values, xFilesFactor=requestContext.get('xFilesFactor'))

    # hack to pass expressions through to render functions
    series.pathExpression = pathExpr

    if series.name in seriesList:
      # This counts the Nones in each series, and is unfortunately O(n) for each
      # series, which may be worth further optimization. The value of doing this
      # at all is to avoid the "flipping" effect of loading a graph multiple times
      # and having inconsistent data returned if one of the backing stores has
      # inconsistent data. This is imperfect as a validity test, but in practice
      # nicely keeps us using the "most complete" dataset available. Think of it
      # as a very weak CRDT resolver.
      candidate_nones = 0
      if not settings.REMOTE_STORE_MERGE_RESULTS:
        candidate_nones = len(
          [val for val in values if val is None])

      known = seriesList[series.name]
      # To avoid repeatedly recounting the 'Nones' in series we've already seen,
      # cache the best known count so far in a dict.
      if known.name in series_best_nones:
        known_nones = series_best_nones[known.name]
      else:
        known_nones = len([val for val in known if val is None])
        series_best_nones[known.name] = known_nones

      if known_nones > candidate_nones and len(series):
        if settings.REMOTE_STORE_MERGE_RESULTS and len(series) == len(known):
          # This series has potential data that might be missing from
          # earlier series.  Attempt to merge in useful data and update
          # the cache count.
          log.debug("Merging multiple TimeSeries for %s" % known.name)
          for i, j in enumerate(known):
            if j is None and series[i] is not None:
              known[i] = series[i]
              known_nones -= 1
          # Store known_nones in our cache
          series_best_nones[known.name] = known_nones
        else:
          # Not merging data -
          # we've found a series better than what we've already seen. Update
          # the count cache and replace the given series in the array.
          series_best_nones[known.name] = candidate_nones
          seriesList[known.name] = series

    else:
      # If we looked at this series above, and it matched a 'known'
      # series already, then it's already in the series list (or ignored).
      # If not, append it here.
      seriesList[series.name] = series

  # Stabilize the order of the results by ordering the resulting series by name.
  # This returns the result ordering to the behavior observed pre PR#1010.
  return [seriesList[k] for k in sorted(seriesList)]
Esempio n. 51
0
    def find_nodes(self, query, timer=None):
        timer.set_msg(
            'host: {host}, query: {query}'.format(
                host=self.host,
                query=query))

        log.debug("RemoteFinder.find_nodes(host=%s, query=%s) called" % (self.host, query))

        # prevent divide by 0
        cacheTTL = settings.FIND_CACHE_DURATION or 1
        if query.startTime:
            start = query.startTime - (query.startTime % cacheTTL)
        else:
            start = ""

        if query.endTime:
            end = query.endTime - (query.endTime % cacheTTL)
        else:
            end = ""

        cacheKey = "find:%s:%s:%s:%s" % (self.host, compactHash(query.pattern), start, end)

        results = cache.get(cacheKey)
        if results is not None:
            log.debug(
                "RemoteFinder.find_nodes(host=%s, query=%s) using cached result" %
                (self.host, query))
        else:
            url = '/metrics/find/'

            query_params = [
                ('local', self.params.get('local', '1')),
                ('format', self.params.get('format', 'pickle')),
                ('query', query.pattern),
            ]
            if query.startTime:
                query_params.append(('from', int(query.startTime)))

            if query.endTime:
                query_params.append(('until', int(query.endTime)))

            result = self.request(
                url,
                fields=query_params,
                headers=query.headers,
                timeout=settings.FIND_TIMEOUT)

            results = self.deserialize(result)

            cache.set(cacheKey, results, settings.FIND_CACHE_DURATION)

        # We don't use generator here, this function may be run as a job in a thread pool, using a generator has the following risks:
        # 1. Generators are lazy, if we don't iterator the returned generator in the job, the real execution(network operations,
        #    time-consuming) are very likely be triggered in the calling thread, losing the effect of thread pool;
        # 2. As function execution is delayed, the job manager can not catch job runtime exception as expected/designed;
        nodes = []
        for node_info in results:
            # handle both 1.x and 0.9.x output
            path = node_info.get('path') or node_info.get('metric_path')
            is_leaf = node_info.get('is_leaf') or node_info.get('isLeaf')
            intervals = node_info.get('intervals') or []
            if not isinstance(intervals, IntervalSet):
                intervals = IntervalSet(
                    [Interval(interval[0], interval[1]) for interval in intervals])

            node_info = {
                'is_leaf': is_leaf,
                'path': path,
                'intervals': intervals,
            }

            if is_leaf:
                reader = RemoteReader(self, node_info)
                node = LeafNode(path, reader)
            else:
                node = BranchNode(path)

            node.local = False
            nodes.append(node)

        return nodes