Exemple #1
0
    def _fetch_list_locked(self, url, query_string, query_params, headers):
        url_full = "%s?%s" % (url, query_string)

        jobs = [(self._fetch, url, query_string, query_params, headers)]
        q = pool_apply(self.store.finder.worker_pool(), jobs)

        log.debug('RemoteReader:: Storing FetchInProgress for %s' % url_full)
        return FetchInProgress(_Results(q))
Exemple #2
0
    def fetch(self, startTime, endTime, now=None, requestContext=None):
        seriesList = self.fetch_list(startTime, endTime, now, requestContext)

        def _fetch(seriesList):
            if seriesList is None:
                return None

            for series in seriesList:
                if series['name'] == self.metric_path:
                    time_info = (series['start'], series['end'],
                                 series['step'])
                    return (time_info, series['values'])

            return None

        if isinstance(seriesList, FetchInProgress):
            return FetchInProgress(lambda: _fetch(seriesList.waitForResults()))

        return _fetch(seriesList)
    def fetch(self, startTime, endTime):
        query_params = [('target', self.query), ('format', 'pickle'),
                        ('local', '1'), ('noCache', '1'),
                        ('from', str(int(startTime))),
                        ('until', str(int(endTime)))]
        query_string = urlencode(query_params)
        urlpath = '/render/?' + query_string
        url = "http://%s%s" % (self.store.host, urlpath)

        fetch_result = self.get_inflight_requests(url, urlpath)

        def extract_my_results():
            series = fetch_result.get().get(self.metric_path, None)
            if not series:
                return None
            time_info = (series['start'], series['end'], series['step'])
            return (time_info, series['values'])

        return FetchInProgress(extract_my_results)
    def fetch(self, startTime, endTime):
        def get_data():

            if self.shared_reader.node_count > app_settings.OPENTSDB_METRIC_QUERY_LIMIT:
                data = self.shared_reader.get(
                    self.opentsdb_uri,
                    app_settings.OPENTSDB_DEFAULT_AGGREGATION_INTERVAL,
                    self.leaf_data,
                    int(startTime),
                    int(endTime),
                )
            else:
                data = requests.get(
                    "%s/query?tsuid=sum:%ds-avg:%s&start=%d&end=%d" % (
                        self.opentsdb_uri,
                        app_settings.OPENTSDB_DEFAULT_AGGREGATION_INTERVAL,
                        self.leaf_data['tsuid'],
                        int(startTime),
                        int(endTime),
                    )).json()

            time_info = (startTime, endTime, self.step)
            number_points = int((endTime - startTime) // self.step)
            datapoints = [None for i in range(number_points)]

            for series in data:
                for timestamp, value in series['dps'].items():
                    timestamp = int(timestamp)
                    interval = timestamp - (
                        timestamp %
                        app_settings.OPENTSDB_DEFAULT_AGGREGATION_INTERVAL)
                    index = (interval - int(startTime)) // self.step
                    datapoints[index] = value

            return (time_info, datapoints)

        job = app_settings.OPENTSDB_REQUEST_POOL.apply_async(get_data)

        return FetchInProgress(job.get)
Exemple #5
0
    def fetch(self, startTime, endTime):
        query_params = [('target', self.query), ('format', 'pickle'),
                        ('local', '1'), ('noCache', '1'),
                        ('from', str(int(startTime))),
                        ('until', str(int(endTime)))]
        query_string = urlencode(query_params)
        urlpath = '/render/?' + query_string
        url = "http://%s%s" % (self.store.host, urlpath)

        # Quick cache check up front
        self.clean_cache()
        cached_results = self.request_cache.get(url)
        if cached_results:
            for series in cached_results:
                if series['name'] == self.metric_path:
                    time_info = (series['start'], series['end'],
                                 series['step'])
                    return (time_info, series['values'])

        # Synchronize with other RemoteReaders using the same bulk query.
        # Despite our use of thread synchronization primitives, the common
        # case is for synchronizing asynchronous fetch operations within
        # a single thread.
        (request_lock, completion_event) = self.get_request_locks(url)

        def request_series():
            if request_lock.acquire(
                    False
            ):  # the FetchInProgress that gets waited on waits for the actual completion
                try:
                    log.info("RemoteReader.request_data :: requesting %s" %
                             url)
                    connection = HTTPConnectionWithTimeout(self.store.host)
                    connection.timeout = settings.REMOTE_FETCH_TIMEOUT
                    connection.request('GET', urlpath)
                    response = connection.getresponse()
                    if response.status != 200:
                        raise Exception(
                            "Error response %d %s from %s" %
                            (response.status, response.reason, url))
                    pickled_response = response.read()
                    results = unpickle.loads(pickled_response)
                    self.cache_lock.acquire()
                    self.request_cache[url] = results
                    self.cache_lock.release()
                    completion_event.set()
                    return results
                except:
                    completion_event.set()
                    self.store.fail()
                    log.exception("Error requesting %s" % url)
                    raise

            else:  # otherwise we just wait on the completion_event
                completion_event.wait(settings.REMOTE_FETCH_TIMEOUT)
                cached_results = self.request_cache.get(url)
                if cached_results is None:
                    raise Exception(
                        "Passive remote fetch failed to find cached results")
                else:
                    return cached_results

        def extract_my_results():
            for series in request_series():
                if series['name'] == self.metric_path:
                    time_info = (series['start'], series['end'],
                                 series['step'])
                    return (time_info, series['values'])

        return FetchInProgress(extract_my_results)
Exemple #6
0
    def fetch_list(self, startTime, endTime, now=None, requestContext=None):
        t = time.time()

        query_params = [('target', self.query), ('format', 'pickle'),
                        ('local', '1'), ('from', str(int(startTime))),
                        ('until', str(int(endTime)))]
        if now is not None:
            query_params.append(('now', str(int(now))))

        query_string = urlencode(query_params)
        urlpath = '/render/'
        url = "%s://%s%s" % ('https' if settings.INTRACLUSTER_HTTPS else
                             'http', self.store.host, urlpath)
        headers = requestContext.get(
            'forwardHeaders') if requestContext else None

        cacheKey = "%s?%s" % (url, query_string)

        if requestContext is not None and 'inflight_requests' in requestContext and cacheKey in requestContext[
                'inflight_requests']:
            self.log_debug(
                "RemoteReader:: Returning cached FetchInProgress %s?%s" %
                (url, query_string))
            return requestContext['inflight_requests'][cacheKey]

        if requestContext is None or 'inflight_locks' not in requestContext or cacheKey not in requestContext[
                'inflight_locks']:
            with self.inflight_lock:
                self.log_debug("RemoteReader:: Got global lock %s?%s" %
                               (url, query_string))
                if requestContext is None:
                    requestContext = {}
                if 'inflight_locks' not in requestContext:
                    requestContext['inflight_locks'] = {}
                if 'inflight_requests' not in requestContext:
                    requestContext['inflight_requests'] = {}
                if cacheKey not in requestContext['inflight_locks']:
                    self.log_debug("RemoteReader:: Creating lock %s?%s" %
                                   (url, query_string))
                    requestContext['inflight_locks'][cacheKey] = Lock()
            self.log_debug("RemoteReader:: Released global lock %s?%s" %
                           (url, query_string))

        cacheLock = requestContext['inflight_locks'][cacheKey]

        with cacheLock:
            self.log_debug("RemoteReader:: got url lock %s?%s" %
                           (url, query_string))

            if cacheKey in requestContext['inflight_requests']:
                self.log_debug(
                    "RemoteReader:: Returning cached FetchInProgress %s?%s" %
                    (url, query_string))
                return requestContext['inflight_requests'][cacheKey]

            q = Queue()
            if settings.USE_WORKER_POOL:
                get_pool().apply_async(
                    func=self._fetch,
                    args=[url, query_string, query_params, headers],
                    callback=lambda x: q.put(x),
                )
            else:
                q.put(self._fetch(url, query_string, query_params, headers), )

            def retrieve():
                with retrieve.lock:
                    # if the result is known we return it directly
                    if hasattr(retrieve, '_result'):
                        results = getattr(retrieve, '_result')
                        self.log_debug(
                            'RemoteReader:: retrieve completed (cached) %s' %
                            (', '.join([result['path']
                                        for result in results])), )
                        return results

                    # otherwise we get it from the queue and keep it for later
                    results = q.get(block=True)

                    for i in range(len(results)):
                        results[i]['path'] = results[i]['name']

                    if not results:
                        self.log_debug(
                            'RemoteReader:: retrieve has received no results')

                    setattr(retrieve, '_result', results)
                    self.log_debug(
                        'RemoteReader:: retrieve completed %s' %
                        (', '.join([result['path'] for result in results])), )
                    return results

            self.log_debug(
                'RemoteReader:: Storing FetchInProgress with cacheKey {cacheKey}'
                .format(cacheKey=cacheKey), )
            retrieve.lock = Lock()
            data = FetchInProgress(retrieve)
            requestContext['inflight_requests'][cacheKey] = data

        self.log_debug("RemoteReader:: Returning %s?%s in %fs" %
                       (url, query_string, time.time() - t))
        return data
Exemple #7
0
    def fetch(self, startTime, endTime):
        def get_data(startTime, endTime):
            log.info("time range %d-%d" % (startTime, endTime))
            host, metric = self.metric_name.split("com.")
            host += "com"
            s = self.kudu_table.scanner()
            s.add_predicate(s.range_predicate(0, host, host))
            s.add_predicate(s.range_predicate(1, metric, metric))
            s.add_predicate(s.range_predicate(2, startTime, endTime))
            s.open()
            values = []
            while s.has_more_rows():
                t = s.next_batch().as_tuples()
                log.info("metric batch: %d" % len(t))
                values.extend([(time, value) for (_, _, time, value) in t])
            # TODO: project just the time and value, not host/metric!
            values.sort()
            values_length = len(values)

            if values_length == 0:
                time_info = (startTime, endTime, 1)
                datapoints = []
                return (time_info, datapoints)

            startTime = min(t[0] for t in values)
            endTime = max(t[0] for t in values)
            if values_length == 1:
                time_info = (startTime, endTime, 1)
                datapoints = [values[0][1]]
                return (time_info, datapoints)
            log.info("data: %s" % repr(values))

            # 1. Calculate step (in seconds)
            #    Step will be lowest time delta between values or 1 (in case if delta is smaller)
            step = 1
            minDelta = None

            for i in range(0, values_length - 2):
                (timeI, valueI) = values[i]
                (timeIplus1, valueIplus1) = values[i + 1]
                delta = timeIplus1 - timeI

                if (minDelta == None or delta < minDelta):
                    minDelta = delta

            if minDelta > step:
                step = minDelta

            # 2. Fill time info table
            time_info = (startTime, endTime, step)

            # 3. Create array of output points
            number_points = int(math.ceil((endTime - startTime) / step))
            datapoints = [None for i in range(number_points)]

            # 4. Fill array of output points
            cur_index = 0
            cur_value = None
            cur_time_stamp = None
            cur_value_used = None

            for i in range(0, number_points - 1):

                data_point_time_stamp = startTime + i * step

                (cur_time_stamp, cur_value) = values[cur_index]
                cur_time_stamp = cur_time_stamp

                while (cur_index + 1 < values_length):
                    (next_time_stamp, next_value) = values[cur_index + 1]
                    if next_time_stamp > data_point_time_stamp:
                        break
                    (cur_value, cur_time_stamp,
                     cur_value_used) = (next_value, next_time_stamp, False)
                    cur_index = cur_index + 1

                data_point_value = None
                if (not cur_value_used
                        and cur_time_stamp <= data_point_time_stamp):
                    cur_value_used = True
                    data_point_value = cur_value

                datapoints[i] = data_point_value

            log.info("data: %s" % repr(datapoints))
            return (time_info, datapoints)

        job = KUDU_REQUEST_POOL.apply_async(get_data, [startTime, endTime])
        return FetchInProgress(job.get)