예제 #1
0
 def __init__(self, host, port, table_prefix):
     # set up client
     self.metaTable = table_prefix + "META"
     self.dataTable = table_prefix + "DATA"
     socket = TSocket.TSocket(host, port)
     self.transport = TTransport.TBufferedTransport(socket)
     protocol = TBinaryProtocol.TBinaryProtocol(self.transport)
     self.client = Client(protocol)
     self.transport.open()
     # ensure both our tables exist
     tables = self.client.getTableNames()
     if self.metaTable not in tables:
         self.client.createTable(self.metaTable, [ColumnDescriptor("cf:")])
         # add counter record
         self.client.atomicIncrement(self.metaTable, "CTR", "cf:CTR", 1)
     if self.dataTable not in tables:
         self.client.createTable(self.dataTable, [ColumnDescriptor("cf:")])
예제 #2
0
 def __init__(self, host, port, table_prefix):
     # set up client
     self.metaTable = table_prefix + "META"
     self.dataTable = table_prefix + "DATA"
     socket = TSocket.TSocket(host, port)
     self.transport = TTransport.TBufferedTransport(socket)
     protocol = TBinaryProtocol.TBinaryProtocol(self.transport)
     self.client = Client(protocol)
     self.transport.open()
     # ensure both our tables exist
     tables = self.client.getTableNames()
     if self.metaTable not in tables:
         self.client.createTable(self.metaTable, [ColumnDescriptor("cf:")])
         # add counter record
         self.client.atomicIncrement(self.metaTable, "CTR", "cf:CTR", 1)
     if self.dataTable not in tables:
         self.client.createTable(self.dataTable, [ColumnDescriptor("cf:")])
예제 #3
0
class HbaseTSDB(TSDB):
    __slots__ = ("transport", "client", "metaTable", "dataTable")

    def __init__(self, host, port, table_prefix):
        # set up client
        self.metaTable = table_prefix + "META"
        self.dataTable = table_prefix + "DATA"
        socket = TSocket.TSocket(host, port)
        self.transport = TTransport.TBufferedTransport(socket)
        protocol = TBinaryProtocol.TBinaryProtocol(self.transport)
        self.client = Client(protocol)
        self.transport.open()
        # ensure both our tables exist
        tables = self.client.getTableNames()
        if self.metaTable not in tables:
            self.client.createTable(self.metaTable, [ColumnDescriptor("cf:")])
            # add counter record
            self.client.atomicIncrement(self.metaTable, "CTR", "cf:CTR", 1)
        if self.dataTable not in tables:
            self.client.createTable(self.dataTable, [ColumnDescriptor("cf:")])

    # returns info for the underlying db (including 'aggregationMethod')

    # info returned in the format
    # info = {
    #  'aggregationMethod' : aggregationTypeToMethod.get(aggregationType, 'average'),
    #  'maxRetention' : maxRetention,
    #  'xFilesFactor' : xff,
    #  'archives' : archives,
    # }
    # where archives is a list of
    # archiveInfo = {
    #  'archiveId': unique id,
    #  'secondsPerPoint' : secondsPerPoint,
    #  'points' : points, number of points per
    #  'retention' : secondsPerPoint    * points,
    #  'size' : points * pointSize,
    # }
    #
    def info(self, metric):
        # info is stored as serialized map under META#METRIC
        key = "m_" + metric
        result = self.client.get(self.metaTable, "m_" + metric, "cf:INFO", None)
        if len(result) == 0:
            raise Exception("No metric " + metric)
        return json.loads(result[0].value)

    # aggregationMethod specifies the method to use when propogating data (see ``whisper.aggregationMethods``)
    # xFilesFactor specifies the fraction of data points in a propagation interval that must have known values for a propagation to occur.  If None, the existing xFilesFactor in path will not be changed
    def setAggregationMethod(self, metric, aggregationMethod, xFilesFactor=None):
        currInfo = self.info(metric)
        currInfo["aggregationMethod"] = aggregationMethod
        currInfo["xFilesFactor"] = xFilesFactor

        infoJson = json.dumps(currInfo)
        self.client.mutateRow(self.metaTable, "m_" + metric, [Mutation(column="cf:INFO", value=infoJson)], None)
        return

    # archiveList is a list of archives, each of which is of the form (secondsPerPoint,numberOfPoints)
    # xFilesFactor specifies the fraction of data points in a propagation interval that must have known values for a propagation to occur
    # aggregationMethod specifies the function to use when propogating data (see ``whisper.aggregationMethods``)
    def create(self, metric, archiveList, xFilesFactor, aggregationMethod, isSparse, doFallocate):

        # for a in archiveList:
        #    a['archiveId'] = (self.client.atomicIncrement(self.metaTable,"CTR","cf:CTR",1))

        archiveMapList = [
            {
                "archiveId": (self.client.atomicIncrement(self.metaTable, "CTR", "cf:CTR", 1)),
                "secondsPerPoint": a[0],
                "points": a[1],
                "retention": a[0] * a[1],
            }
            for a in archiveList
        ]
        # newId = self.client.atomicIncrement(self.metaTable,"CTR","cf:CTR",1)

        oldest = max([secondsPerPoint * points for secondsPerPoint, points in archiveList])
        # then write the metanode
        info = {
            "aggregationMethod": aggregationMethod,
            "maxRetention": oldest,
            "xFilesFactor": xFilesFactor,
            "archives": archiveMapList,
        }
        self.client.mutateRow(self.metaTable, "m_" + metric, [Mutation(column="cf:INFO", value=json.dumps(info))], None)
        # finally, ensure links exist
        metric_parts = metric.split(".")
        priorParts = ""
        for part in metric_parts:
            # if parent is empty, special case for root
            if priorParts == "":
                metricParentKey = "ROOT"
                metricKey = "m_" + part
                priorParts = part
            else:
                metricParentKey = "m_" + priorParts
                metricKey = "m_" + priorParts + "." + part
                priorParts += "." + part

            # make sure parent of this node exists and is linked to us
            parentLink = self.client.get(self.metaTable, metricParentKey, "cf:c_" + part, None)
            if len(parentLink) == 0:
                self.client.mutateRow(
                    self.metaTable, metricParentKey, [Mutation(column="cf:c_" + part, value=metricKey)], None
                )

    # points is a list of (timestamp,value) points
    def update_many(self, metric, points):
        info = self.info(metric)
        now = int(time.time())
        archives = iter(info["archives"])
        currentArchive = archives.next()
        currentPoints = []

        for point in points:
            age = now - point[0]

            while currentArchive["retention"] < age:  # we can't fit any more points in this archive
                if currentPoints:  # commit all the points we've found that it can fit
                    currentPoints.reverse()  # put points in chronological order
                    self.__archive_update_many(info, currentArchive, currentPoints)
                    currentPoints = []
                try:
                    currentArchive = archives.next()
                except StopIteration:
                    currentArchive = None
                    break

            if not currentArchive:
                break  # drop remaining points that don't fit in the database

            currentPoints.append(point)

        if currentArchive and currentPoints:  # don't forget to commit after we've checked all the archives
            currentPoints.reverse()
            self.__archive_update_many(info, currentArchive, currentPoints)

    def __archive_update_many(self, info, archive, points):
        numPoints = archive["points"]
        step = archive["secondsPerPoint"]
        archiveId = archive["archiveId"]
        alignedPoints = [(timestamp - (timestamp % step), value) for (timestamp, value) in points]
        alignedPoints = dict(alignedPoints).items()  # Take the last val of duplicates

        for timestamp, value in alignedPoints:
            slot = int((timestamp / step) % numPoints)
            rowkey = struct.pack(KEY_FMT, archiveId, slot)
            rowval = struct.pack(VAL_FMT, timestamp, value)
            self.client.mutateRow(self.dataTable, rowkey, [Mutation(column="cf:d", value=rowval)], None)

        # Now we propagate the updates to lower-precision archives
        higher = archive
        lowerArchives = [arc for arc in info["archives"] if arc["secondsPerPoint"] > archive["secondsPerPoint"]]

        for lower in lowerArchives:
            fit = lambda i: i - (i % lower["secondsPerPoint"])
            lowerIntervals = [fit(p[0]) for p in alignedPoints]
            uniqueLowerIntervals = set(lowerIntervals)
            propagateFurther = False
            for interval in uniqueLowerIntervals:
                if self.__propagate(info, interval, higher, lower):
                    propagateFurther = True

            if not propagateFurther:
                break
            higher = lower

    def __propagate(self, info, timestamp, higher, lower):
        aggregationMethod = info["aggregationMethod"]
        xff = info["xFilesFactor"]

        # we want to update the items from higher between these two
        intervalStart = timestamp - (timestamp % lower["secondsPerPoint"])
        intervalEnd = intervalStart + lower["secondsPerPoint"]

        higherResData = self.__archive_fetch(higher["archiveId"], intervalStart, intervalEnd)

        known_datapts = [v for v in higherResData if v is not None]  # strip out "nones"
        if (len(known_datapts) / len(higherResData)) > xff:  # we have enough data, so propagate downwards
            aggregateValue = util.aggregate(aggregationMethod, known_datapts)
            lowerSlot = timestamp / lower["secondsPerPoint"] % lower["numPoints"]
            rowkey = struct.pack(KEY_FMT, lower["archiveId"], lowerSlot)
            rowval = struct.pack(VAL_FMT, timestamp, aggregateValue)
            self.client.mutateRow(self.dataTable, rowkey, [Mutation(column="cf:d", value=rowval)], None)

    # returns list of values between the two times.  length is endTime - startTime / secondsPerPorint.
    # should be aligned with secondsPerPoint for proper results
    def __archive_fetch(self, archive, startTime, endTime):
        step = archive["secondsPerPoint"]
        numPoints = archive["points"]
        startTime = int(startTime - (startTime % step) + step)
        endTime = int(endTime - (endTime % step) + step)
        startSlot = int((startTime / step) % numPoints)
        endSlot = int((endTime / step) % numPoints)
        if startSlot > endSlot:  # we wrapped so make 2 queries
            ranges = [(0, endSlot + 1), (startSlot, numPoints)]
        else:
            ranges = [(startSlot, endSlot + 1)]
        for t in ranges:
            startkey = struct.pack(KEY_FMT, archive["archiveId"], t[0])
            endkey = struct.pack(KEY_FMT, archive["archiveId"], t[1])
            scannerId = self.client.scannerOpenWithStop(self.dataTable, startkey, endkey, ["cf:d"], None)

            numSlots = (endTime - startTime) / archive["secondsPerPoint"]
            ret = [None] * numSlots

            for row in self.client.scannerGetList(scannerId, 100000):
                (timestamp, value) = struct.unpack(VAL_FMT, row.columns["cf:d"].value)
                if timestamp >= startTime and timestamp <= endTime:
                    returnslot = int((timestamp - startTime) / archive["secondsPerPoint"]) % numSlots
                    ret[returnslot] = value
            self.client.scannerClose(scannerId)
        timeInfo = (startTime, endTime, step)
        return timeInfo, ret

    def exists(self, metric):
        return len(self.client.getRow(self.metaTable, "m_" + metric, None)) > 0

    # fromTime is an epoch time
    # untilTime is also an epoch time, but defaults to now.
    #
    # Returns a tuple of (timeInfo, valueList)
    # where timeInfo is itself a tuple of (fromTime, untilTime, step)
    # Returns None if no data can be returned
    def fetch(self, info, fromTime, untilTime):
        now = int(time.time())
        if untilTime is None:
            untilTime = now
        fromTime = int(fromTime)
        untilTime = int(untilTime)
        if untilTime > now:
            untilTime = now
        if fromTime > untilTime:
            raise Exception("Invalid time interval: from time '%s' is after until time '%s'" % (fromTime, untilTime))

        if fromTime > now:  # from time in the future
            return None
        oldestTime = now - info["maxRetention"]
        if fromTime < oldestTime:
            fromTime = oldestTime
            # iterate archives to find the smallest
        diff = now - fromTime
        for archive in info["archives"]:
            if archive["retention"] >= diff:
                break
        return self.__archive_fetch(archive, fromTime, untilTime)

    # returns [ start, end ] where start,end are unixtime ints
    def get_intervals(self, metric):
        start = time.time() - self.info(metric)["maxRetention"]
        end = time.time()
        return [start, end]

    # returns list of metrics as strings
    def find_nodes(self, query):
        # break query into parts
        clean_pattern = query.pattern.replace("\\", "")
        pattern_parts = clean_pattern.split(".")
        ret = self._find_paths("ROOT", pattern_parts)
        return ret

    def _find_paths(self, currNodeRowKey, patterns):
        """Recursively generates absolute paths whose components underneath current_node
        match the corresponding pattern in patterns"""

        from graphite.node import BranchNode, LeafNode
        from graphite.intervals import Interval, IntervalSet

        pattern = patterns[0]
        patterns = patterns[1:]

        nodeRow = self.client.getRow(self.metaTable, currNodeRowKey, None)
        if len(nodeRow) == 0:
            return

        subnodes = {}
        for k, v in nodeRow[0].columns.items():
            if k.startswith("cf:c_"):  # branches start with c_
                key = k.split("_", 2)[1]  # pop off cf:c_ prefix
                subnodes[key] = v.value

        matching_subnodes = match_entries(subnodes.keys(), pattern)

        if patterns:  # we've still got more directories to traverse
            for subnode in matching_subnodes:
                rowKey = subnodes[subnode]
                subNodeContents = self.client.getRow(self.metaTable, rowKey, None)

                # leafs have a cf:INFO column describing their data
                # we can't possibly match on a leaf here because we have more components in the pattern,
                # so only recurse on branches
                if "cf:INFO" not in subNodeContents[0].columns:
                    for m in self._find_paths(rowKey, patterns):
                        yield m

        else:  # at the end of the pattern
            for subnode in matching_subnodes:
                rowKey = subnodes[subnode]
                nodeRow = self.client.getRow(self.metaTable, rowKey, None)
                if len(nodeRow) == 0:
                    continue
                metric = rowKey.split("_", 2)[1]  # pop off "m_" in key
                if "cf:INFO" in nodeRow[0].columns:
                    info = json.loads(nodeRow[0].columns["cf:INFO"].value)
                    start = time.time() - info["maxRetention"]
                    end = time.time()
                    intervals = IntervalSet([Interval(start, end)])
                    reader = HbaseReader(metric, intervals, info, self)
                    yield LeafNode(metric, reader)
                else:
                    yield BranchNode(metric)
예제 #4
0
class HbaseTSDB(TSDB):
    __slots__ = ('transport', 'client', 'metaTable', 'dataTable')

    def __init__(self, host, port, table_prefix):
        # set up client
        self.metaTable = table_prefix + "META"
        self.dataTable = table_prefix + "DATA"
        socket = TSocket.TSocket(host, port)
        self.transport = TTransport.TBufferedTransport(socket)
        protocol = TBinaryProtocol.TBinaryProtocol(self.transport)
        self.client = Client(protocol)
        self.transport.open()
        # ensure both our tables exist
        tables = self.client.getTableNames()
        if self.metaTable not in tables:
            self.client.createTable(self.metaTable, [ColumnDescriptor("cf:")])
            # add counter record
            self.client.atomicIncrement(self.metaTable, "CTR", "cf:CTR", 1)
        if self.dataTable not in tables:
            self.client.createTable(self.dataTable, [ColumnDescriptor("cf:")])

    # returns info for the underlying db (including 'aggregationMethod')

    # info returned in the format
    #info = {
    #  'aggregationMethod' : aggregationTypeToMethod.get(aggregationType, 'average'),
    #  'maxRetention' : maxRetention,
    #  'xFilesFactor' : xff,
    #  'archives' : archives,
    #}
    # where archives is a list of
    # archiveInfo = {
    #  'archiveId': unique id,
    #  'secondsPerPoint' : secondsPerPoint,
    #  'points' : points, number of points per
    #  'retention' : secondsPerPoint    * points,
    #  'size' : points * pointSize,
    #}
    #
    def info(self, metric):
        # info is stored as serialized map under META#METRIC
        key = "m_" + metric
        result = self.client.get(self.metaTable, "m_" + metric, "cf:INFO",
                                 None)
        if len(result) == 0:
            raise Exception("No metric " + metric)
        return json.loads(result[0].value)

    # aggregationMethod specifies the method to use when propogating data (see ``whisper.aggregationMethods``)
    # xFilesFactor specifies the fraction of data points in a propagation interval that must have known values for a propagation to occur.  If None, the existing xFilesFactor in path will not be changed
    def setAggregationMethod(self,
                             metric,
                             aggregationMethod,
                             xFilesFactor=None):
        currInfo = self.info(metric)
        currInfo['aggregationMethod'] = aggregationMethod
        currInfo['xFilesFactor'] = xFilesFactor

        infoJson = json.dumps(currInfo)
        self.client.mutateRow(self.metaTable, "m_" + metric,
                              [Mutation(column="cf:INFO", value=infoJson)],
                              None)
        return

    # archiveList is a list of archives, each of which is of the form (secondsPerPoint,numberOfPoints)
    # xFilesFactor specifies the fraction of data points in a propagation interval that must have known values for a propagation to occur
    # aggregationMethod specifies the function to use when propogating data (see ``whisper.aggregationMethods``)
    def create(self, metric, archiveList, xFilesFactor, aggregationMethod,
               isSparse, doFallocate):

        #for a in archiveList:
        #    a['archiveId'] = (self.client.atomicIncrement(self.metaTable,"CTR","cf:CTR",1))

        archiveMapList = [{
            'archiveId': (self.client.atomicIncrement(self.metaTable, "CTR",
                                                      "cf:CTR", 1)),
            'secondsPerPoint':
            a[0],
            'points':
            a[1],
            'retention':
            a[0] * a[1],
        } for a in archiveList]
        #newId = self.client.atomicIncrement(self.metaTable,"CTR","cf:CTR",1)

        oldest = max([
            secondsPerPoint * points for secondsPerPoint, points in archiveList
        ])
        # then write the metanode
        info = {
            'aggregationMethod': aggregationMethod,
            'maxRetention': oldest,
            'xFilesFactor': xFilesFactor,
            'archives': archiveMapList,
        }
        self.client.mutateRow(
            self.metaTable, "m_" + metric,
            [Mutation(column="cf:INFO", value=json.dumps(info))], None)
        # finally, ensure links exist
        metric_parts = metric.split('.')
        priorParts = ""
        for part in metric_parts:
            # if parent is empty, special case for root
            if priorParts == "":
                metricParentKey = "ROOT"
                metricKey = "m_" + part
                priorParts = part
            else:
                metricParentKey = "m_" + priorParts
                metricKey = "m_" + priorParts + "." + part
                priorParts += "." + part

            # make sure parent of this node exists and is linked to us
            parentLink = self.client.get(self.metaTable, metricParentKey,
                                         "cf:c_" + part, None)
            if len(parentLink) == 0:
                self.client.mutateRow(
                    self.metaTable, metricParentKey,
                    [Mutation(column="cf:c_" + part, value=metricKey)], None)

    # points is a list of (timestamp,value) points
    def update_many(self, metric, points):
        info = self.info(metric)
        now = int(time.time())
        archives = iter(info['archives'])
        currentArchive = archives.next()
        currentPoints = []

        for point in points:
            age = now - point[0]

            while currentArchive[
                    'retention'] < age:  #we can't fit any more points in this archive
                if currentPoints:  #commit all the points we've found that it can fit
                    currentPoints.reverse()  #put points in chronological order
                    self.__archive_update_many(info, currentArchive,
                                               currentPoints)
                    currentPoints = []
                try:
                    currentArchive = archives.next()
                except StopIteration:
                    currentArchive = None
                    break

            if not currentArchive:
                break  #drop remaining points that don't fit in the database

            currentPoints.append(point)

        if currentArchive and currentPoints:  #don't forget to commit after we've checked all the archives
            currentPoints.reverse()
            self.__archive_update_many(info, currentArchive, currentPoints)

    def __archive_update_many(self, info, archive, points):
        numPoints = archive['points']
        step = archive['secondsPerPoint']
        archiveId = archive['archiveId']
        alignedPoints = [(timestamp - (timestamp % step), value)
                         for (timestamp, value) in points]
        alignedPoints = dict(
            alignedPoints).items()  # Take the last val of duplicates

        for timestamp, value in alignedPoints:
            slot = int((timestamp / step) % numPoints)
            rowkey = struct.pack(KEY_FMT, archiveId, slot)
            rowval = struct.pack(VAL_FMT, timestamp, value)
            self.client.mutateRow(self.dataTable, rowkey,
                                  [Mutation(column="cf:d", value=rowval)],
                                  None)

        #Now we propagate the updates to lower-precision archives
        higher = archive
        lowerArchives = [
            arc for arc in info['archives']
            if arc['secondsPerPoint'] > archive['secondsPerPoint']
        ]

        for lower in lowerArchives:
            fit = lambda i: i - (i % lower['secondsPerPoint'])
            lowerIntervals = [fit(p[0]) for p in alignedPoints]
            uniqueLowerIntervals = set(lowerIntervals)
            propagateFurther = False
            for interval in uniqueLowerIntervals:
                if self.__propagate(info, interval, higher, lower):
                    propagateFurther = True

            if not propagateFurther:
                break
            higher = lower

    def __propagate(self, info, timestamp, higher, lower):
        aggregationMethod = info['aggregationMethod']
        xff = info['xFilesFactor']

        # we want to update the items from higher between these two
        intervalStart = timestamp - (timestamp % lower['secondsPerPoint'])
        intervalEnd = intervalStart + lower['secondsPerPoint']

        higherResData = self.__archive_fetch(higher['archiveId'],
                                             intervalStart, intervalEnd)

        known_datapts = [v for v in higherResData
                         if v is not None]  # strip out "nones"
        if (len(known_datapts) / len(higherResData)
            ) > xff:  # we have enough data, so propagate downwards
            aggregateValue = util.aggregate(aggregationMethod, known_datapts)
            lowerSlot = timestamp / lower['secondsPerPoint'] % lower[
                'numPoints']
            rowkey = struct.pack(KEY_FMT, lower['archiveId'], lowerSlot)
            rowval = struct.pack(VAL_FMT, timestamp, aggregateValue)
            self.client.mutateRow(self.dataTable, rowkey,
                                  [Mutation(column="cf:d", value=rowval)],
                                  None)

    # returns list of values between the two times.  length is endTime - startTime / secondsPerPorint.
    # should be aligned with secondsPerPoint for proper results
    def __archive_fetch(self, archive, startTime, endTime):
        step = archive['secondsPerPoint']
        numPoints = archive['points']
        startTime = int(startTime - (startTime % step) + step)
        endTime = int(endTime - (endTime % step) + step)
        startSlot = int((startTime / step) % numPoints)
        endSlot = int((endTime / step) % numPoints)
        if startSlot > endSlot:  # we wrapped so make 2 queries
            ranges = [(0, endSlot + 1), (startSlot, numPoints)]
        else:
            ranges = [(startSlot, endSlot + 1)]
        for t in ranges:
            startkey = struct.pack(KEY_FMT, archive['archiveId'], t[0])
            endkey = struct.pack(KEY_FMT, archive['archiveId'], t[1])
            scannerId = self.client.scannerOpenWithStop(
                self.dataTable, startkey, endkey, ["cf:d"], None)

            numSlots = (endTime - startTime) / archive['secondsPerPoint']
            ret = [None] * numSlots

            for row in self.client.scannerGetList(scannerId, 100000):
                (timestamp, value) = struct.unpack(VAL_FMT,
                                                   row.columns["cf:d"].value)
                if timestamp >= startTime and timestamp <= endTime:
                    returnslot = int((timestamp - startTime) /
                                     archive['secondsPerPoint']) % numSlots
                    ret[returnslot] = value
            self.client.scannerClose(scannerId)
        timeInfo = (startTime, endTime, step)
        return timeInfo, ret

    def exists(self, metric):
        return len(self.client.getRow(self.metaTable, "m_" + metric, None)) > 0

    # fromTime is an epoch time
    # untilTime is also an epoch time, but defaults to now.
    #
    # Returns a tuple of (timeInfo, valueList)
    # where timeInfo is itself a tuple of (fromTime, untilTime, step)
    # Returns None if no data can be returned
    def fetch(self, info, fromTime, untilTime):
        now = int(time.time())
        if untilTime is None:
            untilTime = now
        fromTime = int(fromTime)
        untilTime = int(untilTime)
        if untilTime > now:
            untilTime = now
        if (fromTime > untilTime):
            raise Exception(
                "Invalid time interval: from time '%s' is after until time '%s'"
                % (fromTime, untilTime))

        if fromTime > now:  # from time in the future
            return None
        oldestTime = now - info['maxRetention']
        if fromTime < oldestTime:
            fromTime = oldestTime
            # iterate archives to find the smallest
        diff = now - fromTime
        for archive in info['archives']:
            if archive['retention'] >= diff:
                break
        return self.__archive_fetch(archive, fromTime, untilTime)

    # returns [ start, end ] where start,end are unixtime ints
    def get_intervals(self, metric):
        start = time.time() - self.info(metric)['maxRetention']
        end = time.time()
        return [start, end]

    # returns list of metrics as strings
    def find_nodes(self, query):
        # break query into parts
        clean_pattern = query.pattern.replace('\\', '')
        pattern_parts = clean_pattern.split('.')
        ret = self._find_paths("ROOT", pattern_parts)
        return ret

    def _find_paths(self, currNodeRowKey, patterns):
        """Recursively generates absolute paths whose components underneath current_node
        match the corresponding pattern in patterns"""

        from graphite.node import BranchNode, LeafNode
        from graphite.intervals import Interval, IntervalSet

        pattern = patterns[0]
        patterns = patterns[1:]

        nodeRow = self.client.getRow(self.metaTable, currNodeRowKey, None)
        if len(nodeRow) == 0:
            return

        subnodes = {}
        for k, v in nodeRow[0].columns.items():
            if k.startswith("cf:c_"):  # branches start with c_
                key = k.split("_", 2)[1]  # pop off cf:c_ prefix
                subnodes[key] = v.value

        matching_subnodes = match_entries(subnodes.keys(), pattern)

        if patterns:  # we've still got more directories to traverse
            for subnode in matching_subnodes:
                rowKey = subnodes[subnode]
                subNodeContents = self.client.getRow(self.metaTable, rowKey,
                                                     None)

                # leafs have a cf:INFO column describing their data
                # we can't possibly match on a leaf here because we have more components in the pattern,
                # so only recurse on branches
                if "cf:INFO" not in subNodeContents[0].columns:
                    for m in self._find_paths(rowKey, patterns):
                        yield m

        else:  # at the end of the pattern
            for subnode in matching_subnodes:
                rowKey = subnodes[subnode]
                nodeRow = self.client.getRow(self.metaTable, rowKey, None)
                if len(nodeRow) == 0:
                    continue
                metric = rowKey.split("_", 2)[1]  # pop off "m_" in key
                if "cf:INFO" in nodeRow[0].columns:
                    info = json.loads(nodeRow[0].columns["cf:INFO"].value)
                    start = time.time() - info['maxRetention']
                    end = time.time()
                    intervals = IntervalSet([Interval(start, end)])
                    reader = HbaseReader(metric, intervals, info, self)
                    yield LeafNode(metric, reader)
                else:
                    yield BranchNode(metric)