Example #1
0
    def test_histograms(self):
        config = get_config(get_config_path())
        db = CASSANDRA_DB(config)

        ret = db.query_raw_data(path=self.tr.h_ttl_path,
                                ts_min=self.tr.q_start,
                                ts_max=self.tr.q_end)

        self.assertEqual(len(ret), self.tr.h_ttl_len)
        self.assertEqual(ret[0]['ts'], self.tr.h_ttl_start_ts)
        self.assertEqual(ret[0]['val'], self.tr.h_ttl_start_val)
        self.assertEqual(ret[-1]['ts'], self.tr.h_ttl_end_ts)
        self.assertEqual(ret[-1]['val'], self.tr.h_ttl_end_val)

        ret = db.query_raw_data(path=self.tr.h_owd_min_path,
                                ts_min=self.tr.q_start,
                                ts_max=self.tr.q_end)

        self.assertEqual(len(ret), self.tr.h_owd_min_len)
        self.assertEqual(ret[0]['ts'], self.tr.h_owd_min_start_ts)
        self.assertEqual(ret[0]['val'], self.tr.h_owd_min_start_val)
        self.assertEqual(ret[-1]['ts'], self.tr.h_owd_min_end_ts)
        self.assertEqual(ret[-1]['val'], self.tr.h_owd_min_end_val)

        ret = db.query_raw_data(path=self.tr.h_owd_day_path,
                                ts_min=self.tr.q_start,
                                ts_max=self.tr.q_end,
                                freq=self.tr.h_owd_day_freq)

        self.assertEqual(len(ret), self.tr.h_owd_day_len)
        self.assertEqual(ret[0]['ts'], self.tr.h_owd_day_start_ts)
        self.assertEqual(ret[0]['val'], self.tr.h_owd_day_start_val)
        self.assertEqual(ret[-1]['ts'], self.tr.h_owd_day_end_ts)
        self.assertEqual(ret[-1]['val'], self.tr.h_owd_day_end_val)
Example #2
0
    def __init__(self, config, qname, persistq):
        PollPersister.__init__(self, config, qname, persistq)
        # The clear on testing arg - set in the config file if the
        # testing env var is set will result in the target keyspace
        # and all of its data being deleted and rebuilt.
        self.log.debug("connecting to cassandra")
        self.db = CASSANDRA_DB(config, qname=qname)
        self.log.debug("connected to cassandra")

        self.ns = "snmp"

        self.oidsets = {}
        self.poller_args = {}
        self.oids = {}

        oidsets = OIDSet.objects.all()

        for oidset in oidsets:
            self.oidsets[oidset.name] = oidset
            d = {}
            if oidset.poller_args:
                for arg in oidset.poller_args.split():
                    (k, v) = arg.split('=')
                    d[k] = v
                self.poller_args[oidset.name] = d

            for oid in oidset.oids.all():
                self.oids[oid.name] = oid
Example #3
0
    def test_histograms(self):
        config = get_config(get_config_path())
        db = CASSANDRA_DB(config)

        ret = db.query_raw_data( path=self.tr.h_ttl_path,
            ts_min=self.tr.q_start, ts_max=self.tr.q_end
        )

        self.assertEqual(len(ret), self.tr.h_ttl_len)
        self.assertEqual(ret[0]['ts'], self.tr.h_ttl_start_ts)
        self.assertEqual(ret[0]['val'], self.tr.h_ttl_start_val)
        self.assertEqual(ret[-1]['ts'], self.tr.h_ttl_end_ts)
        self.assertEqual(ret[-1]['val'], self.tr.h_ttl_end_val)

        ret = db.query_raw_data( path=self.tr.h_owd_min_path,
            ts_min=self.tr.q_start, ts_max=self.tr.q_end
        )

        self.assertEqual(len(ret), self.tr.h_owd_min_len)
        self.assertEqual(ret[0]['ts'], self.tr.h_owd_min_start_ts)
        self.assertEqual(ret[0]['val'], self.tr.h_owd_min_start_val)
        self.assertEqual(ret[-1]['ts'], self.tr.h_owd_min_end_ts)
        self.assertEqual(ret[-1]['val'], self.tr.h_owd_min_end_val)

        ret = db.query_raw_data( path=self.tr.h_owd_day_path,
            ts_min=self.tr.q_start, ts_max=self.tr.q_end,
            freq=self.tr.h_owd_day_freq
        )

        self.assertEqual(len(ret), self.tr.h_owd_day_len)
        self.assertEqual(ret[0]['ts'], self.tr.h_owd_day_start_ts)
        self.assertEqual(ret[0]['val'], self.tr.h_owd_day_start_val)
        self.assertEqual(ret[-1]['ts'], self.tr.h_owd_day_end_ts)
        self.assertEqual(ret[-1]['val'], self.tr.h_owd_day_end_val)
Example #4
0
 def test_a_config_cassandra(self):
     '''
     Clear database before starting a test. Takes too long to do before an individual test
     '''
     config = get_config(get_config_path())
     config.db_clear_on_testing = True
     db = CASSANDRA_DB(config)
Example #5
0
    def test_sys_uptime(self):
        config = get_config(get_config_path())
        q = TestPersistQueue(json.loads(sys_uptime_test_data))
        p = CassandraPollPersister(config, "test", persistq=q)
        p.run()
        p.db.flush()
        p.db.close()

        db = CASSANDRA_DB(config)
        ret = db.query_raw_data(
            path=[SNMP_NAMESPACE,'rtr_d','FastPollHC', 'sysUpTime'],
            freq=30*1000,
            ts_min=self.ctr.begin*1000,
            ts_max=self.ctr.end*1000)

        ret = ret[0]
        self.assertEqual(ret['ts'], self.ctr.begin * 1000)
        self.assertEqual(ret['val'], 100)
Example #6
0
    def test_values(self):
        config = get_config(get_config_path())
        db = CASSANDRA_DB(config)
        
        ret = db.query_baserate_timerange( path=self.tr.throughput_path,
            ts_min=self.tr.q_start, ts_max=self.tr.q_end
        )

        self.assertEqual(len(ret), self.tr.throughput_len)
        self.assertEqual(ret[0]['ts'], self.tr.throughput_start_ts)
        self.assertEqual(ret[0]['val'], self.tr.throughput_start_val)
        self.assertEqual(ret[-1]['ts'], self.tr.throughput_end_ts)
        self.assertEqual(ret[-1]['val'], self.tr.throughput_end_val)

        ret = db.query_baserate_timerange( path=self.tr.packet_dup_path,
            ts_min=self.tr.q_start, ts_max=self.tr.q_end
        )

        self.assertEqual(len(ret), self.tr.packet_dup_len)
        self.assertEqual(ret[0]['ts'], self.tr.packet_dup_start_ts)
        self.assertEqual(ret[0]['val'], self.tr.packet_dup_start_val)
        self.assertEqual(ret[-1]['ts'], self.tr.packet_dup_end_ts)
        self.assertEqual(ret[-1]['val'], self.tr.packet_dup_end_val)

        ret = db.query_baserate_timerange( path=self.tr.packet_sent_path,
            ts_min=self.tr.q_start, ts_max=self.tr.q_end
        )

        self.assertEqual(len(ret), self.tr.packet_sent_len)
        self.assertEqual(ret[0]['ts'], self.tr.packet_sent_start_ts)
        self.assertEqual(ret[0]['val'], self.tr.packet_sent_start_val)
        self.assertEqual(ret[-1]['ts'], self.tr.packet_sent_end_ts)
        self.assertEqual(ret[-1]['val'], self.tr.packet_sent_end_val)

        ret = db.query_baserate_timerange( path=self.tr.packet_lost_path,
            ts_min=self.tr.q_start, ts_max=self.tr.q_end
        )

        self.assertEqual(len(ret), self.tr.packet_lost_len)
        self.assertEqual(ret[0]['ts'], self.tr.packet_lost_start_ts)
        self.assertEqual(ret[0]['val'], self.tr.packet_lost_start_val)
        self.assertEqual(ret[-1]['ts'], self.tr.packet_lost_end_ts)
        self.assertEqual(ret[-1]['val'], self.tr.packet_lost_end_val)
Example #7
0
    def test_a_load_data(self):
        config = get_config(get_config_path())
        config.db_clear_on_testing = True

        db = CASSANDRA_DB(config)

        for dat in hist_data:
            for row in load_test_data(dat):
                db.set_raw_data(RawRateData(**row))

        for dat in rate_data:
            for row in load_test_data(dat):
                db.update_rate_bin(BaseRateBin(**row))

        db.flush()
Example #8
0
    def test_a_load_data(self):
        config = get_config(get_config_path())
        config.db_clear_on_testing = True

        db = CASSANDRA_DB(config)

        for dat in hist_data:
            for row in load_test_data(dat):
                db.set_raw_data(RawRateData(**row))

        for dat in rate_data:
            for row in load_test_data(dat):
                db.update_rate_bin(BaseRateBin(**row))

        db.flush()
Example #9
0
def main():
    usage = '%prog [ -c col_family | -p pattern_to_find (optional) ]'
    parser = OptionParser(usage=usage)
    parser.add_option('-c', '--column', metavar='COLUMN_FAMILY',
            type='string', dest='column_family',  default='raw',
            help='Column family to dump [raw|rate|aggs|stat] (default=%default).')
    parser.add_option('-p', '--pattern', metavar='PATTERN',
            type='string', dest='pattern', default="",
            help='Optional pattern to look for in keys (uses python string.find()).')
    parser.add_option('-l', '--limit', metavar='LIMIT',
            type='int', dest='limit', default=25,
            help='Limit number of keys dumped since a few generally makes the point (default=%default).')
    options, args = parser.parse_args()

    config = get_config(get_config_path())
    # config.cassandra_keyspace = 'test_esmond'

    db = CASSANDRA_DB(config)

    col_fams = {
        'raw': db.raw_data,
        'rate': db.rates,
        'aggs': db.aggs,
        'stat': db.stat_agg
    }

    if options.column_family not in col_fams.keys():
        print '{0} is not a valid column family selection'.format(options.column_family)
        parser.print_help()
        return -1

    count = 0

    for k in col_fams[options.column_family]._column_family.get_range(
        column_count=0, filter_empty=False):
        if count >= options.limit:
            break
        if k[0].find(options.pattern) == -1:
            continue
        print k[0]
        count += 1

    return
Example #10
0
def main():
    config = get_config(get_config_path())

    db = CASSANDRA_DB(config)

    print 'bogus key, valid time range:',

    path = ['snmp', 'rtr_d', 'FastPollHC', 'ifHCInOctets', 'fxp0.0', 'bogus']

    print check(db, path, begin, end)

    print 'valid key, valid time range:',

    path = ['snmp', 'rtr_d', 'FastPollHC', 'ifHCInOctets', 'fxp0.0']

    print check(db, path, begin, end)

    print 'valid key path, valid AND invalid range keys:',

    print check(db, path, begin, end + 31557600000)
    # print check(db, path, begin-31557600000, end)

    pass
Example #11
0
    def test_values(self):
        config = get_config(get_config_path())
        db = CASSANDRA_DB(config)

        ret = db.query_baserate_timerange(path=self.tr.throughput_path,
                                          ts_min=self.tr.q_start,
                                          ts_max=self.tr.q_end)

        self.assertEqual(len(ret), self.tr.throughput_len)
        self.assertEqual(ret[0]['ts'], self.tr.throughput_start_ts)
        self.assertEqual(ret[0]['val'], self.tr.throughput_start_val)
        self.assertEqual(ret[-1]['ts'], self.tr.throughput_end_ts)
        self.assertEqual(ret[-1]['val'], self.tr.throughput_end_val)

        ret = db.query_baserate_timerange(path=self.tr.packet_dup_path,
                                          ts_min=self.tr.q_start,
                                          ts_max=self.tr.q_end)

        self.assertEqual(len(ret), self.tr.packet_dup_len)
        self.assertEqual(ret[0]['ts'], self.tr.packet_dup_start_ts)
        self.assertEqual(ret[0]['val'], self.tr.packet_dup_start_val)
        self.assertEqual(ret[-1]['ts'], self.tr.packet_dup_end_ts)
        self.assertEqual(ret[-1]['val'], self.tr.packet_dup_end_val)

        ret = db.query_baserate_timerange(path=self.tr.packet_sent_path,
                                          ts_min=self.tr.q_start,
                                          ts_max=self.tr.q_end)

        self.assertEqual(len(ret), self.tr.packet_sent_len)
        self.assertEqual(ret[0]['ts'], self.tr.packet_sent_start_ts)
        self.assertEqual(ret[0]['val'], self.tr.packet_sent_start_val)
        self.assertEqual(ret[-1]['ts'], self.tr.packet_sent_end_ts)
        self.assertEqual(ret[-1]['val'], self.tr.packet_sent_end_val)

        ret = db.query_baserate_timerange(path=self.tr.packet_lost_path,
                                          ts_min=self.tr.q_start,
                                          ts_max=self.tr.q_end)

        self.assertEqual(len(ret), self.tr.packet_lost_len)
        self.assertEqual(ret[0]['ts'], self.tr.packet_lost_start_ts)
        self.assertEqual(ret[0]['val'], self.tr.packet_lost_start_val)
        self.assertEqual(ret[-1]['ts'], self.tr.packet_lost_end_ts)
        self.assertEqual(ret[-1]['val'], self.tr.packet_lost_end_val)
Example #12
0
    def test_range_baserate_query(self):
        """
        Presumed using test data loaded in previous test method.

        Shows the three query methods that return json formatted data.
        """
        config = get_config(get_config_path())
        db = CASSANDRA_DB(config)
        
        start_time = self.ctr.begin*1000
        end_time = self.ctr.end*1000

        ret = db.query_baserate_timerange(
            path=[SNMP_NAMESPACE,'rtr_d','FastPollHC','ifHCInOctets','fxp0.0'],
            freq=30*1000,
            ts_min=start_time,
            ts_max=end_time
        )

        self.assertEqual(len(ret), self.ctr.expected_results)
        self.assertEqual(ret[0]['ts'], start_time)
        self.assertEqual(ret[0]['val'], self.ctr.base_rate_val_first)
        self.assertEqual(ret[self.ctr.expected_results-1]['ts'], end_time)
        self.assertEqual(ret[self.ctr.expected_results-1]['val'],
                self.ctr.base_rate_val_last)

        ret = db.query_raw_data(
            path=[SNMP_NAMESPACE,'rtr_d','FastPollHC','ifHCInOctets','fxp0.0'],
            freq=30*1000,
            ts_min=start_time,
            ts_max=end_time
        )

        self.assertEqual(len(ret), self.ctr.expected_results - 1)
        self.assertEqual(ret[0]['ts'], self.ctr.raw_ts_first*1000)
        self.assertEqual(ret[0]['val'], self.ctr.raw_val_first)
        self.assertEqual(ret[len(ret)-1]['ts'], self.ctr.raw_ts_last*1000)
        self.assertEqual(ret[len(ret)-1]['val'], self.ctr.raw_val_last)

        ret = db.query_aggregation_timerange(
            path=[SNMP_NAMESPACE,'rtr_d','FastPollHC','ifHCInOctets','fxp0.0'],
            ts_min=start_time - 3600*1000,
            ts_max=end_time,
            freq=self.ctr.agg_freq*1000, # required!
            cf='average',  # min | max | average - also required!
        )
        
        self.assertEqual(ret[0]['cf'], 'average')
        self.assertEqual(ret[0]['val'], self.ctr.agg_avg)
        self.assertEqual(ret[0]['ts'], self.ctr.agg_ts*1000)

        ret = db.query_aggregation_timerange(
            path=[SNMP_NAMESPACE,'rtr_d','FastPollHC','ifHCInOctets','fxp0.0'],
            ts_min=start_time - 3600*1000,
            ts_max=end_time,
            freq=self.ctr.agg_freq*1000, # required!
            cf='raw',  # raw - rarely used
        )

        self.assertEqual(ret[0]['cf'], 'raw')
        self.assertEqual(ret[0]['val'], self.ctr.agg_raw)
        self.assertEqual(ret[0]['ts'], self.ctr.agg_ts*1000)

        return
        
        ret = db.query_aggregation_timerange(
            path=[SNMP_NAMESPACE,'rtr_d','FastPollHC','ifHCInOctets','fxp0.0'],
            ts_min=start_time - 3600*1000,
            ts_max=end_time,
            freq=self.ctr.agg_freq*1000, # required!
            cf='min',  # min | max | average - also required!
        )

        self.assertEqual(ret[0]['cf'], 'min')
        self.assertEqual(ret[0]['val'], self.ctr.agg_min)
        self.assertEqual(ret[0]['ts'], self.ctr.agg_ts*1000)

        ret = db.query_aggregation_timerange(
            path=[SNMP_NAMESPACE,'rtr_d','FastPollHC','ifHCInOctets','fxp0.0'],
            ts_min=start_time - 3600*1000,
            ts_max=end_time,
            freq=self.ctr.agg_freq*1000, # required!
            cf='max',  # min | max | average - also required!
        )
        
        self.assertEqual(ret[0]['cf'], 'max')
        self.assertEqual(ret[0]['val'], self.ctr.agg_max)
        self.assertEqual(ret[0]['ts'], self.ctr.agg_ts*1000)

        db.close()
Example #13
0
    def test_persister_long(self):
        """Make sure the tsdb and cassandra data match"""
        config = get_config(get_config_path())
        test_data = load_test_data("rtr_d_ifhcin_long.json")
        # return
        config.db_clear_on_testing = True
        config.db_profile_on_testing = True

        q = TestPersistQueue(test_data)
        p = CassandraPollPersister(config, "test", persistq=q)
        p.run()
        p.db.flush()
        p.db.close()
        p.db.stats.report('all')
        return
        test_data = load_test_data("rtr_d_ifhcin_long.json")
        q = TestPersistQueue(test_data)
        p = TSDBPollPersister(config, "test", persistq=q)
        p.run()

        path_levels = []

        rtr_d_path = os.path.join(settings.ESMOND_ROOT, "tsdb-data", "rtr_d")
        for (path, dirs, files) in os.walk(rtr_d_path):
            if dirs[0] == 'TSDBAggregates':
                break
            path_levels.append(dirs)

        oidsets = path_levels[0]
        oids    = path_levels[1]
        paths   = path_levels[2]

        full_paths = {}

        for oidset in oidsets:
            for oid in oids:
                for path in paths:
                    full_path = 'rtr_d/%s/%s/%s/TSDBAggregates/30'  % \
                        (oidset, oid, path)
                    if not full_paths.has_key(full_path):
                        full_paths[full_path] = 1

        ts_db = tsdb.TSDB(config.tsdb_root)
        
        config.db_clear_on_testing = False
        db = CASSANDRA_DB(config)

        rates = ColumnFamily(db.pool, db.rate_cf)

        count_bad = 0
        tsdb_aggs = 0

        for p in full_paths.keys():
            v = ts_db.get_var(p)
            device,oidset,oid,path,tmp1,tmp2 = p.split('/')
            path = path.replace("_", "/")
            for d in v.select():
                tsdb_aggs += 1
                key = '%s:%s:%s:%s:%s:%s:%s'  % \
                    (SNMP_NAMESPACE, device,oidset,oid,path,int(tmp2)*1000,
                    datetime.datetime.utcfromtimestamp(d.timestamp).year)

                val = rates.get(key, [d.timestamp*1000])[d.timestamp*1000]
                if d.flags != ROW_VALID:
                    self.assertLess(val['is_valid'], 2)
                else:
                    self.assertLessEqual(abs(val['val'] - d.delta), 1.0)
                    self.assertGreater(val['is_valid'], 0)

        db.close()
Example #14
0
class CassandraTester:
    def __init__(self, keyspace_name, savedb=True):
        config = get_config(get_config_path())
        if not savedb:
            config.db_clear_on_testing = True

        self.db = CASSANDRA_DB(config)

    def generate_int_data(self, key_prefix, metatdata_key, num_rows, start_ts,
                          end_ts, summary_type, time_int, min_val, max_val):
        row_keys = []
        # data = []
        for n in range(num_rows):
            if metatdata_key is None:
                metatdata_key, = uuid.uuid4().hex
            path = [PERFSONAR_NAMESPACE, key_prefix, metatdata_key]
            if summary_type and summary_type != 'base':
                path = path + [summary_type, str(time_int)]
            row_keys.append(
                BaseRateBin(path=path, ts=1).get_meta_key().lower())
            for ts in range(start_ts, end_ts, time_int):
                br = BaseRateBin(path=path,
                                 ts=ts * 1000,
                                 val=random.randint(min_val, max_val),
                                 is_valid=1)
                # data.append({'path': path, 'ts':ts*1000, 'val':random.randint(min_val, max_val), 'is_valid':1})
                self.db.update_rate_bin(br)
        self.db.flush()
        return row_keys

    def generate_histogram_data(self, key_prefix, metatdata_key, num_rows,
                                start_ts, end_ts, summary_type, summ_window,
                                sample_size, bucket_min, bucket_max):
        row_keys = []
        data = []
        for n in range(num_rows):
            if metatdata_key is None:
                metatdata_key, = uuid.uuid4().hex
            path = [PERFSONAR_NAMESPACE, key_prefix, metatdata_key]
            if summary_type and summary_type != 'base':
                path = path + [summary_type, str(summ_window)]
            row_keys.append(
                RawRateData(path=path, ts=1).get_meta_key().lower())
            for ts in range(start_ts, end_ts, summ_window):
                histogram = {}
                sample = sample_size
                while (sample > 0):
                    bucket = random.randint(bucket_min, bucket_max)
                    val = random.randint(1, sample)
                    if not histogram.has_key(str(bucket)):
                        histogram[str(bucket)] = val
                    else:
                        histogram[str(bucket)] += val
                    sample -= val
                rr = RawRateData(path=path,
                                 ts=ts * 1000,
                                 val=json.dumps(histogram))
                # data.append({'path':path, 'ts':ts*1000, 'val':json.dumps(histogram)})
                self.db.set_raw_data(rr)
        self.db.flush()
        return row_keys

    def get_data(self, cf_name, key, start_time, end_time, output_json=False):
        cf = ColumnFamily(self.pool, cf_name)
        try:
            result = cf.multiget(self.gen_key_range(key, start_time, end_time),
                                 column_start=start_time * 1000,
                                 column_finish=end_time * 1000,
                                 column_count=10000000)
            if output_json:
                self.dump_json(result)
        except NotFoundException:
            pass

    def dump_json(self, db_result):
        time_series = []
        for row in db_result.keys():
            for ts in db_result[row].keys():
                time_series.append({'time': ts, 'value': db_result[row][ts]})
        print json.dumps(time_series)

    def gen_key(self, key, ts):
        year = datetime.datetime.utcfromtimestamp(ts).year
        key = "%s:%d" % (key, year)
        return key.lower()

    def gen_key_range(self, key, start_time, end_time):
        key_range = []
        start_year = datetime.datetime.utcfromtimestamp(start_time).year
        end_year = datetime.datetime.utcfromtimestamp(end_time).year
        year_range = range(start_year, end_year + 1)
        for year in year_range:
            key_range.append("%s:%d" % (key, year))
        return key_range
Example #15
0
def generate_or_update_gap_inventory(limit=0, threshold=0, verbose=False):

    db = CASSANDRA_DB(get_config(get_config_path()))

    gap_duration_lower_bound = datetime.timedelta(seconds=threshold)

    if limit:
        row_inventory = Inventory.objects.filter(
            scan_complete=False).order_by('row_key')[:limit]
    else:
        row_inventory = Inventory.objects.filter(
            scan_complete=False).order_by('row_key')

    count = 1

    inv_count = len(row_inventory)

    for entry in row_inventory:
        print entry
        if verbose:
            print '  *', entry.start_time, ts_epoch(entry.start_time)
            print '  *', entry.end_time, ts_epoch(entry.end_time)
            print '  * inventory item # {0}/{1}'.format(count, inv_count)

        count += 1

        # Check for valid timestamps
        if entry.start_time > entry.end_time:
            print '   * Bad start/end times!'
            entry.issues = 'ifref start_time/end_time mismatch'
            entry.save()
            continue

        ts_start = ts_epoch(entry.start_time)
        ts_end = ts_epoch(entry.end_time)

        # If end_time of current row is in the
        # future (ie: probably when run on the row of the
        # current year), adjust end time arg to an hour ago.
        # Use this in both the query and when setting up fill
        # boundaries.
        #
        # Will also be setting last_scan_point to that
        # value in the main inventory table.

        future_end_time = False
        if ts_end > int(time.time()):
            future_end_time = True
            # fit it to a bin
            ts_end = (int(time.time() - 3600) /
                      entry.frequency) * entry.frequency

        # if last scan point is set, adjust the start time to that
        if entry.last_scan_point != None:
            print '  * setting start to last scan point'
            ts_start = ts_epoch(entry.last_scan_point)

        path = _split_rowkey(entry.row_key)[0:5]

        if sig_handler.interrupted:
            print 'shutting down'
            break

        if entry.get_column_family_display() == 'base_rates':
            data = db.query_baserate_timerange(path=path,
                                               freq=entry.frequency * 1000,
                                               ts_min=ts_start * 1000,
                                               ts_max=ts_end * 1000)

        else:
            # XXX(mmg): figure out what data is being stored
            # in the raw data cf and process accordingly.
            print '  * not processing'
            continue

        if data:
            entry.data_found = True
            print '  * data found'

        # Format the data payload (transform ms timestamps back
        # to seconds and set is_valid = 0 values to None) and
        # build a filled series over the query range out of
        # the returned data.
        formatted_data = QueryUtil.format_data_payload(data)
        filled_data = Fill.verify_fill(ts_start, ts_end, entry.frequency,
                                       formatted_data)

        gaps = find_gaps_in_series(filled_data)

        # Lots of data being passed around, so explicitly clear
        # the lists.
        del filled_data[:]
        del formatted_data[:]
        del data[:]

        if sig_handler.interrupted:
            print 'shutting down'
            break

        for gap in gaps:
            g_start = make_aware(datetime.datetime.utcfromtimestamp(gap[0]),
                                 utc)
            g_end = make_aware(datetime.datetime.utcfromtimestamp(gap[1]), utc)

            # Skip gaps too small to be considered gaps
            if g_end - g_start < gap_duration_lower_bound:
                continue

            if verbose:
                print '  * gap'
                print '   *', g_start
                print '   *', g_end
                print '   * dur: ', g_end - g_start

            # See if there is already an existing gap ending on the
            # current last_scan_point.  If so just "extend" the existing
            # gap (as long as it hasn't been processed) with up to date
            # information rather than creating a new gap entry.
            #
            # This prevents subsequent scans during the current year
            # from creating a bunch of gap_inventory entries for
            # a prolonged gap/inactive interface.
            g = None

            try:
                g = GapInventory.objects.get(row=entry,
                                             end_time=entry.last_scan_point,
                                             processed=False)
            except ObjectDoesNotExist:
                pass

            if g:
                if verbose: print '   * update gap'
                g.end_time = g_end
            else:
                if verbose: print '   * new gap'
                g = GapInventory(row=entry, start_time=g_start, end_time=g_end)

            g.save()
            if verbose: print '   * +++'

        if future_end_time:
            # Current year, keep our spot
            entry.last_scan_point = make_aware(
                datetime.datetime.utcfromtimestamp(ts_end), utc)
        else:
            # Previous year, mark the row as processed
            entry.last_scan_point = entry.end_time
            entry.scan_complete = True

        entry.save()
        # explicitly clear gaps list just in case and issue
        # the djanjo reset so as to no leak memory in the
        # form of saved queries.
        del gaps[:]
        django_db.reset_queries()

        if verbose: print '======='
        if sig_handler.interrupted:
            print 'shutting down'
            break

    pass
Example #16
0
    def __init__(self, keyspace_name, savedb=True):
        config = get_config(get_config_path())
        if not savedb:
            config.db_clear_on_testing = True

        self.db = CASSANDRA_DB(config)
Example #17
0
    def __init__(self, keyspace_name, savedb=True):
        config = get_config(get_config_path())
        if not savedb:
            config.db_clear_on_testing = True

        self.db = CASSANDRA_DB(config)
Example #18
0
class CassandraPollPersister(PollPersister):
    """Given a ``PollResult`` write the data to a Cassandra backend.

    The ``data`` member of the PollResult must be a list of (name,value)
    pairs.  The ``metadata`` member of PollResult must contain the following
    keys::

        ``tsdb_flags``
            TSDB flags to be used

    """

    def __init__(self, config, qname, persistq):
        PollPersister.__init__(self, config, qname, persistq)
        # The clear on testing arg - set in the config file if the
        # testing env var is set will result in the target keyspace
        # and all of its data being deleted and rebuilt.
        self.log.debug("connecting to cassandra")
        self.db = CASSANDRA_DB(config, qname=qname)
        self.log.debug("connected to cassandra")

        self.ns = "snmp"

        self.oidsets = {}
        self.poller_args = {}
        self.oids = {}

        oidsets = OIDSet.objects.all()

        for oidset in oidsets:
            self.oidsets[oidset.name] = oidset
            d = {}
            if oidset.poller_args:
                for arg in oidset.poller_args.split():
                    (k, v) = arg.split('=')
                    d[k] = v
                self.poller_args[oidset.name] = d

            for oid in oidset.oids.all():
                self.oids[oid.name] = oid

    def flush(self):
        self.log.debug('flush state called.')
        try:
            self.db.flush()
        except MaximumRetryException:
            self.log.warn("flush failed. MaximumRetryException")

    def store(self, result):
        oidset = self.oidsets[result.oidset_name]
        set_name = self.poller_args[oidset.name].get('set_name', oidset.name)
        basepath = [self.ns, result.device_name, set_name]
        oid = self.oids[result.oid_name]
        
        t0 = time.time()
        nvar = 0

        for var, val in result.data:
            if set_name == "SparkySet": # This is pure hack. A new row type should be created for floats
                val = float(val) * 100
            nvar += 1
            
            var_path = basepath + var

            # This shouldn't happen.
            if val is None:
                self.log.error('Got a None value for %s' % (":".join(var_path)))
                continue
                
            # Create data encapsulation object (defined in cassandra.py 
            # module) and store the raw input.

            raw_data = RawRateData(path=var_path, ts=result.timestamp * 1000,
                    val=val, freq=oidset.frequency_ms)

            self.db.set_raw_data(raw_data, ttl=oidset.ttl)

            # Generate aggregations if apropos.
            if oid.aggregate:
                delta_v = self.aggregate_base_rate(raw_data)
                # XXX: not implemented
                #uptime_name = os.path.join(basename, 'sysUpTime')
                
                if delta_v != None: # a value of zero is ok
                    # We got a good delta back - generate rollups.
                    # Just swap the delta into the raw data object.
                    raw_data.val = delta_v
                    self.generate_aggregations(raw_data, oidset.aggregates)
            else:
                pass

        self.log.debug("stored %d vars in %f seconds: %s" % (nvar,
            time.time() - t0, result))

    def aggregate_base_rate(self, data):
        """
        Given incoming data that is meant for aggregation, generate and 
        store the base rate deltas, update the metadata cache, and if a valid 
        delta (delta_v) is generated, return to calling code to generate
        higher-level rollup aggregations.
        
        The data arg passed in is a RawData encapsulation object as
        defined in the cassandra.py module.  
        
        All of this logic is copied/adapted from the TSDB aggregator.py
        module.
        """

        metadata = self.db.get_metadata(data)
        last_update = metadata.ts_to_jstime('last_update')

        if data.min_last_update and data.min_last_update > last_update:
            last_update = data.min_last_update

        min_ts = metadata.ts_to_jstime('min_ts')

        if min_ts > last_update:
            last_update = min_ts
            metatdata.last_update = last_update

        # This mimics logic in the tsdb persister - skip any further 
        # processing of the rate aggregate if this is the first value

        if data.val == metadata.last_val and \
            data.ts == metadata.last_update:
            return

        last_data_ts = metadata.ts_to_jstime('last_update')
        curr_data_ts = data.ts_to_jstime()

        # We've retrieved valid previous vals/ts from metadata, so calculate
        # the value and time delta, and the fractional slots that the data
        # will (usually) be split between.
        delta_t = curr_data_ts - last_data_ts
        delta_v = data.val - metadata.last_val

        rate = float(delta_v) / float(delta_t)
        # XXX(jdugan): should compare to ifHighSpeed?  this is BAD:
        max_rate = int(110e9)

        # Reality check the current rate and make sure the delta is
        # equal to or greater than zero.  Log errors but still update
        # the metadata cache with the most recently seen raw value/ts 
        # then stop processing.
        if rate > max_rate:
            self.log.error('max_rate_exceeded - %s - %s - %s' \
                % (rate, metadata.last_val, data.val))
            metadata.refresh_from_raw(data)
            return

        if delta_v < 0:
            self.log.error('delta_v < 0: %s vals: %s - %s path: %s' % \
                (delta_v,data.val,metadata.last_val,data.get_meta_key()))
            metadata.refresh_from_raw(data)
            self.db.update_metadata(data.get_meta_key(), metadata)
            return
            
        # This re-implements old "hearbeat" logic.  If the current time
        # delta is greater than HEARTBEAT_FREQ_MULTIPLIER (3), write
        # zero-value non-valid bins in the gap.  These MAY be updated
        # later with valid values or backfill.  Then update only
        # the current bin, update metadata with current slot info
        # and return the delta.
        if delta_t > data.freq * HEARTBEAT_FREQ_MULTIPLIER:
            prev_slot = last_data_ts - (last_data_ts % data.freq)
            curr_slot = curr_data_ts - (curr_data_ts % data.freq)

            self.log.warning(
              'gap exceeds heartbeat for {0} from {1}({2}) to {3}({4})'.format(
                    data.path,
                    time.ctime(last_data_ts/1000),
                    last_data_ts,
                    time.ctime(curr_data_ts/1000),
                    curr_data_ts)
            )

            curr_frac = int(delta_v * ((curr_data_ts - curr_slot)/float(delta_t)))
            # Update only the "current" bin and return.
            curr_bin = BaseRateBin(ts=curr_slot, freq=data.freq, val=curr_frac,
                path=data.path)
            self.db.update_rate_bin(curr_bin)
            
            metadata.refresh_from_raw(data)
            self.db.update_metadata(data.get_meta_key(), metadata)

            return


        updates = fit_to_bins(data.freq, last_data_ts, metadata.last_val,
                curr_data_ts, data.val)
        # Now, write the new valid data between the appropriate bins.

        for bin_name, val in updates.iteritems():
            update_bin = BaseRateBin(ts=bin_name, freq=data.freq, val=val,
                path=data.path)
            self.db.update_rate_bin(update_bin)

        # Gotten to the final success condition, so update the metadata
        # cache with values from the current data input and return the 
        # valid delta to the calling code.
        metadata.refresh_from_raw(data)
        self.db.update_metadata(data.get_meta_key(), metadata)
        
        return delta_v

    def _agg_timestamp(self, data, freq):
        """
        Utility method to generate the 'compressed' timestamp for an higher-level 
        aggregation bin.  
        
        The data arg is a data encapsulation object.
        
        The freq arg is the frequency of the desired aggregation to be written 
        to (ie: 5 mins, hourly, etc) in seconds.
        """
        return datetime.datetime.utcfromtimestamp((data.ts_to_unixtime() / freq) * freq)

    def generate_aggregations(self, data, aggregate_freqs):
        """
        Given a data encapsulation object that has been updated with the 
        current delta, iterate through the frequencies in oidset.aggregates
        and generate the appropriate higher level aggregations.
        
        The 'rate aggregations' are the summed deltas and the associated 
        counts.  The 'stat aggregations' are the min/max values.  These 
        are being writtent to two different column families due to schema
        constraints.
        
        Since the stat aggregations are read from/not just written to, 
        track if a new value has been generated (min/max will only be updated
        periodically), and if so, explicitly flush the stat_agg batch.
        """
        stat_updated = False

        for freq in aggregate_freqs:
            self.db.update_rate_aggregation(data, self._agg_timestamp(data, freq), freq*1000)
            updated = self.db.update_stat_aggregation(data, 
                                        self._agg_timestamp(data, freq), freq*1000)
            if updated: stat_updated = True
                                
        if stat_updated:
            self.db.stat_agg.send()

    def stop(self, x, y):
        self.log.debug("flushing and stopping cassandra poll persister")
        self.db.flush()
        self.running = False
Example #19
0
def generate_or_update_gap_inventory(limit=0, threshold=0, verbose=False):

    db = CASSANDRA_DB(get_config(get_config_path()))

    gap_duration_lower_bound = datetime.timedelta(seconds=threshold)

    if limit:
        row_inventory = Inventory.objects.filter(scan_complete=False).order_by('row_key')[:limit]
    else:
        row_inventory = Inventory.objects.filter(scan_complete=False).order_by('row_key')

    count = 1

    inv_count = len(row_inventory)

    for entry in row_inventory:
        print entry
        if verbose:
            print '  *', entry.start_time, ts_epoch(entry.start_time)
            print '  *', entry.end_time, ts_epoch(entry.end_time)
            print '  * inventory item # {0}/{1}'.format(count, inv_count)

        count += 1

        # Check for valid timestamps
        if entry.start_time > entry.end_time:
            print '   * Bad start/end times!'
            entry.issues = 'ifref start_time/end_time mismatch'
            entry.save()
            continue

        ts_start = ts_epoch(entry.start_time)
        ts_end = ts_epoch(entry.end_time)

        # If end_time of current row is in the
        # future (ie: probably when run on the row of the
        # current year), adjust end time arg to an hour ago.  
        # Use this in both the query and when setting up fill 
        # boundaries.
        #
        # Will also be setting last_scan_point to that
        # value in the main inventory table.

        future_end_time = False
        if ts_end > int(time.time()):
            future_end_time = True
            # fit it to a bin
            ts_end = (int(time.time()-3600)/entry.frequency)*entry.frequency

        # if last scan point is set, adjust the start time to that
        if entry.last_scan_point != None:
            print '  * setting start to last scan point'
            ts_start = ts_epoch(entry.last_scan_point)

        path = _split_rowkey(entry.row_key)[0:5]

        if sig_handler.interrupted:
            print 'shutting down'
            break

        if entry.get_column_family_display() == 'base_rates':
            data = db.query_baserate_timerange(path=path, 
                    freq=entry.frequency*1000,
                    ts_min=ts_start*1000,
                    ts_max=ts_end*1000)


        else:
            # XXX(mmg): figure out what data is being stored
            # in the raw data cf and process accordingly.
            print '  * not processing'
            continue

        if data:
            entry.data_found = True
            print '  * data found'

        # Format the data payload (transform ms timestamps back
        # to seconds and set is_valid = 0 values to None) and 
        # build a filled series over the query range out of 
        # the returned data.
        formatted_data = QueryUtil.format_data_payload(data)
        filled_data = Fill.verify_fill(ts_start, ts_end, entry.frequency, formatted_data)

        gaps = find_gaps_in_series(filled_data)

        # Lots of data being passed around, so explicitly clear
        # the lists.
        del filled_data[:]
        del formatted_data[:]
        del data[:]

        if sig_handler.interrupted:
            print 'shutting down'
            break

        for gap in gaps:
            g_start = make_aware(datetime.datetime.utcfromtimestamp(gap[0]), utc)
            g_end = make_aware(datetime.datetime.utcfromtimestamp(gap[1]), utc)

            # Skip gaps too small to be considered gaps
            if g_end - g_start < gap_duration_lower_bound:
                continue

            if verbose:
                print '  * gap'
                print '   *', g_start
                print '   *', g_end
                print '   * dur: ', g_end - g_start
            
            # See if there is already an existing gap ending on the 
            # current last_scan_point.  If so just "extend" the existing
            # gap (as long as it hasn't been processed) with up to date 
            # information rather than creating a new gap entry.
            #
            # This prevents subsequent scans during the current year
            # from creating a bunch of gap_inventory entries for 
            # a prolonged gap/inactive interface.
            g = None

            try:
                g = GapInventory.objects.get(row=entry, 
                        end_time=entry.last_scan_point,
                        processed=False)
            except ObjectDoesNotExist:
                pass

            if g:
                if verbose: print '   * update gap'
                g.end_time = g_end
            else:
                if verbose: print '   * new gap'
                g = GapInventory(row=entry, start_time=g_start, end_time=g_end)

            g.save()
            if verbose: print '   * +++'

        if future_end_time:
            # Current year, keep our spot
            entry.last_scan_point = make_aware(datetime.datetime.utcfromtimestamp(ts_end), utc)
        else:
            # Previous year, mark the row as processed
            entry.last_scan_point = entry.end_time
            entry.scan_complete = True

        entry.save()
        # explicitly clear gaps list just in case and issue 
        # the djanjo reset so as to no leak memory in the 
        # form of saved queries.
        del gaps[:]
        django_db.reset_queries()

        if verbose: print '======='
        if sig_handler.interrupted:
            print 'shutting down'
            break
                    
    pass
Example #20
0
class CassandraTester:
    def __init__(self, keyspace_name, savedb=True):
        config = get_config(get_config_path())
        if not savedb:
            config.db_clear_on_testing = True

        self.db = CASSANDRA_DB(config)
    
    def generate_int_data(self, key_prefix, metatdata_key, num_rows, start_ts, end_ts, summary_type, time_int, min_val, max_val):    
        row_keys = []
        # data = []
        for n in range(num_rows):
            if metatdata_key is None:
                metatdata_key, = uuid.uuid4().hex
            path = [ PERFSONAR_NAMESPACE, key_prefix, metatdata_key ]
            if summary_type and summary_type != 'base':
                path = path + [ summary_type, str(time_int) ]
            row_keys.append(BaseRateBin(path=path, ts=1).get_meta_key().lower())
            for ts in range(start_ts, end_ts, time_int):
                br = BaseRateBin(path=path, ts=ts*1000, val=random.randint(min_val, max_val), is_valid=1)
                # data.append({'path': path, 'ts':ts*1000, 'val':random.randint(min_val, max_val), 'is_valid':1})
                self.db.update_rate_bin(br)
        self.db.flush()
        return row_keys
    
    def generate_histogram_data(self, key_prefix, metatdata_key, num_rows, start_ts, end_ts, summary_type, summ_window, sample_size, bucket_min, bucket_max):    
        row_keys = []
        data = []
        for n in range(num_rows):
            if metatdata_key is None:
                metatdata_key, = uuid.uuid4().hex
            path = [ PERFSONAR_NAMESPACE, key_prefix, metatdata_key ]
            if summary_type and summary_type != 'base':
                path = path + [ summary_type, str(summ_window) ]
            row_keys.append(RawRateData(path=path, ts=1).get_meta_key().lower())
            for ts in range(start_ts, end_ts, summ_window):
                histogram = {}
                sample = sample_size
                while(sample > 0):
                    bucket = random.randint(bucket_min, bucket_max)
                    val = random.randint(1,sample)
                    if not histogram.has_key(str(bucket)):
                        histogram[str(bucket)] = val
                    else:
                        histogram[str(bucket)] += val
                    sample -= val
                rr = RawRateData(path=path, ts=ts*1000, val=json.dumps(histogram))
                # data.append({'path':path, 'ts':ts*1000, 'val':json.dumps(histogram)})
                self.db.set_raw_data(rr)
        self.db.flush()
        return row_keys
    
    def get_data(self, cf_name, key, start_time, end_time, output_json=False):
        cf = ColumnFamily(self.pool, cf_name)
        try:
            result = cf.multiget(self.gen_key_range(key, start_time, end_time), column_start=start_time*1000, column_finish=end_time*1000, column_count=10000000)
            if output_json:
                self.dump_json(result)
        except NotFoundException:
            pass
    
    def dump_json(self, db_result):
        time_series = []
        for row in db_result.keys():
            for ts in db_result[row].keys():
                time_series.append({'time': ts, 'value': db_result[row][ts]})
        print json.dumps(time_series)
    
    def gen_key(self, key, ts):
        year = datetime.datetime.utcfromtimestamp(ts).year
        key = "%s:%d" % (key,year)
        return key.lower();
    
    def gen_key_range(self, key, start_time, end_time):
        key_range = []
        start_year = datetime.datetime.utcfromtimestamp(start_time).year
        end_year = datetime.datetime.utcfromtimestamp(end_time).year
        year_range = range(start_year, end_year+1)
        for year in year_range:
            key_range.append("%s:%d" % (key,year))
        return key_range
Example #21
0
 def handle(self, *args, **options):
     print 'Dropping and re-initializing cassandra esmond keyspace'
     config = get_config(get_config_path())
     config.db_clear_on_testing = True
     db = CASSANDRA_DB(config)
Example #22
0
def main():
    #Parse command-line opts
    parser = argparse.ArgumentParser(
        description="Remove old data and metadata based on a configuration file"
    )
    parser.add_argument('-c',
                        '--config',
                        metavar='CONFIG',
                        nargs=1,
                        dest='config',
                        default=DEFAULT_CONFIG_FILE,
                        help='Configuration file location(default=%default).')
    parser.add_argument(
        '-s',
        '--start',
        metavar='START',
        nargs=1,
        dest='start',
        default=None,
        help=
        'Start looking for expired record at given time as unix timestamp. Default is current time.'
    )
    parser.add_argument(
        '-t',
        '--time-chunk',
        metavar='TIME_CHUNK',
        nargs=1,
        dest='time_chunk',
        default=[DEFAULT_MAX_TIME_CHUNK],
        type=int,
        help=
        'The amount of data to look at each query in seconds. Defaults to {0}'.
        format(DEFAULT_MAX_TIME_CHUNK))
    parser.add_argument(
        '-m',
        '--max-misses',
        metavar='MAX_MISSES',
        nargs=1,
        dest='max_misses',
        default=[DEFAULT_MAX_MISSES],
        type=int,
        help=
        'The maximum number of time chunks with no data before giving up. Defaults to {0}'
        .format(DEFAULT_MAX_MISSES))
    args = parser.parse_args()

    #parse args
    expire_start = None
    if args.start:
        expire_start = dateutil.parser.parse(args.start[0])

    #init django
    django.setup()

    #Connect to DB
    db = CASSANDRA_DB(get_config(get_config_path()), timeout=60)

    #read config file
    policies = {}
    json_file_data = open(args.config[0])
    config = json.load(json_file_data)
    if 'policies' not in config:
        raise RuntimeError(
            "Invalid JSON config. Missing required top-level 'policies' object"
        )
    for p in config['policies']:
        i = 0
        policies_builder = policies
        for req in POLICY_MATCH_FIELD_DEFS:
            i += 1
            if req['name'] not in p:
                raise RuntimeError(
                    "Invalid policy in polcies list at position %d. Missing required field %s."
                    % (i, req['name']))
            val = p[req['name']]
            if val not in req['special_vals']:
                req['type'](val)
                if (req['valid_vals']
                        is not None) and (val not in req['valid_vals']):
                    raise RuntimeError(
                        "Invalid policy in polcies list at position %d. Invalid value %s for %s. Allowed values are %s."
                        % (i, val, req['name'], req['valid_vals']))
            if val not in policies_builder: policies_builder[val] = {}
            policies_builder = policies_builder[val]
        build_policy_action(p, policies_builder)

    #Clean out data from cassandra
    metadata_counts = {}
    for et in PSEventTypes.objects.all():
        #determine policy
        policy = get_policy(et, policies,
                            [v['name'] for v in POLICY_MATCH_FIELD_DEFS], 0)
        if policy is None:
            print "Unable to find matching policy for %s:%s\n" % (et.metadata,
                                                                  et)
            continue

        #determine expire time
        if str(policy['expire']).lower() == 'never':
            continue
        expire_time = datetime_to_ts(datetime.utcnow() -
                                     timedelta(days=int(policy['expire'])))

        #handle command-line option
        if expire_start is not None:
            expire_start_ts = datetime_to_ts(expire_start)
            if expire_start_ts <= expire_time:
                expire_time = expire_start_ts
            else:
                #non-binding expire so skip
                continue

        #check metadata
        md_key = et.metadata.metadata_key
        if md_key not in metadata_counts:
            metadata_counts[md_key] = {
                "expired": 0,
                "total": 0,
                "obj": et.metadata
            }
        metadata_counts[md_key]['total'] += 1
        if et.time_updated is None:
            metadata_counts[md_key]['expired'] += 1
        elif datetime_to_ts(et.time_updated) <= expire_time:
            metadata_counts[md_key]['expired'] += 1
            expire_time = datetime_to_ts(et.time_updated)

        #Some datasets timeout if dataset is too large. in this case grab chunks
        begin_time = expire_time - args.time_chunk[0]
        end_time = expire_time

        misses = 0
        while misses < args.max_misses[0]:
            if begin_time == 0:
                #only run one time after seeing begin_time of 0
                misses = args.max_misses[0]
            elif begin_time < 0:
                #make sure begin_time is not below 0
                begin_time = 0
                misses = args.max_misses[0]

            #query data to delete
            try:
                (expired_data, cf, datapath) = query_data(
                    db, et.metadata.metadata_key, et.event_type,
                    et.summary_type, et.summary_window, begin_time, end_time)
            except Exception as e:
                print "Query error for metadata_key=%s, event_type=%s, summary_type=%s, summary_window=%s, begin_time=%s, end_time=%s, error=%s" % (
                    md_key, et.event_type, et.summary_type, et.summary_window,
                    begin_time, end_time, e)
                break

            #adjust begin_time
            end_time = begin_time
            begin_time = begin_time - args.time_chunk[0]

            #check if we got any data
            if len(expired_data) == 0:
                misses += 1
                continue

            #delete data
            for expired_col in expired_data:
                year = datetime.utcfromtimestamp(
                    float(expired_col['ts']) / 1000.0).year
                row_key = get_rowkey(datapath, et.summary_window, year)
                try:
                    cf.remove(row_key, [expired_col['ts']])
                except Exception as e:
                    sys.stderr.write("Error deleting {0}: {1}\n".format(
                        row_key, e))

            print "Sending request to delete %d rows for metadata_key=%s, event_type=%s, summary_type=%s, summary_window=%s" % (
                len(expired_data), md_key, et.event_type, et.summary_type,
                et.summary_window)
            try:
                cf.send()
            except Exception as e:
                sys.stderr.write("Error sending delete: {0}".format(e))
            print "Deleted %d rows for metadata_key=%s, event_type=%s, summary_type=%s, summary_window=%s" % (
                len(expired_data), md_key, et.event_type, et.summary_type,
                et.summary_window)

    #Clean out metadata from relational database
    for md_key in metadata_counts:
        if metadata_counts[md_key]['total'] == metadata_counts[md_key][
                'expired']:
            metadata_counts[md_key]['obj'].delete()
            print "Deleted metadata %s" % md_key