def store_mfc_config(): from analyticsengine.dbmanager.mfc.schema import MFC_CONFIG_TABLE_NAME from analyticsengine import dbmanager from datetime import datetime db_connection = dbmanager.connect_cassandra() CONFIG_TABLE_INSERT = "INSERT INTO " + MFC_CONFIG_TABLE_NAME + """ (mfcid, hostname, ip, ts, type, value) VALUES (%(mfcid)s, %(hostname)s, %(ip)s, %(ts)s, %(type)s, %(value)s) """ timestamp = lambda dt: long( (dt - datetime.fromtimestamp(0)).total_seconds() * 1000) while True: data = r.blpop(config.get('constants', 'REDIS_CONFIG_STORE_QUEUE_KEY')) conf_data = json.loads(data[1]) pk = conf = raw = dict() #Primary Key. pk['mfcid'] = str(conf_data['device_id']) pk['hostname'] = conf_data['name'] pk['ip'] = conf_data['ip'] pk['ts'] = timestamp(datetime.now()) conf.update(pk) conf['type'] = 'config' conf['value'] = dict({ 'host_id': conf_data['data']['config']['host_id'], 'version': conf_data['data']['config']['version'], 'licenses': json.dumps(conf_data['data']['config']['licenses']), 'network': json.dumps(conf_data['data']['config']['network']), 'ifcfg': json.dumps(conf_data['data']['config']['ifcfg']), 'namespaces': json.dumps(conf_data['data']['config']['namespaces']), }) db_connection.execute(CONFIG_TABLE_INSERT, conf) raw.update(pk) raw['type'] = 'raw' raw['value'] = dict( {'dump': json.dumps(conf_data['data']['config']['dump'])}) db_connection.execute(CONFIG_TABLE_INSERT, raw)
def create_daily_tables(date_str=None): db_connection = dbmanager.connect_cassandra() daily_tables = dict() if date_str is None: date_str = date.today().strftime('%m%d%Y') """This table will store counters from different MFCs per day. indexed with mfcid which is UUID as row key composite key(mfcid, type, name, ts) """ daily_tables['mfc_stats'] = """ CREATE TABLE %s%s (mfcid varchar, hostname varchar, ip varchar, type varchar, name varchar, ts timestamp, value map<text, BigInt>, PRIMARY KEY (mfcid, type, name, ts)) WITH CLUSTERING ORDER BY(type ASC, name ASC, ts DESC) """ % (MFC_STATS_TABLE_NAME, date_str) daily_tables['mfc_summary'] = """ CREATE TABLE %s%s (mfcid varchar, hostname varchar, ip varchar, ts timestamp, sample_id varchar, value map<text, text>, PRIMARY KEY (mfcid)) """ % (MFC_SUMMARY_TABLE_NAME, date_str) daily_tables['cluster_stats'] = """ CREATE TABLE %s%s (name varchar, ts timestamp, value map<text, BigInt>, sample_id varchar, PRIMARY KEY (name, ts)) WITH CLUSTERING ORDER BY(ts DESC) """ % (CLUSTER_STATS_TABLE_NAME, date_str) daily_tables['cluster_summary'] = """ CREATE TABLE %s%s (name varchar, ts timestamp, value map<text, BigInt>, sample_id varchar, PRIMARY KEY (name)) """ % (CLUSTER_SUMMARY_TABLE_NAME, date_str) daily_tables['cluster_sample_map'] = """ CREATE TABLE %s%s (sample_id varchar, ts timestamp, ip_list list<text>, PRIMARY KEY (sample_id)) """ % (CLUSTER_SAMPLE_MAP_TABLE_NAME, date_str) for t_name, create_t in daily_tables.items(): try: LOG.info("Creating Table: %s" % t_name) db_connection.execute(create_t) except AlreadyExists: LOG.info("Table already exist for %s" % t_name) db_connection.shutdown()
def create_cluster_tables(): db_connection = dbmanager.connect_cassandra() main_tables = dict() """These tables will store values for life time. Indexed with mfcid as row key """ main_tables['mfc_config'] = """ CREATE TABLE %s (mfcid varchar, hostname varchar, ip varchar, type varchar, ts timestamp, value map<text, text>, PRIMARY KEY (mfcid, ts, type)) """ % MFC_CONFIG_TABLE_NAME for t_name, create_t in main_tables.items(): try: LOG.info("Creating Table: %s" % t_name) db_connection.execute(create_t) except AlreadyExists: LOG.info("Table already exist for %s" % t_name) db_connection.shutdown()
def store_mfc_config(): from analyticsengine.dbmanager.mfc.schema import MFC_CONFIG_TABLE_NAME from analyticsengine import dbmanager from datetime import datetime db_connection = dbmanager.connect_cassandra() CONFIG_TABLE_INSERT = "INSERT INTO " + MFC_CONFIG_TABLE_NAME + """ (mfcid, hostname, ip, ts, type, value) VALUES (%(mfcid)s, %(hostname)s, %(ip)s, %(ts)s, %(type)s, %(value)s) """ timestamp = lambda dt: long((dt - datetime.fromtimestamp(0)).total_seconds() * 1000) while True: data = r.blpop(config.get('constants', 'REDIS_CONFIG_STORE_QUEUE_KEY')) conf_data = json.loads(data[1]) pk = conf = raw = dict() #Primary Key. pk['mfcid'] = str(conf_data['device_id']) pk['hostname'] = conf_data['name'] pk['ip'] = conf_data['ip'] pk['ts'] = timestamp(datetime.now()) conf.update(pk) conf['type'] = 'config' conf['value'] = dict({ 'host_id': conf_data['data']['config']['host_id'], 'version': conf_data['data']['config']['version'], 'licenses': json.dumps(conf_data['data']['config']['licenses']), 'network': json.dumps(conf_data['data']['config']['network']), 'ifcfg': json.dumps(conf_data['data']['config']['ifcfg']), 'namespaces': json.dumps(conf_data['data']['config']['namespaces']), }) db_connection.execute(CONFIG_TABLE_INSERT, conf) raw.update(pk) raw['type'] = 'raw' raw['value'] = dict({ 'dump': json.dumps(conf_data['data']['config']['dump']) }) db_connection.execute(CONFIG_TABLE_INSERT, raw)
def store_cluster_stats(): from datetime import date from analyticsengine.dbmanager.mfc.schema import ( CLUSTER_STATS_TABLE_NAME, CLUSTER_SUMMARY_TABLE_NAME, CLUSTER_SAMPLE_MAP_TABLE_NAME) from analyticsengine import dbmanager from collections import Counter db_connection = dbmanager.connect_cassandra() date_strf = lambda dt: dt.strftime('%m%d%Y') while True: data = r.blpop(config.get('constants', 'REDIS_CLUSTER_STORE_QUEUE_KEY')) sample_id, counters = eval(data[1]) #CF date suffix calculation based on the current timestamp in the payload. Read above for more info. date_str = date_strf(date.fromtimestamp(counters['timestamp'])) DAILY_TABLE_INSERT = "INSERT INTO " + CLUSTER_STATS_TABLE_NAME + date_str + \ """ (name, ts, sample_id, value) VALUES (%(name)s, %(ts)s, %(sample_id)s, %(value)s) """ DAILY_SUMMARY_TABLE_INSERT = "INSERT INTO " + CLUSTER_SUMMARY_TABLE_NAME + date_str + \ """ (name, ts, sample_id, value) VALUES (%(name)s, %(ts)s, %(sample_id)s, %(value)s) """ SAMPLE_MAP_INSERT = "INSERT INTO " + CLUSTER_SAMPLE_MAP_TABLE_NAME + date_str + \ """ (sample_id, ts, ip_list) VALUES (%(sample_id)s, %(ts)s, %(ip_list)s) """ pk = glbl_req = glbl_bytes = sample_map = dict() pk['ts'] = counters['timestamp'] * 1000 glbl_req.update(pk) glbl_req['name'] = 'gl_requests' glbl_req['value'] = dict(counters['requests']) glbl_req['sample_id'] = sample_id #DailyClusterCounters.create(**glbl_req) db_connection.execute(DAILY_TABLE_INSERT, glbl_req) glbl_bytes.update(pk) glbl_bytes['name'] = 'gl_bytes' glbl_bytes['value'] = dict(counters['bytes']) glbl_bytes['sample_id'] = sample_id #DailyClusterCounters.create(**glbl_bytes) db_connection.execute(DAILY_TABLE_INSERT, glbl_bytes) sample_map.update(pk) sample_map['sample_id'] = sample_id sample_map['ip_list'] = counters['ip_list'] db_connection.execute(SAMPLE_MAP_INSERT, sample_map) #Update clusterwide summary cluster_sum = dict() cluster_sum['name'] = 'cur_thrpt' cluster_sum['ts'] = pk['ts'] cluster_sum['sample_id'] = sample_id cluster_sum['value'] = dict(counters['cur_thrpt']) db_connection.execute(DAILY_SUMMARY_TABLE_INSERT, cluster_sum) cluster_sum['name'] = 'cum_bytes' cluster_sum['value'] = dict(counters['bytes']) db_connection.execute(DAILY_SUMMARY_TABLE_INSERT, cluster_sum) cluster_sum['name'] = 'requests' cluster_sum['value'] = dict(counters['requests']) db_connection.execute(DAILY_SUMMARY_TABLE_INSERT, cluster_sum)
def store_mfc_stats(): from datetime import date from analyticsengine import dbmanager from analyticsengine.dbmanager.mfc.schema import MFC_STATS_TABLE_NAME, MFC_SUMMARY_TABLE_NAME db_connection = dbmanager.connect_cassandra() date_strf = lambda dt: dt.strftime('%m%d%Y') req_interval = int(config.get('collector', 'MFC_REQUEST_FREQUENCY')) while True: data = r.blpop(config.get('constants', 'REDIS_MFC_STORE_QUEUE_KEY')) counters = json.loads(data[1]) """ CF date suffix CF date suffix are calculated based on the timestamp in the response. data can be buffered in queue and may be of different date. Instead of calculating based on current date, its best to stick to the timestamp in the payload as it will then get stored in right CF. This will address the case when queue has buffered data of different date and can recover on a app crash. """ date_str = date_strf(date.fromtimestamp(counters['data']['timestamp'])) DAILY_TABLE_INSERT = "INSERT INTO " + MFC_STATS_TABLE_NAME + date_str + \ """ (mfcid, hostname, ip, ts, type, name, value) VALUES (%(mfcid)s, %(hostname)s, %(ip)s, %(ts)s, %(type)s, %(name)s, %(value)s) """ DAILY_SUMMARY_INSERT = "INSERT INTO " + MFC_SUMMARY_TABLE_NAME + date_str + \ """ (mfcid, hostname, ip, sample_id, ts, value) VALUES (%(mfcid)s, %(hostname)s, %(ip)s, %(sample_id)s, %(ts)s, %(value)s) """ pk = glbl_bytes = glbl_ds = glbl_ram = glbl_req = glbl_tier = http_ns = sys_stat = dict( ) """Global stats.""" #Primary Key. pk['mfcid'] = str(counters['device_id']) pk['hostname'] = counters['name'] pk['ip'] = counters['ip'] pk['ts'] = counters['data']['timestamp'] * 1000 #Global Bytes. glbl_bytes.update(pk) glbl_bytes['type'] = 'global' glbl_bytes['name'] = 'bytes' glbl_bytes['value'] = counters['data']['glbl']['bytes'] #ingest_to_db.apply_async(args=[DailyCounters, session], kwargs=glbl_bytes, queue='store', # routing_key='store.stats') #DailyMFCCounters.create(**glbl_bytes) db_connection.execute(DAILY_TABLE_INSERT, glbl_bytes) #Global Disk Space. glbl_ds.update(pk) glbl_ds['type'] = 'global' glbl_ds['name'] = 'disk_space' #Disk space is in MB. Converting to Bytes for k, v in counters['data']['glbl']['disk_space'].items(): counters['data']['glbl']['disk_space'][k] = v * 1024 * 1024 glbl_ds['value'] = counters['data']['glbl']['disk_space'] #ingest_to_db.apply_async(args=[DailyCounters, session], kwargs=glbl_ds, queue='store', # routing_key='store.stats') #DailyMFCCounters.create(**glbl_ds) db_connection.execute(DAILY_TABLE_INSERT, glbl_ds) #Global Ram Cache. glbl_ram.update(pk) glbl_ram['type'] = 'global' glbl_ram['name'] = 'ram_cache' glbl_ram['value'] = counters['data']['glbl']['ram_cache'] #ingest_to_db.apply_async(args=[DailyCounters, session], kwargs=glbl_ram, queue='store', # routing_key='store.stats') #DailyMFCCounters.create(**glbl_ram) db_connection.execute(DAILY_TABLE_INSERT, glbl_ram) #Global Requests. glbl_req.update(pk) glbl_req['type'] = 'global' glbl_req['name'] = 'requests' glbl_req['value'] = counters['data']['glbl']['requests'] #ingest_to_db.apply_async(args=[DailyCounters, session], kwargs=glbl_req, queue='store', # routing_key='store.stats') #DailyMFCCounters.create(**glbl_req) db_connection.execute(DAILY_TABLE_INSERT, glbl_req) #Global Tiers. glbl_tier.update(pk) glbl_tier['type'] = 'global' for tier in counters['data']['glbl']['tiers']: glbl_tier['name'] = tier['provider'] tier.pop('provider') glbl_tier['value'] = tier db_connection.execute(DAILY_TABLE_INSERT, glbl_tier) """Namespace Stats.""" http_ns.update(pk) http_ns['type'] = 'http_ns' for ns in counters['data']['services']['http']['namespaces']: http_ns['name'] = ns['name'] + ':requests' http_ns['value'] = ns['requests'] db_connection.execute(DAILY_TABLE_INSERT, http_ns) http_ns['name'] = ns['name'] + ':bytes' http_ns['value'] = ns['bytes'] db_connection.execute(DAILY_TABLE_INSERT, http_ns) """System Stats.""" sys_stat.update(pk) sys_stat['type'] = 'system' sys_stat['name'] = 'cpu' sys_stat['value'] = counters['data']['system']['cpu'] db_connection.execute(DAILY_TABLE_INSERT, sys_stat) sys_stat['name'] = 'memory' #System Memory is in KB. Converting to Bytes for k, v in counters['data']['system']['memory'].items(): counters['data']['system']['memory'][k] = v * 1024 sys_stat['value'] = counters['data']['system']['memory'] db_connection.execute(DAILY_TABLE_INSERT, sys_stat) """MFC Summary Stats""" sum_stats = dict() sum_stats['mfcid'] = str(counters['device_id']) sum_stats['hostname'] = counters['name'] sum_stats['ip'] = counters['ip'] sum_stats['sample_id'] = counters['sample_id'] sum_stats['ts'] = counters['data']['timestamp'] * 1000 sum_stats['value'] = {} for k, v in counters['data']['glbl']['requests'].items(): sum_stats['value'].update({'req_' + k: str(v)}) for k, v in counters['data']['cur_thrpt'].items(): sum_stats['value'].update({'cur_' + k: str(v / req_interval)}) sum_stats['value'].update({'chr': str(counters['data']['chr'])}) db_connection.execute(DAILY_SUMMARY_INSERT, sum_stats)
def store_cluster_stats(): from datetime import date from analyticsengine.dbmanager.mfc.schema import (CLUSTER_STATS_TABLE_NAME, CLUSTER_SUMMARY_TABLE_NAME, CLUSTER_SAMPLE_MAP_TABLE_NAME) from analyticsengine import dbmanager from collections import Counter db_connection = dbmanager.connect_cassandra() date_strf = lambda dt: dt.strftime('%m%d%Y') while True: data = r.blpop(config.get('constants', 'REDIS_CLUSTER_STORE_QUEUE_KEY')) sample_id, counters = eval(data[1]) #CF date suffix calculation based on the current timestamp in the payload. Read above for more info. date_str = date_strf(date.fromtimestamp(counters['timestamp'])) DAILY_TABLE_INSERT = "INSERT INTO " + CLUSTER_STATS_TABLE_NAME + date_str + \ """ (name, ts, sample_id, value) VALUES (%(name)s, %(ts)s, %(sample_id)s, %(value)s) """ DAILY_SUMMARY_TABLE_INSERT = "INSERT INTO " + CLUSTER_SUMMARY_TABLE_NAME + date_str + \ """ (name, ts, sample_id, value) VALUES (%(name)s, %(ts)s, %(sample_id)s, %(value)s) """ SAMPLE_MAP_INSERT = "INSERT INTO " + CLUSTER_SAMPLE_MAP_TABLE_NAME + date_str + \ """ (sample_id, ts, ip_list) VALUES (%(sample_id)s, %(ts)s, %(ip_list)s) """ pk = glbl_req = glbl_bytes = sample_map = dict() pk['ts'] = counters['timestamp'] * 1000 glbl_req.update(pk) glbl_req['name'] = 'gl_requests' glbl_req['value'] = dict(counters['requests']) glbl_req['sample_id'] = sample_id #DailyClusterCounters.create(**glbl_req) db_connection.execute(DAILY_TABLE_INSERT, glbl_req) glbl_bytes.update(pk) glbl_bytes['name'] = 'gl_bytes' glbl_bytes['value'] = dict(counters['bytes']) glbl_bytes['sample_id'] = sample_id #DailyClusterCounters.create(**glbl_bytes) db_connection.execute(DAILY_TABLE_INSERT, glbl_bytes) sample_map.update(pk) sample_map['sample_id'] = sample_id sample_map['ip_list'] = counters['ip_list'] db_connection.execute(SAMPLE_MAP_INSERT, sample_map) #Update clusterwide summary cluster_sum = dict() cluster_sum['name'] = 'cur_thrpt' cluster_sum['ts'] = pk['ts'] cluster_sum['sample_id'] = sample_id cluster_sum['value'] = dict(counters['cur_thrpt']) db_connection.execute(DAILY_SUMMARY_TABLE_INSERT, cluster_sum) cluster_sum['name'] = 'cum_bytes' cluster_sum['value'] = dict(counters['bytes']) db_connection.execute(DAILY_SUMMARY_TABLE_INSERT, cluster_sum) cluster_sum['name'] = 'requests' cluster_sum['value'] = dict(counters['requests']) db_connection.execute(DAILY_SUMMARY_TABLE_INSERT, cluster_sum)
def store_mfc_stats(): from datetime import date from analyticsengine import dbmanager from analyticsengine.dbmanager.mfc.schema import MFC_STATS_TABLE_NAME, MFC_SUMMARY_TABLE_NAME db_connection = dbmanager.connect_cassandra() date_strf = lambda dt: dt.strftime('%m%d%Y') req_interval = int(config.get('collector', 'MFC_REQUEST_FREQUENCY')) while True: data = r.blpop(config.get('constants', 'REDIS_MFC_STORE_QUEUE_KEY')) counters = json.loads(data[1]) """ CF date suffix CF date suffix are calculated based on the timestamp in the response. data can be buffered in queue and may be of different date. Instead of calculating based on current date, its best to stick to the timestamp in the payload as it will then get stored in right CF. This will address the case when queue has buffered data of different date and can recover on a app crash. """ date_str = date_strf(date.fromtimestamp(counters['data']['timestamp'])) DAILY_TABLE_INSERT = "INSERT INTO " + MFC_STATS_TABLE_NAME + date_str + \ """ (mfcid, hostname, ip, ts, type, name, value) VALUES (%(mfcid)s, %(hostname)s, %(ip)s, %(ts)s, %(type)s, %(name)s, %(value)s) """ DAILY_SUMMARY_INSERT = "INSERT INTO " + MFC_SUMMARY_TABLE_NAME + date_str + \ """ (mfcid, hostname, ip, sample_id, ts, value) VALUES (%(mfcid)s, %(hostname)s, %(ip)s, %(sample_id)s, %(ts)s, %(value)s) """ pk = glbl_bytes = glbl_ds = glbl_ram = glbl_req = glbl_tier = http_ns = sys_stat = dict() """Global stats.""" #Primary Key. pk['mfcid'] = str(counters['device_id']) pk['hostname'] = counters['name'] pk['ip'] = counters['ip'] pk['ts'] = counters['data']['timestamp'] * 1000 #Global Bytes. glbl_bytes.update(pk) glbl_bytes['type'] = 'global' glbl_bytes['name'] = 'bytes' glbl_bytes['value'] = counters['data']['glbl']['bytes'] #ingest_to_db.apply_async(args=[DailyCounters, session], kwargs=glbl_bytes, queue='store', # routing_key='store.stats') #DailyMFCCounters.create(**glbl_bytes) db_connection.execute(DAILY_TABLE_INSERT, glbl_bytes) #Global Disk Space. glbl_ds.update(pk) glbl_ds['type'] = 'global' glbl_ds['name'] = 'disk_space' #Disk space is in MB. Converting to Bytes for k, v in counters['data']['glbl']['disk_space'].items(): counters['data']['glbl']['disk_space'][k] = v * 1024 * 1024 glbl_ds['value'] = counters['data']['glbl']['disk_space'] #ingest_to_db.apply_async(args=[DailyCounters, session], kwargs=glbl_ds, queue='store', # routing_key='store.stats') #DailyMFCCounters.create(**glbl_ds) db_connection.execute(DAILY_TABLE_INSERT, glbl_ds) #Global Ram Cache. glbl_ram.update(pk) glbl_ram['type'] = 'global' glbl_ram['name'] = 'ram_cache' glbl_ram['value'] = counters['data']['glbl']['ram_cache'] #ingest_to_db.apply_async(args=[DailyCounters, session], kwargs=glbl_ram, queue='store', # routing_key='store.stats') #DailyMFCCounters.create(**glbl_ram) db_connection.execute(DAILY_TABLE_INSERT, glbl_ram) #Global Requests. glbl_req.update(pk) glbl_req['type'] = 'global' glbl_req['name'] = 'requests' glbl_req['value'] = counters['data']['glbl']['requests'] #ingest_to_db.apply_async(args=[DailyCounters, session], kwargs=glbl_req, queue='store', # routing_key='store.stats') #DailyMFCCounters.create(**glbl_req) db_connection.execute(DAILY_TABLE_INSERT, glbl_req) #Global Tiers. glbl_tier.update(pk) glbl_tier['type'] = 'global' for tier in counters['data']['glbl']['tiers']: glbl_tier['name'] = tier['provider'] tier.pop('provider') glbl_tier['value'] = tier db_connection.execute(DAILY_TABLE_INSERT, glbl_tier) """Namespace Stats.""" http_ns.update(pk) http_ns['type'] = 'http_ns' for ns in counters['data']['services']['http']['namespaces']: http_ns['name'] = ns['name'] + ':requests' http_ns['value'] = ns['requests'] db_connection.execute(DAILY_TABLE_INSERT, http_ns) http_ns['name'] = ns['name'] + ':bytes' http_ns['value'] = ns['bytes'] db_connection.execute(DAILY_TABLE_INSERT, http_ns) """System Stats.""" sys_stat.update(pk) sys_stat['type'] = 'system' sys_stat['name'] = 'cpu' sys_stat['value'] = counters['data']['system']['cpu'] db_connection.execute(DAILY_TABLE_INSERT, sys_stat) sys_stat['name'] = 'memory' #System Memory is in KB. Converting to Bytes for k, v in counters['data']['system']['memory'].items(): counters['data']['system']['memory'][k] = v * 1024 sys_stat['value'] = counters['data']['system']['memory'] db_connection.execute(DAILY_TABLE_INSERT, sys_stat) """MFC Summary Stats""" sum_stats = dict() sum_stats['mfcid'] = str(counters['device_id']) sum_stats['hostname'] = counters['name'] sum_stats['ip'] = counters['ip'] sum_stats['sample_id'] = counters['sample_id'] sum_stats['ts'] = counters['data']['timestamp'] * 1000 sum_stats['value'] = {} for k, v in counters['data']['glbl']['requests'].items(): sum_stats['value'].update({'req_' + k: str(v)}) for k, v in counters['data']['cur_thrpt'].items(): sum_stats['value'].update({'cur_' + k: str(v/req_interval)}) sum_stats['value'].update({'chr': str(counters['data']['chr'])}) db_connection.execute(DAILY_SUMMARY_INSERT, sum_stats)