def _get_df_for_quic_data(coll_name): #iterate through the list of collections logger.info('converting ' + coll_name + ' to dataframe for easy analysis') status, error, cursor = dbif.db_collection_find_records(globals.ANALYSIS_DB_NAME, coll_name) #create a dataframe to hold the contents df = pd.DataFrame() count = 0 if status == dbif.DBIF_OK: #read the documents one by one using the cursor for doc in cursor: df.loc[count, 'Date'] = str(doc['meta']['ts']['year']) + '-' + str(doc['meta']['ts']['month']) + '-' + str(doc['meta']['ts']['day']) df.loc[count, 'pkts_percentage'] = float(doc['items']['pkts_percentage']) df.loc[count, 'bytes_percentage'] = float(doc['items']['bytes_percentage']) count += 1 #sort on the Date column df['Date'] = pd.to_datetime(df.Date) df = df.sort_values(by = 'Date') #the entire document set it in the dataframe now, not all documents had all the fields #replace NaNs with 0.0, this is because it makes sense to do this for pkts_percentage df = df.fillna(0.0) logger.info('created dataframe') else: logger.error('error while converting collection ' + coll + ' to dataframe') return status, df
def _get_df_for_protocol_data(coll_name): #iterate through the list of collections logger.info('converting ' + coll_name + ' to dataframe for easy analysis') status, error, cursor = dbif.db_collection_find_records(globals.ANALYSIS_DB_NAME, coll_name) #create a dataframe to hold the contents df = pd.DataFrame() count = 0 if status == dbif.DBIF_OK: #read the documents one by one using the cursor for doc in cursor: nan_or_unassigned = 0.0 df.loc[count, 'Date'] = str(doc['meta']['ts']['year']) + '-' + str(doc['meta']['ts']['month']) + '-' + str(doc['meta']['ts']['day']) for item in doc['items']: proto = str(item['protocol']) #special handling for nan and unassigned (also handle the typo) if proto == 'nan' or proto == 'unassgined' or proto == 'unassigned': nan_or_unassigned += float(item['pkts_percentage']) #print 'count ' + str(count) + ' proto ' + proto + 'pkts percentage ' + str(nan_or_unassigned) df.loc[count, 'unknown'] = nan_or_unassigned else: df.loc[count, proto] = float(item['pkts_percentage']) count += 1 #sort on the Date column df['Date'] = pd.to_datetime(df.Date) df = df.sort_values(by = 'Date') #the entire document set it in the dataframe now, not all documents had all the fields #replace NaNs with 0.0, this is because it makes sense to do this for pkts_percentage df = df.fillna(0.0) logger.info('created dataframe') else: logger.error('error while converting collection ' + coll + ' to dataframe') return status, df