def main(): ''' Main program ''' # Init Kafka and Casandra connections kaf = kafka_consumer(STREAM_NAME) cass = cass_utils.cassandra_utils([CASS_HOSTNAME]) # This below is to init the cass object with all keyspaces cass.get_keyspaces() # Set the Cassandra keyspace cass.set_session_keyspace(CASS_KEYSPACE) # Now read from kafka and write to Cassandra kafka_to_cass(kaf, cass)
def cass_query(start_time, end_time): ''' main program ''' # init the cass obj with all keyspaces and tables cass = cu.cassandra_utils([HOSTNAME]) # Set the keyspace cass.set_session_keyspace(KEYSPACE) # Set the pandas factory cass.session.row_factory = pandas_factory # Set query fetch size to 5000. Otherwise things will timeout # TBD: Magic number alert cass.session.default_fetch_size = 5000 # Before making the query, check that the table exists. If it does not # exists, create an empty data frame and return it # TBD: This should be handled better or more elagently table_exists = check_table_exists(cass, start_time) if not table_exists: # return an empty dataframe return make_empty_dataframe() # Now do the query ret_dict_iter = cass_query_to_dict(cass, STREAM_GROUP_NAME, STREAM_NAME, start_time, end_time) # Now go through the query page by page and assemble the dataframe of # interest person_df = pd.DataFrame() while ret_dict_iter.has_more_pages: # Get the page into a temp df and then appent only the rows of # interest into person_df tmp_df = ret_dict_iter._current_rows person_df = person_df.append(tmp_df[tmp_df.found == 'person']) # print(len(person_df)) ret_dict_iter.fetch_next_page() # Now append the last remaining rows of the fetch (otherwise df will # always be multiple of cass.session.default_fetch_size) tmp_df = ret_dict_iter._current_rows person_df = person_df.append(tmp_df[tmp_df.found == 'person']) # query_df = ret_dict_iter._current_rows # Now get the count of only the persons detcted # person_df = query_df[query_df.found == 'person'] # cass.session.cleanup() return person_df
def main(): ''' main program ''' # init the cass obj with all keyspaces and tables cass = cu.cassandra_utils([HOSTNAME]) # Set the keyspace cass.set_session_keyspace(KEYSPACE) # Now do the query ret_dict = cass_query_to_dict(cass, START_TIME, END_TIME) print('Ret dict: {}'.format(ret_dict)) for d in ret_dict: print(d) cass.cleanup()
def main(): ''' main program ''' args = parse_args() # Connect to cassandra db cass = cu.cassandra_utils([HOSTNAME]) if args['add_ks']: logging.info('Adding Keyspace {} to Cassandra DB'.format( args['add_ks'])) create_keyspace(cass, args['add_ks']) elif args['delete_ks']: logging.info('Deleting Keyspace {} from Cassandra DB'.format( args['delete_ks'])) delete_keyspace(cass, args['delete_ks']) else: logging.error('Need to specify either add or delete option') cass.cleanup()
def main(): ''' main program ''' # init the cass obj with all keyspaces and tables cass = cu.cassandra_utils([HOSTNAME]) # Set the keyspace cass.set_session_keyspace(KEYSPACE) # Now do the query # ret_dict = cass_query_to_dict(cass, STREAM_GROUP_NAME, STREAM_NAME, # START_TIME, END_TIME) person_df = cass_query(START_TIME, END_TIME) print(person_df.columns) print(person_df.head()) # print(len(list(ret_dict))) cass.cleanup()
def main(): ''' main program ''' args = parse_args() # Connect to cassandra db cass = cu.cassandra_utils([HOSTNAME]) if args['add_table']: logging.info('Adding Table {} to Keyspace {}'.format( args['add_table'], args['ks_name'])) cass.create_table(args['ks_name'], args['add_table'], TABLE_COLUMNS_SV2) elif args['delete_table']: logging.info('Deleting Table {} from Keyspace {}'.format( args['delete_table'], args['ks_name'])) cass.delete_table(args['ks_name'], args['delete_table']) elif args['list_tables']: logging.info('Listing tables from Keyspace {}'.format(args['ks_name'])) ks_list, _ = cass.get_keyspaces() tables_list = cass.get_tables_in_keyspace(args['ks_name']) for t in sorted(tables_list): print(t) else: logging.error('Need to specify either add or delete option') cass.cleanup()