Exemple #1
0
def main():
    ''' Main program '''
    # Init Kafka and Casandra connections
    kaf = kafka_consumer(STREAM_NAME)
    cass = cass_utils.cassandra_utils([CASS_HOSTNAME])
    # This below is to init the cass object with all keyspaces
    cass.get_keyspaces()
    # Set the Cassandra keyspace
    cass.set_session_keyspace(CASS_KEYSPACE)

    # Now read from kafka and write to Cassandra
    kafka_to_cass(kaf, cass)
Exemple #2
0
def cass_query(start_time, end_time):
    ''' main program '''
    # init the cass obj with all keyspaces and tables
    cass = cu.cassandra_utils([HOSTNAME])

    # Set the keyspace
    cass.set_session_keyspace(KEYSPACE)

    # Set the pandas factory
    cass.session.row_factory = pandas_factory

    # Set query fetch size to 5000. Otherwise things will timeout
    # TBD: Magic number alert
    cass.session.default_fetch_size = 5000

    # Before making the query, check that the table exists. If it does not
    # exists, create an empty data frame and return it
    # TBD: This should be handled better or more elagently
    table_exists = check_table_exists(cass, start_time)
    if not table_exists:
        # return an empty dataframe
        return make_empty_dataframe()

    # Now do the query
    ret_dict_iter = cass_query_to_dict(cass, STREAM_GROUP_NAME, STREAM_NAME,
                                       start_time, end_time)

    # Now go through the query page by page and assemble the dataframe of
    # interest
    person_df = pd.DataFrame()
    while ret_dict_iter.has_more_pages:
        # Get the page into a temp df and then appent only the rows of
        # interest into person_df
        tmp_df = ret_dict_iter._current_rows
        person_df = person_df.append(tmp_df[tmp_df.found == 'person'])
        # print(len(person_df))
        ret_dict_iter.fetch_next_page()
    # Now append the last remaining rows of the fetch (otherwise df will
    # always be multiple of cass.session.default_fetch_size)
    tmp_df = ret_dict_iter._current_rows
    person_df = person_df.append(tmp_df[tmp_df.found == 'person'])

    # query_df = ret_dict_iter._current_rows

    # Now get the count of only the persons detcted
    # person_df = query_df[query_df.found == 'person']

    # cass.session.cleanup()
    return person_df
def main():
    ''' main program '''

    # init the cass obj with all keyspaces and tables
    cass = cu.cassandra_utils([HOSTNAME])

    # Set the keyspace
    cass.set_session_keyspace(KEYSPACE)

    # Now do the query
    ret_dict = cass_query_to_dict(cass, START_TIME, END_TIME)
    print('Ret dict: {}'.format(ret_dict))
    for d in ret_dict:
        print(d)

    cass.cleanup()
Exemple #4
0
def main():
    ''' main program '''
    args = parse_args()
    # Connect to cassandra db
    cass = cu.cassandra_utils([HOSTNAME])

    if args['add_ks']:
        logging.info('Adding Keyspace {} to Cassandra DB'.format(
            args['add_ks']))
        create_keyspace(cass, args['add_ks'])
    elif args['delete_ks']:
        logging.info('Deleting Keyspace {} from Cassandra DB'.format(
            args['delete_ks']))
        delete_keyspace(cass, args['delete_ks'])
    else:
        logging.error('Need to specify either add or delete option')
    cass.cleanup()
Exemple #5
0
def main():
    ''' main program '''

    # init the cass obj with all keyspaces and tables
    cass = cu.cassandra_utils([HOSTNAME])

    # Set the keyspace
    cass.set_session_keyspace(KEYSPACE)

    # Now do the query
    # ret_dict = cass_query_to_dict(cass, STREAM_GROUP_NAME, STREAM_NAME,
    #                               START_TIME, END_TIME)

    person_df = cass_query(START_TIME, END_TIME)
    print(person_df.columns)
    print(person_df.head())
    # print(len(list(ret_dict)))

    cass.cleanup()
def main():
    ''' main program '''
    args = parse_args()
    # Connect to cassandra db
    cass = cu.cassandra_utils([HOSTNAME])

    if args['add_table']:
        logging.info('Adding Table {} to Keyspace {}'.format(
            args['add_table'], args['ks_name']))
        cass.create_table(args['ks_name'], args['add_table'],
                          TABLE_COLUMNS_SV2)
    elif args['delete_table']:
        logging.info('Deleting Table {} from Keyspace {}'.format(
            args['delete_table'], args['ks_name']))
        cass.delete_table(args['ks_name'], args['delete_table'])
    elif args['list_tables']:
        logging.info('Listing tables from Keyspace {}'.format(args['ks_name']))
        ks_list, _ = cass.get_keyspaces()
        tables_list = cass.get_tables_in_keyspace(args['ks_name'])
        for t in sorted(tables_list):
            print(t)
    else:
        logging.error('Need to specify either add or delete option')
    cass.cleanup()