def main(): try: if db_user == 'no_cred' or db_pass == 'no_cred': db = gpudb.GPUdb(encoding='BINARY', host=db_conn_str) else: db = gpudb.GPUdb(encoding='BINARY', host=db_conn_str, username=db_user, password=db_pass) if db.has_table(table_name=TBL_NAME_toll_stream)['table_exists']: print('Table exists {0:s}'.format(TBL_NAME_toll_stream)) else: status = gpudb.GPUdbTable(_type=TBL_SCHEMA_toll_stream, name=TBL_NAME_toll_stream, db=db, options={'collection_name': 'traffic'}) print(status) print('\tTable created {0:s}'.format('Table created')) test_insert() except gpudb.gpudb.GPUdbException as e: print(e)
def get_conn_db(db_conn_str, db_user, db_pass): # Prepare DB Communications logger.info( f"Attempting to connect to DB at {db_conn_str} to push to {tbl_out_audit}" ) if db_user == 'no_cred' or db_pass == 'no_cred': cn_db = gpudb.GPUdb(encoding='BINARY', host=db_conn_str) else: cn_db = gpudb.GPUdb(encoding='BINARY', host=db_conn_str, username=db_user, password=db_pass) return cn_db
def main(): logger = logging.getLogger('Price consumer') logger.setLevel(logging.DEBUG) #logging.basicConfig(filename='myapp.log', level=logging.INFO) consumer = KafkaConsumer('prices') db = gpudb.GPUdb(encoding='BINARY', host='kinetica-saif') columns = [ ["TIMESTAMP", "long", "timestamp"], ["tickid", "long", "primary_key"], ["TICKER", "string", "char32"], ["DESC", "string", "char32"], ["PX_LAST", "float"], ] px_table = gpudb.GPUdbTable(columns, 'PRICES', db=db) print("Eating off queue on topic %s " % TOPIC) for msg in consumer: quote = json.loads(msg.value) theTimestamp = int(round(time.time() * 1000)) logger.critical(quote) print( px_table.insert_records(theTimestamp, quote['tickid'], quote['ticker'], quote['desc'], quote['pxlast'])) print("DONE Eating off queue on topic %s " % TOPIC)
def main(): """ Prepares database for storing Meetup.com RSVPs by creating all required tables. """ db = gpudb.GPUdb(host=config.GPUDB_HOST, port=config.GPUDB_PORT) create_event_rsvp_table(db) add_city_column(db) add_prediction_columns(db)
def get_conn_db(db_conn_str, db_user, db_pass): """Prepare DB Communications.""" logger.info(f"Attempting to connect to database at {db_conn_str}. " f"Pushing to table {tbl_out_audit}") if db_user == 'no_cred' or db_pass == 'no_cred': cn_db = gpudb.GPUdb(encoding='BINARY', host=db_conn_str, primary_host=db_conn_str) else: cn_db = gpudb.GPUdb(encoding='BINARY', host=db_conn_str, primary_host=db_conn_str, username=db_user, password=db_pass) return cn_db
def test_insert(): test_insert_a = [(3100, 3100, "E", 0.00, 1.87, 46, 33.9, 1.34, "medium"), (3100, 3100, "E", 1.42, 1.87, 46, 33.9, 1.34, "heavy")] test_insert_b = [(3200, 3210, "E", 0.00, 1.87, 146, 33.9, 1.34, "medium"), (3200, 3330, "E", 1.42, 1.87, 146, 33.9, 1.34, "heavy")] try: if db_user == 'no_cred' or db_pass == 'no_cred': db = gpudb.GPUdb(encoding='BINARY', host=db_conn_str) else: db = gpudb.GPUdb(encoding='BINARY', host=db_conn_str, username=db_user, password=db_pass) insertables = [] for (sz, ez, direction, tp1, tp2, humd, tempr, wnd, wthr) in test_insert_a: insertable = collections.OrderedDict() insertable["startzoneid"] = sz insertable["endzoneid"] = ez insertable["direction"] = direction insertable["toll_prev_1"] = tp1 insertable["toll_prev_2"] = tp2 insertable["humidity"] = humd insertable["temperature"] = tempr insertable["windspeed"] = wnd insertable["weather"] = wthr insertables.append(insertable) sink_table = gpudb.GPUdbTable(name=TBL_NAME_toll_stream, db=db) insert_status = sink_table.insert_records(insertables) print(insert_status) print('\tSample records inserted ') except gpudb.gpudb.GPUdbException as e: print(e)
def main(): db = gpudb.GPUdb('159.69.39.8') logger = kmllogger.attach_log() in_map = { 'city': 'New York', 'country': 'us', 'day_of_week': 6, 'hour': 18, 'group_events': 100, 'group_members': 4000 } out_map = bb_module_default.predict(in_map, db, logger) print(out_map)
def multithread(): h_db = gpudb.GPUdb(encoding='BINARY', host='127.0.0.1', port='9191') sharded_columns = [ ["city", "string", "char16"], ["state_province", "string", "char2", "shard_key"], # shard key column [ "country", gpudb.GPUdbRecordColumn._ColumnType.STRING, gpudb.GPUdbColumnProperty.CHAR16 ], ["airport", "string", "nullable"], # a nullable column ["x", "double"], ["y", "double"], ["avg_temp", "double"], ["time_zone", "string", "char8", "shard_key"] # shard key column ] sharded_table = gpudb.GPUdbTable(sharded_columns, db=h_db, use_multihead_ingest=True, multihead_ingest_batch_size=33) num_records = 500 null_likelihood = 10 alphanum = (string.ascii_letters + string.digits) for i in range(0, num_records): record = collections.OrderedDict() record["city"] = ''.join( [random.choice(alphanum) for n in range(0, random.randint(5, 16))]) record["state_province"] = ''.join( [random.choice(alphanum) for n in range(0, random.randint(0, 2))]) record["country"] = ''.join( [random.choice(alphanum) for n in range(0, random.randint(5, 16))]) record[ "airport" ] = None if (random.random() < null_likelihood) \ else ''.join( [random.choice( alphanum ) for n in range( 0, random.randint( 2, 25 ) )] ) record["x"] = random.uniform(-180, 180) record["y"] = random.uniform(-90, 90) record["avg_temp"] = random.uniform(-40, 110) record["time_zone"] = "UTC-{}".format(random.randint(-11, 14)) sharded_table.insert_records(record) # end loop sharded_table.flush_data_to_server()
def main(): """ Start listening to the Meetup.com RSVP stream (uses WebSockets). If a RSVP is received store it in the database. """ db = gpudb.GPUdb(host=config.GPUDB_HOST, port=config.GPUDB_PORT) rsvp_queue = multiprocessing.Queue() event_info_provider = apiutils.EventInfoProvider(db) storer = threading.Thread(target=store_rsvps, kwargs={ 'db': db, 'queue': rsvp_queue, 'event_info_provider': event_info_provider }) storer.start() websocket.enableTrace(False) on_message = functools.partial(add_to_storing_queue, queue=rsvp_queue) ws = websocket.WebSocketApp(config.MEETUP_API_RSVP_ENDPOINT, on_message=on_message) ws.run_forever()
COLLECTION = "RISK" NEW_TABLE = "bs_stream" HOST = "<ipaddress>" ENCODING = "binary" PORT = "9191" DATA_PACK = 1 INGEST_FREQ = 3 "Execute python scripts on Kinetica servers using" "/opt/gpudb/bin/gpudb_python" # Establish connection to database with necessary credentials # Pull data from Kinetica and put it directly into a Pandas df h_db = gpudb.GPUdb(encoding=ENCODING, host=HOST, port=PORT, username="******", password="******") if h_db.has_table(table_name=TABLE)['table_exists']: print("Table successfully reached.") else: print("Table not found.") # Pull data from Kinetica and put it directly into a Pandas df data = h_db.get_records(table_name=TABLE, offset=0, limit=gpudb.GPUdb.END_OF_SET, encoding=ENCODING) df = pd.DataFrame( gpudb.GPUdbRecord.decode_binary_data(data["type_schema"],
import collections import sys from datetime import datetime """ All constants and functions provided here are to be used in examples and unit test implementations for the UDF API. It involves two features: test data ingestion (and deletion) to a table in an actual Kinetica instance and initialization of a udf-simulator (and cleansing its artifacts) to avoid having to deploy actual UDFs just for the purpose of examples or unit tests. Note that the udf-simulator requires the test data to be ingested, which means the ingestion has to happen first. """ # Adjust these constants to reflect your Kinetica installation that you can use for running examples and unit tests. HOST = 'http://localhost' PORT = '9191' USER = '******' PASSWORD = '******' DB_HANDLE = gpudb.GPUdb(encoding='BINARY', host=HOST, port=PORT) # This is the test data that examples and unit tests will use. All data and tables will be deleted after the # examples or unit tests have run. TEST_DATA_TABLE_NAME_1 = 'unittest_toy_data' TEST_DATA_TYPE_1 = [['id', gpudb.GPUdbRecordColumn._ColumnType.INT], ['value_long', gpudb.GPUdbRecordColumn._ColumnType.LONG], ['value_float', gpudb.GPUdbRecordColumn._ColumnType.FLOAT]] TEST_DATA_RECORDS_1 = [[1, 123, 1.2], [2, 1234, 1.3], [3, 12345, 1.4], [4, 123456, 1.5], [5, 1234567, 1.6]] TEST_DATA_TABLE_NAME_2 = 'unittest_toy_data2' TEST_DATA_TYPE_2 = [['id', gpudb.GPUdbRecordColumn._ColumnType.INT], ['value_float', gpudb.GPUdbRecordColumn._ColumnType.FLOAT]] TEST_DATA_RECORDS_2 = [[6, 2.2], [7, 2.3], [8, 2.4], [9, 2.5], [10, 2.6]] TEST_OUTPUT_TABLE_NAME = 'unittest_df_output'
output_table_names=[OUTPUT_TABLE], options={} ) print("Proc executed successfully:") print(response) print("Check the system log or 'gpudb-proc.log' for execution information") print("") # end python_tc_udf_exec() if __name__ == '__main__': # Set up args parser = argparse.ArgumentParser( description='Execute the table copy Python UDF example.') parser.add_argument('--host', default='127.0.0.1', help='Kinetica host to run example against') parser.add_argument('--username', default='', help='Username of user to run example with') parser.add_argument('--password', default='', help='Password of user') args = parser.parse_args() # Establish connection with a locally-running instance of Kinetica kinetica = gpudb.GPUdb(host=['http://' + args.host + ':9191'], username=args.username, password=args.password) # Execute defined functions python_tc_udf_exec()
from io import StringIO import collections from avro import schema, io import os from PIL import Image # from io import StringIO # Init table_name = "imageTableInference" collection = "MASTER" tiffFolder = "./imagesTIF/" jpegFolder = "./imagesJPEG/" # init gpudb_host = "172.31.33.26" h_db = gpudb.GPUdb(encoding='BINARY', host=gpudb_host, port='9191') my_type = """ { "type": "record", "name": "image", "fields": [ {"name":"image","type":"bytes"} ] }""".replace('\n', '').replace(' ', '') def creatTable(type_def=my_type, table_name="imageTableInference", collection="MASTER"): response = h_db.create_type(type_definition=type_def, label='image') type_id = response['type_id']
def output(): if "KINETICA_PCF" not in os.environ: raise RuntimeError("No control file specified") icf = os.environ["KINETICA_PCF"] if not os.path.exists(icf): raise RuntimeError("Specified control file does not exist") icf = open(icf, "rb") if read_uint64(icf) != 1: raise RuntimeError("Unrecognized control file version") read_dict(icf) read_dict(icf) read_dict(icf) read_dict(icf) for io in range(0, 2): for i in range(0, read_uint64(icf)): read_string(icf) for j in range(0, read_uint64(icf)): read_string(icf) read_uint64(icf) read_string(icf) read_string(icf) read_string(icf) if io == 0: output_pos = icf.tell() ocf = read_string(icf) if os.path.getsize(ocf) == 0: raise RuntimeError("No output detected") ocf = open(ocf, "rb") if read_uint64(ocf) != 1: raise RuntimeError("Unrecognized output control file version") results = read_dict(ocf) if results: print("Results:") print("") for key, value in results.items(): print(key + ": " + value) print("") else: print("No results") icf.seek(output_pos) table_count = read_uint64(icf) if table_count > 0: print("Output:") print("") db = gpudb.GPUdb(encoding="BINARY", host=args.url, username=args.username, password=args.password) for i in range(0, table_count): read_table(icf, db) else: print("No output")
def execute(): if args.distributed and args.nondistributed: parser.error("-d/--distributed and -n/--nondistributed are mutually exclusive") if args.nondistributed and args.input: parser.error("-n/--nondistribtued and -i/--input are mutually exclusive") if args.nondistributed and args.output: parser.error("-n/--nondistributed and -o/--output are mutually exclusive") if args.input or args.output: args.distributed = True elif not args.distributed: args.nondistributed = True icf = tempfile.NamedTemporaryFile(prefix="kinetica-udf-sim-icf-", dir=args.path, delete=False) write_uint64(icf, 1) icf_info = {} icf_info["run_id"] = "0" icf_info["proc_name"] = "proc" if args.distributed: icf_info["rank_number"] = "1" icf_info["tom_number"] = "0" else: icf_info["rank_number"] = "0" icf_info["data_segment_id"] = "0" icf_info["data_segment_number"] = "0" icf_info["data_segment_count"] = "1" icf_info["head_url"] = args.url icf_info["username"] = args.username icf_info["password"] = args.password write_dict(icf, icf_info) write_dict(icf, {}) icf_params = {} if args.param: for param in args.param: icf_params[param[0]] = param[1] write_dict(icf, icf_params) write_dict(icf, {}) if args.input or args.output: db = gpudb.GPUdb(encoding="BINARY", host=args.url, username=args.username, password=args.password) if args.input: write_uint64(icf, len(args.input)) for table in args.input: write_table(icf, db, table, True) else: write_uint64(icf, 0) if args.output: write_uint64(icf, len(args.output)) for table in args.output: write_table(icf, db, [table], False) else: write_uint64(icf, 0) write_string(icf, tempfile.NamedTemporaryFile(prefix="kinetica-udf-sim-", dir=args.path, delete=False).name) print("export KINETICA_PCF=" + icf.name)
def gpudb_example(): print ( "TUTORIAL OUTPUT") print ( "===============\n") # all tables/views used in examples below weather_table_name = "weather" weather_w_view = "weather_west" weather_nw_view = "weather_northwest" weather_country_view = "weather_country" weather_e_view = "weather_east" weather_se_view = "weather_southeast" weather_h_view = "weather_histogram" """ Establish connection with a locally-running instance of Kinetica, using binary encoding to save memory """ h_db = gpudb.GPUdb(encoding='BINARY', host='127.0.0.1', port='9191') print () print ( "CREATING A TYPE & TABLE") print ( "-----------------------") print () """ Create columns; column arguments consist of a list of the name, then type, and then optional additional properties. E.g., [ "column_name", column_type, column_property1, column_property2 ]. Note that any number of column properties can be listed as long as they are not mutually exclusive within themselves or with the primitive type. Also note that raw string can be used for both the primitive type and the properties; but the user is also able to use string constants as illustrated in the example below. """ columns = [ [ "city", "string", "char16" ], [ "state_province", gpudb.GPUdbRecordColumn._ColumnType.STRING, gpudb.GPUdbColumnProperty.CHAR32 ], [ "country", gpudb.GPUdbRecordColumn._ColumnType.STRING, gpudb.GPUdbColumnProperty.CHAR16 ], [ "x", "double" ], [ "y", "double" ], [ "avg_temp", "double" ], [ "time_zone", "string", "char8" ] ] # Clear any existing table with the same name (otherwise we won't be able to # create the table) if h_db.has_table( table_name = weather_table_name )['table_exists']: h_db.clear_table( weather_table_name ) # Create the table from the type try: weather_table = gpudb.GPUdbTable( columns, weather_table_name, db = h_db ) print ( "Table successfully created.") except gpudb.GPUdbException as e: print ( "Table creation failure: {}".format( str(e) ) ) # We can also create a GPUdbTable object for a table that already exists in # the database. All we need is the table name (and a GPUdb object). Note how # we pass None for the type argument weather_table_duplicate = gpudb.GPUdbTable( None, weather_table_name, db = h_db ) print ( "\n") print ( "INSERTING DATA") print ( "--------------") print () # Insert single record example # Create ordered dictionary for keys & values of record datum = collections.OrderedDict() datum["city"] = "Washington, D.C." datum["state_province"] = "--" datum["country"] = "USA" datum["x"] = -77.016389 datum["y"] = 38.904722 datum["avg_temp"] = 58.5 datum["time_zone"] = "UTC-5" # Insert the record into the table (through the GPUdbTable interface) weather_table.insert_records( datum ) # Create another record datum2 = collections.OrderedDict() datum2["city"] = "Washington, D.C." datum2["state_province"] = "--" datum2["country"] = "USA" datum2["x"] = -77.016389 datum2["y"] = 38.904722 datum2["avg_temp"] = 58.5 datum2["time_zone"] = "UTC-5" # Insert the second record through the basic GPUdb interface # Encode record and put into a single element list weather_record_type = weather_table.get_table_type() single_record = [ gpudb.GPUdbRecord( weather_record_type, datum ).binary_data ] # Insert the record into the table response = h_db.insert_records(table_name = weather_table_name, data = single_record, list_encoding = "binary") print ( "Number of single records inserted: {}".format(response["count_inserted"])) # Insert multiple records example # =============================== records = [] # Create a list of in-line records records.append( ["Paris", "TX", "USA", -95.547778, 33.6625, 64.6, "UTC-6"] ) records.append( ["Memphis", "TN", "USA", -89.971111, 35.1175, 63, "UTC-6"] ) records.append( ["Sydney", "Nova Scotia", "Canada", -60.19551, 46.13631, 44.5, "UTC-4"] ) records.append( ["La Paz", "Baja California Sur", "Mexico", -110.310833, 24.142222, 77, "UTC-7"] ) records.append( ["St. Petersburg", "FL", "USA", -82.64, 27.773056, 74.5, "UTC-5"] ) records.append( ["Oslo", "--", "Norway", 10.75, 59.95, 45.5, "UTC+1"] ) records.append( ["Paris", "--", "France", 2.3508, 48.8567, 56.5, "UTC+1"] ) records.append( ["Memphis", "--", "Egypt", 31.250833, 29.844722, 73, "UTC+2"] ) records.append( ["St. Petersburg", "--", "Russia", 30.3, 59.95, 43.5, "UTC+3"] ) records.append( ["Lagos", "Lagos", "Nigeria", 3.384082, 6.455027, 83, "UTC+1"] ) records.append( ["La Paz", "Pedro Domingo Murillo", "Bolivia", -68.15, -16.5, 44, "UTC-4"] ) records.append( ["Sao Paulo", "Sao Paulo", "Brazil", -46.633333, -23.55, 69.5, "UTC-3"] ) records.append( ["Santiago", "Santiago Province", "Chile", -70.666667, -33.45, 62, "UTC-4"] ) records.append( ["Buenos Aires", "--", "Argentina", -58.381667, -34.603333, 65, "UTC-3"] ) records.append( ["Manaus", "Amazonas", "Brazil", -60.016667, -3.1, 83.5, "UTC-4"] ) records.append( ["Sydney", "New South Wales", "Australia", 151.209444, -33.865, 63.5, "UTC+10"] ) records.append( ["Auckland", "--", "New Zealand", 174.74, -36.840556, 60.5, "UTC+12"] ) records.append( ["Jakarta", "--", "Indonesia", 106.816667, -6.2, 83, "UTC+7"] ) records.append( ["Hobart", "--", "Tasmania", 147.325, -42.880556, 56, "UTC+10"] ) records.append( ["Perth", "Western Australia", "Australia", 115.858889, -31.952222, 68, "UTC+8"] ) # Insert the records into the table weather_table.insert_records( records ) print ( "Number of batch records inserted: {}".format( weather_table.size() )) print ( "\n") print ( "RETRIEVING DATA") print ( "---------------") print () """ Retrieve the second set of ten records from weather_table. Note that records can be iterated over directly. """ print ( "{:<20s} {:<25s} {:<15s} {:<10s} {:<11s} {:<9s} {:<8s}".format("City","State/Province","Country","Latitude","Longitude","Avg. Temp","Time Zone")) print ( "{:=<20s} {:=<25s} {:=<15s} {:=<10s} {:=<11s} {:=<9s} {:=<9s}".format("", "", "", "", "", "", "")) for weatherLoc in weather_table.get_records( offset = 10, limit = 10 ): print ( "{city:<20s} {state:<25s} {country:<15s} {y:10.6f} {x:11.6f} {avg_temp:9.1f} {time_zone}" "".format( city = weatherLoc["city"], state = weatherLoc["state_province"], country = weatherLoc["country"], y = weatherLoc["y"], x = weatherLoc["x"], avg_temp = weatherLoc["avg_temp"], time_zone = weatherLoc["time_zone"] ) ) """ Retrieve no more than 10 records as JSON from weather_table through the GPUdb interface. Note that records are stringified and have to be parsed if using the 'json' encoding. """ weatherLocs = h_db.get_records( table_name = weather_table_name, offset = 0, limit = 10, encoding = "json", options = {"sort_by":"city"} )['records_json'] print ( "{:<20s} {:<25s} {:<15s} {:<10s} {:<11s} {:<9s} {:<8s}".format("City","State/Province","Country","Latitude","Longitude","Avg. Temp","Time Zone")) print ( "{:=<20s} {:=<25s} {:=<15s} {:=<10s} {:=<11s} {:=<9s} {:=<9s}".format("", "", "", "", "", "", "")) for weatherLoc in weatherLocs: print ( "{city:<20s} {state_province:<25s} {country:<15s} {y:10.6f} {x:11.6f} {avg_temp:9.1f} {time_zone}".format(**json.loads(weatherLoc))) """ Retrieve no more than 25 of the remaining records as binary from weather table. Note that records are binary and have to be decoded. """ response = h_db.get_records( table_name = weather_table_name, offset = 10, limit = 25, encoding = "binary", options = {"sort_by":"city"}) weatherLocs = gpudb.GPUdbRecord.decode_binary_data(response["type_schema"], response["records_binary"]) for weatherLoc in weatherLocs: print ( "{city:<20s} {state_province:<25s} {country:<15s} {y:10.6f} {x:11.6f} {avg_temp:9.1f} {time_zone}".format(**weatherLoc)) """ Note that total_number_of_records does not reflect offset/limit; it's the count of all records or those which match the given expression """ print ( "\nNumber of records in new table: {:d}".format(response["total_number_of_records"])) print ( "\n") print ( "FILTERING") print ( "---------") print () ### Filter Example 1 """ Filter records where column x is less than 0, i.e., cities in the western hemisphere, and store the filter in a view. Note that the GPUdbTable creates a random view name if one is not supplied. """ view1 = weather_table.filter( expression = "x < 0" ) print ( "Number of records in the western hemisphere: {}".format( view1.size() )) ### Filter Example 2 """ Filter records where column x is less than 0 and column y is greater than 0, i.e., cities in the northwestern semi-hemisphere, and store the filter in a view. This filter operation is done through the base GPUdb interface. """ response = h_db.filter(table_name = weather_table_name, view_name = weather_nw_view, expression = "x < 0 and y > 0" ) print ( "Number of records in the northwestern semi-hemisphere: {}".format( response["count"] )) ### Filter Example 3 """ Filter records using the same expressions as Example 2, but using query chaining this time (note that we're using the view created by the first filter. """ nw_view = view1.filter( expression = "y > 0" ) print ( "Number of records in the northwestern semi-hemisphere (with query chaining): {}" "".format( nw_view.size() )) ### Filter Example 4 """ Filter by list where country name is USA, Brazil, or Australia. Here we use the duplicate GPUdbTable object (but it points to the same DB table). """ country_map = {"country": ["USA", "Brazil", "Australia"]} view3 = weather_table_duplicate.filter_by_list( column_values_map = country_map ) print ( "Number of records where country name is USA, Brazil, or Australia: {}" "".format( view3.size() )) ### Filter Example 5 """ Filter by range cities that are east of GMT (the Prime Meridian) """ view4 = weather_table.filter_by_range( column_name = "x", lower_bound = 0, upper_bound = 180 ) print ( "Number of records that are east of the Prime Meridian (x > 0): {}" "".format( view4.size() )) print ( "\n") print ( "AGGREGATING, GROUPING, and HISTOGRAMS") print ( "-------------------------------------") print () ### Aggregate Example 1 """ Aggregate count, min, mean, and max on the average temperature. Note that unlike the filter functions, the aggregate functions of GPUdbTable return the response from the database. """ stat_results = weather_table.aggregate_statistics( column_name = "avg_temp", stats = "count,min,max,mean" ) print ( "Statistics of values in the average temperature column:") print ( "\tCount: {count:.0f}\n\tMin: {min:4.2f}\n\tMean: {mean:4.2f}\n\tMax: {max:4.2f}" "\n".format( **stat_results["stats"] )) ### Aggregate Example 2 """ Find unique city names. """ results = weather_table.aggregate_unique( column_name = "city", offset = 0, limit = 25 ) print ( "Unique city names:") for weatherLoc in results.data["city"]: print ( "\t* {}".format( weatherLoc )) print () """ Same operation, but through the base GPUdb interface. Note that the results have to parsed specially using GPUdb.parse_dynamic_response(). Also, we're using the 'json' encoding in this case (the 'binary' encoding can also be used). Also note how the data is accessed differently. """ response = h_db.aggregate_unique( table_name = weather_table_name, column_name = "city", offset = 0, limit = 25, encoding = "json") print ( "Unique city names (using the GPUdb class):") weatherLocs = h_db.parse_dynamic_response(response)['response']['city'] for weatherLoc in weatherLocs: print ( "\t* {}".format(weatherLoc)) print () ### Aggregate Example 3 """ Find number of weather locations per country in the northwestern semi-hemisphere. Note that the data is automatically decoded. """ results = nw_view.aggregate_group_by( column_names = ["country", "count(country)"], offset = 0, limit = 25 ) print ( "Weather locations per country in the northwest semi-hemisphere:") for country in zip(results.data["country"], results.data["count(country)"]): print ( "\t{:<10s}{:2d}".format(country[0] + ":", country[1])) print () """ Find number of weather locations per country in the northwestern semi-hemisphere; use binary decoding explicitly since we're using the GPUdb class. """ response = h_db.aggregate_group_by(table_name=weather_nw_view, column_names=["country", "count(country)"], offset=0, limit=25, encoding="binary") countries = gpudb.GPUdbRecord.decode_binary_data(response["response_schema_str"], response["binary_encoded_response"]) print ( "Weather locations per country in the northwest semi-hemisphere:") for country in zip(countries["column_1"], countries["column_2"]): print ( "\t{:<10s}{:2d}".format(country[0] + ":", country[1])) print () ### Aggregate Example 4 """ Filter table to southeastern semi-hemisphere records, group by country, and aggregate min, max, and mean on the average temperature; using the default binary decoding and the GPUdbTable interface. """ # Do a filter first se_view = weather_table.filter( expression="x > 0 and y < 0" ) # Then do the aggregation operation (note how we use the 'data' property to get # the data) data = se_view.aggregate_group_by( column_names = ["country", "min(avg_temp)", "max(avg_temp)", "mean(avg_temp)"], offset = 0, limit = 25 ).data print ( "{:<20s} {:^5s} {:^5s} {:^5s}".format("SE Semi-Hemi Country", "Min", "Mean", "Max")) print ( "{:=<20s} {:=<5s} {:=<5s} {:=<5s}".format("", "", "", "")) for countryWeather in zip(data["country"], data["min(avg_temp)"], data["mean(avg_temp)"], data["max(avg_temp)"]): print ( "{:<20s} {:5.2f} {:5.2f} {:5.2f}".format(*countryWeather)) print () """ Filter table to southeastern semi-hemisphere records, group by country, and aggregate min, max, and mean on the average temperature; using the default binary decoding and the base GPUdb interface. """ h_db.filter(table_name = weather_table_name, view_name = weather_se_view, expression="x > 0 and y < 0") response = h_db.aggregate_group_by( table_name = weather_se_view, column_names = ["country", "min(avg_temp)", "max(avg_temp)", "mean(avg_temp)"], offset = 0, limit = 25 ) data = h_db.parse_dynamic_response(response)['response'] print ( "{:<20s} {:^5s} {:^5s} {:^5s}".format("SE Semi-Hemi Country", "Min", "Mean", "Max")) print ( "{:=<20s} {:=<5s} {:=<5s} {:=<5s}".format("", "", "", "")) for countryWeather in zip(data["country"], data["min(avg_temp)"], data["mean(avg_temp)"], data["max(avg_temp)"]): print ( "{:<20s} {:5.2f} {:5.2f} {:5.2f}".format(*countryWeather)) print () ### Aggregate Example 5 """ Filter for southern hemisphere cities and create a histogram for the average temperature of those cities (divided into every 10 degrees, e.g., 40s, 50s, 60s, etc.) """ s_view = weather_table.filter( expression = "y < 0" ) histogram_result = s_view.aggregate_histogram( column_name = "avg_temp", start = 40, end = 90, interval = 10 ) print ( "Number of southern hemisphere cities with average temps in the given ranges:") for histogroup in zip([40, 50, 60, 70, 80], histogram_result['counts']): print ( "\t{}s: {:2.0f}".format(*histogroup)) print() ### Aggregate Example 6 """ Aggregate group by has an option 'result_table' which creates a result table and does not return the data. Very useful when the data is large and we want to fetch records from it in batches. """ # Create another table with the same type, and generate a lot of random data for it. # Note that we're allowing GPUdbTable to come up with a random name for the table. weather_table2 = gpudb.GPUdbTable( columns, db = h_db ) # Create random data (but specify a range for the average temperature column) weather_table2.insert_records_random( count = 10000, options = { "avg_temp": {"min": -20, "max": 105 } } ) print() print ( "Second weather table size: ", weather_table2.size() ) # Create a view on the south-western quadrant of the planet sw_view = weather_table2.filter( expression="x < 0 and y < 0" ) # Then do the aggregation operation . Note that the column names need # aliases to utilize th 'result_table' option. agg_result_table = sw_view.aggregate_group_by( column_names = ["country", "min(avg_temp) as min_avg_temp", "max(avg_temp) as max_avg_temp", "mean(avg_temp) as mean_avg_temp"], offset = 0, limit = 25, options = { "result_table": gpudb.GPUdbTable.prefix_name("agg_") } ) print ( "Size of records in the SW quadrant of the planet: ", agg_result_table.size() ) print ( "{:<20s} {:^7s} {:^7s} {:^5s}".format("SW Semi-Hemi Country", "Min", "Mean", "Max")) print ( "{:=<20s} {:=<6s} {:=<6s} {:=<6s}".format("", "", "", "")) # Note that we can slice GPUdbTable objects to fetch the data inside for record in agg_result_table[ 10 : 50 ]: print ( "{:<20s} {:5.2f} {:5.2f} {:5.2f}".format( record["country"], record["min_avg_temp"], record["mean_avg_temp"], record["max_avg_temp"] )) print () print ( "\n") print ( "DELETING DATA") print ( "-------------") print () """ Filter for cities that are either south of latitude -50 or west of longitude -50 to determine how many records will be deleted; delete the records, then confirm the deletion by refiltering. """ deleteExpression = "x < -50 or y < -50" num_records_to_delete = weather_table.filter( expression = deleteExpression ).count print ( "Number of records that meet deletion criteria before deleting: {}" "".format( num_records_to_delete ) ) weather_table.delete_records( expressions = [ deleteExpression ] ) # Note that we're using the duplicate GPUdbTable object which points to the # same table in the DB num_records_post_delete = weather_table_duplicate.filter( expression = deleteExpression ).count print ( "Number of records that meet deletion criteria after deleting (expect 0): {}".format( num_records_post_delete )) print () print ( "\n") print ( "Using Multi-head Ingestion") print ( "--------------------------") print () """For tables with primary or shard key columns, it might be useful to use the multi-head ingestion procedure for inserting records into a table for heavy ingestion loads. There are benefits and drawbacks of using multi-head ingestion: the benefit is that if the database is configured for multi-head ingestion and there is a tremendous ingestion load, then the ingestion will be faster over all. However, the drawback is that the client has to do some calculation PER record to find out which worker rank of the database server to send the record to. So, unless the following parameters are met, it is unwise to use multi-head ingestion as it will unncessarily slow ingestion down: * The server is configured to use multi-head ingestion * The table type has at least one primary or shard key column * There is a heavy stream of data to be inserted """ # Create a type that has some shard keys sharded_columns = [ [ "city", "string", "char16" ], [ "state_province", "string", "char2", "shard_key" ], # shard key column [ "country", gpudb.GPUdbRecordColumn._ColumnType.STRING, gpudb.GPUdbColumnProperty.CHAR16 ], [ "airport", "string", "nullable" ], # a nullable column [ "x", "double" ], [ "y", "double" ], [ "avg_temp", "double" ], [ "time_zone", "string", "char8", "shard_key" ] # shard key column ] # Create a table with the multi-head ingestion options # (the default batch size is 10k) sharded_table = gpudb.GPUdbTable( sharded_columns, db = h_db, use_multihead_ingest = True, multihead_ingest_batch_size = 33 ) # Generate some random data to be inserted num_records = 100 null_likelihood = 10 alphanum = (string.ascii_letters + string.digits) for i in range(0, num_records): record = collections.OrderedDict() record[ "city" ] = ''.join( [random.choice( alphanum ) for n in range( 0, random.randint( 5, 16 ) )] ) record[ "state_province"] = ''.join( [random.choice( alphanum ) for n in range( 0, random.randint( 0, 2 ) )] ) record[ "country" ] = ''.join( [random.choice( alphanum ) for n in range( 0, random.randint( 5, 16 ) )] ) record[ "airport" ] = None if (random.random() < null_likelihood) \ else ''.join( [random.choice( alphanum ) for n in range( 0, random.randint( 2, 25 ) )] ) record[ "x" ] = random.uniform( -180, 180 ) record[ "y" ] = random.uniform( -90, 90 ) record[ "avg_temp" ] = random.uniform( -40, 110 ) record[ "time_zone" ] = "UTC-{}".format( random.randint( -11, 14 ) ) sharded_table.insert_records( record ) # end loop print ( "Size of sharded table (expect less than 100 as the batch size is 33 and \n100 is not a multiple of 33): ", sharded_table.size() ) print () print ( "Flushing the records remaining in the ingestor queue...") sharded_table.flush_data_to_server() print ( "Size of sharded table post forced flush (expect 100): ", sharded_table.size() ) print ()
["humidity", "float"], ["temperature", "float"], ["windspeed", "float"], ["weather", "string"], ["timestamp", "string", "datetime"], ] db_conn_str="http://172.30.50.78:9191" db_user = "******" db_pass = "******" try: if db_user == 'no_cred' or db_pass == 'no_cred': db=gpudb.GPUdb(encoding='BINARY', host=db_conn_str) else: db=gpudb.GPUdb(encoding='BINARY', host=db_conn_str, username=db_user, password=db_pass) if db.has_table(table_name=TBL_NAME_toll_stream)['table_exists']: print('Table exists {0:s}'.format(TBL_NAME_toll_stream)) else: status = gpudb.GPUdbTable(_type=TBL_SCHEMA_toll_stream, name=TBL_NAME_toll_stream, db=db, options={'collection_name': 'traffic'}) print(status)
def aggregate(): weather_table_name = "weather" weather_w_view = "weather_west" weather_nw_view = "weather_northwest" weather_country_view = "weather_country" weather_e_view = "weather_east" weather_se_view = "weather_southeast" weather_h_view = "weather_histogram" h_db = gpudb.GPUdb(encoding='BINARY', host='127.0.0.1', port='9191') columns = [["city", "string", "char16"], [ "state_province", gpudb.GPUdbRecordColumn._ColumnType.STRING, gpudb.GPUdbColumnProperty.CHAR32 ], [ "country", gpudb.GPUdbRecordColumn._ColumnType.STRING, gpudb.GPUdbColumnProperty.CHAR16 ], ["x", "double"], ["y", "double"], ["avg_temp", "double"], ["time_zone", "string", "char8"]] if h_db.has_table(table_name=weather_table_name)['table_exists']: h_db.clear_table(weather_table_name) try: weather_table = gpudb.GPUdbTable(columns, weather_table_name, db=h_db) print("Table successfully created.") except gpudb.GPUdbException as e: print("Table creation failure: {}".format(str(e))) weather_table_duplicate = gpudb.GPUdbTable(None, weather_table_name, db=h_db) print("\n") print("INSERTING DATA") print("--------------") print() datum = collections.OrderedDict() datum["city"] = "Washington, D.C." datum["state_province"] = "--" datum["country"] = "USA" datum["x"] = -77.016389 datum["y"] = 38.904722 datum["avg_temp"] = 58.5 datum["time_zone"] = "UTC-5" weather_table.insert_records(datum) datum2 = collections.OrderedDict() datum2["city"] = "Washington, D.C." datum2["state_province"] = "--" datum2["country"] = "USA" datum2["x"] = -77.016389 datum2["y"] = 38.904722 datum2["avg_temp"] = 58.5 datum2["time_zone"] = "UTC-5" weather_record_type = weather_table.get_table_type() single_record = [gpudb.GPUdbRecord(weather_record_type, datum).binary_data] response = h_db.insert_records(table_name=weather_table_name, data=single_record, list_encoding="binary") print("Number of single records inserted: {}".format( response["count_inserted"])) records = [] records.append(["Paris", "TX", "USA", -95.547778, 33.6625, 64.6, "UTC-6"]) records.append(["Memphis", "TN", "USA", -89.971111, 35.1175, 63, "UTC-6"]) records.append([ "Sydney", "Nova Scotia", "Canada", -60.19551, 46.13631, 44.5, "UTC-4" ]) records.append([ "La Paz", "Baja California Sur", "Mexico", -110.310833, 24.142222, 77, "UTC-7" ]) records.append( ["St. Petersburg", "FL", "USA", -82.64, 27.773056, 74.5, "UTC-5"]) records.append(["Oslo", "--", "Norway", 10.75, 59.95, 45.5, "UTC+1"]) records.append(["Paris", "--", "France", 2.3508, 48.8567, 56.5, "UTC+1"]) records.append( ["Memphis", "--", "Egypt", 31.250833, 29.844722, 73, "UTC+2"]) records.append( ["St. Petersburg", "--", "Russia", 30.3, 59.95, 43.5, "UTC+3"]) records.append( ["Lagos", "Lagos", "Nigeria", 3.384082, 6.455027, 83, "UTC+1"]) records.append([ "La Paz", "Pedro Domingo Murillo", "Bolivia", -68.15, -16.5, 44, "UTC-4" ]) records.append([ "Sao Paulo", "Sao Paulo", "Brazil", -46.633333, -23.55, 69.5, "UTC-3" ]) records.append([ "Santiago", "Santiago Province", "Chile", -70.666667, -33.45, 62, "UTC-4" ]) records.append([ "Buenos Aires", "--", "Argentina", -58.381667, -34.603333, 65, "UTC-3" ]) records.append( ["Manaus", "Amazonas", "Brazil", -60.016667, -3.1, 83.5, "UTC-4"]) records.append([ "Sydney", "New South Wales", "Australia", 151.209444, -33.865, 63.5, "UTC+10" ]) records.append( ["Auckland", "--", "New Zealand", 174.74, -36.840556, 60.5, "UTC+12"]) records.append( ["Jakarta", "--", "Indonesia", 106.816667, -6.2, 83, "UTC+7"]) records.append( ["Hobart", "--", "Tasmania", 147.325, -42.880556, 56, "UTC+10"]) records.append([ "Perth", "Western Australia", "Australia", 115.858889, -31.952222, 68, "UTC+8" ]) #get histogram of cities divides by intervals of 20, from 40 to 80 (in terms of temperature) s_view = weather_table.filter(expression="y < 0") histogram_result = s_view.aggregate_histogram(column_name="avg_temp", start=40, end=80, interval=20) print( "Number of southern hemisphere cities with average temps in the given ranges:" ) for histogroup in zip([40, 60, 80], histogram_result['counts']): print("\t{}s: {:2.0f}".format(*histogroup)) print()
def gpudb_example(): """An example of how to use the GPUdb python API. """ new_line = '\n' # handle to database h_db = gpudb.GPUdb(encoding = 'BINARY', host = '127.0.0.1', port = '9191') my_table='my_table_1' # Data type for the table my_type = """ { "type": "record", "name": "my_type_1", "fields": [ {"name":"col1","type":"double"}, {"name":"col2","type":"string"}, {"name":"group_id","type":"string"} ] } """.replace(' ','').replace('\n','') # Create the data type in the DB response = h_db.create_type( type_definition = my_type, label = 'my_type_lb_1' ) type_id_1 = response['type_id'] print 'GPUdb generated type id for the new type - ', type_id_1, new_line # Create a table with the given data type response = h_db.create_table( table_name = my_table, type_id = type_id_1 ) # Generate data to be inserted into the table encoded_obj_list = [] for val in range(1,10): datum = collections.OrderedDict() datum["col1"] = val+0.1 datum["col2"] = 'string '+str(val) datum["group_id"] = 'Group 1' # Encode the data appropriately to prepare for insertion encoded_obj_list.append(h_db.encode_datum(my_type, datum)) # Optional parameter that enables returning IDs for the # newly inserted records options = {'return_record_ids':'true'} # Insert the records into the table response = h_db.insert_records( table_name = my_table, objects = encoded_obj_list, list_encoding = 'binary', options = options ) print "Record Ids for %d new records - %s" % (response['count_inserted'], response['record_ids']), new_line # Retrieve records from a table. Note that the records are stringified and have to be parsed response = h_db.get_records(table_name=my_table, offset=0, limit=100, encoding='json', options={}) print "Returned records ", response['records_json'], new_line # Filter records into a view. Response contains the count only response = h_db.filter(table_name=my_table, view_name='my_table_view', expression='col1 = 1.1') print "Number of records returned by filter expresion ", response['count'], new_line # Read the filtered records from the view (exactly as reading from table) response = h_db.get_records( table_name = 'my_table_view', offset = 0, limit = 100, encoding = 'json', options = {} ) print "Filtered records ", response['records_json'], new_line # Drop the view h_db.clear_table('my_table_view') # Filter expression with two columns on the original table response = h_db.filter(my_table,'my_table_view','col1 <= 9 and group_id="Group 1"') print "Number of records returned by second filter expresion ", response['count'], new_line # Fetch the records from the view response = h_db.get_records( table_name = 'my_table_view', offset = 0, limit = 100, encoding = 'json', options = {} ) print "Returned records ", response['records_json'], new_line # Filter by a list. query is executed on resultset from previous query (query chaining) response = h_db.filter_by_list( table_name = 'my_table_view', view_name = 'my_table_view_2', column_values_map = {'col1': ['1.1', '2.1', '5.1' ] } ) print "Number of records returned by filter expresion ", response['count'], new_line # Fetch the records response = h_db.get_records( table_name = 'my_table_view_2', offset = 0, limit = 100, encoding = 'json', options = {} ) print "Returned records filtered by list: ", response['records_json'], new_line # filter a range of values (numeric values only) response = h_db.filter_by_range( table_name = my_table, view_name = 'my_table_view_3', column_name = 'col1', lower_bound = 1, upper_bound = 5 ) print "Number of records returned by filter expresion ", response['count'], new_line response = h_db.get_records(table_name='my_table_view_3', offset=0, limit=100, encoding='binary', options={}) # Decoding the binary encoded response print "Returned records filtered by range: " parsed_schema = schema.parse( response['type_schema'] ) reader = io.DatumReader( parsed_schema ) for bin_record in response['records_binary']: str_IO = cStringIO.StringIO( bin_record ) bin_decoder = io.BinaryDecoder( str_IO ) decoded_response = reader.read( bin_decoder ) print decoded_response, new_line response = h_db.aggregate_statistics(table_name=my_table, column_name='col1', stats='count,sum,mean') print "Statistics of values in col1 ", response['stats'], new_line encoded_obj_list=[] for val in range(1,8): datum = collections.OrderedDict() datum["col1"] = val+10.1 datum["col2"] = 'string '+str(val) datum["group_id"] = 'Group 2' encoded_obj_list.append(h_db.encode_datum(my_type, datum)) h_db.insert_records(my_table,encoded_obj_list,'binary',{}) # find unique values in a column response = h_db.aggregate_unique( table_name = my_table, column_name = 'group_id', offset = 0, limit = 20, encoding = 'json') print 'Unique values in group_id column ', response['json_encoded_response'], new_line # Group by groupby_col_names = ['col2'] retval = h_db.aggregate_group_by(table_name=my_table, column_names=groupby_col_names, offset=0, limit=1000,encoding='json') print "Group by results ", retval['json_encoded_response'], new_line # aggregate values groupby_col_names = ['group_id', "count(*)", 'sum(col1)', 'avg(col1)'] retval = h_db.aggregate_group_by(table_name=my_table, column_names=groupby_col_names, offset=0, limit=1000,encoding='json') print "Group by results ", retval['json_encoded_response'], new_line # Do another aggregate group by operation groupby_col_names = ['group_id', 'sum(col1*10)'] retval = h_db.aggregate_group_by( table_name = my_table, column_names = groupby_col_names, offset = 0, limit = 1000, encoding = 'json') print "Group by results ", retval['json_encoded_response'], new_line # Add more data for val in range(4,10): datum = collections.OrderedDict() datum["col1"] = val+0.6 datum["col2"] = 'string 2'+str(val) datum["group_id"] = 'Group 1' encoded_obj_list.append(h_db.encode_datum(my_type, datum)) h_db.insert_records( table_name = my_table, objects = encoded_obj_list, list_encoding = 'binary', options = {} ) histogram_result = h_db.aggregate_histogram( table_name = my_table, column_name = 'col1', start = 1.1, end = 2, interval = 1 ) print "histogram result:", histogram_result, new_line # Drop the table (will automatically drop all views on the table) h_db.clear_table(my_table) # Check that clearing a table automatically drops all the dependent views response = h_db.get_records( table_name = 'my_table_view', offset = 0, limit = 100, encoding = 'json', options = {}) assert (response['status_info']['status'] == "ERROR"), \ "Problem: View on deleted table found!!!" print ("Response status and message : ", response['status_info']['status'], ' - ', response['status_info']['message'], new_line)
HOST_IP = "127.0.0.1" def main(db_handle): proc_name = 'H2o_rf_train_test' file_paths = ["h2o_rf_train_test.py", "../../kinetica_proc.py" ] # put the main python script in the first place # Read proc code in as bytes and add to a file data array files = {} for script_path in file_paths: script_name = os.path.basename(script_path) with open(script_path, 'rb') as f: files[script_name] = f.read() # Remove proc if it exists from a prior registration if db_handle.has_proc(proc_name)['proc_exists']: db_handle.delete_proc(proc_name) print("Registering proc...") response = db_handle.create_proc(proc_name, 'distributed', files, 'python', [file_paths[0]], {}) print(response) print("Executing proc...") response = db_handle.execute_proc(proc_name, {}, {}, [te.LOAN_TRAIN_DATA_TABLE_NAME], {}, [], {}) print(response) if __name__ == "__main__": main(gpudb.GPUdb(encoding='BINARY', host=HOST_IP, port='9191'))
def __init__(self, bb_module, bb_method, schema_inbound, schema_outbound, zmq_dealer_host, zmq_dealer_port, db_table_audit, db_table_results, db_conn_str, db_user = "", db_pass = "", be_quiet = False ): """Construct a new KineticaBlackBox object. :param bb_module: :type bb_module: dict :param bb_method: :type bb_method: dict :param schema_inbound: :type schema_inbound: str :param schema_outbound: :type schema_outbound: str :param zmq_dealer_host: :type zmq_dealer_host: str :param zmq_dealer_port: :type zmq_dealer_port: str :param db_table_audit: :type db_table_audit: str :param db_table_results: :type db_table_results: str :param db_host: Default "127.0.0.1". :type db_host: str :param db_port: Default "9191". :type db_port: str :param db_user: optional :type db_user: str :param db_pw: optional :type db_pw: str """ logger.info("Initializing KineticaBlackBox") logger.info(f"zmq_dealer_host: {zmq_dealer_host}") logger.info(f"zmq_dealer_port: {zmq_dealer_port}") logger.info(f"db_table a: {db_table_audit}") logger.info(f"db_table r: {db_table_results}") logger.info(f"db_conn_str: {db_conn_str}") logger.info(f"db_user: {db_user}") logger.info(f"db_pass: *******") logger.info(f"schema_inbound: {schema_inbound}") logger.info(f"schema_outbound: {schema_outbound}") logger.info(f"bb_module: {bb_module}") logger.info(f"bb_method: {bb_method}") if be_quiet: import logging logger.setLevel(logging.INFO) self.be_quiet = be_quiet self.schema_inbound = schema_inbound self.schema_outbound = schema_outbound self.bb_module = bb_module self.bb_method = bb_method # Prepare DB Communications logger.info(f"Attempting to connect to DB at {db_conn_str} to push to {db_table_audit}") if db_user == 'no_cred' or db_pass == 'no_cred': db=gpudb.GPUdb(encoding='BINARY', host=db_conn_str) else: db=gpudb.GPUdb(encoding='BINARY', host=db_conn_str, username=db_user, password=db_pass) self.sink_table_audit = gpudb.GPUdbTable(name = db_table_audit, db = db) self.db = db logger.info(f"DB Results Table {db_table_results}") if db_table_results == "NOT_APPLICABLE": logger.info(f"All results will be persisted to Audit DB Table {db_table_audit}") self.sink_table_results = None else: self.sink_table_results = gpudb.GPUdbTable(name = db_table_results, db = db) logger.info(f"Established connection to sink table") logger.info(self.sink_table_results) logger.info(self.sink_table_results) if self.sink_table_results is None: logger.info(f"All results will be persisted to Audit DB Table only") else: logger.info(f"All results will be persisted to both Audit and output DB Tables {db_table_results}") logger.info("Prepping response with with schema") logger.info(json.dumps(json.loads(schema_outbound))) # Prepare ZMQ Communications zmq_dealer_uri = f"tcp://{zmq_dealer_host}:{zmq_dealer_port}" context = zmq.Context() self.socket = context.socket(zmq.PULL) #logger.info("Listening for incoming requests on topic: %s via %s" % (topicfilter,topic_source)) self.socket.connect(zmq_dealer_uri)
def retrieve(): weather_table_name = "weather" weather_w_view = "weather_west" weather_nw_view = "weather_northwest" weather_country_view = "weather_country" weather_e_view = "weather_east" weather_se_view = "weather_southeast" weather_h_view = "weather_histogram" h_db = gpudb.GPUdb(encoding='BINARY', host='127.0.0.1', port='9191') columns = [["city", "string", "char16"], [ "state_province", gpudb.GPUdbRecordColumn._ColumnType.STRING, gpudb.GPUdbColumnProperty.CHAR32 ], [ "country", gpudb.GPUdbRecordColumn._ColumnType.STRING, gpudb.GPUdbColumnProperty.CHAR16 ], ["x", "double"], ["y", "double"], ["avg_temp", "double"], ["time_zone", "string", "char8"]] if h_db.has_table(table_name=weather_table_name)['table_exists']: h_db.clear_table(weather_table_name) try: weather_table = gpudb.GPUdbTable(columns, weather_table_name, db=h_db) print("Table successfully created.") except gpudb.GPUdbException as e: print("Table creation failure: {}".format(str(e))) weather_table_duplicate = gpudb.GPUdbTable(None, weather_table_name, db=h_db) print("\n") print("INSERTING DATA") print("--------------") print() datum = collections.OrderedDict() datum["city"] = "Washington, D.C." datum["state_province"] = "--" datum["country"] = "USA" datum["x"] = -77.016389 datum["y"] = 38.904722 datum["avg_temp"] = 58.5 datum["time_zone"] = "UTC-5" weather_table.insert_records(datum) datum2 = collections.OrderedDict() datum2["city"] = "Washington, D.C." datum2["state_province"] = "--" datum2["country"] = "USA" datum2["x"] = -77.016389 datum2["y"] = 38.904722 datum2["avg_temp"] = 58.5 datum2["time_zone"] = "UTC-5" weather_record_type = weather_table.get_table_type() single_record = [gpudb.GPUdbRecord(weather_record_type, datum).binary_data] response = h_db.insert_records(table_name=weather_table_name, data=single_record, list_encoding="binary") print("Number of single records inserted: {}".format( response["count_inserted"])) records = [] records.append(["Paris", "TX", "USA", -95.547778, 33.6625, 64.6, "UTC-6"]) records.append(["Memphis", "TN", "USA", -89.971111, 35.1175, 63, "UTC-6"]) records.append([ "Sydney", "Nova Scotia", "Canada", -60.19551, 46.13631, 44.5, "UTC-4" ]) records.append([ "La Paz", "Baja California Sur", "Mexico", -110.310833, 24.142222, 77, "UTC-7" ]) records.append( ["St. Petersburg", "FL", "USA", -82.64, 27.773056, 74.5, "UTC-5"]) records.append(["Oslo", "--", "Norway", 10.75, 59.95, 45.5, "UTC+1"]) records.append(["Paris", "--", "France", 2.3508, 48.8567, 56.5, "UTC+1"]) records.append( ["Memphis", "--", "Egypt", 31.250833, 29.844722, 73, "UTC+2"]) records.append( ["St. Petersburg", "--", "Russia", 30.3, 59.95, 43.5, "UTC+3"]) records.append( ["Lagos", "Lagos", "Nigeria", 3.384082, 6.455027, 83, "UTC+1"]) records.append([ "La Paz", "Pedro Domingo Murillo", "Bolivia", -68.15, -16.5, 44, "UTC-4" ]) records.append([ "Sao Paulo", "Sao Paulo", "Brazil", -46.633333, -23.55, 69.5, "UTC-3" ]) records.append([ "Santiago", "Santiago Province", "Chile", -70.666667, -33.45, 62, "UTC-4" ]) records.append([ "Buenos Aires", "--", "Argentina", -58.381667, -34.603333, 65, "UTC-3" ]) records.append( ["Manaus", "Amazonas", "Brazil", -60.016667, -3.1, 83.5, "UTC-4"]) records.append([ "Sydney", "New South Wales", "Australia", 151.209444, -33.865, 63.5, "UTC+10" ]) records.append( ["Auckland", "--", "New Zealand", 174.74, -36.840556, 60.5, "UTC+12"]) records.append( ["Jakarta", "--", "Indonesia", 106.816667, -6.2, 83, "UTC+7"]) records.append( ["Hobart", "--", "Tasmania", 147.325, -42.880556, 56, "UTC+10"]) records.append([ "Perth", "Western Australia", "Australia", 115.858889, -31.952222, 68, "UTC+8" ]) weather_table.insert_records(records) weatherLocs = h_db.get_records(table_name=weather_table_name, offset=0, limit=10, encoding="json", options={"sort_by": "city"})['records_json'] for weatherLoc in weatherLocs: print( "{city:<20s} {state_province:<25s} {country:<15s} {y:10.6f} {x:11.6f} {avg_temp:9.1f} {time_zone}" .format(**json.loads(weatherLoc))) response = h_db.get_records(table_name=weather_table_name, offset=10, limit=25, encoding="binary", options={"sort_by": "city"}) weatherLocs = gpudb.GPUdbRecord.decode_binary_data( response["type_schema"], response["records_binary"]) for weatherLoc in weatherLocs: print( "{city:<20s} {state_province:<25s} {country:<15s} {y:10.6f} {x:11.6f} {avg_temp:9.1f} {time_zone}" .format(**weatherLoc))
import gpudb a = gpudb.GPUdb() a.execute_sql_and_decode
# import sys # sys.path.append("/Users/zhewu/PycharmProjects/learning/pandasKinetica") # from kdf import kdf # from kdfconn import kdf # from kdfconn import kdf import pandas as pd import gpudb conn = gpudb.GPUdb(encoding='BINARY', host="k.yamei.info", port="9191") # df = kDataFrame(pd.read_csv("./pandasKinetica/template.csv"), conn) df = kdf(conn) df.from_pandas(pd.read_csv("./tmp/tmp.test3.csv")) # df.read_table("est6") df.to_table("est17", charN_On=True, timeStampColumn="ValidDateTime") # df.to_table("est9",charN_On=False, timeStampColumn="ValidDateTime") ############################################################################################### from kdfconn import kdf import pandas as pd import numpy as np import gpudb conn = gpudb.GPUdb(encoding='BINARY', host="k.yamei.info", port="9191") # df = kDataFrame(pd.read_csv("./pandasKinetica/template.csv"), conn)
# File: kapi_io.py # Purpose: I/O of between dataframes and Kinetica with native API. # Author: Chad Juliano # Date: 07/20/2018 ############################################################################### import numpy as np import pandas as pd import gpudb import sys KDBC = gpudb.GPUdb(encoding='BINARY', host='127.0.0.1', port='9191') KAPI_TYPE_MAP = { 'int64': gpudb.GPUdbRecordColumn._ColumnType.LONG, 'int32': gpudb.GPUdbRecordColumn._ColumnType.INT, 'int16': gpudb.GPUdbRecordColumn._ColumnType.INT, 'float64': gpudb.GPUdbRecordColumn._ColumnType.DOUBLE, 'float32': gpudb.GPUdbRecordColumn._ColumnType.FLOAT, 'object': gpudb.GPUdbRecordColumn._ColumnType.STRING } def get_coldef(_col_name, _np_dtype, _col_props): """Convert a Numpy type to Kinetica type.""" if (str(_np_dtype) not in KAPI_TYPE_MAP): raise Exception('Type not supported: {}'.format(_np_dtype)) _k_type = KAPI_TYPE_MAP[str(_np_dtype)] _k_properties = []
import gpudb import os, glob import pandas as pd import numpy as np import datetime import random # Establish connection with a locally-running instance of Kinetica using # binary encoding to save memory h_db = gpudb.GPUdb( encoding="BINARY", host="172.31.33.26", port="9191", username="******", password="******") # Confirm target table exists if h_db.has_table(table_name="stores")['table_exists']: print("Table successfully reached.") else: print("Table not found.") # Pull data from target and store as variable data = h_db.get_records( table_name="stores", offset=0, limit=gpudb.GPUdb.END_OF_SET, encoding="binary") # Hydrate pandas df, adding a datetime column as a start date for the desired month store_df = pd.DataFrame(gpudb.GPUdbRecord.decode_binary_data(data["type_schema"], data["records_binary"]))