def main():
    try:
        if db_user == 'no_cred' or db_pass == 'no_cred':
            db = gpudb.GPUdb(encoding='BINARY', host=db_conn_str)
        else:
            db = gpudb.GPUdb(encoding='BINARY',
                             host=db_conn_str,
                             username=db_user,
                             password=db_pass)

        if db.has_table(table_name=TBL_NAME_toll_stream)['table_exists']:
            print('Table exists {0:s}'.format(TBL_NAME_toll_stream))

        else:
            status = gpudb.GPUdbTable(_type=TBL_SCHEMA_toll_stream,
                                      name=TBL_NAME_toll_stream,
                                      db=db,
                                      options={'collection_name': 'traffic'})
            print(status)
            print('\tTable created {0:s}'.format('Table created'))

        test_insert()

    except gpudb.gpudb.GPUdbException as e:
        print(e)
예제 #2
0
def get_conn_db(db_conn_str, db_user, db_pass):
    # Prepare DB Communications
    logger.info(
        f"Attempting to connect to DB at {db_conn_str} to push to {tbl_out_audit}"
    )
    if db_user == 'no_cred' or db_pass == 'no_cred':
        cn_db = gpudb.GPUdb(encoding='BINARY', host=db_conn_str)
    else:
        cn_db = gpudb.GPUdb(encoding='BINARY',
                            host=db_conn_str,
                            username=db_user,
                            password=db_pass)
    return cn_db
예제 #3
0
def main():
    logger = logging.getLogger('Price consumer')
    logger.setLevel(logging.DEBUG)
    #logging.basicConfig(filename='myapp.log', level=logging.INFO)
    consumer = KafkaConsumer('prices')

    db = gpudb.GPUdb(encoding='BINARY', host='kinetica-saif')
    columns = [
        ["TIMESTAMP", "long", "timestamp"],
        ["tickid", "long", "primary_key"],
        ["TICKER", "string", "char32"],
        ["DESC", "string", "char32"],
        ["PX_LAST", "float"],
    ]
    px_table = gpudb.GPUdbTable(columns, 'PRICES', db=db)

    print("Eating off queue on topic %s " % TOPIC)

    for msg in consumer:
        quote = json.loads(msg.value)
        theTimestamp = int(round(time.time() * 1000))
        logger.critical(quote)
        print(
            px_table.insert_records(theTimestamp, quote['tickid'],
                                    quote['ticker'], quote['desc'],
                                    quote['pxlast']))

    print("DONE Eating off queue on topic %s " % TOPIC)
예제 #4
0
def main():
    """
    Prepares database for storing Meetup.com RSVPs by creating all required tables.
    """
    db = gpudb.GPUdb(host=config.GPUDB_HOST, port=config.GPUDB_PORT)
    create_event_rsvp_table(db)
    add_city_column(db)
    add_prediction_columns(db)
예제 #5
0
def get_conn_db(db_conn_str, db_user, db_pass):
    """Prepare DB Communications."""

    logger.info(f"Attempting to connect to database at {db_conn_str}. "
                f"Pushing to table {tbl_out_audit}")

    if db_user == 'no_cred' or db_pass == 'no_cred':
        cn_db = gpudb.GPUdb(encoding='BINARY',
                            host=db_conn_str,
                            primary_host=db_conn_str)
    else:
        cn_db = gpudb.GPUdb(encoding='BINARY',
                            host=db_conn_str,
                            primary_host=db_conn_str,
                            username=db_user,
                            password=db_pass)

    return cn_db
def test_insert():

    test_insert_a = [(3100, 3100, "E", 0.00, 1.87, 46, 33.9, 1.34, "medium"),
                     (3100, 3100, "E", 1.42, 1.87, 46, 33.9, 1.34, "heavy")]
    test_insert_b = [(3200, 3210, "E", 0.00, 1.87, 146, 33.9, 1.34, "medium"),
                     (3200, 3330, "E", 1.42, 1.87, 146, 33.9, 1.34, "heavy")]

    try:
        if db_user == 'no_cred' or db_pass == 'no_cred':
            db = gpudb.GPUdb(encoding='BINARY', host=db_conn_str)
        else:
            db = gpudb.GPUdb(encoding='BINARY',
                             host=db_conn_str,
                             username=db_user,
                             password=db_pass)

        insertables = []
        for (sz, ez, direction, tp1, tp2, humd, tempr, wnd,
             wthr) in test_insert_a:
            insertable = collections.OrderedDict()
            insertable["startzoneid"] = sz
            insertable["endzoneid"] = ez
            insertable["direction"] = direction
            insertable["toll_prev_1"] = tp1
            insertable["toll_prev_2"] = tp2
            insertable["humidity"] = humd
            insertable["temperature"] = tempr
            insertable["windspeed"] = wnd
            insertable["weather"] = wthr
            insertables.append(insertable)

        sink_table = gpudb.GPUdbTable(name=TBL_NAME_toll_stream, db=db)
        insert_status = sink_table.insert_records(insertables)

        print(insert_status)
        print('\tSample records inserted ')

    except gpudb.gpudb.GPUdbException as e:

        print(e)
예제 #7
0
def main():
    db = gpudb.GPUdb('159.69.39.8')
    logger = kmllogger.attach_log()
    in_map = {
        'city': 'New York',
        'country': 'us',
        'day_of_week': 6,
        'hour': 18,
        'group_events': 100,
        'group_members': 4000
    }
    out_map = bb_module_default.predict(in_map, db, logger)
    print(out_map)
예제 #8
0
def multithread():
    h_db = gpudb.GPUdb(encoding='BINARY', host='127.0.0.1', port='9191')

    sharded_columns = [
        ["city", "string", "char16"],
        ["state_province", "string", "char2", "shard_key"],  # shard key column
        [
            "country", gpudb.GPUdbRecordColumn._ColumnType.STRING,
            gpudb.GPUdbColumnProperty.CHAR16
        ],
        ["airport", "string", "nullable"],  # a nullable column
        ["x", "double"],
        ["y", "double"],
        ["avg_temp", "double"],
        ["time_zone", "string", "char8", "shard_key"]  # shard key column
    ]

    sharded_table = gpudb.GPUdbTable(sharded_columns,
                                     db=h_db,
                                     use_multihead_ingest=True,
                                     multihead_ingest_batch_size=33)

    num_records = 500
    null_likelihood = 10
    alphanum = (string.ascii_letters + string.digits)
    for i in range(0, num_records):
        record = collections.OrderedDict()
        record["city"] = ''.join(
            [random.choice(alphanum) for n in range(0, random.randint(5, 16))])
        record["state_province"] = ''.join(
            [random.choice(alphanum) for n in range(0, random.randint(0, 2))])
        record["country"] = ''.join(
            [random.choice(alphanum) for n in range(0, random.randint(5, 16))])
        record[ "airport"       ] = None if (random.random() < null_likelihood) \
                                    else ''.join( [random.choice( alphanum ) for n in range( 0, random.randint( 2, 25 ) )] )
        record["x"] = random.uniform(-180, 180)
        record["y"] = random.uniform(-90, 90)
        record["avg_temp"] = random.uniform(-40, 110)
        record["time_zone"] = "UTC-{}".format(random.randint(-11, 14))
        sharded_table.insert_records(record)
    # end loop

    sharded_table.flush_data_to_server()
예제 #9
0
def main():
    """
    Start listening to the Meetup.com RSVP stream (uses WebSockets).
    If a RSVP is received store it in the database.
    """
    db = gpudb.GPUdb(host=config.GPUDB_HOST, port=config.GPUDB_PORT)
    rsvp_queue = multiprocessing.Queue()
    event_info_provider = apiutils.EventInfoProvider(db)

    storer = threading.Thread(target=store_rsvps,
                              kwargs={
                                  'db': db,
                                  'queue': rsvp_queue,
                                  'event_info_provider': event_info_provider
                              })
    storer.start()

    websocket.enableTrace(False)
    on_message = functools.partial(add_to_storing_queue, queue=rsvp_queue)
    ws = websocket.WebSocketApp(config.MEETUP_API_RSVP_ENDPOINT,
                                on_message=on_message)
    ws.run_forever()
예제 #10
0
COLLECTION = "RISK"
NEW_TABLE = "bs_stream"
HOST = "<ipaddress>"
ENCODING = "binary"
PORT = "9191"
DATA_PACK = 1
INGEST_FREQ = 3

"Execute python scripts on Kinetica servers using"
"/opt/gpudb/bin/gpudb_python"

# Establish connection to database with necessary credentials
# Pull data from Kinetica and put it directly into a Pandas df
h_db = gpudb.GPUdb(encoding=ENCODING,
                   host=HOST,
                   port=PORT,
                   username="******",
                   password="******")

if h_db.has_table(table_name=TABLE)['table_exists']:
    print("Table successfully reached.")
else:
    print("Table not found.")

# Pull data from Kinetica and put it directly into a Pandas df
data = h_db.get_records(table_name=TABLE,
                        offset=0,
                        limit=gpudb.GPUdb.END_OF_SET,
                        encoding=ENCODING)
df = pd.DataFrame(
    gpudb.GPUdbRecord.decode_binary_data(data["type_schema"],
예제 #11
0
import collections
import sys
from datetime import datetime
""" All constants and functions provided here are to be used in examples and unit test implementations for the UDF API.
    It involves two features: test data ingestion (and deletion) to a table in an actual Kinetica instance and 
    initialization of a udf-simulator (and cleansing its artifacts) to avoid having to deploy actual UDFs just for 
    the purpose of examples or unit tests. Note that the udf-simulator requires the test data to be ingested, 
    which means the ingestion has to happen first. 
"""

# Adjust these constants to reflect your Kinetica installation that you can use for running examples and unit tests.
HOST = 'http://localhost'
PORT = '9191'
USER = '******'
PASSWORD = '******'
DB_HANDLE = gpudb.GPUdb(encoding='BINARY', host=HOST, port=PORT)

# This is the test data that examples and unit tests will use. All data and tables will be deleted after the
# examples or unit tests have run.
TEST_DATA_TABLE_NAME_1 = 'unittest_toy_data'
TEST_DATA_TYPE_1 = [['id', gpudb.GPUdbRecordColumn._ColumnType.INT],
                    ['value_long', gpudb.GPUdbRecordColumn._ColumnType.LONG],
                    ['value_float', gpudb.GPUdbRecordColumn._ColumnType.FLOAT]]
TEST_DATA_RECORDS_1 = [[1, 123, 1.2], [2, 1234, 1.3], [3, 12345, 1.4],
                       [4, 123456, 1.5], [5, 1234567, 1.6]]
TEST_DATA_TABLE_NAME_2 = 'unittest_toy_data2'
TEST_DATA_TYPE_2 = [['id', gpudb.GPUdbRecordColumn._ColumnType.INT],
                    ['value_float', gpudb.GPUdbRecordColumn._ColumnType.FLOAT]]
TEST_DATA_RECORDS_2 = [[6, 2.2], [7, 2.3], [8, 2.4], [9, 2.5], [10, 2.6]]
TEST_OUTPUT_TABLE_NAME = 'unittest_df_output'
        output_table_names=[OUTPUT_TABLE],
        options={}
    )
    print("Proc executed successfully:")
    print(response)
    print("Check the system log or 'gpudb-proc.log' for execution information")
    print("")

# end python_tc_udf_exec()


if __name__ == '__main__':

    # Set up args
    parser = argparse.ArgumentParser(
        description='Execute the table copy Python UDF example.')
    parser.add_argument('--host', default='127.0.0.1',
                        help='Kinetica host to run example against')
    parser.add_argument('--username', default='',
                        help='Username of user to run example with')
    parser.add_argument('--password', default='', help='Password of user')

    args = parser.parse_args()

    # Establish connection with a locally-running instance of Kinetica
    kinetica = gpudb.GPUdb(host=['http://' + args.host + ':9191'],
                           username=args.username, password=args.password)

    # Execute defined functions
    python_tc_udf_exec()
예제 #13
0
from io import StringIO
import collections
from avro import schema, io
import os
from PIL import Image

# from io import StringIO

# Init
table_name = "imageTableInference"
collection = "MASTER"
tiffFolder = "./imagesTIF/"
jpegFolder = "./imagesJPEG/"
# init
gpudb_host = "172.31.33.26"
h_db = gpudb.GPUdb(encoding='BINARY', host=gpudb_host, port='9191')
my_type = """
{
   "type": "record",
   "name": "image",
   "fields": [
   {"name":"image","type":"bytes"}
   ]
}""".replace('\n', '').replace(' ', '')


def creatTable(type_def=my_type,
               table_name="imageTableInference",
               collection="MASTER"):
    response = h_db.create_type(type_definition=type_def, label='image')
    type_id = response['type_id']
예제 #14
0
def output():
    if "KINETICA_PCF" not in os.environ:
        raise RuntimeError("No control file specified")

    icf = os.environ["KINETICA_PCF"]

    if not os.path.exists(icf):
        raise RuntimeError("Specified control file does not exist")

    icf = open(icf, "rb")

    if read_uint64(icf) != 1:
        raise RuntimeError("Unrecognized control file version")

    read_dict(icf)
    read_dict(icf)
    read_dict(icf)
    read_dict(icf)

    for io in range(0, 2):
        for i in range(0, read_uint64(icf)):
            read_string(icf)

            for j in range(0, read_uint64(icf)):
                read_string(icf)
                read_uint64(icf)
                read_string(icf)
                read_string(icf)
                read_string(icf)

        if io == 0:
            output_pos = icf.tell()

    ocf = read_string(icf)

    if os.path.getsize(ocf) == 0:
        raise RuntimeError("No output detected")

    ocf = open(ocf, "rb")

    if read_uint64(ocf) != 1:
        raise RuntimeError("Unrecognized output control file version")

    results = read_dict(ocf)

    if results:
        print("Results:")
        print("")

        for key, value in results.items():
            print(key + ": " + value)

        print("")
    else:
        print("No results")

    icf.seek(output_pos)

    table_count = read_uint64(icf)

    if table_count > 0:
        print("Output:")
        print("")

        db = gpudb.GPUdb(encoding="BINARY", host=args.url, username=args.username, password=args.password)

        for i in range(0, table_count):
            read_table(icf, db)
    else:
        print("No output")
예제 #15
0
def execute():
    if args.distributed and args.nondistributed:
        parser.error("-d/--distributed and -n/--nondistributed are mutually exclusive")

    if args.nondistributed and args.input:
        parser.error("-n/--nondistribtued and -i/--input are mutually exclusive")

    if args.nondistributed and args.output:
        parser.error("-n/--nondistributed and -o/--output are mutually exclusive")

    if args.input or args.output:
        args.distributed = True
    elif not args.distributed:
        args.nondistributed = True

    icf = tempfile.NamedTemporaryFile(prefix="kinetica-udf-sim-icf-", dir=args.path, delete=False)

    write_uint64(icf, 1)

    icf_info = {}
    icf_info["run_id"] = "0"
    icf_info["proc_name"] = "proc"

    if args.distributed:
        icf_info["rank_number"] = "1"
        icf_info["tom_number"] = "0"
    else:
        icf_info["rank_number"] = "0"

    icf_info["data_segment_id"] = "0"
    icf_info["data_segment_number"] = "0"
    icf_info["data_segment_count"] = "1"
    icf_info["head_url"] = args.url
    icf_info["username"] = args.username
    icf_info["password"] = args.password
    write_dict(icf, icf_info)

    write_dict(icf, {})

    icf_params = {}

    if args.param:
        for param in args.param:
            icf_params[param[0]] = param[1]

    write_dict(icf, icf_params)

    write_dict(icf, {})

    if args.input or args.output:
        db = gpudb.GPUdb(encoding="BINARY", host=args.url, username=args.username, password=args.password)

    if args.input:
        write_uint64(icf, len(args.input))

        for table in args.input:
            write_table(icf, db, table, True)
    else:
        write_uint64(icf, 0)

    if args.output:
        write_uint64(icf, len(args.output))

        for table in args.output:
            write_table(icf, db, [table], False)
    else:
        write_uint64(icf, 0)

    write_string(icf, tempfile.NamedTemporaryFile(prefix="kinetica-udf-sim-", dir=args.path, delete=False).name)

    print("export KINETICA_PCF=" + icf.name)
예제 #16
0
def gpudb_example():
    
    print ( "TUTORIAL OUTPUT")
    print ( "===============\n")

    # all tables/views used in examples below
    weather_table_name = "weather"
    weather_w_view = "weather_west"
    weather_nw_view = "weather_northwest"
    weather_country_view = "weather_country"
    weather_e_view = "weather_east"
    weather_se_view = "weather_southeast"
    weather_h_view = "weather_histogram"

    """ Establish connection with a locally-running instance of Kinetica,
        using binary encoding to save memory """
    h_db = gpudb.GPUdb(encoding='BINARY', host='127.0.0.1', port='9191')

    print ()
    print ( "CREATING A TYPE & TABLE")
    print ( "-----------------------")
    print ()

    """ Create columns; column arguments consist of a list of the name, then type, and then
        optional additional properties.  E.g., [ "column_name", column_type, column_property1,
        column_property2 ].  Note that any number of column properties can be listed as long as
        they are not mutually exclusive within themselves or with the primitive type.  Also note
        that raw string can be used for both the primitive type and the properties; but the user is
        also able to use string constants as illustrated in the example below.
    """
    columns = [
        [ "city", "string", "char16" ],
        [ "state_province", gpudb.GPUdbRecordColumn._ColumnType.STRING, gpudb.GPUdbColumnProperty.CHAR32 ],
        [ "country", gpudb.GPUdbRecordColumn._ColumnType.STRING, gpudb.GPUdbColumnProperty.CHAR16 ],
        [ "x", "double" ],
        [ "y", "double" ],
        [ "avg_temp", "double" ],
        [ "time_zone", "string", "char8" ]
    ]

    # Clear any existing table with the same name (otherwise we won't be able to
    # create the table)
    if h_db.has_table( table_name = weather_table_name )['table_exists']:
        h_db.clear_table( weather_table_name )

    # Create the table from the type
    try:
        weather_table = gpudb.GPUdbTable( columns, weather_table_name, db = h_db )
        print ( "Table successfully created.")
    except gpudb.GPUdbException as e:
        print ( "Table creation failure: {}".format( str(e) ) )


    # We can also create a GPUdbTable object for a table that already exists in
    # the database.  All we need is the table name (and a GPUdb object).  Note how
    # we pass None for the type argument
    weather_table_duplicate = gpudb.GPUdbTable( None, weather_table_name, db = h_db )

    print ( "\n")
    print ( "INSERTING DATA")
    print ( "--------------")
    print ()

    # Insert single record example

    # Create ordered dictionary for keys & values of record
    datum = collections.OrderedDict()
    datum["city"] = "Washington, D.C."
    datum["state_province"] = "--"
    datum["country"] = "USA"
    datum["x"] = -77.016389
    datum["y"] = 38.904722
    datum["avg_temp"] = 58.5
    datum["time_zone"] = "UTC-5"

    # Insert the record into the table (through the GPUdbTable interface)
    weather_table.insert_records( datum )

    # Create another record
    datum2 = collections.OrderedDict()
    datum2["city"] = "Washington, D.C."
    datum2["state_province"] = "--"
    datum2["country"] = "USA"
    datum2["x"] = -77.016389
    datum2["y"] = 38.904722
    datum2["avg_temp"] = 58.5
    datum2["time_zone"] = "UTC-5"

    # Insert the second record through the basic GPUdb interface
    # Encode record and put into a single element list
    weather_record_type = weather_table.get_table_type()
    single_record = [ gpudb.GPUdbRecord( weather_record_type, datum ).binary_data ]

    # Insert the record into the table
    response = h_db.insert_records(table_name = weather_table_name, data = single_record, list_encoding = "binary")
    print ( "Number of single records inserted:  {}".format(response["count_inserted"]))


    # Insert multiple records example
    # ===============================
    records = []
    # Create a list of in-line records
    records.append( ["Paris", "TX", "USA", -95.547778, 33.6625, 64.6, "UTC-6"] )
    records.append( ["Memphis", "TN", "USA", -89.971111, 35.1175, 63, "UTC-6"] )
    records.append( ["Sydney", "Nova Scotia", "Canada", -60.19551, 46.13631, 44.5, "UTC-4"] )
    records.append( ["La Paz", "Baja California Sur", "Mexico", -110.310833, 24.142222, 77, "UTC-7"] )
    records.append( ["St. Petersburg", "FL", "USA", -82.64, 27.773056, 74.5, "UTC-5"] )
    records.append( ["Oslo", "--", "Norway", 10.75, 59.95, 45.5, "UTC+1"] )
    records.append( ["Paris", "--", "France", 2.3508, 48.8567, 56.5, "UTC+1"] )
    records.append( ["Memphis", "--", "Egypt", 31.250833, 29.844722, 73, "UTC+2"] )
    records.append( ["St. Petersburg", "--", "Russia", 30.3, 59.95, 43.5, "UTC+3"] )
    records.append( ["Lagos", "Lagos", "Nigeria", 3.384082, 6.455027, 83, "UTC+1"] )
    records.append( ["La Paz", "Pedro Domingo Murillo", "Bolivia", -68.15, -16.5, 44, "UTC-4"] )
    records.append( ["Sao Paulo", "Sao Paulo", "Brazil", -46.633333, -23.55, 69.5, "UTC-3"] )
    records.append( ["Santiago", "Santiago Province", "Chile", -70.666667, -33.45, 62, "UTC-4"] )
    records.append( ["Buenos Aires", "--", "Argentina", -58.381667, -34.603333, 65, "UTC-3"] )
    records.append( ["Manaus", "Amazonas", "Brazil", -60.016667, -3.1, 83.5, "UTC-4"] )
    records.append( ["Sydney", "New South Wales", "Australia", 151.209444, -33.865, 63.5, "UTC+10"] )
    records.append( ["Auckland", "--", "New Zealand", 174.74, -36.840556, 60.5, "UTC+12"] )
    records.append( ["Jakarta", "--", "Indonesia", 106.816667, -6.2, 83, "UTC+7"] )
    records.append( ["Hobart", "--", "Tasmania", 147.325, -42.880556, 56, "UTC+10"] )
    records.append( ["Perth", "Western Australia", "Australia", 115.858889, -31.952222, 68, "UTC+8"] )

    # Insert the records into the table
    weather_table.insert_records( records )
    print ( "Number of batch records inserted:  {}".format( weather_table.size() ))

    print ( "\n")
    print ( "RETRIEVING DATA")
    print ( "---------------")
    print ()

    """ Retrieve the second set of ten records from weather_table. Note that
        records can be iterated over directly. """
    print ( "{:<20s} {:<25s} {:<15s} {:<10s} {:<11s} {:<9s} {:<8s}".format("City","State/Province","Country","Latitude","Longitude","Avg. Temp","Time Zone"))
    print ( "{:=<20s} {:=<25s} {:=<15s} {:=<10s} {:=<11s} {:=<9s} {:=<9s}".format("", "", "", "", "", "", ""))
    for weatherLoc in weather_table.get_records( offset = 10, limit = 10 ):
        print ( "{city:<20s} {state:<25s} {country:<15s} {y:10.6f} {x:11.6f} {avg_temp:9.1f}   {time_zone}"
                "".format( city = weatherLoc["city"], state = weatherLoc["state_province"], country = weatherLoc["country"],
                           y = weatherLoc["y"], x = weatherLoc["x"], avg_temp = weatherLoc["avg_temp"], time_zone = weatherLoc["time_zone"] ) )

    
    """ Retrieve no more than 10 records as JSON from weather_table through the GPUdb interface.
        Note that records are stringified and have to be parsed if using the 'json' encoding. """
    weatherLocs = h_db.get_records( table_name = weather_table_name, offset = 0, limit = 10,
                                    encoding = "json", options = {"sort_by":"city"} )['records_json']

    print ( "{:<20s} {:<25s} {:<15s} {:<10s} {:<11s} {:<9s} {:<8s}".format("City","State/Province","Country","Latitude","Longitude","Avg. Temp","Time Zone"))
    print ( "{:=<20s} {:=<25s} {:=<15s} {:=<10s} {:=<11s} {:=<9s} {:=<9s}".format("", "", "", "", "", "", ""))
    for weatherLoc in weatherLocs:
        print ( "{city:<20s} {state_province:<25s} {country:<15s} {y:10.6f} {x:11.6f} {avg_temp:9.1f}   {time_zone}".format(**json.loads(weatherLoc)))

    """ Retrieve no more than 25 of the remaining records as binary from weather
        table. Note that records are binary and have to be decoded. """
    response = h_db.get_records( table_name = weather_table_name, offset = 10, limit = 25,
                                 encoding = "binary", options = {"sort_by":"city"})
    weatherLocs = gpudb.GPUdbRecord.decode_binary_data(response["type_schema"], response["records_binary"])

    for weatherLoc in weatherLocs:
        print ( "{city:<20s} {state_province:<25s} {country:<15s} {y:10.6f} {x:11.6f} {avg_temp:9.1f}   {time_zone}".format(**weatherLoc))

    """ Note that total_number_of_records does not reflect offset/limit; it's
        the count of all records or those which match the given expression """
    print ( "\nNumber of records in new table:  {:d}".format(response["total_number_of_records"]))

    print ( "\n")
    print ( "FILTERING")
    print ( "---------")
    print ()

    ### Filter Example 1
    
    """ Filter records where column x is less than 0, i.e., cities in the
        western hemisphere, and store the filter in a view.  Note that the GPUdbTable
        creates a random view name if one is not supplied. """
    view1 = weather_table.filter( expression = "x < 0" )
    print ( "Number of records in the western hemisphere:  {}".format( view1.size() ))

    ### Filter Example 2
    
    """ Filter records where column x is less than 0 and column y is greater
        than 0, i.e., cities in the northwestern semi-hemisphere, and store
        the filter in a view.  This filter operation is done through the base
        GPUdb interface. """
    response = h_db.filter(table_name = weather_table_name, view_name = weather_nw_view,
                           expression = "x < 0 and y > 0" )
    print ( "Number of records in the northwestern semi-hemisphere:  {}".format( response["count"] ))

    ### Filter Example 3
    
    """ Filter records using the same expressions as Example 2, but using
        query chaining this time (note that we're using the view created by the
        first filter. """

    nw_view = view1.filter( expression = "y > 0" )
    print ( "Number of records in the northwestern semi-hemisphere (with query chaining):  {}"
            "".format( nw_view.size() ))

    ### Filter Example 4
    
    """ Filter by list where country name is USA, Brazil, or Australia.  Here we
        use the duplicate GPUdbTable object (but it points to the same DB table). """
    country_map = {"country": ["USA", "Brazil", "Australia"]}
    view3 = weather_table_duplicate.filter_by_list( column_values_map = country_map )
    print ( "Number of records where country name is USA, Brazil, or Australia:  {}"
            "".format( view3.size() ))

    ### Filter Example 5
    
    """ Filter by range cities that are east of GMT (the Prime Meridian) """
    view4 = weather_table.filter_by_range( column_name = "x", lower_bound = 0,
                                           upper_bound = 180 )
    print ( "Number of records that are east of the Prime Meridian (x > 0):  {}"
            "".format( view4.size() ))


    print ( "\n")
    print ( "AGGREGATING, GROUPING, and HISTOGRAMS")
    print ( "-------------------------------------")
    print ()

    ### Aggregate Example 1
    
    """ Aggregate count, min, mean, and max on the average temperature.  Note
        that unlike the filter functions, the aggregate functions of GPUdbTable
        return the response from the database. """
    stat_results = weather_table.aggregate_statistics( column_name = "avg_temp",
                                                       stats = "count,min,max,mean" )
    print ( "Statistics of values in the average temperature column:")
    print ( "\tCount: {count:.0f}\n\tMin:  {min:4.2f}\n\tMean: {mean:4.2f}\n\tMax:  {max:4.2f}"
            "\n".format( **stat_results["stats"] ))

    ### Aggregate Example 2
    
    """ Find unique city names. """
    results = weather_table.aggregate_unique( column_name = "city", offset = 0,
                                              limit = 25 )
    print ( "Unique city names:")
    for weatherLoc in results.data["city"]:
        print ( "\t* {}".format( weatherLoc ))
    print ()

    """ Same operation, but through the base GPUdb interface.  Note that the
        results have to parsed specially using GPUdb.parse_dynamic_response().
        Also, we're using the 'json' encoding in this case (the 'binary' encoding
        can also be used).  Also note how the data is accessed differently. """
    response = h_db.aggregate_unique( table_name = weather_table_name,
                                      column_name = "city", offset = 0,
                                      limit = 25, encoding = "json")
    print ( "Unique city names (using the GPUdb class):")
    weatherLocs = h_db.parse_dynamic_response(response)['response']['city']
    for weatherLoc in weatherLocs:
        print ( "\t* {}".format(weatherLoc))
    print ()

    ### Aggregate Example 3
    
    """ Find number of weather locations per country in the northwestern
        semi-hemisphere.  Note that the data is automatically decoded. """
    results = nw_view.aggregate_group_by( column_names = ["country", "count(country)"], offset = 0,
                                          limit = 25 )
    print ( "Weather locations per country in the northwest semi-hemisphere:")
    for country in zip(results.data["country"], results.data["count(country)"]):
        print ( "\t{:<10s}{:2d}".format(country[0] + ":", country[1]))
    print ()

    """ Find number of weather locations per country in the northwestern
        semi-hemisphere; use binary decoding explicitly since we're using
        the GPUdb class. """
    response = h_db.aggregate_group_by(table_name=weather_nw_view, column_names=["country", "count(country)"], offset=0, limit=25, encoding="binary")
    countries = gpudb.GPUdbRecord.decode_binary_data(response["response_schema_str"], response["binary_encoded_response"])
    print ( "Weather locations per country in the northwest semi-hemisphere:")
    for country in zip(countries["column_1"], countries["column_2"]):
        print ( "\t{:<10s}{:2d}".format(country[0] + ":", country[1]))
    print ()

    ### Aggregate Example 4
    
    """ Filter table to southeastern semi-hemisphere records, group by country,
        and aggregate min, max, and mean on the average temperature; using the default
        binary decoding and the GPUdbTable interface. """
    # Do a filter first
    se_view = weather_table.filter( expression="x > 0 and y < 0" )
    # Then do the aggregation operation (note how we use the 'data' property to get
    # the data)
    data = se_view.aggregate_group_by( column_names = ["country", "min(avg_temp)", "max(avg_temp)", "mean(avg_temp)"],
                                       offset = 0, limit = 25 ).data
    print ( "{:<20s} {:^5s} {:^5s} {:^5s}".format("SE Semi-Hemi Country", "Min", "Mean", "Max"))
    print ( "{:=<20s} {:=<5s} {:=<5s} {:=<5s}".format("", "", "", ""))
    for countryWeather in zip(data["country"], data["min(avg_temp)"], data["mean(avg_temp)"], data["max(avg_temp)"]):
        print ( "{:<20s} {:5.2f} {:5.2f} {:5.2f}".format(*countryWeather))
    print ()

    """ Filter table to southeastern semi-hemisphere records, group by country,
        and aggregate min, max, and mean on the average temperature; using the default
        binary decoding and the base GPUdb interface. """
    h_db.filter(table_name = weather_table_name, view_name = weather_se_view, expression="x > 0 and y < 0")

    response = h_db.aggregate_group_by( table_name = weather_se_view,
                                        column_names = ["country", "min(avg_temp)", "max(avg_temp)", "mean(avg_temp)"],
                                        offset = 0, limit = 25 )
    data = h_db.parse_dynamic_response(response)['response']
    print ( "{:<20s} {:^5s} {:^5s} {:^5s}".format("SE Semi-Hemi Country", "Min", "Mean", "Max"))
    print ( "{:=<20s} {:=<5s} {:=<5s} {:=<5s}".format("", "", "", ""))
    for countryWeather in zip(data["country"], data["min(avg_temp)"], data["mean(avg_temp)"], data["max(avg_temp)"]):
        print ( "{:<20s} {:5.2f} {:5.2f} {:5.2f}".format(*countryWeather))
    print ()

    ### Aggregate Example 5
    
    """ Filter for southern hemisphere cities and create a histogram for the
        average temperature of those cities (divided into every 10 degrees,
        e.g., 40s, 50s, 60s, etc.) """
    s_view = weather_table.filter( expression = "y < 0" )

    histogram_result = s_view.aggregate_histogram( column_name = "avg_temp",
                                                   start = 40, end = 90,
                                                   interval = 10 )
    print ( "Number of southern hemisphere cities with average temps in the given ranges:")
    for histogroup in zip([40, 50, 60, 70, 80], histogram_result['counts']):
        print ( "\t{}s: {:2.0f}".format(*histogroup))
    print()


    ### Aggregate Example 6

    """ Aggregate group by has an option 'result_table' which creates a result table and does not
        return the data.  Very useful when the data is large and we want to fetch records from it
        in batches.
    """
    # Create another table with the same type, and generate a lot of random data for it.
    # Note that we're allowing GPUdbTable to come up with a random name for the table.
    weather_table2 = gpudb.GPUdbTable( columns, db = h_db )
    # Create random data (but specify a range for the average temperature column)
    weather_table2.insert_records_random( count = 10000,
                                          options = { "avg_temp": {"min": -20, "max": 105 } } )
    print()
    print ( "Second weather table size: ", weather_table2.size() )

    # Create a view on the south-western quadrant of the planet
    sw_view = weather_table2.filter( expression="x < 0 and y < 0" )
    # Then do the aggregation operation .  Note that the column names need
    # aliases to utilize th 'result_table' option.
    agg_result_table = sw_view.aggregate_group_by( column_names = ["country",
                                                                   "min(avg_temp) as min_avg_temp",
                                                                   "max(avg_temp) as max_avg_temp",
                                                                   "mean(avg_temp) as mean_avg_temp"],
                                                   offset = 0, limit = 25,
                                                   options = { "result_table": gpudb.GPUdbTable.prefix_name("agg_") } )
    print ( "Size of records in the SW quadrant of the planet: ", agg_result_table.size() )
    print ( "{:<20s} {:^7s} {:^7s} {:^5s}".format("SW Semi-Hemi Country", "Min", "Mean", "Max"))
    print ( "{:=<20s} {:=<6s} {:=<6s} {:=<6s}".format("", "", "", ""))

    # Note that we can slice GPUdbTable objects to fetch the data inside
    for record in agg_result_table[ 10 : 50 ]:
        print ( "{:<20s} {:5.2f} {:5.2f} {:5.2f}".format( record["country"], record["min_avg_temp"], record["mean_avg_temp"], record["max_avg_temp"] ))
    print ()
    

    print ( "\n")
    print ( "DELETING DATA")
    print ( "-------------")
    print ()

    """ Filter for cities that are either south of latitude -50 or west of
        longitude -50 to determine how many records will be deleted; delete
        the records, then confirm the deletion by refiltering. """

    deleteExpression = "x < -50 or y < -50"
    num_records_to_delete = weather_table.filter( expression = deleteExpression ).count
    print ( "Number of records that meet deletion criteria before deleting:  {}"
            "".format( num_records_to_delete ) )

    weather_table.delete_records( expressions = [ deleteExpression ] )

    # Note that we're using the duplicate GPUdbTable object which points to the
    # same table in the DB
    num_records_post_delete = weather_table_duplicate.filter( expression = deleteExpression ).count
    print ( "Number of records that meet deletion criteria after deleting (expect 0):  {}".format( num_records_post_delete ))
    print ()



    print ( "\n")
    print ( "Using Multi-head Ingestion")
    print ( "--------------------------")
    print ()

    """For tables with primary or shard key columns, it might be useful to use
       the multi-head ingestion procedure for inserting records into a table for
       heavy ingestion loads.  There are benefits and drawbacks of using multi-head
       ingestion: the benefit is that if the database is configured for multi-head
       ingestion and there is a tremendous ingestion load, then the ingestion will
       be faster over all.  However, the drawback is that the client has to do some
       calculation PER record to find out which worker rank of the database server to
       send the record to.  So, unless the following parameters are met, it is unwise
       to use multi-head ingestion as it will unncessarily slow ingestion down:

       * The server is configured to use multi-head ingestion
       * The table type has at least one primary or shard key column
       * There is a heavy stream of data to be inserted
    """
    # Create a type that has some shard keys
    sharded_columns = [
        [ "city", "string", "char16" ],
        [ "state_province", "string", "char2", "shard_key" ],  # shard key column
        [ "country", gpudb.GPUdbRecordColumn._ColumnType.STRING, gpudb.GPUdbColumnProperty.CHAR16 ],
        [ "airport", "string", "nullable" ], # a nullable column
        [ "x", "double" ],
        [ "y", "double" ],
        [ "avg_temp", "double" ],
        [ "time_zone", "string", "char8", "shard_key" ] # shard key column
    ]

    # Create a table with the multi-head ingestion options
    # (the default batch size is 10k)
    sharded_table = gpudb.GPUdbTable( sharded_columns, db = h_db,
                                      use_multihead_ingest = True,
                                      multihead_ingest_batch_size = 33 )

    # Generate some random data to be inserted
    num_records = 100
    null_likelihood = 10
    alphanum = (string.ascii_letters + string.digits)
    for i in range(0, num_records):
        record = collections.OrderedDict()
        record[ "city"          ] = ''.join( [random.choice( alphanum ) for n in range( 0, random.randint( 5, 16 ) )] )
        record[ "state_province"] = ''.join( [random.choice( alphanum ) for n in range( 0, random.randint( 0, 2 ) )] )
        record[ "country"       ] = ''.join( [random.choice( alphanum ) for n in range( 0, random.randint( 5, 16 ) )] )
        record[ "airport"       ] = None if (random.random() < null_likelihood) \
                                    else ''.join( [random.choice( alphanum ) for n in range( 0, random.randint( 2, 25 ) )] )
        record[ "x"             ] = random.uniform( -180, 180 )
        record[ "y"             ] = random.uniform(  -90,  90 )
        record[ "avg_temp"      ] = random.uniform(  -40, 110 )
        record[ "time_zone"     ] = "UTC-{}".format( random.randint( -11, 14 ) )
        sharded_table.insert_records( record )
    # end loop

    print ( "Size of sharded table (expect less than 100 as the batch size is 33 and \n100 is not a multiple of 33): ", sharded_table.size() )
    print ()
    print ( "Flushing the records remaining in the ingestor queue...")
    sharded_table.flush_data_to_server()
    print ( "Size of sharded table post forced flush (expect 100): ", sharded_table.size() )
    print ()
예제 #17
0
    ["humidity", "float"],
    ["temperature", "float"],
    ["windspeed", "float"],
    ["weather", "string"],

    ["timestamp", "string", "datetime"],
    ]

db_conn_str="http://172.30.50.78:9191"
db_user = "******"
db_pass = "******"    

try:
    if db_user == 'no_cred' or db_pass == 'no_cred':
        db=gpudb.GPUdb(encoding='BINARY',
                       host=db_conn_str)
    else:
        db=gpudb.GPUdb(encoding='BINARY',
                       host=db_conn_str,
                       username=db_user,
                       password=db_pass)

    if db.has_table(table_name=TBL_NAME_toll_stream)['table_exists']:
        print('Table exists {0:s}'.format(TBL_NAME_toll_stream))

    else:
        status = gpudb.GPUdbTable(_type=TBL_SCHEMA_toll_stream,
                                  name=TBL_NAME_toll_stream,
                                  db=db,
                                  options={'collection_name': 'traffic'})
        print(status)
예제 #18
0
def aggregate():
    weather_table_name = "weather"
    weather_w_view = "weather_west"
    weather_nw_view = "weather_northwest"
    weather_country_view = "weather_country"
    weather_e_view = "weather_east"
    weather_se_view = "weather_southeast"
    weather_h_view = "weather_histogram"

    h_db = gpudb.GPUdb(encoding='BINARY', host='127.0.0.1', port='9191')

    columns = [["city", "string", "char16"],
               [
                   "state_province",
                   gpudb.GPUdbRecordColumn._ColumnType.STRING,
                   gpudb.GPUdbColumnProperty.CHAR32
               ],
               [
                   "country", gpudb.GPUdbRecordColumn._ColumnType.STRING,
                   gpudb.GPUdbColumnProperty.CHAR16
               ], ["x", "double"], ["y", "double"], ["avg_temp", "double"],
               ["time_zone", "string", "char8"]]

    if h_db.has_table(table_name=weather_table_name)['table_exists']:
        h_db.clear_table(weather_table_name)

    try:
        weather_table = gpudb.GPUdbTable(columns, weather_table_name, db=h_db)
        print("Table successfully created.")
    except gpudb.GPUdbException as e:
        print("Table creation failure: {}".format(str(e)))

    weather_table_duplicate = gpudb.GPUdbTable(None,
                                               weather_table_name,
                                               db=h_db)

    print("\n")
    print("INSERTING DATA")
    print("--------------")
    print()

    datum = collections.OrderedDict()
    datum["city"] = "Washington, D.C."
    datum["state_province"] = "--"
    datum["country"] = "USA"
    datum["x"] = -77.016389
    datum["y"] = 38.904722
    datum["avg_temp"] = 58.5
    datum["time_zone"] = "UTC-5"

    weather_table.insert_records(datum)

    datum2 = collections.OrderedDict()
    datum2["city"] = "Washington, D.C."
    datum2["state_province"] = "--"
    datum2["country"] = "USA"
    datum2["x"] = -77.016389
    datum2["y"] = 38.904722
    datum2["avg_temp"] = 58.5
    datum2["time_zone"] = "UTC-5"

    weather_record_type = weather_table.get_table_type()
    single_record = [gpudb.GPUdbRecord(weather_record_type, datum).binary_data]

    response = h_db.insert_records(table_name=weather_table_name,
                                   data=single_record,
                                   list_encoding="binary")
    print("Number of single records inserted:  {}".format(
        response["count_inserted"]))

    records = []
    records.append(["Paris", "TX", "USA", -95.547778, 33.6625, 64.6, "UTC-6"])
    records.append(["Memphis", "TN", "USA", -89.971111, 35.1175, 63, "UTC-6"])
    records.append([
        "Sydney", "Nova Scotia", "Canada", -60.19551, 46.13631, 44.5, "UTC-4"
    ])
    records.append([
        "La Paz", "Baja California Sur", "Mexico", -110.310833, 24.142222, 77,
        "UTC-7"
    ])
    records.append(
        ["St. Petersburg", "FL", "USA", -82.64, 27.773056, 74.5, "UTC-5"])
    records.append(["Oslo", "--", "Norway", 10.75, 59.95, 45.5, "UTC+1"])
    records.append(["Paris", "--", "France", 2.3508, 48.8567, 56.5, "UTC+1"])
    records.append(
        ["Memphis", "--", "Egypt", 31.250833, 29.844722, 73, "UTC+2"])
    records.append(
        ["St. Petersburg", "--", "Russia", 30.3, 59.95, 43.5, "UTC+3"])
    records.append(
        ["Lagos", "Lagos", "Nigeria", 3.384082, 6.455027, 83, "UTC+1"])
    records.append([
        "La Paz", "Pedro Domingo Murillo", "Bolivia", -68.15, -16.5, 44,
        "UTC-4"
    ])
    records.append([
        "Sao Paulo", "Sao Paulo", "Brazil", -46.633333, -23.55, 69.5, "UTC-3"
    ])
    records.append([
        "Santiago", "Santiago Province", "Chile", -70.666667, -33.45, 62,
        "UTC-4"
    ])
    records.append([
        "Buenos Aires", "--", "Argentina", -58.381667, -34.603333, 65, "UTC-3"
    ])
    records.append(
        ["Manaus", "Amazonas", "Brazil", -60.016667, -3.1, 83.5, "UTC-4"])
    records.append([
        "Sydney", "New South Wales", "Australia", 151.209444, -33.865, 63.5,
        "UTC+10"
    ])
    records.append(
        ["Auckland", "--", "New Zealand", 174.74, -36.840556, 60.5, "UTC+12"])
    records.append(
        ["Jakarta", "--", "Indonesia", 106.816667, -6.2, 83, "UTC+7"])
    records.append(
        ["Hobart", "--", "Tasmania", 147.325, -42.880556, 56, "UTC+10"])
    records.append([
        "Perth", "Western Australia", "Australia", 115.858889, -31.952222, 68,
        "UTC+8"
    ])

    #get histogram of cities divides by intervals of 20, from 40 to 80 (in terms of temperature)
    s_view = weather_table.filter(expression="y < 0")

    histogram_result = s_view.aggregate_histogram(column_name="avg_temp",
                                                  start=40,
                                                  end=80,
                                                  interval=20)
    print(
        "Number of southern hemisphere cities with average temps in the given ranges:"
    )
    for histogroup in zip([40, 60, 80], histogram_result['counts']):
        print("\t{}s: {:2.0f}".format(*histogroup))
    print()
예제 #19
0
def gpudb_example():
    """An example of how to use the GPUdb python API.
    """

    new_line = '\n'

    # handle to database
    h_db = gpudb.GPUdb(encoding = 'BINARY', host = '127.0.0.1', port = '9191')

    my_table='my_table_1'

    # Data type for the table
    my_type = """
    {
        "type": "record",
        "name": "my_type_1",
        "fields": [
            {"name":"col1","type":"double"},
            {"name":"col2","type":"string"},
            {"name":"group_id","type":"string"}
        ]
    }  """.replace(' ','').replace('\n','')

    # Create the data type in the DB
    response = h_db.create_type( type_definition = my_type, label = 'my_type_lb_1' )
    type_id_1 = response['type_id']
    print 'GPUdb generated type id for the new type - ', type_id_1, new_line

    # Create a table with the given data type
    response = h_db.create_table( table_name = my_table, type_id = type_id_1 )

    # Generate data to be inserted into the table
    encoded_obj_list = []

    for val in range(1,10):
        datum = collections.OrderedDict()
        datum["col1"] = val+0.1
        datum["col2"] = 'string '+str(val)
        datum["group_id"] = 'Group 1'
        # Encode the data appropriately to prepare for insertion
        encoded_obj_list.append(h_db.encode_datum(my_type, datum))

    # Optional parameter that enables returning IDs for the
    # newly inserted records
    options = {'return_record_ids':'true'}

    # Insert the records into the table
    response = h_db.insert_records( table_name = my_table,
                                    objects = encoded_obj_list,
                                    list_encoding = 'binary',
                                    options = options )
    print "Record Ids for %d new records - %s" % (response['count_inserted'], response['record_ids']), new_line

    # Retrieve records from a table. Note that the records are stringified and have to be parsed
    response = h_db.get_records(table_name=my_table, offset=0, limit=100, encoding='json', options={})
    print "Returned records ", response['records_json'], new_line

    # Filter records into a view.  Response contains the count only
    response = h_db.filter(table_name=my_table, view_name='my_table_view', expression='col1 = 1.1')
    print "Number of records returned by filter expresion ", response['count'], new_line

    # Read the filtered records from the view (exactly as reading from table)
    response = h_db.get_records( table_name = 'my_table_view',
                                 offset = 0, limit = 100,
                                 encoding = 'json', options = {} )
    print "Filtered records ", response['records_json'], new_line

    # Drop the view
    h_db.clear_table('my_table_view')

    # Filter expression with two columns on the original table
    response = h_db.filter(my_table,'my_table_view','col1 <= 9 and group_id="Group 1"')
    print "Number of records returned by second filter expresion ", response['count'], new_line

    # Fetch the records from the view
    response = h_db.get_records( table_name = 'my_table_view',
                                 offset = 0, limit = 100, encoding = 'json',
				 options = {} )
    print "Returned records ", response['records_json'], new_line

    # Filter by a list.  query is executed on resultset from previous query (query chaining)
    response = h_db.filter_by_list( table_name = 'my_table_view',
                                    view_name = 'my_table_view_2',
                                    column_values_map = {'col1': ['1.1', '2.1', '5.1' ] } )
    print "Number of records returned by filter expresion ", response['count'], new_line

    # Fetch the records
    response = h_db.get_records( table_name = 'my_table_view_2',
                                 offset = 0, limit = 100,
                                 encoding = 'json', options = {} )
    print "Returned records filtered by list: ", response['records_json'], new_line

    # filter a range of values (numeric values only)
    response = h_db.filter_by_range( table_name = my_table,
                                     view_name = 'my_table_view_3',
				     column_name = 'col1',
				     lower_bound = 1,
				     upper_bound = 5 )
    print "Number of records returned by filter expresion ", response['count'], new_line

    response = h_db.get_records(table_name='my_table_view_3', offset=0, limit=100, encoding='binary', options={})

    # Decoding the binary encoded response
    print "Returned records filtered by range: "
    parsed_schema = schema.parse( response['type_schema'] )
    reader = io.DatumReader( parsed_schema )
    for bin_record in response['records_binary']:
        str_IO = cStringIO.StringIO( bin_record )
        bin_decoder = io.BinaryDecoder( str_IO )
        decoded_response = reader.read( bin_decoder )
        print decoded_response, new_line




    response = h_db.aggregate_statistics(table_name=my_table, column_name='col1', stats='count,sum,mean')
    print "Statistics of values in col1 ", response['stats'], new_line

    encoded_obj_list=[]
    for val in range(1,8):
        datum = collections.OrderedDict()
        datum["col1"] = val+10.1
        datum["col2"] = 'string '+str(val)
        datum["group_id"] = 'Group 2'
        encoded_obj_list.append(h_db.encode_datum(my_type, datum))

    h_db.insert_records(my_table,encoded_obj_list,'binary',{})

    # find unique values in a column
    response = h_db.aggregate_unique( table_name = my_table,
                                      column_name = 'group_id',
                                      offset = 0, limit = 20,
                                      encoding = 'json')
    print 'Unique values in group_id column ', response['json_encoded_response'], new_line

    # Group by
    groupby_col_names = ['col2']
    retval = h_db.aggregate_group_by(table_name=my_table, column_names=groupby_col_names, offset=0, limit=1000,encoding='json')
    print "Group by results ", retval['json_encoded_response'], new_line

    # aggregate values
    groupby_col_names = ['group_id', "count(*)", 'sum(col1)', 'avg(col1)']
    retval = h_db.aggregate_group_by(table_name=my_table, column_names=groupby_col_names, offset=0, limit=1000,encoding='json')
    print "Group by results ", retval['json_encoded_response'], new_line

    # Do another aggregate group by operation
    groupby_col_names = ['group_id', 'sum(col1*10)']
    retval = h_db.aggregate_group_by( table_name = my_table,
                                      column_names = groupby_col_names,
				      offset = 0, limit = 1000,
				      encoding = 'json')
    print "Group by results ", retval['json_encoded_response'], new_line

    # Add more data
    for val in range(4,10):
        datum = collections.OrderedDict()
        datum["col1"] = val+0.6
        datum["col2"] = 'string 2'+str(val)
        datum["group_id"] = 'Group 1'
        encoded_obj_list.append(h_db.encode_datum(my_type, datum))
    h_db.insert_records( table_name = my_table,
                         objects = encoded_obj_list,
                         list_encoding = 'binary',
                         options = {} )

    histogram_result = h_db.aggregate_histogram( table_name = my_table,
                                                 column_name = 'col1',
                                                 start = 1.1, end = 2,
                                                 interval = 1 )
    print "histogram result:", histogram_result, new_line



    # Drop the table (will automatically drop all views on the table)
    h_db.clear_table(my_table)

    # Check that clearing a table automatically drops all the dependent views
    response = h_db.get_records( table_name = 'my_table_view',
                                 offset = 0, limit = 100,
                                 encoding = 'json', options = {})
    assert (response['status_info']['status'] == "ERROR"), \
        "Problem: View on deleted table found!!!"
    print ("Response status and message : ", response['status_info']['status'], 
           ' - ', response['status_info']['message'], new_line)
예제 #20
0
HOST_IP = "127.0.0.1"


def main(db_handle):
    proc_name = 'H2o_rf_train_test'
    file_paths = ["h2o_rf_train_test.py", "../../kinetica_proc.py"
                  ]  # put the main python script in the first place
    # Read proc code in as bytes and add to a file data array
    files = {}
    for script_path in file_paths:
        script_name = os.path.basename(script_path)
        with open(script_path, 'rb') as f:
            files[script_name] = f.read()
    # Remove proc if it exists from a prior registration
    if db_handle.has_proc(proc_name)['proc_exists']:
        db_handle.delete_proc(proc_name)
    print("Registering proc...")
    response = db_handle.create_proc(proc_name, 'distributed', files, 'python',
                                     [file_paths[0]], {})
    print(response)
    print("Executing proc...")
    response = db_handle.execute_proc(proc_name, {}, {},
                                      [te.LOAN_TRAIN_DATA_TABLE_NAME], {}, [],
                                      {})
    print(response)


if __name__ == "__main__":
    main(gpudb.GPUdb(encoding='BINARY', host=HOST_IP, port='9191'))
예제 #21
0
    def __init__(self, bb_module, bb_method,
                 schema_inbound, schema_outbound,
                 zmq_dealer_host, zmq_dealer_port,
                 db_table_audit, db_table_results, db_conn_str,
                 db_user = "", db_pass = "", be_quiet = False ):
        """Construct a new KineticaBlackBox object.

        :param bb_module:
        :type bb_module: dict
        :param bb_method:
        :type bb_method: dict
        :param schema_inbound:
        :type schema_inbound: str
        :param schema_outbound:
        :type schema_outbound: str
        :param zmq_dealer_host:
        :type zmq_dealer_host: str
        :param zmq_dealer_port:
        :type zmq_dealer_port: str
        :param db_table_audit:
        :type db_table_audit: str
        :param db_table_results:
        :type db_table_results: str
        :param db_host: Default "127.0.0.1".
        :type db_host: str
        :param db_port: Default "9191".
        :type db_port: str
        :param db_user: optional
        :type db_user: str
        :param db_pw: optional
        :type db_pw: str

        """

        logger.info("Initializing KineticaBlackBox")
        logger.info(f"zmq_dealer_host: {zmq_dealer_host}")
        logger.info(f"zmq_dealer_port: {zmq_dealer_port}")
        logger.info(f"db_table a: {db_table_audit}")
        logger.info(f"db_table r: {db_table_results}")
        logger.info(f"db_conn_str: {db_conn_str}")
        logger.info(f"db_user: {db_user}")
        logger.info(f"db_pass: *******")
        logger.info(f"schema_inbound: {schema_inbound}")
        logger.info(f"schema_outbound: {schema_outbound}")
        logger.info(f"bb_module: {bb_module}")
        logger.info(f"bb_method: {bb_method}")

        if be_quiet:
            import logging
            logger.setLevel(logging.INFO)

        self.be_quiet = be_quiet
        self.schema_inbound = schema_inbound
        self.schema_outbound = schema_outbound

        self.bb_module = bb_module
        self.bb_method = bb_method

        # Prepare DB Communications
        logger.info(f"Attempting to connect to DB at {db_conn_str} to push to {db_table_audit}")
        if db_user == 'no_cred' or db_pass == 'no_cred':
            db=gpudb.GPUdb(encoding='BINARY',
                           host=db_conn_str)
        else:
            db=gpudb.GPUdb(encoding='BINARY',
                           host=db_conn_str,
                           username=db_user,
                           password=db_pass)

        self.sink_table_audit = gpudb.GPUdbTable(name = db_table_audit, db = db)
        self.db = db

        logger.info(f"DB Results Table {db_table_results}")
        if db_table_results == "NOT_APPLICABLE":            
            logger.info(f"All results will be persisted to Audit DB Table {db_table_audit}")
            self.sink_table_results = None
        else:
            self.sink_table_results = gpudb.GPUdbTable(name = db_table_results, db = db)
            logger.info(f"Established connection to sink table")
            logger.info(self.sink_table_results)

        logger.info(self.sink_table_results)
        if self.sink_table_results is None:
            logger.info(f"All results will be persisted to Audit DB Table only")            
        else:
            logger.info(f"All results will be persisted to both Audit and output DB Tables {db_table_results}")


        logger.info("Prepping response with with schema")
        logger.info(json.dumps(json.loads(schema_outbound)))

        # Prepare ZMQ Communications
        zmq_dealer_uri = f"tcp://{zmq_dealer_host}:{zmq_dealer_port}"
        context = zmq.Context()
        self.socket = context.socket(zmq.PULL)
        #logger.info("Listening for incoming requests on topic: %s via %s" % (topicfilter,topic_source))
        self.socket.connect(zmq_dealer_uri)
예제 #22
0
def retrieve():
    weather_table_name = "weather"
    weather_w_view = "weather_west"
    weather_nw_view = "weather_northwest"
    weather_country_view = "weather_country"
    weather_e_view = "weather_east"
    weather_se_view = "weather_southeast"
    weather_h_view = "weather_histogram"

    h_db = gpudb.GPUdb(encoding='BINARY', host='127.0.0.1', port='9191')

    columns = [["city", "string", "char16"],
               [
                   "state_province",
                   gpudb.GPUdbRecordColumn._ColumnType.STRING,
                   gpudb.GPUdbColumnProperty.CHAR32
               ],
               [
                   "country", gpudb.GPUdbRecordColumn._ColumnType.STRING,
                   gpudb.GPUdbColumnProperty.CHAR16
               ], ["x", "double"], ["y", "double"], ["avg_temp", "double"],
               ["time_zone", "string", "char8"]]

    if h_db.has_table(table_name=weather_table_name)['table_exists']:
        h_db.clear_table(weather_table_name)

    try:
        weather_table = gpudb.GPUdbTable(columns, weather_table_name, db=h_db)
        print("Table successfully created.")
    except gpudb.GPUdbException as e:
        print("Table creation failure: {}".format(str(e)))

    weather_table_duplicate = gpudb.GPUdbTable(None,
                                               weather_table_name,
                                               db=h_db)

    print("\n")
    print("INSERTING DATA")
    print("--------------")
    print()

    datum = collections.OrderedDict()
    datum["city"] = "Washington, D.C."
    datum["state_province"] = "--"
    datum["country"] = "USA"
    datum["x"] = -77.016389
    datum["y"] = 38.904722
    datum["avg_temp"] = 58.5
    datum["time_zone"] = "UTC-5"

    weather_table.insert_records(datum)

    datum2 = collections.OrderedDict()
    datum2["city"] = "Washington, D.C."
    datum2["state_province"] = "--"
    datum2["country"] = "USA"
    datum2["x"] = -77.016389
    datum2["y"] = 38.904722
    datum2["avg_temp"] = 58.5
    datum2["time_zone"] = "UTC-5"

    weather_record_type = weather_table.get_table_type()
    single_record = [gpudb.GPUdbRecord(weather_record_type, datum).binary_data]

    response = h_db.insert_records(table_name=weather_table_name,
                                   data=single_record,
                                   list_encoding="binary")
    print("Number of single records inserted:  {}".format(
        response["count_inserted"]))

    records = []
    records.append(["Paris", "TX", "USA", -95.547778, 33.6625, 64.6, "UTC-6"])
    records.append(["Memphis", "TN", "USA", -89.971111, 35.1175, 63, "UTC-6"])
    records.append([
        "Sydney", "Nova Scotia", "Canada", -60.19551, 46.13631, 44.5, "UTC-4"
    ])
    records.append([
        "La Paz", "Baja California Sur", "Mexico", -110.310833, 24.142222, 77,
        "UTC-7"
    ])
    records.append(
        ["St. Petersburg", "FL", "USA", -82.64, 27.773056, 74.5, "UTC-5"])
    records.append(["Oslo", "--", "Norway", 10.75, 59.95, 45.5, "UTC+1"])
    records.append(["Paris", "--", "France", 2.3508, 48.8567, 56.5, "UTC+1"])
    records.append(
        ["Memphis", "--", "Egypt", 31.250833, 29.844722, 73, "UTC+2"])
    records.append(
        ["St. Petersburg", "--", "Russia", 30.3, 59.95, 43.5, "UTC+3"])
    records.append(
        ["Lagos", "Lagos", "Nigeria", 3.384082, 6.455027, 83, "UTC+1"])
    records.append([
        "La Paz", "Pedro Domingo Murillo", "Bolivia", -68.15, -16.5, 44,
        "UTC-4"
    ])
    records.append([
        "Sao Paulo", "Sao Paulo", "Brazil", -46.633333, -23.55, 69.5, "UTC-3"
    ])
    records.append([
        "Santiago", "Santiago Province", "Chile", -70.666667, -33.45, 62,
        "UTC-4"
    ])
    records.append([
        "Buenos Aires", "--", "Argentina", -58.381667, -34.603333, 65, "UTC-3"
    ])
    records.append(
        ["Manaus", "Amazonas", "Brazil", -60.016667, -3.1, 83.5, "UTC-4"])
    records.append([
        "Sydney", "New South Wales", "Australia", 151.209444, -33.865, 63.5,
        "UTC+10"
    ])
    records.append(
        ["Auckland", "--", "New Zealand", 174.74, -36.840556, 60.5, "UTC+12"])
    records.append(
        ["Jakarta", "--", "Indonesia", 106.816667, -6.2, 83, "UTC+7"])
    records.append(
        ["Hobart", "--", "Tasmania", 147.325, -42.880556, 56, "UTC+10"])
    records.append([
        "Perth", "Western Australia", "Australia", 115.858889, -31.952222, 68,
        "UTC+8"
    ])

    weather_table.insert_records(records)

    weatherLocs = h_db.get_records(table_name=weather_table_name,
                                   offset=0,
                                   limit=10,
                                   encoding="json",
                                   options={"sort_by": "city"})['records_json']
    for weatherLoc in weatherLocs:
        print(
            "{city:<20s} {state_province:<25s} {country:<15s} {y:10.6f} {x:11.6f} {avg_temp:9.1f}   {time_zone}"
            .format(**json.loads(weatherLoc)))

    response = h_db.get_records(table_name=weather_table_name,
                                offset=10,
                                limit=25,
                                encoding="binary",
                                options={"sort_by": "city"})
    weatherLocs = gpudb.GPUdbRecord.decode_binary_data(
        response["type_schema"], response["records_binary"])

    for weatherLoc in weatherLocs:
        print(
            "{city:<20s} {state_province:<25s} {country:<15s} {y:10.6f} {x:11.6f} {avg_temp:9.1f}   {time_zone}"
            .format(**weatherLoc))
예제 #23
0
파일: t.py 프로젝트: rewreu/learning
import gpudb

a = gpudb.GPUdb()
a.execute_sql_and_decode
예제 #24
0
# import sys
# sys.path.append("/Users/zhewu/PycharmProjects/learning/pandasKinetica")
# from kdf import kdf

# from kdfconn import kdf
# from kdfconn import kdf
import pandas as pd
import gpudb

conn = gpudb.GPUdb(encoding='BINARY', host="k.yamei.info", port="9191")

# df = kDataFrame(pd.read_csv("./pandasKinetica/template.csv"), conn)

df = kdf(conn)

df.from_pandas(pd.read_csv("./tmp/tmp.test3.csv"))
# df.read_table("est6")

df.to_table("est17", charN_On=True, timeStampColumn="ValidDateTime")
# df.to_table("est9",charN_On=False, timeStampColumn="ValidDateTime")

###############################################################################################
from kdfconn import kdf
import pandas as pd
import numpy as np
import gpudb

conn = gpudb.GPUdb(encoding='BINARY', host="k.yamei.info", port="9191")

# df = kDataFrame(pd.read_csv("./pandasKinetica/template.csv"), conn)
예제 #25
0
# File: kapi_io.py
# Purpose: I/O of between dataframes and Kinetica with native API.
# Author: Chad Juliano
# Date: 07/20/2018
###############################################################################

import numpy as np
import pandas as pd
import gpudb
import sys

KDBC = gpudb.GPUdb(encoding='BINARY', host='127.0.0.1', port='9191')

KAPI_TYPE_MAP = {
    'int64': gpudb.GPUdbRecordColumn._ColumnType.LONG,
    'int32': gpudb.GPUdbRecordColumn._ColumnType.INT,
    'int16': gpudb.GPUdbRecordColumn._ColumnType.INT,
    'float64': gpudb.GPUdbRecordColumn._ColumnType.DOUBLE,
    'float32': gpudb.GPUdbRecordColumn._ColumnType.FLOAT,
    'object': gpudb.GPUdbRecordColumn._ColumnType.STRING
}


def get_coldef(_col_name, _np_dtype, _col_props):
    """Convert a Numpy type to Kinetica type."""

    if (str(_np_dtype) not in KAPI_TYPE_MAP):
        raise Exception('Type not supported: {}'.format(_np_dtype))

    _k_type = KAPI_TYPE_MAP[str(_np_dtype)]
    _k_properties = []
예제 #26
0
import gpudb
import os, glob
import pandas as pd
import numpy as np
import datetime
import random

# Establish connection with a locally-running instance of Kinetica using
# binary encoding to save memory
h_db = gpudb.GPUdb(
    encoding="BINARY",
    host="172.31.33.26",
    port="9191",
    username="******",
    password="******")

# Confirm target table exists
if h_db.has_table(table_name="stores")['table_exists']:
    print("Table successfully reached.")
else:
    print("Table not found.")

# Pull data from target and store as variable
data = h_db.get_records(
    table_name="stores",
    offset=0,
    limit=gpudb.GPUdb.END_OF_SET,
    encoding="binary")

# Hydrate pandas df, adding a datetime column as a start date for the desired month
store_df = pd.DataFrame(gpudb.GPUdbRecord.decode_binary_data(data["type_schema"], data["records_binary"]))