コード例 #1
0
def get_tbl_handle(tbl_name, db, schema=None):
    """Create a DB table handle."""

    tbl_ref = tbl_name
    if USE_MULTIHEAD_IO == 'TRUE':
        return gpudb.GPUdbTable(name=tbl_ref,
                                db=db,
                                use_multihead_io=True,
                                flush_multi_head_ingest_per_insertion=True)
    else:
        return gpudb.GPUdbTable(name=tbl_ref, db=db)
コード例 #2
0
def main():
    try:
        if db_user == 'no_cred' or db_pass == 'no_cred':
            db = gpudb.GPUdb(encoding='BINARY', host=db_conn_str)
        else:
            db = gpudb.GPUdb(encoding='BINARY',
                             host=db_conn_str,
                             username=db_user,
                             password=db_pass)

        if db.has_table(table_name=TBL_NAME_toll_stream)['table_exists']:
            print('Table exists {0:s}'.format(TBL_NAME_toll_stream))

        else:
            status = gpudb.GPUdbTable(_type=TBL_SCHEMA_toll_stream,
                                      name=TBL_NAME_toll_stream,
                                      db=db,
                                      options={'collection_name': 'traffic'})
            print(status)
            print('\tTable created {0:s}'.format('Table created'))

        test_insert()

    except gpudb.gpudb.GPUdbException as e:
        print(e)
コード例 #3
0
ファイル: kconsumer.py プロジェクト: saifrahmed/kafka-esque
def main():
    logger = logging.getLogger('Price consumer')
    logger.setLevel(logging.DEBUG)
    #logging.basicConfig(filename='myapp.log', level=logging.INFO)
    consumer = KafkaConsumer('prices')

    db = gpudb.GPUdb(encoding='BINARY', host='kinetica-saif')
    columns = [
        ["TIMESTAMP", "long", "timestamp"],
        ["tickid", "long", "primary_key"],
        ["TICKER", "string", "char32"],
        ["DESC", "string", "char32"],
        ["PX_LAST", "float"],
    ]
    px_table = gpudb.GPUdbTable(columns, 'PRICES', db=db)

    print("Eating off queue on topic %s " % TOPIC)

    for msg in consumer:
        quote = json.loads(msg.value)
        theTimestamp = int(round(time.time() * 1000))
        logger.critical(quote)
        print(
            px_table.insert_records(theTimestamp, quote['tickid'],
                                    quote['ticker'], quote['desc'],
                                    quote['pxlast']))

    print("DONE Eating off queue on topic %s " % TOPIC)
コード例 #4
0
def python_tc_udf_init():

    print("")
    print("PYTHON UDF TABLE COPY INITIALIZATION")
    print("====================================")
    print("")

    # Create the Python UDF tutorial schema, if it doesn't exist
    kinetica.create_schema(SCHEMA, options=OPTION_NO_CREATE_ERROR)

    # Create input data table
    columns = [["id", "int", "int16", "primary_key"], ["x", "float"],
               ["y", "float"]]

    if kinetica.has_table(table_name=INPUT_TABLE)['table_exists']:
        kinetica.clear_table(table_name=INPUT_TABLE)

    input_table_obj = gpudb.GPUdbTable(_type=columns,
                                       name=INPUT_TABLE,
                                       db=kinetica)

    print("Input table successfully created: ")
    print(input_table_obj)

    records = []
    for val in range(1, MAX_RECORDS + 1):
        records.append([val, random.gauss(1, 1), random.gauss(1, 2)])
    input_table_obj.insert_records(records)

    print("Number of records inserted into the input table: {}".format(
        input_table_obj.size()))

    # Create output data table
    columns = [["id", "int", "int16", "primary_key"], ["a", "float"],
               ["b", "float"]]

    if kinetica.has_table(table_name=OUTPUT_TABLE)['table_exists']:
        kinetica.clear_table(table_name=OUTPUT_TABLE)

    output_table_obj = gpudb.GPUdbTable(_type=columns,
                                        name=OUTPUT_TABLE,
                                        db=kinetica)

    print("")
    print("Output table successfully created: ")
    print(output_table_obj)
    print("")
コード例 #5
0
ファイル: kdf.py プロジェクト: rewreu/kdfconn
    def to_table(self,
                 tableName='tmp',
                 appendExistTable=False,
                 clearTableIfExist=False,
                 charN_On=False,
                 timeStampColumn=None):
        """
        If appendExistTable is True, clearTableIfExist is disabled. 
        timeStampColumn can be string of a column name, or a list of column names
        """
        if type(timeStampColumn) == str or timeStampColumn == None:
            timeStampColumn = [timeStampColumn]
        types = []
        if not appendExistTable:
            if clearTableIfExist:
                self.__conn.clear_table(
                    tableName, options={'no_error_if_not_exists': 'true'})
                clearTableIfExist = False
            else:
                clearTableIfExist = self.__conn.has_table(
                    tableName)['table_exists']
            assert clearTableIfExist == False, "table {} exist in database, stop ingestion".format(
                tableName)

            self._tableTypes = [
                self._getcoltype(column=column,
                                 charN_On=charN_On,
                                 timeStampColumn=timeStampColumn)
                for column in self.columns
            ]
        else:
            self._tableTypes = None  # use the existing table in database
        try:
            table = gpudb.GPUdbTable(_type=self._tableTypes,
                                     name=tableName,
                                     options={"is_replicated": "false"},
                                     db=self.__conn)
            if self._tableTypes == None:
                print("Table successfully connected")
            else:
                print("Table successfully created")
        except gpudb.GPUdbException as e:
            if self._tableTypes == None:
                print("Table connection failure: {}".format(str(e)))
            else:
                print("Table creation failure: {}".format(str(e)))

        i = 0
        while True:
            tdf = self[i:i + 20000]
            # only replace nan here, if replaced earlier, the float/double column becomes object
            tdf.replace({pd.np.nan: None}, inplace=True)
            insert_records = tdf.to_records(index=False)
            insert_rows = [list(x.item()) for x in insert_records]
            if len(insert_rows) == 0: break
            table.insert_records(insert_rows)
            i += 20000
            print("{} rows of data inserted.".format(tdf.shape[0]))
コード例 #6
0
ファイル: kapi_io.py プロジェクト: mmmika/kinetica-jupyterlab
def save_df(_df,
            _table_name,
            _schema,
            _kdbc=KDBC,
            _col_props={},
            _is_replicated=False):
    """Save a Dataframe to a Kinetica table."""

    # Should index be used to create a column?
    _use_index = (_df.index.name != None)

    # Construct the type to use for creating the table.
    _result_type = []

    if (_use_index):
        _idx_type = get_coldef(_df.index.name, _df.index.dtype, _col_props)
        _idx_type.column_properties.append('shard_key')
        _result_type.append(_idx_type)

    for _idx in range(_df.columns.size):
        _col_name = _df.columns[_idx]
        _dtype = _df.dtypes[_idx]
        _result_type.append(get_coldef(_col_name, _dtype, _col_props))

    print('Dropping table: <{}>'.format(_table_name))
    _kdbc.clear_table(_table_name, options={'no_error_if_not_exists': 'true'})

    _print_replicated = ''
    if (_is_replicated):
        _print_replicated = 'replicated '

    print('Creating {} table: <{}>'.format(_print_replicated, _table_name))
    for _idx, _coldef in enumerate(_result_type):
        print('Column {}: <{}> ({}) {}'.format(_idx, _coldef.name,
                                               _coldef.column_type,
                                               _coldef.column_properties))

    #_is_replicated = 'false'
    _type_obj = gpudb.GPUdbRecordType(columns=_result_type, label=_table_name)
    _result_table = gpudb.GPUdbTable(db=_kdbc,
                                     _type=_type_obj,
                                     name=_table_name,
                                     options={
                                         'collection_name': _schema,
                                         'is_replicated': _is_replicated
                                     })

    # Convert to records so we can preserve the column dtypes
    _insert_records = _df.to_records(index=_use_index)

    # Call item() so the types are converted to python native types
    _insert_rows = [list(x.item()) for x in _insert_records]

    if (len(_insert_rows) > 0):
        _result_table.insert_records(_insert_rows)

    print('Inserted rows into <{}.{}>: {}'.format(_schema, _table_name,
                                                  len(_insert_rows)))
コード例 #7
0
def _save_model(db, model_id, model, test_r2, train_r2, sample_cnt):
    model_record = collections.OrderedDict()
    model_record['model_id'] = model_id
    model_record['dump'] = pickle.dumps(model)
    model_record['created_on'] = int(time.time() * 1000)
    model_record['sample_cnt'] = sample_cnt
    model_record['test_r2'] = test_r2
    model_record['train_r2'] = train_r2

    table = gpudb.GPUdbTable(name='prediction_model', db=db)
    table.insert_records(model_record)
コード例 #8
0
def create_event_rsvp_table(db):
    """
    Creates an empty table for storing RSVPs from Meetup.com streaming API.
    Nothing happens if the table already exists

    :param gpudb.GPUdb db: Connection to Kinetica DB
    """
    try:
        gpudb.GPUdbTable(
            _type=config.EVENT_RSVP_TYPE,
            name=config.EVENT_RSVP_TABLE_NAME,
            options={'collection_name': config.EVENT_RSVP_COLLECTION},
            db=db)
    except gpudb.GPUdbException as e:
        if "Table '%s' exists;" % config.EVENT_RSVP_TABLE_NAME in str(e):
            print('Table for Meetup events already exists')
        else:
            raise e
コード例 #9
0
def multithread():
    h_db = gpudb.GPUdb(encoding='BINARY', host='127.0.0.1', port='9191')

    sharded_columns = [
        ["city", "string", "char16"],
        ["state_province", "string", "char2", "shard_key"],  # shard key column
        [
            "country", gpudb.GPUdbRecordColumn._ColumnType.STRING,
            gpudb.GPUdbColumnProperty.CHAR16
        ],
        ["airport", "string", "nullable"],  # a nullable column
        ["x", "double"],
        ["y", "double"],
        ["avg_temp", "double"],
        ["time_zone", "string", "char8", "shard_key"]  # shard key column
    ]

    sharded_table = gpudb.GPUdbTable(sharded_columns,
                                     db=h_db,
                                     use_multihead_ingest=True,
                                     multihead_ingest_batch_size=33)

    num_records = 500
    null_likelihood = 10
    alphanum = (string.ascii_letters + string.digits)
    for i in range(0, num_records):
        record = collections.OrderedDict()
        record["city"] = ''.join(
            [random.choice(alphanum) for n in range(0, random.randint(5, 16))])
        record["state_province"] = ''.join(
            [random.choice(alphanum) for n in range(0, random.randint(0, 2))])
        record["country"] = ''.join(
            [random.choice(alphanum) for n in range(0, random.randint(5, 16))])
        record[ "airport"       ] = None if (random.random() < null_likelihood) \
                                    else ''.join( [random.choice( alphanum ) for n in range( 0, random.randint( 2, 25 ) )] )
        record["x"] = random.uniform(-180, 180)
        record["y"] = random.uniform(-90, 90)
        record["avg_temp"] = random.uniform(-40, 110)
        record["time_zone"] = "UTC-{}".format(random.randint(-11, 14))
        sharded_table.insert_records(record)
    # end loop

    sharded_table.flush_data_to_server()
コード例 #10
0
def test_insert():

    test_insert_a = [(3100, 3100, "E", 0.00, 1.87, 46, 33.9, 1.34, "medium"),
                     (3100, 3100, "E", 1.42, 1.87, 46, 33.9, 1.34, "heavy")]
    test_insert_b = [(3200, 3210, "E", 0.00, 1.87, 146, 33.9, 1.34, "medium"),
                     (3200, 3330, "E", 1.42, 1.87, 146, 33.9, 1.34, "heavy")]

    try:
        if db_user == 'no_cred' or db_pass == 'no_cred':
            db = gpudb.GPUdb(encoding='BINARY', host=db_conn_str)
        else:
            db = gpudb.GPUdb(encoding='BINARY',
                             host=db_conn_str,
                             username=db_user,
                             password=db_pass)

        insertables = []
        for (sz, ez, direction, tp1, tp2, humd, tempr, wnd,
             wthr) in test_insert_a:
            insertable = collections.OrderedDict()
            insertable["startzoneid"] = sz
            insertable["endzoneid"] = ez
            insertable["direction"] = direction
            insertable["toll_prev_1"] = tp1
            insertable["toll_prev_2"] = tp2
            insertable["humidity"] = humd
            insertable["temperature"] = tempr
            insertable["windspeed"] = wnd
            insertable["weather"] = wthr
            insertables.append(insertable)

        sink_table = gpudb.GPUdbTable(name=TBL_NAME_toll_stream, db=db)
        insert_status = sink_table.insert_records(insertables)

        print(insert_status)
        print('\tSample records inserted ')

    except gpudb.gpudb.GPUdbException as e:

        print(e)
コード例 #11
0
ファイル: kapi_io.py プロジェクト: mmmika/kinetica-jupyterlab
def load_df(_input_table, _kdbc=KDBC):
    """Load a dataframe from a Kinetica table."""

    _table = gpudb.GPUdbTable(_type=None, name=_input_table, db=_kdbc)
    _type = _table.get_table_type()
    _columns = [_col.name for _col in _type.columns]

    #print('Getting records from <{}>'.format(_input_table), end='', flush=True)
    sys.stdout.write('Getting {} records from <{}>'.format(
        _table.count, _input_table))

    BATCH_SIZE = 10000
    _offset = 0
    _table_df = pd.DataFrame()

    while True:
        _response = _kdbc.get_records(table_name=_input_table,
                                      offset=_offset,
                                      limit=BATCH_SIZE)
        check_response(_response)

        _res_decoded = gpudb.GPUdbRecord.decode_binary_data(
            _response['type_schema'], _response['records_binary'])

        # print something to show we are working
        #print('.', end='', flush=True)
        sys.stdout.write('.')

        _offset += len(_res_decoded)
        _table_df = _table_df.append(_res_decoded)
        if _response['has_more_records'] == False:
            break

    # reorder dataframe columns
    _table_df = _table_df[_columns]

    print('')
    print('Records Retrieved: {}'.format(_table_df.shape))
    return _table_df
コード例 #12
0
def _ensure_models_table(db, logger):
    """
    Make sure that there is a table for storing trained prediction models.
    The table is created if it doesn't exist.

    :param gpudb.GPUdb db: Kinetica DB connection
    """
    table_name = 'prediction_model'
    table_check = db.has_table(table_name=table_name)
    table_structure = [
        ['model_id', 'int', 'primary_key', 'shard_key'],
        ['dump', 'bytes'],
        ['created_on', 'long', 'timestamp'],
        ['sample_cnt', 'long'],
        ['test_r2', 'double'],
        ['train_r2', 'double']
    ]

    if not table_check['table_exists']:
        logger.info('Table %s for storing trained prediction models does not exist' % table_name)
        gpudb.GPUdbTable(_type=table_structure, name=table_name, db=db)
        logger.info('Table %s created' % table_name)
    else:
        logger.info('Table %s for storing trained prediction models already exists' % table_name)
コード例 #13
0
    socket.connect(ZMQ_CONN_STR)
    register_event_lifecycle(api_base=KML_API_BASE,
                             credentials=credentials,
                             event_sub_type="ZMQ_CONNECTED")

    # Prepare DB Connection
    register_event_lifecycle(api_base=KML_API_BASE,
                             credentials=credentials,
                             event_sub_type="DB_CONNECTING")
    cn_db = get_conn_db(DB_CONN_STR, DB_USER, DB_PASS)
    register_event_lifecycle(api_base=KML_API_BASE,
                             credentials=credentials,
                             event_sub_type="DB_CONNECTED")

    # [Re]Establish table handles
    h_tbl_out_audit = gpudb.GPUdbTable(name=tbl_out_audit, db=cn_db)
    h_tbl_out_results = None
    logger.info(f"DB Results Table {tbl_out_results}")
    if tbl_out_results and tbl_out_results != "NOT_APPLICABLE":
        h_tbl_out_results = gpudb.GPUdbTable(name=tbl_out_results, db=cn_db)
        logger.info(f"Established connection to sink table")
        logger.info(
            f"All results will be persisted to both Audit {tbl_out_audit} and output DB Tables {tbl_out_results}"
        )
    else:
        logger.info(
            f"All results will be persisted to Audit DB Table {tbl_out_audit} only"
        )

    register_event_lifecycle(api_base=KML_API_BASE,
                             credentials=credentials,
コード例 #14
0
    def __init__(self, bb_module, bb_method,
                 schema_inbound, schema_outbound,
                 zmq_dealer_host, zmq_dealer_port,
                 db_table_audit, db_table_results, db_conn_str,
                 db_user = "", db_pass = "", be_quiet = False ):
        """Construct a new KineticaBlackBox object.

        :param bb_module:
        :type bb_module: dict
        :param bb_method:
        :type bb_method: dict
        :param schema_inbound:
        :type schema_inbound: str
        :param schema_outbound:
        :type schema_outbound: str
        :param zmq_dealer_host:
        :type zmq_dealer_host: str
        :param zmq_dealer_port:
        :type zmq_dealer_port: str
        :param db_table_audit:
        :type db_table_audit: str
        :param db_table_results:
        :type db_table_results: str
        :param db_host: Default "127.0.0.1".
        :type db_host: str
        :param db_port: Default "9191".
        :type db_port: str
        :param db_user: optional
        :type db_user: str
        :param db_pw: optional
        :type db_pw: str

        """

        logger.info("Initializing KineticaBlackBox")
        logger.info(f"zmq_dealer_host: {zmq_dealer_host}")
        logger.info(f"zmq_dealer_port: {zmq_dealer_port}")
        logger.info(f"db_table a: {db_table_audit}")
        logger.info(f"db_table r: {db_table_results}")
        logger.info(f"db_conn_str: {db_conn_str}")
        logger.info(f"db_user: {db_user}")
        logger.info(f"db_pass: *******")
        logger.info(f"schema_inbound: {schema_inbound}")
        logger.info(f"schema_outbound: {schema_outbound}")
        logger.info(f"bb_module: {bb_module}")
        logger.info(f"bb_method: {bb_method}")

        if be_quiet:
            import logging
            logger.setLevel(logging.INFO)

        self.be_quiet = be_quiet
        self.schema_inbound = schema_inbound
        self.schema_outbound = schema_outbound

        self.bb_module = bb_module
        self.bb_method = bb_method

        # Prepare DB Communications
        logger.info(f"Attempting to connect to DB at {db_conn_str} to push to {db_table_audit}")
        if db_user == 'no_cred' or db_pass == 'no_cred':
            db=gpudb.GPUdb(encoding='BINARY',
                           host=db_conn_str)
        else:
            db=gpudb.GPUdb(encoding='BINARY',
                           host=db_conn_str,
                           username=db_user,
                           password=db_pass)

        self.sink_table_audit = gpudb.GPUdbTable(name = db_table_audit, db = db)
        self.db = db

        logger.info(f"DB Results Table {db_table_results}")
        if db_table_results == "NOT_APPLICABLE":            
            logger.info(f"All results will be persisted to Audit DB Table {db_table_audit}")
            self.sink_table_results = None
        else:
            self.sink_table_results = gpudb.GPUdbTable(name = db_table_results, db = db)
            logger.info(f"Established connection to sink table")
            logger.info(self.sink_table_results)

        logger.info(self.sink_table_results)
        if self.sink_table_results is None:
            logger.info(f"All results will be persisted to Audit DB Table only")            
        else:
            logger.info(f"All results will be persisted to both Audit and output DB Tables {db_table_results}")


        logger.info("Prepping response with with schema")
        logger.info(json.dumps(json.loads(schema_outbound)))

        # Prepare ZMQ Communications
        zmq_dealer_uri = f"tcp://{zmq_dealer_host}:{zmq_dealer_port}"
        context = zmq.Context()
        self.socket = context.socket(zmq.PULL)
        #logger.info("Listening for incoming requests on topic: %s via %s" % (topicfilter,topic_source))
        self.socket.connect(zmq_dealer_uri)
コード例 #15
0
ファイル: aggregate.py プロジェクト: paullee73/Kinetica
def aggregate():
    weather_table_name = "weather"
    weather_w_view = "weather_west"
    weather_nw_view = "weather_northwest"
    weather_country_view = "weather_country"
    weather_e_view = "weather_east"
    weather_se_view = "weather_southeast"
    weather_h_view = "weather_histogram"

    h_db = gpudb.GPUdb(encoding='BINARY', host='127.0.0.1', port='9191')

    columns = [["city", "string", "char16"],
               [
                   "state_province",
                   gpudb.GPUdbRecordColumn._ColumnType.STRING,
                   gpudb.GPUdbColumnProperty.CHAR32
               ],
               [
                   "country", gpudb.GPUdbRecordColumn._ColumnType.STRING,
                   gpudb.GPUdbColumnProperty.CHAR16
               ], ["x", "double"], ["y", "double"], ["avg_temp", "double"],
               ["time_zone", "string", "char8"]]

    if h_db.has_table(table_name=weather_table_name)['table_exists']:
        h_db.clear_table(weather_table_name)

    try:
        weather_table = gpudb.GPUdbTable(columns, weather_table_name, db=h_db)
        print("Table successfully created.")
    except gpudb.GPUdbException as e:
        print("Table creation failure: {}".format(str(e)))

    weather_table_duplicate = gpudb.GPUdbTable(None,
                                               weather_table_name,
                                               db=h_db)

    print("\n")
    print("INSERTING DATA")
    print("--------------")
    print()

    datum = collections.OrderedDict()
    datum["city"] = "Washington, D.C."
    datum["state_province"] = "--"
    datum["country"] = "USA"
    datum["x"] = -77.016389
    datum["y"] = 38.904722
    datum["avg_temp"] = 58.5
    datum["time_zone"] = "UTC-5"

    weather_table.insert_records(datum)

    datum2 = collections.OrderedDict()
    datum2["city"] = "Washington, D.C."
    datum2["state_province"] = "--"
    datum2["country"] = "USA"
    datum2["x"] = -77.016389
    datum2["y"] = 38.904722
    datum2["avg_temp"] = 58.5
    datum2["time_zone"] = "UTC-5"

    weather_record_type = weather_table.get_table_type()
    single_record = [gpudb.GPUdbRecord(weather_record_type, datum).binary_data]

    response = h_db.insert_records(table_name=weather_table_name,
                                   data=single_record,
                                   list_encoding="binary")
    print("Number of single records inserted:  {}".format(
        response["count_inserted"]))

    records = []
    records.append(["Paris", "TX", "USA", -95.547778, 33.6625, 64.6, "UTC-6"])
    records.append(["Memphis", "TN", "USA", -89.971111, 35.1175, 63, "UTC-6"])
    records.append([
        "Sydney", "Nova Scotia", "Canada", -60.19551, 46.13631, 44.5, "UTC-4"
    ])
    records.append([
        "La Paz", "Baja California Sur", "Mexico", -110.310833, 24.142222, 77,
        "UTC-7"
    ])
    records.append(
        ["St. Petersburg", "FL", "USA", -82.64, 27.773056, 74.5, "UTC-5"])
    records.append(["Oslo", "--", "Norway", 10.75, 59.95, 45.5, "UTC+1"])
    records.append(["Paris", "--", "France", 2.3508, 48.8567, 56.5, "UTC+1"])
    records.append(
        ["Memphis", "--", "Egypt", 31.250833, 29.844722, 73, "UTC+2"])
    records.append(
        ["St. Petersburg", "--", "Russia", 30.3, 59.95, 43.5, "UTC+3"])
    records.append(
        ["Lagos", "Lagos", "Nigeria", 3.384082, 6.455027, 83, "UTC+1"])
    records.append([
        "La Paz", "Pedro Domingo Murillo", "Bolivia", -68.15, -16.5, 44,
        "UTC-4"
    ])
    records.append([
        "Sao Paulo", "Sao Paulo", "Brazil", -46.633333, -23.55, 69.5, "UTC-3"
    ])
    records.append([
        "Santiago", "Santiago Province", "Chile", -70.666667, -33.45, 62,
        "UTC-4"
    ])
    records.append([
        "Buenos Aires", "--", "Argentina", -58.381667, -34.603333, 65, "UTC-3"
    ])
    records.append(
        ["Manaus", "Amazonas", "Brazil", -60.016667, -3.1, 83.5, "UTC-4"])
    records.append([
        "Sydney", "New South Wales", "Australia", 151.209444, -33.865, 63.5,
        "UTC+10"
    ])
    records.append(
        ["Auckland", "--", "New Zealand", 174.74, -36.840556, 60.5, "UTC+12"])
    records.append(
        ["Jakarta", "--", "Indonesia", 106.816667, -6.2, 83, "UTC+7"])
    records.append(
        ["Hobart", "--", "Tasmania", 147.325, -42.880556, 56, "UTC+10"])
    records.append([
        "Perth", "Western Australia", "Australia", 115.858889, -31.952222, 68,
        "UTC+8"
    ])

    #get histogram of cities divides by intervals of 20, from 40 to 80 (in terms of temperature)
    s_view = weather_table.filter(expression="y < 0")

    histogram_result = s_view.aggregate_histogram(column_name="avg_temp",
                                                  start=40,
                                                  end=80,
                                                  interval=20)
    print(
        "Number of southern hemisphere cities with average temps in the given ranges:"
    )
    for histogroup in zip([40, 60, 80], histogram_result['counts']):
        print("\t{}s: {:2.0f}".format(*histogroup))
    print()
コード例 #16
0
def ingest_test_data(table_name, table_type, table_records):
    """Ingest some test data to a table in Kinetica, to have it available for the examples or unit tests."""
    if DB_HANDLE.has_table(table_name=table_name)['table_exists']:
        DB_HANDLE.clear_table(table_name)
    test_data_table = gpudb.GPUdbTable(table_type, table_name, db=DB_HANDLE)
    test_data_table.insert_records(table_records)
コード例 #17
0
def create_model_table():
    """Create table to be able to store serialized models."""
    if DB_HANDLE.has_table(table_name=MODEL_TABLE_NAME)['table_exists']:
        DB_HANDLE.clear_table(MODEL_TABLE_NAME)
    gpudb.GPUdbTable(MODEL_TABLE_TYPE, MODEL_TABLE_NAME, db=DB_HANDLE)
コード例 #18
0
def create_test_output_table(table_name, table_type):
    if DB_HANDLE.has_table(table_name=table_name)['table_exists']:
        DB_HANDLE.clear_table(table_name)
    test_data_table = gpudb.GPUdbTable(table_type, table_name, db=DB_HANDLE)
コード例 #19
0
try:
    if db_user == 'no_cred' or db_pass == 'no_cred':
        db=gpudb.GPUdb(encoding='BINARY',
                       host=db_conn_str)
    else:
        db=gpudb.GPUdb(encoding='BINARY',
                       host=db_conn_str,
                       username=db_user,
                       password=db_pass)

    if db.has_table(table_name=TBL_NAME_toll_stream)['table_exists']:
        print('Table exists {0:s}'.format(TBL_NAME_toll_stream))

    else:
        status = gpudb.GPUdbTable(_type=TBL_SCHEMA_toll_stream,
                                  name=TBL_NAME_toll_stream,
                                  db=db,
                                  options={'collection_name': 'traffic'})
        print(status)
        print('\tTable created {0:s}'.format('Table created'))          

    sink_table = gpudb.GPUdbTable( name = TBL_NAME_toll_stream, db = db)

except gpudb.gpudb.GPUdbException as e:
    print(e)

#Retrieve one hot encodings
#encoder = pickle.load(open('toll_encoder.sav', 'rb'))

API_KEY_TOLLING = "rroAyo8V7jUoB4R7og4o9zBkagSL2JBQmbrrdKt4j1qIFzKBUBFMaZhHFm1FeI0b"
API_KEY_WEATHER = "62c667044ed34c21941755b53b286186"
コード例 #20
0
def gpudb_example():
    
    print ( "TUTORIAL OUTPUT")
    print ( "===============\n")

    # all tables/views used in examples below
    weather_table_name = "weather"
    weather_w_view = "weather_west"
    weather_nw_view = "weather_northwest"
    weather_country_view = "weather_country"
    weather_e_view = "weather_east"
    weather_se_view = "weather_southeast"
    weather_h_view = "weather_histogram"

    """ Establish connection with a locally-running instance of Kinetica,
        using binary encoding to save memory """
    h_db = gpudb.GPUdb(encoding='BINARY', host='127.0.0.1', port='9191')

    print ()
    print ( "CREATING A TYPE & TABLE")
    print ( "-----------------------")
    print ()

    """ Create columns; column arguments consist of a list of the name, then type, and then
        optional additional properties.  E.g., [ "column_name", column_type, column_property1,
        column_property2 ].  Note that any number of column properties can be listed as long as
        they are not mutually exclusive within themselves or with the primitive type.  Also note
        that raw string can be used for both the primitive type and the properties; but the user is
        also able to use string constants as illustrated in the example below.
    """
    columns = [
        [ "city", "string", "char16" ],
        [ "state_province", gpudb.GPUdbRecordColumn._ColumnType.STRING, gpudb.GPUdbColumnProperty.CHAR32 ],
        [ "country", gpudb.GPUdbRecordColumn._ColumnType.STRING, gpudb.GPUdbColumnProperty.CHAR16 ],
        [ "x", "double" ],
        [ "y", "double" ],
        [ "avg_temp", "double" ],
        [ "time_zone", "string", "char8" ]
    ]

    # Clear any existing table with the same name (otherwise we won't be able to
    # create the table)
    if h_db.has_table( table_name = weather_table_name )['table_exists']:
        h_db.clear_table( weather_table_name )

    # Create the table from the type
    try:
        weather_table = gpudb.GPUdbTable( columns, weather_table_name, db = h_db )
        print ( "Table successfully created.")
    except gpudb.GPUdbException as e:
        print ( "Table creation failure: {}".format( str(e) ) )


    # We can also create a GPUdbTable object for a table that already exists in
    # the database.  All we need is the table name (and a GPUdb object).  Note how
    # we pass None for the type argument
    weather_table_duplicate = gpudb.GPUdbTable( None, weather_table_name, db = h_db )

    print ( "\n")
    print ( "INSERTING DATA")
    print ( "--------------")
    print ()

    # Insert single record example

    # Create ordered dictionary for keys & values of record
    datum = collections.OrderedDict()
    datum["city"] = "Washington, D.C."
    datum["state_province"] = "--"
    datum["country"] = "USA"
    datum["x"] = -77.016389
    datum["y"] = 38.904722
    datum["avg_temp"] = 58.5
    datum["time_zone"] = "UTC-5"

    # Insert the record into the table (through the GPUdbTable interface)
    weather_table.insert_records( datum )

    # Create another record
    datum2 = collections.OrderedDict()
    datum2["city"] = "Washington, D.C."
    datum2["state_province"] = "--"
    datum2["country"] = "USA"
    datum2["x"] = -77.016389
    datum2["y"] = 38.904722
    datum2["avg_temp"] = 58.5
    datum2["time_zone"] = "UTC-5"

    # Insert the second record through the basic GPUdb interface
    # Encode record and put into a single element list
    weather_record_type = weather_table.get_table_type()
    single_record = [ gpudb.GPUdbRecord( weather_record_type, datum ).binary_data ]

    # Insert the record into the table
    response = h_db.insert_records(table_name = weather_table_name, data = single_record, list_encoding = "binary")
    print ( "Number of single records inserted:  {}".format(response["count_inserted"]))


    # Insert multiple records example
    # ===============================
    records = []
    # Create a list of in-line records
    records.append( ["Paris", "TX", "USA", -95.547778, 33.6625, 64.6, "UTC-6"] )
    records.append( ["Memphis", "TN", "USA", -89.971111, 35.1175, 63, "UTC-6"] )
    records.append( ["Sydney", "Nova Scotia", "Canada", -60.19551, 46.13631, 44.5, "UTC-4"] )
    records.append( ["La Paz", "Baja California Sur", "Mexico", -110.310833, 24.142222, 77, "UTC-7"] )
    records.append( ["St. Petersburg", "FL", "USA", -82.64, 27.773056, 74.5, "UTC-5"] )
    records.append( ["Oslo", "--", "Norway", 10.75, 59.95, 45.5, "UTC+1"] )
    records.append( ["Paris", "--", "France", 2.3508, 48.8567, 56.5, "UTC+1"] )
    records.append( ["Memphis", "--", "Egypt", 31.250833, 29.844722, 73, "UTC+2"] )
    records.append( ["St. Petersburg", "--", "Russia", 30.3, 59.95, 43.5, "UTC+3"] )
    records.append( ["Lagos", "Lagos", "Nigeria", 3.384082, 6.455027, 83, "UTC+1"] )
    records.append( ["La Paz", "Pedro Domingo Murillo", "Bolivia", -68.15, -16.5, 44, "UTC-4"] )
    records.append( ["Sao Paulo", "Sao Paulo", "Brazil", -46.633333, -23.55, 69.5, "UTC-3"] )
    records.append( ["Santiago", "Santiago Province", "Chile", -70.666667, -33.45, 62, "UTC-4"] )
    records.append( ["Buenos Aires", "--", "Argentina", -58.381667, -34.603333, 65, "UTC-3"] )
    records.append( ["Manaus", "Amazonas", "Brazil", -60.016667, -3.1, 83.5, "UTC-4"] )
    records.append( ["Sydney", "New South Wales", "Australia", 151.209444, -33.865, 63.5, "UTC+10"] )
    records.append( ["Auckland", "--", "New Zealand", 174.74, -36.840556, 60.5, "UTC+12"] )
    records.append( ["Jakarta", "--", "Indonesia", 106.816667, -6.2, 83, "UTC+7"] )
    records.append( ["Hobart", "--", "Tasmania", 147.325, -42.880556, 56, "UTC+10"] )
    records.append( ["Perth", "Western Australia", "Australia", 115.858889, -31.952222, 68, "UTC+8"] )

    # Insert the records into the table
    weather_table.insert_records( records )
    print ( "Number of batch records inserted:  {}".format( weather_table.size() ))

    print ( "\n")
    print ( "RETRIEVING DATA")
    print ( "---------------")
    print ()

    """ Retrieve the second set of ten records from weather_table. Note that
        records can be iterated over directly. """
    print ( "{:<20s} {:<25s} {:<15s} {:<10s} {:<11s} {:<9s} {:<8s}".format("City","State/Province","Country","Latitude","Longitude","Avg. Temp","Time Zone"))
    print ( "{:=<20s} {:=<25s} {:=<15s} {:=<10s} {:=<11s} {:=<9s} {:=<9s}".format("", "", "", "", "", "", ""))
    for weatherLoc in weather_table.get_records( offset = 10, limit = 10 ):
        print ( "{city:<20s} {state:<25s} {country:<15s} {y:10.6f} {x:11.6f} {avg_temp:9.1f}   {time_zone}"
                "".format( city = weatherLoc["city"], state = weatherLoc["state_province"], country = weatherLoc["country"],
                           y = weatherLoc["y"], x = weatherLoc["x"], avg_temp = weatherLoc["avg_temp"], time_zone = weatherLoc["time_zone"] ) )

    
    """ Retrieve no more than 10 records as JSON from weather_table through the GPUdb interface.
        Note that records are stringified and have to be parsed if using the 'json' encoding. """
    weatherLocs = h_db.get_records( table_name = weather_table_name, offset = 0, limit = 10,
                                    encoding = "json", options = {"sort_by":"city"} )['records_json']

    print ( "{:<20s} {:<25s} {:<15s} {:<10s} {:<11s} {:<9s} {:<8s}".format("City","State/Province","Country","Latitude","Longitude","Avg. Temp","Time Zone"))
    print ( "{:=<20s} {:=<25s} {:=<15s} {:=<10s} {:=<11s} {:=<9s} {:=<9s}".format("", "", "", "", "", "", ""))
    for weatherLoc in weatherLocs:
        print ( "{city:<20s} {state_province:<25s} {country:<15s} {y:10.6f} {x:11.6f} {avg_temp:9.1f}   {time_zone}".format(**json.loads(weatherLoc)))

    """ Retrieve no more than 25 of the remaining records as binary from weather
        table. Note that records are binary and have to be decoded. """
    response = h_db.get_records( table_name = weather_table_name, offset = 10, limit = 25,
                                 encoding = "binary", options = {"sort_by":"city"})
    weatherLocs = gpudb.GPUdbRecord.decode_binary_data(response["type_schema"], response["records_binary"])

    for weatherLoc in weatherLocs:
        print ( "{city:<20s} {state_province:<25s} {country:<15s} {y:10.6f} {x:11.6f} {avg_temp:9.1f}   {time_zone}".format(**weatherLoc))

    """ Note that total_number_of_records does not reflect offset/limit; it's
        the count of all records or those which match the given expression """
    print ( "\nNumber of records in new table:  {:d}".format(response["total_number_of_records"]))

    print ( "\n")
    print ( "FILTERING")
    print ( "---------")
    print ()

    ### Filter Example 1
    
    """ Filter records where column x is less than 0, i.e., cities in the
        western hemisphere, and store the filter in a view.  Note that the GPUdbTable
        creates a random view name if one is not supplied. """
    view1 = weather_table.filter( expression = "x < 0" )
    print ( "Number of records in the western hemisphere:  {}".format( view1.size() ))

    ### Filter Example 2
    
    """ Filter records where column x is less than 0 and column y is greater
        than 0, i.e., cities in the northwestern semi-hemisphere, and store
        the filter in a view.  This filter operation is done through the base
        GPUdb interface. """
    response = h_db.filter(table_name = weather_table_name, view_name = weather_nw_view,
                           expression = "x < 0 and y > 0" )
    print ( "Number of records in the northwestern semi-hemisphere:  {}".format( response["count"] ))

    ### Filter Example 3
    
    """ Filter records using the same expressions as Example 2, but using
        query chaining this time (note that we're using the view created by the
        first filter. """

    nw_view = view1.filter( expression = "y > 0" )
    print ( "Number of records in the northwestern semi-hemisphere (with query chaining):  {}"
            "".format( nw_view.size() ))

    ### Filter Example 4
    
    """ Filter by list where country name is USA, Brazil, or Australia.  Here we
        use the duplicate GPUdbTable object (but it points to the same DB table). """
    country_map = {"country": ["USA", "Brazil", "Australia"]}
    view3 = weather_table_duplicate.filter_by_list( column_values_map = country_map )
    print ( "Number of records where country name is USA, Brazil, or Australia:  {}"
            "".format( view3.size() ))

    ### Filter Example 5
    
    """ Filter by range cities that are east of GMT (the Prime Meridian) """
    view4 = weather_table.filter_by_range( column_name = "x", lower_bound = 0,
                                           upper_bound = 180 )
    print ( "Number of records that are east of the Prime Meridian (x > 0):  {}"
            "".format( view4.size() ))


    print ( "\n")
    print ( "AGGREGATING, GROUPING, and HISTOGRAMS")
    print ( "-------------------------------------")
    print ()

    ### Aggregate Example 1
    
    """ Aggregate count, min, mean, and max on the average temperature.  Note
        that unlike the filter functions, the aggregate functions of GPUdbTable
        return the response from the database. """
    stat_results = weather_table.aggregate_statistics( column_name = "avg_temp",
                                                       stats = "count,min,max,mean" )
    print ( "Statistics of values in the average temperature column:")
    print ( "\tCount: {count:.0f}\n\tMin:  {min:4.2f}\n\tMean: {mean:4.2f}\n\tMax:  {max:4.2f}"
            "\n".format( **stat_results["stats"] ))

    ### Aggregate Example 2
    
    """ Find unique city names. """
    results = weather_table.aggregate_unique( column_name = "city", offset = 0,
                                              limit = 25 )
    print ( "Unique city names:")
    for weatherLoc in results.data["city"]:
        print ( "\t* {}".format( weatherLoc ))
    print ()

    """ Same operation, but through the base GPUdb interface.  Note that the
        results have to parsed specially using GPUdb.parse_dynamic_response().
        Also, we're using the 'json' encoding in this case (the 'binary' encoding
        can also be used).  Also note how the data is accessed differently. """
    response = h_db.aggregate_unique( table_name = weather_table_name,
                                      column_name = "city", offset = 0,
                                      limit = 25, encoding = "json")
    print ( "Unique city names (using the GPUdb class):")
    weatherLocs = h_db.parse_dynamic_response(response)['response']['city']
    for weatherLoc in weatherLocs:
        print ( "\t* {}".format(weatherLoc))
    print ()

    ### Aggregate Example 3
    
    """ Find number of weather locations per country in the northwestern
        semi-hemisphere.  Note that the data is automatically decoded. """
    results = nw_view.aggregate_group_by( column_names = ["country", "count(country)"], offset = 0,
                                          limit = 25 )
    print ( "Weather locations per country in the northwest semi-hemisphere:")
    for country in zip(results.data["country"], results.data["count(country)"]):
        print ( "\t{:<10s}{:2d}".format(country[0] + ":", country[1]))
    print ()

    """ Find number of weather locations per country in the northwestern
        semi-hemisphere; use binary decoding explicitly since we're using
        the GPUdb class. """
    response = h_db.aggregate_group_by(table_name=weather_nw_view, column_names=["country", "count(country)"], offset=0, limit=25, encoding="binary")
    countries = gpudb.GPUdbRecord.decode_binary_data(response["response_schema_str"], response["binary_encoded_response"])
    print ( "Weather locations per country in the northwest semi-hemisphere:")
    for country in zip(countries["column_1"], countries["column_2"]):
        print ( "\t{:<10s}{:2d}".format(country[0] + ":", country[1]))
    print ()

    ### Aggregate Example 4
    
    """ Filter table to southeastern semi-hemisphere records, group by country,
        and aggregate min, max, and mean on the average temperature; using the default
        binary decoding and the GPUdbTable interface. """
    # Do a filter first
    se_view = weather_table.filter( expression="x > 0 and y < 0" )
    # Then do the aggregation operation (note how we use the 'data' property to get
    # the data)
    data = se_view.aggregate_group_by( column_names = ["country", "min(avg_temp)", "max(avg_temp)", "mean(avg_temp)"],
                                       offset = 0, limit = 25 ).data
    print ( "{:<20s} {:^5s} {:^5s} {:^5s}".format("SE Semi-Hemi Country", "Min", "Mean", "Max"))
    print ( "{:=<20s} {:=<5s} {:=<5s} {:=<5s}".format("", "", "", ""))
    for countryWeather in zip(data["country"], data["min(avg_temp)"], data["mean(avg_temp)"], data["max(avg_temp)"]):
        print ( "{:<20s} {:5.2f} {:5.2f} {:5.2f}".format(*countryWeather))
    print ()

    """ Filter table to southeastern semi-hemisphere records, group by country,
        and aggregate min, max, and mean on the average temperature; using the default
        binary decoding and the base GPUdb interface. """
    h_db.filter(table_name = weather_table_name, view_name = weather_se_view, expression="x > 0 and y < 0")

    response = h_db.aggregate_group_by( table_name = weather_se_view,
                                        column_names = ["country", "min(avg_temp)", "max(avg_temp)", "mean(avg_temp)"],
                                        offset = 0, limit = 25 )
    data = h_db.parse_dynamic_response(response)['response']
    print ( "{:<20s} {:^5s} {:^5s} {:^5s}".format("SE Semi-Hemi Country", "Min", "Mean", "Max"))
    print ( "{:=<20s} {:=<5s} {:=<5s} {:=<5s}".format("", "", "", ""))
    for countryWeather in zip(data["country"], data["min(avg_temp)"], data["mean(avg_temp)"], data["max(avg_temp)"]):
        print ( "{:<20s} {:5.2f} {:5.2f} {:5.2f}".format(*countryWeather))
    print ()

    ### Aggregate Example 5
    
    """ Filter for southern hemisphere cities and create a histogram for the
        average temperature of those cities (divided into every 10 degrees,
        e.g., 40s, 50s, 60s, etc.) """
    s_view = weather_table.filter( expression = "y < 0" )

    histogram_result = s_view.aggregate_histogram( column_name = "avg_temp",
                                                   start = 40, end = 90,
                                                   interval = 10 )
    print ( "Number of southern hemisphere cities with average temps in the given ranges:")
    for histogroup in zip([40, 50, 60, 70, 80], histogram_result['counts']):
        print ( "\t{}s: {:2.0f}".format(*histogroup))
    print()


    ### Aggregate Example 6

    """ Aggregate group by has an option 'result_table' which creates a result table and does not
        return the data.  Very useful when the data is large and we want to fetch records from it
        in batches.
    """
    # Create another table with the same type, and generate a lot of random data for it.
    # Note that we're allowing GPUdbTable to come up with a random name for the table.
    weather_table2 = gpudb.GPUdbTable( columns, db = h_db )
    # Create random data (but specify a range for the average temperature column)
    weather_table2.insert_records_random( count = 10000,
                                          options = { "avg_temp": {"min": -20, "max": 105 } } )
    print()
    print ( "Second weather table size: ", weather_table2.size() )

    # Create a view on the south-western quadrant of the planet
    sw_view = weather_table2.filter( expression="x < 0 and y < 0" )
    # Then do the aggregation operation .  Note that the column names need
    # aliases to utilize th 'result_table' option.
    agg_result_table = sw_view.aggregate_group_by( column_names = ["country",
                                                                   "min(avg_temp) as min_avg_temp",
                                                                   "max(avg_temp) as max_avg_temp",
                                                                   "mean(avg_temp) as mean_avg_temp"],
                                                   offset = 0, limit = 25,
                                                   options = { "result_table": gpudb.GPUdbTable.prefix_name("agg_") } )
    print ( "Size of records in the SW quadrant of the planet: ", agg_result_table.size() )
    print ( "{:<20s} {:^7s} {:^7s} {:^5s}".format("SW Semi-Hemi Country", "Min", "Mean", "Max"))
    print ( "{:=<20s} {:=<6s} {:=<6s} {:=<6s}".format("", "", "", ""))

    # Note that we can slice GPUdbTable objects to fetch the data inside
    for record in agg_result_table[ 10 : 50 ]:
        print ( "{:<20s} {:5.2f} {:5.2f} {:5.2f}".format( record["country"], record["min_avg_temp"], record["mean_avg_temp"], record["max_avg_temp"] ))
    print ()
    

    print ( "\n")
    print ( "DELETING DATA")
    print ( "-------------")
    print ()

    """ Filter for cities that are either south of latitude -50 or west of
        longitude -50 to determine how many records will be deleted; delete
        the records, then confirm the deletion by refiltering. """

    deleteExpression = "x < -50 or y < -50"
    num_records_to_delete = weather_table.filter( expression = deleteExpression ).count
    print ( "Number of records that meet deletion criteria before deleting:  {}"
            "".format( num_records_to_delete ) )

    weather_table.delete_records( expressions = [ deleteExpression ] )

    # Note that we're using the duplicate GPUdbTable object which points to the
    # same table in the DB
    num_records_post_delete = weather_table_duplicate.filter( expression = deleteExpression ).count
    print ( "Number of records that meet deletion criteria after deleting (expect 0):  {}".format( num_records_post_delete ))
    print ()



    print ( "\n")
    print ( "Using Multi-head Ingestion")
    print ( "--------------------------")
    print ()

    """For tables with primary or shard key columns, it might be useful to use
       the multi-head ingestion procedure for inserting records into a table for
       heavy ingestion loads.  There are benefits and drawbacks of using multi-head
       ingestion: the benefit is that if the database is configured for multi-head
       ingestion and there is a tremendous ingestion load, then the ingestion will
       be faster over all.  However, the drawback is that the client has to do some
       calculation PER record to find out which worker rank of the database server to
       send the record to.  So, unless the following parameters are met, it is unwise
       to use multi-head ingestion as it will unncessarily slow ingestion down:

       * The server is configured to use multi-head ingestion
       * The table type has at least one primary or shard key column
       * There is a heavy stream of data to be inserted
    """
    # Create a type that has some shard keys
    sharded_columns = [
        [ "city", "string", "char16" ],
        [ "state_province", "string", "char2", "shard_key" ],  # shard key column
        [ "country", gpudb.GPUdbRecordColumn._ColumnType.STRING, gpudb.GPUdbColumnProperty.CHAR16 ],
        [ "airport", "string", "nullable" ], # a nullable column
        [ "x", "double" ],
        [ "y", "double" ],
        [ "avg_temp", "double" ],
        [ "time_zone", "string", "char8", "shard_key" ] # shard key column
    ]

    # Create a table with the multi-head ingestion options
    # (the default batch size is 10k)
    sharded_table = gpudb.GPUdbTable( sharded_columns, db = h_db,
                                      use_multihead_ingest = True,
                                      multihead_ingest_batch_size = 33 )

    # Generate some random data to be inserted
    num_records = 100
    null_likelihood = 10
    alphanum = (string.ascii_letters + string.digits)
    for i in range(0, num_records):
        record = collections.OrderedDict()
        record[ "city"          ] = ''.join( [random.choice( alphanum ) for n in range( 0, random.randint( 5, 16 ) )] )
        record[ "state_province"] = ''.join( [random.choice( alphanum ) for n in range( 0, random.randint( 0, 2 ) )] )
        record[ "country"       ] = ''.join( [random.choice( alphanum ) for n in range( 0, random.randint( 5, 16 ) )] )
        record[ "airport"       ] = None if (random.random() < null_likelihood) \
                                    else ''.join( [random.choice( alphanum ) for n in range( 0, random.randint( 2, 25 ) )] )
        record[ "x"             ] = random.uniform( -180, 180 )
        record[ "y"             ] = random.uniform(  -90,  90 )
        record[ "avg_temp"      ] = random.uniform(  -40, 110 )
        record[ "time_zone"     ] = "UTC-{}".format( random.randint( -11, 14 ) )
        sharded_table.insert_records( record )
    # end loop

    print ( "Size of sharded table (expect less than 100 as the batch size is 33 and \n100 is not a multiple of 33): ", sharded_table.size() )
    print ()
    print ( "Flushing the records remaining in the ingestor queue...")
    sharded_table.flush_data_to_server()
    print ( "Size of sharded table post forced flush (expect 100): ", sharded_table.size() )
    print ()
コード例 #21
0
    register_event_lifecycle(api_base=KML_API_BASE,
                             credentials=credentials,
                             event_sub_type='DB_CONNECTED')

    # [Re]Establish table handles

    h_tbl_out_audit = get_tbl_handle(tbl_out_audit,
                                     db=cn_db,
                                     schema=SCHEMA_AUDIT)
    h_tbl_out_results = None
    logger.info(f"DB Results Table {tbl_out_results}")
    if tbl_out_results and tbl_out_results != 'NOT_APPLICABLE':
        if USE_MULTIHEAD_IO == 'TRUE':
            h_tbl_out_results = gpudb.GPUdbTable(
                name=tbl_out_results,
                db=cn_db,
                use_multihead_io=True,
                flush_multi_head_ingest_per_insertion=True)
        else:
            h_tbl_out_results = gpudb.GPUdbTable(name=tbl_out_results,
                                                 db=cn_db)
        logger.info(f"Established connection to sink table")
        logger.info(
            f"All results will be persisted to both Audit {tbl_out_audit} "
            f"and output DB Tables {tbl_out_results}")
    else:
        logger.info(f"All results will be persisted to Audit DB "
                    f"Table {tbl_out_audit} only")

    record_type = gpudb.RecordType.from_type_schema('', schema_decoder, {})
コード例 #22
0
           ["strike_price", "float"], ["maturity_y", "int"],
           ["maturity_m", "int"], ["maturity_d", "int"],
           ["calendar", "string"], ["day_count", "string"],
           ["risk_free_rate", "float"], ["dividend_rate", "float"],
           ["calc_dt_y", "int"], ["calc_dt_m", "int"], ["calc_dt_d", "int"],
           ["volatility", "float"]]

# Clear the table at run time the create the table
no_error_option = {"no_error_if_not_exists": "true"}
h_db.clear_table(table_name=NEW_TABLE, options=no_error_option)
collection_option_object = gpudb.GPUdbTableOptions.default().collection_name(
    COLLECTION)
print("Table cleared")

try:
    table_gps_obj = gpudb.GPUdbTable(columns, NEW_TABLE,
                                     collection_option_object, h_db)
    print("Table created succesfully")
except gpudb.GPUdbException as e:
    print("Table creation failure: {}".format(str(e)))
print(df.head(5))

index = 0
h_db = gpudb.GPUdb(encoding=ENCODING, host=HOST, port=PORT)
# Implement the GpuDB table class instead of manual JSON
my_type = """
{
  "type": "record",
  "name": "type_name",
  "fields": [
             {"name": "symbol","type": "string"},
             {"name": "spot_price","type": "float"},
コード例 #23
0
#Retrieve model and one hot encodings
model = pickle.load(open('toll_model.sav', 'rb'))

#Retrieve one hot encodings
encoder = pickle.load(open('toll_encoder.sav', 'rb'))

try:
    if db_user == 'no_cred' or db_pass == 'no_cred':
        db = gpudb.GPUdb(encoding='BINARY', host=db_conn_str)
    else:
        db = gpudb.GPUdb(encoding='BINARY',
                         host=db_conn_str,
                         username=db_user,
                         password=db_pass)

    toll_stream_tbl = gpudb.GPUdbTable(name=TBL_NAME_toll_stream, db=db)

except gpudb.gpudb.GPUdbException as e:
    print(e)


def get_weather_dict():
    url = 'https://api.weatherbit.io/v2.0/forecast/hourly?city=Arlington,VA&key=62c667044ed34c21941755b53b286186'
    weather_json = requests.post(url=url)
    weather_dict = json.loads(weather_json.text)
    return weather_dict


def get_future_weather(dfNew, weather_dict, hour):
    #Get just the data from the necessary hour
    weather_data = weather_dict['data'][hour]
コード例 #24
0
ファイル: retrieve.py プロジェクト: paullee73/Kinetica
def retrieve():
    weather_table_name = "weather"
    weather_w_view = "weather_west"
    weather_nw_view = "weather_northwest"
    weather_country_view = "weather_country"
    weather_e_view = "weather_east"
    weather_se_view = "weather_southeast"
    weather_h_view = "weather_histogram"

    h_db = gpudb.GPUdb(encoding='BINARY', host='127.0.0.1', port='9191')

    columns = [["city", "string", "char16"],
               [
                   "state_province",
                   gpudb.GPUdbRecordColumn._ColumnType.STRING,
                   gpudb.GPUdbColumnProperty.CHAR32
               ],
               [
                   "country", gpudb.GPUdbRecordColumn._ColumnType.STRING,
                   gpudb.GPUdbColumnProperty.CHAR16
               ], ["x", "double"], ["y", "double"], ["avg_temp", "double"],
               ["time_zone", "string", "char8"]]

    if h_db.has_table(table_name=weather_table_name)['table_exists']:
        h_db.clear_table(weather_table_name)

    try:
        weather_table = gpudb.GPUdbTable(columns, weather_table_name, db=h_db)
        print("Table successfully created.")
    except gpudb.GPUdbException as e:
        print("Table creation failure: {}".format(str(e)))

    weather_table_duplicate = gpudb.GPUdbTable(None,
                                               weather_table_name,
                                               db=h_db)

    print("\n")
    print("INSERTING DATA")
    print("--------------")
    print()

    datum = collections.OrderedDict()
    datum["city"] = "Washington, D.C."
    datum["state_province"] = "--"
    datum["country"] = "USA"
    datum["x"] = -77.016389
    datum["y"] = 38.904722
    datum["avg_temp"] = 58.5
    datum["time_zone"] = "UTC-5"

    weather_table.insert_records(datum)

    datum2 = collections.OrderedDict()
    datum2["city"] = "Washington, D.C."
    datum2["state_province"] = "--"
    datum2["country"] = "USA"
    datum2["x"] = -77.016389
    datum2["y"] = 38.904722
    datum2["avg_temp"] = 58.5
    datum2["time_zone"] = "UTC-5"

    weather_record_type = weather_table.get_table_type()
    single_record = [gpudb.GPUdbRecord(weather_record_type, datum).binary_data]

    response = h_db.insert_records(table_name=weather_table_name,
                                   data=single_record,
                                   list_encoding="binary")
    print("Number of single records inserted:  {}".format(
        response["count_inserted"]))

    records = []
    records.append(["Paris", "TX", "USA", -95.547778, 33.6625, 64.6, "UTC-6"])
    records.append(["Memphis", "TN", "USA", -89.971111, 35.1175, 63, "UTC-6"])
    records.append([
        "Sydney", "Nova Scotia", "Canada", -60.19551, 46.13631, 44.5, "UTC-4"
    ])
    records.append([
        "La Paz", "Baja California Sur", "Mexico", -110.310833, 24.142222, 77,
        "UTC-7"
    ])
    records.append(
        ["St. Petersburg", "FL", "USA", -82.64, 27.773056, 74.5, "UTC-5"])
    records.append(["Oslo", "--", "Norway", 10.75, 59.95, 45.5, "UTC+1"])
    records.append(["Paris", "--", "France", 2.3508, 48.8567, 56.5, "UTC+1"])
    records.append(
        ["Memphis", "--", "Egypt", 31.250833, 29.844722, 73, "UTC+2"])
    records.append(
        ["St. Petersburg", "--", "Russia", 30.3, 59.95, 43.5, "UTC+3"])
    records.append(
        ["Lagos", "Lagos", "Nigeria", 3.384082, 6.455027, 83, "UTC+1"])
    records.append([
        "La Paz", "Pedro Domingo Murillo", "Bolivia", -68.15, -16.5, 44,
        "UTC-4"
    ])
    records.append([
        "Sao Paulo", "Sao Paulo", "Brazil", -46.633333, -23.55, 69.5, "UTC-3"
    ])
    records.append([
        "Santiago", "Santiago Province", "Chile", -70.666667, -33.45, 62,
        "UTC-4"
    ])
    records.append([
        "Buenos Aires", "--", "Argentina", -58.381667, -34.603333, 65, "UTC-3"
    ])
    records.append(
        ["Manaus", "Amazonas", "Brazil", -60.016667, -3.1, 83.5, "UTC-4"])
    records.append([
        "Sydney", "New South Wales", "Australia", 151.209444, -33.865, 63.5,
        "UTC+10"
    ])
    records.append(
        ["Auckland", "--", "New Zealand", 174.74, -36.840556, 60.5, "UTC+12"])
    records.append(
        ["Jakarta", "--", "Indonesia", 106.816667, -6.2, 83, "UTC+7"])
    records.append(
        ["Hobart", "--", "Tasmania", 147.325, -42.880556, 56, "UTC+10"])
    records.append([
        "Perth", "Western Australia", "Australia", 115.858889, -31.952222, 68,
        "UTC+8"
    ])

    weather_table.insert_records(records)

    weatherLocs = h_db.get_records(table_name=weather_table_name,
                                   offset=0,
                                   limit=10,
                                   encoding="json",
                                   options={"sort_by": "city"})['records_json']
    for weatherLoc in weatherLocs:
        print(
            "{city:<20s} {state_province:<25s} {country:<15s} {y:10.6f} {x:11.6f} {avg_temp:9.1f}   {time_zone}"
            .format(**json.loads(weatherLoc)))

    response = h_db.get_records(table_name=weather_table_name,
                                offset=10,
                                limit=25,
                                encoding="binary",
                                options={"sort_by": "city"})
    weatherLocs = gpudb.GPUdbRecord.decode_binary_data(
        response["type_schema"], response["records_binary"])

    for weatherLoc in weatherLocs:
        print(
            "{city:<20s} {state_province:<25s} {country:<15s} {y:10.6f} {x:11.6f} {avg_temp:9.1f}   {time_zone}"
            .format(**weatherLoc))