Esempio n. 1
0
def test_all_types():
    # create connection for table creation / query
    conn = create_conn()

    # create all_types table
    tablename = 'pymcsapi_all_types'
    drop_table(conn, tablename)
    create_all_types = """create table %s (
    uint64 bigint unsigned,
    int64 bigint,
    uint32 int unsigned,
    int32 int,
    uint16 smallint unsigned,
    int16 smallint,
    uint8 tinyint unsigned,
    `int8` tinyint,
    f float,
    d double,
    ch4 char(5),
    vch30 varchar(30),
    dt date,
    dtm datetime,
    dc decimal(18),
    tx text
    ) engine=columnstore""" % (tablename, )
    exec_stmt(conn, create_all_types)

    # load rows into all_types table
    d = pymcsapi.ColumnStoreDriver()
    # simple values
    b = d.createBulkInsert(DB_NAME, tablename, 0, 0)
    try:
        b.setColumn(0, long(1))
        b.setColumn(1, long(2))
        b.setColumn(2, int(3))
        b.setColumn(3, int(4))
        b.setColumn(4, int(5))
        b.setColumn(5, int(6))
        b.setColumn(6, int(7))
        b.setColumn(7, int(8))
        b.setColumn(8, float(1.234))
        b.setColumn(9, float(2.34567))
        b.setColumn(10, 'ABCD')
        b.setColumn(11, 'Hello World')
        b.setColumn(12, pymcsapi.ColumnStoreDateTime(2017, 9, 8))
        b.setColumn(13, pymcsapi.ColumnStoreDateTime(2017, 9, 8, 13, 58, 23))
        b.setColumn(14, pymcsapi.ColumnStoreDecimal(123))
        b.setColumn(15, 'Hello World Longer')
        b.writeRow()

        # min values
        b.setColumn(0, long(0))
        b.setColumn(1, long(-9223372036854775806))
        b.setColumn(2, int(0))
        b.setColumn(3, int(-2147483646))
        b.setColumn(4, int(0))
        b.setColumn(5, int(-32766))
        b.setColumn(6, int(0))
        b.setColumn(7, int(-126))
        b.setColumn(8, float(1.234))
        b.setColumn(9, float(2.34567))
        b.setColumn(10, 'A')
        b.setColumn(11, 'B')
        b.setColumn(12, pymcsapi.ColumnStoreDateTime(1000, 1, 1))
        b.setColumn(13, pymcsapi.ColumnStoreDateTime(1000, 1, 1, 0, 0, 0))
        b.setColumn(14, pymcsapi.ColumnStoreDecimal(-123))
        b.setColumn(15, 'C')
        b.writeRow()

        # max values
        b.setColumn(0, long(9223372036854775807))
        b.setColumn(1, long(9223372036854775807))  # python long is signed
        b.setColumn(2, int(4294967293))
        b.setColumn(3, int(2147483647))
        b.setColumn(4, int(65533))
        b.setColumn(5, int(32767))
        b.setColumn(6, int(253))
        b.setColumn(7, int(127))
        b.setColumn(8, float(1.234))
        b.setColumn(9, float(2.34567))
        b.setColumn(10, 'ZYXW')
        b.setColumn(11, '012345678901234567890123456789')
        b.setColumn(12, pymcsapi.ColumnStoreDateTime(9999, 12, 31))
        b.setColumn(13, pymcsapi.ColumnStoreDateTime(9999, 12, 31, 23, 59, 59))
        b.setColumn(14, pymcsapi.ColumnStoreDecimal(123))
        b.setColumn(15, '012345678901234567890123456789')
        b.writeRow()

        b.commit()
    except RuntimeError as err:
        b.rollback()
        pytest.fail("Error executing bulk insert: %s" % (err, ))

    # verify data
    all_types_validate(
        conn, 1,
        "1, 2, 3, 4, 5, 6, 7, 8, 1.234, 2.34567, ABCD, Hello World, 2017-09-08, 2017-09-08 13:58:23, 123, Hello World Longer"
    )
    all_types_validate(
        conn, 0,
        "0, -9223372036854775806, 0, -2147483646, 0, -32766, 0, -126, 1.234, 2.34567, A, B, 1000-01-01, 1000-01-01 00:00:00, -123, C"
    )
    all_types_validate(
        conn, 9223372036854775807,
        "9223372036854775807, 9223372036854775807, 4294967293, 2147483647, 65533, 32767, 253, 127, 1.234, 2.34567, ZYXW, 012345678901234567890123456789, 9999-12-31, 9999-12-31 23:59:59, 123, 012345678901234567890123456789"
    )

    drop_table(conn, tablename)
    conn.close()
def export(database, table, df, configuration=None):
    global long
    python2 = True

    if sys.version_info[0] == 3:
        long = int
        python2 = False

    rows = df.collect()
    if configuration == None:
        driver = pymcsapi.ColumnStoreDriver()
    else:
        driver = pymcsapi.ColumnStoreDriver(configuration)
    bulkInsert = driver.createBulkInsert(database, table, 0, 0)
    
    # get the column count of table
    dbCatalog = driver.getSystemCatalog()
    dbTable = dbCatalog.getTable(database, table)
    dbTableColumnCount = dbTable.getColumnCount()
    
    # insert row by row into table
    try:
        for row in rows:
            for columnId in range(0, len(row)):
                if columnId < dbTableColumnCount:
                    if row[columnId] is None:
                        if dbTable.getColumn(columnId).isNullable():
                            bulkInsert.setNull(columnId)
                        else:
                            print("warning: column %d is not nullable. Using default value instead." % (columnId,))
                            bulkInsert.setColumn(columnId, dbTable.getColumn(columnId).getDefaultValue())
                    else:
                        if isinstance(row[columnId], bool):
                            if row[columnId]:
                                bulkInsert.setColumn(columnId, 1)
                            else:
                                bulkInsert.setColumn(columnId, 0)
                    
                        elif isinstance(row[columnId], datetime.date):
                            bulkInsert.setColumn(columnId, row[columnId].strftime('%Y-%m-%d %H:%M:%S'))
                    
                        elif isinstance(row[columnId], decimal.Decimal):
                            dbColumn = dbTable.getColumn(columnId)
                            #DATA_TYPE_DECIMAL, DATA_TYPE_UDECIMAL, DATA_TYPE_FLOAT, DATA_TYPE_UFLOAT, DATA_TYPE_DOUBLE, DATA_TYPE_UDOUBLE
                            if dbColumn.getType() == 4 or dbColumn.getType() == 18 or dbColumn.getType() == 7 or dbColumn.getType() == 21 or dbColumn.getType() == 10 or dbColumn.getType() == 23:
                                s = '{0:f}'.format(row[columnId])
                                bulkInsert.setColumn(columnId, pymcsapi.ColumnStoreDecimal(s))
                            #ANY OTHER DATA TYPE
                            else:
                                bulkInsert.setColumn(columnId, long(row[columnId]))
    
                        #handle python2 unicode strings
                        elif python2 and isinstance(row[columnId], unicode):
                            bulkInsert.setColumn(columnId, row[columnId].encode('utf-8'))

                        #any other datatype is inserted without parsing
                        else:
                            bulkInsert.setColumn(columnId, row[columnId])
            bulkInsert.writeRow()
        bulkInsert.commit()
    except Exception as e:
        bulkInsert.rollback()
        print("An exception occured. The bulk insert was rolled back.")
        print("row: %s, ingest type: %s, ingest value: %s" % (row[columnId], type(row[columnId]), row[columnId]))
        print(type(e), str(e))
       
    #print a short summary of the insertion process
    summary = bulkInsert.getSummary()
    print("Execution time: %s" % (summary.getExecutionTime(),))
    print("Rows inserted: %s" % (summary.getRowsInsertedCount(),))
    print("Truncation count: %s" %(summary.getTruncationCount(),))
    print("Saturated count: %s" %(summary.getSaturatedCount(),))
    print("Invalid count: %s" %(summary.getInvalidCount(),))
Esempio n. 3
0
## advanced_bulk_insert.sql to be created in the test database
##
## NOTE: if you edit this file please update the line numbers in
## advanced_bulk_insert.rst

import pymcsapi

try:
    driver = pymcsapi.ColumnStoreDriver()
    bulk = driver.createBulkInsert("test", "t2", 0, 0)
    bulk.setColumn(0, 1)
    bulk.setColumn(1, "Andrew")
    bulk.setColumn(2, "1936-12-24")
    bulk.setColumn(3, "2017-07-07 15:14:12")
    bulk.setColumn(4, "15239.45")
    bulk.writeRow()
    bulk.setColumn(0, 2)[0].setColumn(1, "David")[0].setColumn(
        2, pymcsapi.ColumnStoreDateTime(1972, 5, 23))[0].setColumn(
            3,
            pymcsapi.ColumnStoreDateTime(2017, 7, 7, 15, 20, 18))[0].setColumn(
                4, pymcsapi.ColumnStoreDecimal(2347623, 2))[0].writeRow()
    bulk.commit()
    summary = bulk.getSummary()
    print("Execution time: %s" % (summary.getExecutionTime(), ))
    print("Rows inserted: %s" % (summary.getRowsInsertedCount(), ))
    print("Truncation count: %s" % (summary.getTruncationCount(), ))
    print("Saturated count: %s" % (summary.getSaturatedCount(), ))
    print("Invalid count: %s" % (summary.getInvalidCount(), ))
except RuntimeError as err:
    print("Error caught: %s" % (err, ))