def test_all_types(): # create connection for table creation / query conn = create_conn() # create all_types table tablename = 'pymcsapi_all_types' drop_table(conn, tablename) create_all_types = """create table %s ( uint64 bigint unsigned, int64 bigint, uint32 int unsigned, int32 int, uint16 smallint unsigned, int16 smallint, uint8 tinyint unsigned, `int8` tinyint, f float, d double, ch4 char(5), vch30 varchar(30), dt date, dtm datetime, dc decimal(18), tx text ) engine=columnstore""" % (tablename, ) exec_stmt(conn, create_all_types) # load rows into all_types table d = pymcsapi.ColumnStoreDriver() # simple values b = d.createBulkInsert(DB_NAME, tablename, 0, 0) try: b.setColumn(0, long(1)) b.setColumn(1, long(2)) b.setColumn(2, int(3)) b.setColumn(3, int(4)) b.setColumn(4, int(5)) b.setColumn(5, int(6)) b.setColumn(6, int(7)) b.setColumn(7, int(8)) b.setColumn(8, float(1.234)) b.setColumn(9, float(2.34567)) b.setColumn(10, 'ABCD') b.setColumn(11, 'Hello World') b.setColumn(12, pymcsapi.ColumnStoreDateTime(2017, 9, 8)) b.setColumn(13, pymcsapi.ColumnStoreDateTime(2017, 9, 8, 13, 58, 23)) b.setColumn(14, pymcsapi.ColumnStoreDecimal(123)) b.setColumn(15, 'Hello World Longer') b.writeRow() # min values b.setColumn(0, long(0)) b.setColumn(1, long(-9223372036854775806)) b.setColumn(2, int(0)) b.setColumn(3, int(-2147483646)) b.setColumn(4, int(0)) b.setColumn(5, int(-32766)) b.setColumn(6, int(0)) b.setColumn(7, int(-126)) b.setColumn(8, float(1.234)) b.setColumn(9, float(2.34567)) b.setColumn(10, 'A') b.setColumn(11, 'B') b.setColumn(12, pymcsapi.ColumnStoreDateTime(1000, 1, 1)) b.setColumn(13, pymcsapi.ColumnStoreDateTime(1000, 1, 1, 0, 0, 0)) b.setColumn(14, pymcsapi.ColumnStoreDecimal(-123)) b.setColumn(15, 'C') b.writeRow() # max values b.setColumn(0, long(9223372036854775807)) b.setColumn(1, long(9223372036854775807)) # python long is signed b.setColumn(2, int(4294967293)) b.setColumn(3, int(2147483647)) b.setColumn(4, int(65533)) b.setColumn(5, int(32767)) b.setColumn(6, int(253)) b.setColumn(7, int(127)) b.setColumn(8, float(1.234)) b.setColumn(9, float(2.34567)) b.setColumn(10, 'ZYXW') b.setColumn(11, '012345678901234567890123456789') b.setColumn(12, pymcsapi.ColumnStoreDateTime(9999, 12, 31)) b.setColumn(13, pymcsapi.ColumnStoreDateTime(9999, 12, 31, 23, 59, 59)) b.setColumn(14, pymcsapi.ColumnStoreDecimal(123)) b.setColumn(15, '012345678901234567890123456789') b.writeRow() b.commit() except RuntimeError as err: b.rollback() pytest.fail("Error executing bulk insert: %s" % (err, )) # verify data all_types_validate( conn, 1, "1, 2, 3, 4, 5, 6, 7, 8, 1.234, 2.34567, ABCD, Hello World, 2017-09-08, 2017-09-08 13:58:23, 123, Hello World Longer" ) all_types_validate( conn, 0, "0, -9223372036854775806, 0, -2147483646, 0, -32766, 0, -126, 1.234, 2.34567, A, B, 1000-01-01, 1000-01-01 00:00:00, -123, C" ) all_types_validate( conn, 9223372036854775807, "9223372036854775807, 9223372036854775807, 4294967293, 2147483647, 65533, 32767, 253, 127, 1.234, 2.34567, ZYXW, 012345678901234567890123456789, 9999-12-31, 9999-12-31 23:59:59, 123, 012345678901234567890123456789" ) drop_table(conn, tablename) conn.close()
def export(database, table, df, configuration=None): global long python2 = True if sys.version_info[0] == 3: long = int python2 = False rows = df.collect() if configuration == None: driver = pymcsapi.ColumnStoreDriver() else: driver = pymcsapi.ColumnStoreDriver(configuration) bulkInsert = driver.createBulkInsert(database, table, 0, 0) # get the column count of table dbCatalog = driver.getSystemCatalog() dbTable = dbCatalog.getTable(database, table) dbTableColumnCount = dbTable.getColumnCount() # insert row by row into table try: for row in rows: for columnId in range(0, len(row)): if columnId < dbTableColumnCount: if row[columnId] is None: if dbTable.getColumn(columnId).isNullable(): bulkInsert.setNull(columnId) else: print("warning: column %d is not nullable. Using default value instead." % (columnId,)) bulkInsert.setColumn(columnId, dbTable.getColumn(columnId).getDefaultValue()) else: if isinstance(row[columnId], bool): if row[columnId]: bulkInsert.setColumn(columnId, 1) else: bulkInsert.setColumn(columnId, 0) elif isinstance(row[columnId], datetime.date): bulkInsert.setColumn(columnId, row[columnId].strftime('%Y-%m-%d %H:%M:%S')) elif isinstance(row[columnId], decimal.Decimal): dbColumn = dbTable.getColumn(columnId) #DATA_TYPE_DECIMAL, DATA_TYPE_UDECIMAL, DATA_TYPE_FLOAT, DATA_TYPE_UFLOAT, DATA_TYPE_DOUBLE, DATA_TYPE_UDOUBLE if dbColumn.getType() == 4 or dbColumn.getType() == 18 or dbColumn.getType() == 7 or dbColumn.getType() == 21 or dbColumn.getType() == 10 or dbColumn.getType() == 23: s = '{0:f}'.format(row[columnId]) bulkInsert.setColumn(columnId, pymcsapi.ColumnStoreDecimal(s)) #ANY OTHER DATA TYPE else: bulkInsert.setColumn(columnId, long(row[columnId])) #handle python2 unicode strings elif python2 and isinstance(row[columnId], unicode): bulkInsert.setColumn(columnId, row[columnId].encode('utf-8')) #any other datatype is inserted without parsing else: bulkInsert.setColumn(columnId, row[columnId]) bulkInsert.writeRow() bulkInsert.commit() except Exception as e: bulkInsert.rollback() print("An exception occured. The bulk insert was rolled back.") print("row: %s, ingest type: %s, ingest value: %s" % (row[columnId], type(row[columnId]), row[columnId])) print(type(e), str(e)) #print a short summary of the insertion process summary = bulkInsert.getSummary() print("Execution time: %s" % (summary.getExecutionTime(),)) print("Rows inserted: %s" % (summary.getRowsInsertedCount(),)) print("Truncation count: %s" %(summary.getTruncationCount(),)) print("Saturated count: %s" %(summary.getSaturatedCount(),)) print("Invalid count: %s" %(summary.getInvalidCount(),))
## advanced_bulk_insert.sql to be created in the test database ## ## NOTE: if you edit this file please update the line numbers in ## advanced_bulk_insert.rst import pymcsapi try: driver = pymcsapi.ColumnStoreDriver() bulk = driver.createBulkInsert("test", "t2", 0, 0) bulk.setColumn(0, 1) bulk.setColumn(1, "Andrew") bulk.setColumn(2, "1936-12-24") bulk.setColumn(3, "2017-07-07 15:14:12") bulk.setColumn(4, "15239.45") bulk.writeRow() bulk.setColumn(0, 2)[0].setColumn(1, "David")[0].setColumn( 2, pymcsapi.ColumnStoreDateTime(1972, 5, 23))[0].setColumn( 3, pymcsapi.ColumnStoreDateTime(2017, 7, 7, 15, 20, 18))[0].setColumn( 4, pymcsapi.ColumnStoreDecimal(2347623, 2))[0].writeRow() bulk.commit() summary = bulk.getSummary() print("Execution time: %s" % (summary.getExecutionTime(), )) print("Rows inserted: %s" % (summary.getRowsInsertedCount(), )) print("Truncation count: %s" % (summary.getTruncationCount(), )) print("Saturated count: %s" % (summary.getSaturatedCount(), )) print("Invalid count: %s" % (summary.getInvalidCount(), )) except RuntimeError as err: print("Error caught: %s" % (err, ))