def new_tde(self, data): # Step 1: Create the Extract File dataExtract = Extract(self.tde_name) if dataExtract.hasTable('Extract'): return print("tde already exist use another name") # Step 2: Create the table definition cols = data.columns.tolist() dataSchema = TableDefinition() for col in cols: dataSchema.addColumn(col, Type.UNICODE_STRING) # Step 3: Create a table in the image of the table definition table = dataExtract.addTable('Extract', dataSchema) # Step 4: Create rows and insert them one by one newRow = Row(dataSchema) cols = data.columns.tolist() for i in range(0, len(data)): for col in cols: col_index = cols.index(col) newRow.setString(col_index, data[col][i]) table.insert(newRow) dataExtract.close() # Step 5: Close the tde dataExtract.close()
def createOrOpenExtract(filename, useSpatial): try: # Create Extract Object # (NOTE: The Extract constructor opens an existing extract with the # given filename if one exists or creates a new extract with the given # filename if one does not) extract = Extract(filename) # Define Table Schema (If we are creating a new extract) # (NOTE: In Tableau Data Engine, all tables must be named 'Extract') if (not extract.hasTable('Extract')): schema = TableDefinition() schema.setDefaultCollation(Collation.EN_GB) schema.addColumn('Purchased', Type.DATETIME) schema.addColumn('Product', Type.CHAR_STRING) schema.addColumn('uProduct', Type.UNICODE_STRING) schema.addColumn('Price', Type.DOUBLE) schema.addColumn('Quantity', Type.INTEGER) schema.addColumn('Taxed', Type.BOOLEAN) schema.addColumn('Expiration Date', Type.DATE) schema.addColumnWithCollation('Produkt', Type.CHAR_STRING, Collation.DE) if (useSpatial): schema.addColumn('Destination', Type.SPATIAL) table = extract.addTable('Extract', schema) if (table == None): print 'A fatal error occurred while creating the table:\nExiting now\n.' exit(-1) except TableauException, e: print 'A fatal error occurred while creating the new extract:\n', e, '\nExiting now.' exit(-1)
class TDEExport(object): @staticmethod def make_table_definition(schema): table_def = TableDefinition() table_def.setDefaultCollation(Collation.EN_GB) for col in schema: table_def.addColumn(col['name'], typeMap.get(col["type"],Type.UNICODE_STRING)) return table_def def __init__(self, tde_file_path, input_schema): self.tde_file_path = tde_file_path self.output_path = os.path.dirname(self.tde_file_path) self.input_schema = input_schema self.errors = 0 self.nrows = 0 print "Writing TDE file: %s" % self.tde_file_path ExtractAPI.initialize() self.extract = Extract(self.tde_file_path) assert(not self.extract.hasTable('Extract')) self.table_def = TDEExport.make_table_definition(self.input_schema) self.table = self.extract.addTable('Extract', self.table_def) def __enter__(self): return self def __exit__(self, type, value, tb): self.close(); def close(self): self.extract.close() ExtractAPI.cleanup() if self.errors > 0: print "Encountered %d errors" % self.errors def insert_array_row(self, input_row): # Work around bug in DSS 3.1.0 API for single-column datasets if len(input_row) == 0 and len(self.input_schema) == 1: input_row = [u''] nb_cols = len(self.input_schema) output_row = Row(self.table_def) for col_no in range(nb_cols): col = self.input_schema[col_no] data = input_row[col_no] try: fieldSetterMap[col['type']](output_row, col_no, data) except Exception, e: self.errors += 1 if self.errors < 100 or (self.errors < 10000 and self.errors % 100 == 0) or (self.errors % 1000 ==0): try: print "[err #%s] Failed setting: col=%s type=%s val=%s err=%s" % (self.errors, col["name"], col["type"], data, e) except Exception, e2: print "[err #%s] Failed setting: col=%s type=%s val=NON_PRINTED err=%s" % (self.errors, col["name"], col["type"],e)
class TDEExport(object): @staticmethod def convert_type(type): return typeMap.get(type,Type.UNICODE_STRING) @staticmethod def make_table_definition(schema): table_def = TableDefinition() table_def.setDefaultCollation(Collation.EN_GB) for col in schema: table_def.addColumn(col['name'], TDEExport.convert_type(col['type'])) return table_def def __init__(self, tde_file_path, input_schema): self.tde_file_path = tde_file_path self.output_path = os.path.dirname(self.tde_file_path) self.input_schema = input_schema print "Writing TDE file: %s" % self.tde_file_path ExtractAPI.initialize() self.extract = Extract(self.tde_file_path) assert(not self.extract.hasTable('Extract')) self.table_def = TDEExport.make_table_definition(self.input_schema) self.table = self.extract.addTable('Extract', self.table_def) def __enter__(self): return self def __exit__(self, type, value, tb): self.close(); def close(self): self.extract.close() ExtractAPI.cleanup() def insert_array_row(self, input_row): # Work around bug in DSS 3.1.0 API for single-column datasets if len(input_row) == 0 and len(self.input_schema) == 1: input_row = [u''] nb_cols = len(self.input_schema) output_row = Row(self.table_def) for col_no in range(nb_cols): col = self.input_schema[col_no] data = input_row[col_no] try: fieldSetterMap[col['type']](output_row, col_no, data) except Exception, e: print "Failed setting field %s to value %s: %s" % (col["name"], data, e) pass self.table.insert(output_row)
def writeTableToTDE(pgFields, pgData, extractLocation): print("writing table to {0}").format(extractLocation) # 1. initialize a new extract ExtractAPI.initialize() # 2. Create a table definition new_extract = Extract(extractLocation) # 3. Add column definitions to the table definition table_definition = TableDefinition() for i in range(0, len(pgFields)): table_definition.addColumn(pgFields[i][0], pgFields[i][1]) # 4. Initialize a new table in the extract # a. check if the table already exists # Only add table if it doesn't already exist if (new_extract.hasTable('Extract') == False): new_table = new_extract.addTable('Extract', table_definition) else: new_table = new_extract.openTable('Extract') # 5. Create a new row new_row = Row( table_definition) # Pass the table definition to the constructor # 6. Populate each new row numberRecords = len(pgData) for i in range(0, numberRecords): # Note that this doesn't cover all possible TDE data types for j in range(0, len(pgFields)): if pgFields[j][1] == Type.INTEGER: new_row.setInteger(j, pgData[i][j]) elif pgFields[j][1] == Type.UNICODE_STRING: new_row.setString(j, pgData[i][j]) elif pgFields[j][1] == Type.SPATIAL: new_row.setSpatial(j, pgData[i][j]) elif pgFields[j][i] == Type.BOOLEAN: new_row.setBoolean(j, pgData[i][j]) elif pgFields[j][i] == Type.DOUBLE: new_row.setDouble(j, pgData[j][i]) new_table.insert(new_row) # Add the new row to the table # 7. Save the table and extract new_extract.close() # 8. Release the extract API ExtractAPI.cleanup() return
def to_tableau(self, path, table_name='Extract', add_index=False): """ Converts a Pandas DataFrame to a Tableau .tde file @param path = path to write file @param table_name = name of the table in the extract @param add_index = adds incrementing integer index before dataframe columns """ # Delete dataextract log and hyper_db temp files if already exists files = (glob('DataExtract*.log') + glob('hyper_db_*') + [ os.path.dirname(path) + '/debug.log', './DataExtract.log', './debug.log' ]) for file in files: if os.path.isfile(file): os.remove(file) # Create Extract and Table ExtractAPI.initialize() new_extract = Extract(path) if not new_extract.hasTable(table_name): print( 'Table \'{}\' does not exist in extract {}, creating.'.format( table_name, path)) self.set_table_structure(new_extract, table_name, add_index) new_table = new_extract.openTable(table_name) table_def = new_table.getTableDefinition() # Set Column values self.set_column_values(new_table, table_def, add_index) # Close extract new_extract.close() ExtractAPI.cleanup()
###Setting Tableau recognized data types### for i in column_types: if i is numpy.int64: column_headers_types.append(Type.INTEGER) elif i is numpy.float64: column_headers_types.append(Type.DOUBLE) elif i is pd.Timestamp: column_headers_types.append(Type.DATETIME) else: column_headers_types.append(Type.UNICODE_STRING) ###Getting the existing table schema for incremental refresh### if incremental_refresh == 1: dataExtract = Extract(extract_name) if dataExtract.hasTable('Extract'): table = dataExtract.openTable('Extract') dataSchema = table.getTableDefinition() else: ###Removing extract if it already exists (for full refresh)### try: cwd = os.getcwd() for f in os.listdir(cwd): if re.search(extract_name, f): os.remove(os.path.join(cwd, f)) except OSError, e: pass ###Initializng the Extract API, and applying the schema to the table### ExtractAPI.initialize()
createExtract = Extract("/Users/jlin/Desktop/data_stuff/predict_data_Extract.tde") for i in combined_data.columns: if i in ['Contract Execute','Contract Sent to Site','Essential Doc','IRB','Ip Release']: combined_data[i] = pd.to_datetime(combined_data[i],coerce=True) #combined_data = combined_data.drop(['Contract Execute','Contract Sent to Site','Essential Doc','IRB','Ip Release'],1) if createExtract.hasTable('Extract'): table = createExtract.openTable('Extract') tabledef = table.getTableDefinition() else: tabledef = TableDefinition() for i in combined_data.columns : #print combined_data[i].dtype if str(combined_data[i].dtype) == "int64": tabledef.addColumn(i,Types.Type.INTEGER) if str(combined_data[i].dtype) == "object": tabledef.addColumn(i,Types.Type.CHAR_STRING) if str(combined_data[i].dtype) == "datetime64[ns]": tabledef.addColumn(i,Types.Type.DATE) if str(combined_data[i].dtype) == "float64":
# 3. Add column definitions to the table definition table_definition = TableDefinition() table_definition.addColumn('route', Type.UNICODE_STRING) # column 0 table_definition.addColumn('latitude', Type.DOUBLE) table_definition.addColumn('longitude', Type.DOUBLE) table_definition.addColumn('point_number', Type.INTEGER) table_definition.addColumn('distance_km', Type.DOUBLE) table_definition.addColumn('FlightNumber', Type.UNICODE_STRING) table_definition.addColumn('DepartureTime', Type.UNICODE_STRING) table_definition.addColumn('DepartureTimeFromMidnight', Type.UNICODE_STRING) table_definition.addColumn('SegMinFromMidnight', Type.INTEGER) table_definition.addColumn('FlightTime', Type.INTEGER) table_definition.addColumn('SegTimeFromMidnight', Type.UNICODE_STRING) # 4. Initialize a new table in the extract if (new_extract.hasTable('Extract') == False): new_table = new_extract.addTable('Extract', table_definition) else: new_table = new_extract.openTable('Extract') # 5. Create a new row new_row = Row(table_definition) # Pass the table definition to the constructor # 6. walk through the origin/destination data from CSV, write each path to TDE with open(csvLocation, 'rb') as csvfile: csvreader = csv.reader(csvfile, delimiter='|') next(csvreader) # skip header for row in csvreader:
def to_tde(dataset, tde_mode, tde_name): if tde_mode == 'new': # Step 1: Create the Extract File dataExtract = Extract(tde_name) if dataExtract.hasTable('Extract'): return print("tde already exist use another name") # Step 2: Create the table definition dataSchema = TableDefinition() dataSchema.addColumn('Station', Type.UNICODE_STRING) dataSchema.addColumn('Time', Type.UNICODE_STRING) dataSchema.addColumn('Date', Type.DATE) dataSchema.addColumn('all', Type.DOUBLE) dataSchema.addColumn('n7', Type.DOUBLE) dataSchema.addColumn('n8', Type.DOUBLE) dataSchema.addColumn('y_serries', Type.DOUBLE) # Step 3: Create a table in the image of the table definition table = dataExtract.addTable('Extract', dataSchema) # Step 4: Create rows and insert them one by one newRow = Row(dataSchema) for i in range(0, len(dataset)): print(i) newRow.setString (0, dataset['Station'][i]) newRow.setString (1, dataset['Time'][i]) newRow.setDate (2, dataset['Date'][i].year, dataset['Date'][i].month, dataset['Date'][i].day) newRow.setDouble (3, dataset['all'][i]) newRow.setDouble (4, dataset['n7'][i]) newRow.setDouble (5, dataset['n8'][i]) newRow.setDouble (6, dataset['y_serries'][i]) table.insert(newRow) # Step 5: Close the tde dataExtract.close() elif tde_mode == 'append': # Step 1: Import the Extract File preExtract = Extract(tde_name) # Step 2: Open existing table table = preExtract.openTable('Extract') # Step 3: Import the table definition tableDef = table.getTableDefinition() # Step 4: Create rows and insert them one by one newRow = Row(tableDef) for i in range(0, len(dataset)): print(i) newRow.setString (0, dataset['Station'][i]) newRow.setString (1, dataset['Time'][i]) newRow.setDate (2, dataset['Date'][i].year, dataset['Date'][i].month, dataset['Date'][i].day) newRow.setDouble (3, dataset['all'][i]) newRow.setDouble (4, dataset['n7'][i]) newRow.setDouble (5, dataset['n8'][i]) newRow.setDouble (6, dataset['y_serries'][i]) table.insert(newRow) # Step 5: Close the extract preExtract.close()
def heatmapToTDE(heatmap, extent, smoothing, extractLocation): xLen = len(heatmap) yLen = len(heatmap[0]) xRange = abs(extent[0] - extent[1]) yRange = abs(extent[2] - extent[3]) xMin = min(extent[0], extent[1]) yMin = min(extent[2], extent[3]) xIncrement = xRange / yLen yIncrement = yRange / xLen # 1. initialize a new extract ExtractAPI.initialize() # 2. Create a table definition new_extract = Extract(extractLocation) # 3. Add column definitions to the table definition table_definition = TableDefinition() table_definition.addColumn('ROW', Type.UNICODE_STRING) # column 0 table_definition.addColumn('COL', Type.UNICODE_STRING) # column 1 table_definition.addColumn('VALUE', Type.DOUBLE) # column 2 table_definition.addColumn('ID', Type.UNICODE_STRING) table_definition.addColumn('CellCount', Type.INTEGER) table_definition.addColumn('Smoothing', Type.DOUBLE) table_definition.addColumn('GEOM', Type.SPATIAL) # 4. Initialize a new table in the extract # a. check if the table already exists # Only add table if it doesn't already exist if (new_extract.hasTable('Extract') == False): new_table = new_extract.addTable('Extract', table_definition) else: new_table = new_extract.openTable('Extract') # 5. Create a new row new_row = Row(table_definition) # Pass the table definition to the constructor # 6. Populate each new row yCoord = yMin for i in range(0, xLen): yCoord += yIncrement xCoord = xMin for j in range(0, yLen): xCoord += xIncrement cellWkt = getFourCorners([xCoord, yCoord], xIncrement, yIncrement) new_row.setString(0, str(i)) # ROW new_row.setString(1, str(j)) # COL new_row.setDouble(2, heatmap[i][j]) # VAL new_row.setString(3, str(i) + '-' + str(j)) # id new_row.setInteger(4, len(heatmap[0])) # cell count new_row.setDouble(5, smoothing) # smoothing new_row.setSpatial(6, cellWkt) # WKT spatial new_table.insert(new_row) # Add the new row to the table # 7. Save the table and extract new_extract.close() # 8. Release the extract API ExtractAPI.cleanup() return
def new_tde(self): dataExtract = Extract(self.tde_name) if dataExtract.hasTable('Extract'): return print("tde already exist use another name") return dataExtract