def to_tableau(self, path, table_name='Extract', add_index=False): """ Converts a Pandas DataFrame to a Tableau .tde file @param path = path to write file @param table_name = name of the table in the extract @param add_index = adds incrementing integer index before dataframe columns """ # Delete dataextract log and hyper_db temp files if already exists files = (glob('DataExtract*.log') + glob('hyper_db_*') + [ os.path.dirname(path) + '/debug.log', './DataExtract.log', './debug.log' ]) for file in files: if os.path.isfile(file): os.remove(file) # Create Extract and Table ExtractAPI.initialize() new_extract = Extract(path) if not new_extract.hasTable(table_name): print( 'Table \'{}\' does not exist in extract {}, creating.'.format( table_name, path)) self.set_table_structure(new_extract, table_name, add_index) new_table = new_extract.openTable(table_name) table_def = new_table.getTableDefinition() # Set Column values self.set_column_values(new_table, table_def, add_index) # Close extract new_extract.close() ExtractAPI.cleanup()
###Setting Tableau recognized data types### for i in column_types: if i is numpy.int64: column_headers_types.append(Type.INTEGER) elif i is numpy.float64: column_headers_types.append(Type.DOUBLE) elif i is pd.Timestamp: column_headers_types.append(Type.DATETIME) else: column_headers_types.append(Type.UNICODE_STRING) ###Getting the existing table schema for incremental refresh### if incremental_refresh == 1: dataExtract = Extract(extract_name) if dataExtract.hasTable('Extract'): table = dataExtract.openTable('Extract') dataSchema = table.getTableDefinition() else: ###Removing extract if it already exists (for full refresh)### try: cwd = os.getcwd() for f in os.listdir(cwd): if re.search(extract_name, f): os.remove(os.path.join(cwd, f)) except OSError, e: pass ###Initializng the Extract API, and applying the schema to the table### ExtractAPI.initialize() dataExtract = Extract(extract_name)
createExtract = Extract("/Users/jlin/Desktop/data_stuff/predict_data_Extract.tde") for i in combined_data.columns: if i in ['Contract Execute','Contract Sent to Site','Essential Doc','IRB','Ip Release']: combined_data[i] = pd.to_datetime(combined_data[i],coerce=True) #combined_data = combined_data.drop(['Contract Execute','Contract Sent to Site','Essential Doc','IRB','Ip Release'],1) if createExtract.hasTable('Extract'): table = createExtract.openTable('Extract') tabledef = table.getTableDefinition() else: tabledef = TableDefinition() for i in combined_data.columns : #print combined_data[i].dtype if str(combined_data[i].dtype) == "int64": tabledef.addColumn(i,Types.Type.INTEGER) if str(combined_data[i].dtype) == "object": tabledef.addColumn(i,Types.Type.CHAR_STRING) if str(combined_data[i].dtype) == "datetime64[ns]": tabledef.addColumn(i,Types.Type.DATE) if str(combined_data[i].dtype) == "float64": tabledef.addColumn(i,Types.Type.DOUBLE)
table_definition.addColumn('latitude', Type.DOUBLE) table_definition.addColumn('longitude', Type.DOUBLE) table_definition.addColumn('point_number', Type.INTEGER) table_definition.addColumn('distance_km', Type.DOUBLE) table_definition.addColumn('FlightNumber', Type.UNICODE_STRING) table_definition.addColumn('DepartureTime', Type.UNICODE_STRING) table_definition.addColumn('DepartureTimeFromMidnight', Type.UNICODE_STRING) table_definition.addColumn('SegMinFromMidnight', Type.INTEGER) table_definition.addColumn('FlightTime', Type.INTEGER) table_definition.addColumn('SegTimeFromMidnight', Type.UNICODE_STRING) # 4. Initialize a new table in the extract if (new_extract.hasTable('Extract') == False): new_table = new_extract.addTable('Extract', table_definition) else: new_table = new_extract.openTable('Extract') # 5. Create a new row new_row = Row(table_definition) # Pass the table definition to the constructor # 6. walk through the origin/destination data from CSV, write each path to TDE with open(csvLocation, 'rb') as csvfile: csvreader = csv.reader(csvfile, delimiter='|') next(csvreader) # skip header for row in csvreader: # print row olat = float(row[ORIG_LAT]) olon = float(row[ORIG_LON])
def to_tde(dataset, tde_mode, tde_name): if tde_mode == 'new': # Step 1: Create the Extract File dataExtract = Extract(tde_name) if dataExtract.hasTable('Extract'): return print("tde already exist use another name") # Step 2: Create the table definition dataSchema = TableDefinition() dataSchema.addColumn('Station', Type.UNICODE_STRING) dataSchema.addColumn('Time', Type.UNICODE_STRING) dataSchema.addColumn('Date', Type.DATE) dataSchema.addColumn('all', Type.DOUBLE) dataSchema.addColumn('n7', Type.DOUBLE) dataSchema.addColumn('n8', Type.DOUBLE) dataSchema.addColumn('y_serries', Type.DOUBLE) # Step 3: Create a table in the image of the table definition table = dataExtract.addTable('Extract', dataSchema) # Step 4: Create rows and insert them one by one newRow = Row(dataSchema) for i in range(0, len(dataset)): print(i) newRow.setString (0, dataset['Station'][i]) newRow.setString (1, dataset['Time'][i]) newRow.setDate (2, dataset['Date'][i].year, dataset['Date'][i].month, dataset['Date'][i].day) newRow.setDouble (3, dataset['all'][i]) newRow.setDouble (4, dataset['n7'][i]) newRow.setDouble (5, dataset['n8'][i]) newRow.setDouble (6, dataset['y_serries'][i]) table.insert(newRow) # Step 5: Close the tde dataExtract.close() elif tde_mode == 'append': # Step 1: Import the Extract File preExtract = Extract(tde_name) # Step 2: Open existing table table = preExtract.openTable('Extract') # Step 3: Import the table definition tableDef = table.getTableDefinition() # Step 4: Create rows and insert them one by one newRow = Row(tableDef) for i in range(0, len(dataset)): print(i) newRow.setString (0, dataset['Station'][i]) newRow.setString (1, dataset['Time'][i]) newRow.setDate (2, dataset['Date'][i].year, dataset['Date'][i].month, dataset['Date'][i].day) newRow.setDouble (3, dataset['all'][i]) newRow.setDouble (4, dataset['n7'][i]) newRow.setDouble (5, dataset['n8'][i]) newRow.setDouble (6, dataset['y_serries'][i]) table.insert(newRow) # Step 5: Close the extract preExtract.close()
def heatmapToTDE(heatmap, extent, smoothing, extractLocation): xLen = len(heatmap) yLen = len(heatmap[0]) xRange = abs(extent[0] - extent[1]) yRange = abs(extent[2] - extent[3]) xMin = min(extent[0], extent[1]) yMin = min(extent[2], extent[3]) xIncrement = xRange / yLen yIncrement = yRange / xLen # 1. initialize a new extract ExtractAPI.initialize() # 2. Create a table definition new_extract = Extract(extractLocation) # 3. Add column definitions to the table definition table_definition = TableDefinition() table_definition.addColumn('ROW', Type.UNICODE_STRING) # column 0 table_definition.addColumn('COL', Type.UNICODE_STRING) # column 1 table_definition.addColumn('VALUE', Type.DOUBLE) # column 2 table_definition.addColumn('ID', Type.UNICODE_STRING) table_definition.addColumn('CellCount', Type.INTEGER) table_definition.addColumn('Smoothing', Type.DOUBLE) table_definition.addColumn('GEOM', Type.SPATIAL) # 4. Initialize a new table in the extract # a. check if the table already exists # Only add table if it doesn't already exist if (new_extract.hasTable('Extract') == False): new_table = new_extract.addTable('Extract', table_definition) else: new_table = new_extract.openTable('Extract') # 5. Create a new row new_row = Row(table_definition) # Pass the table definition to the constructor # 6. Populate each new row yCoord = yMin for i in range(0, xLen): yCoord += yIncrement xCoord = xMin for j in range(0, yLen): xCoord += xIncrement cellWkt = getFourCorners([xCoord, yCoord], xIncrement, yIncrement) new_row.setString(0, str(i)) # ROW new_row.setString(1, str(j)) # COL new_row.setDouble(2, heatmap[i][j]) # VAL new_row.setString(3, str(i) + '-' + str(j)) # id new_row.setInteger(4, len(heatmap[0])) # cell count new_row.setDouble(5, smoothing) # smoothing new_row.setSpatial(6, cellWkt) # WKT spatial new_table.insert(new_row) # Add the new row to the table # 7. Save the table and extract new_extract.close() # 8. Release the extract API ExtractAPI.cleanup() return
# add the time period column table_definition.addColumn("timeperiod", Type.UNICODE_STRING) new_colnames_to_idx["timeperiod"] = len(new_colnames_to_idx) # add the src column table_definition.addColumn("src", Type.UNICODE_STRING) new_colnames_to_idx["src"] = len(new_colnames_to_idx) table_definition.addColumn("Scenario", Type.UNICODE_STRING) new_colnames_to_idx["Scenario"] = len(new_colnames_to_idx) # print "old_colnames = %s" % str(old_colnames) # print "new colnames = %s" % str(new_colnames_to_idx) # print colnames_to_types # Step 3: Creat the table in the image of the tableDef if arg_append: new_table = new_extract.openTable('Extract') else: new_table = new_extract.addTable('Extract', table_definition) # Step 4: Loop through the csv grab all the data, put it into rows # and insert the rows in the table new_row = Row(table_definition) for csv_dirpath in csv_dirpaths: csv_fullpath = os.path.join(csv_dirpath, csv_filename) # add the new column `src` src = os.path.split(csv_fullpath)[0] # remove the filename part of the path tail = "" while tail in ["", "trn", "iter3", "hwy", "OUTPUT"]: (src,tail) = os.path.split(src) # remove one more dir from path (e.g. core_summaries) print "tail=[%s] src=[%s]" % (tail,src)