def createOrOpenExtract(filename, useSpatial): try: # Create Extract Object # (NOTE: The Extract constructor opens an existing extract with the # given filename if one exists or creates a new extract with the given # filename if one does not) extract = Extract(filename) # Define Table Schema (If we are creating a new extract) # (NOTE: In Tableau Data Engine, all tables must be named 'Extract') if (not extract.hasTable('Extract')): schema = TableDefinition() schema.setDefaultCollation(Collation.EN_GB) schema.addColumn('Purchased', Type.DATETIME) schema.addColumn('Product', Type.CHAR_STRING) schema.addColumn('uProduct', Type.UNICODE_STRING) schema.addColumn('Price', Type.DOUBLE) schema.addColumn('Quantity', Type.INTEGER) schema.addColumn('Taxed', Type.BOOLEAN) schema.addColumn('Expiration Date', Type.DATE) schema.addColumnWithCollation('Produkt', Type.CHAR_STRING, Collation.DE) if (useSpatial): schema.addColumn('Destination', Type.SPATIAL) table = extract.addTable('Extract', schema) if (table == None): print 'A fatal error occurred while creating the table:\nExiting now\n.' exit(-1) except TableauException, e: print 'A fatal error occurred while creating the new extract:\n', e, '\nExiting now.' exit(-1)
def to_tableau( self, path, add_index=False ): ''' Converts a Pandas DataFrame to a Tableau .tde file @param path = path to write file @param tableName = name of the table in the extract ''' # Delete Extract and debug log is already exist for file in [path, os.path.dirname(path) + '/debug.log', './DataExtract.log','./debug.log']: if os.path.isfile(file): os.remove(file) # Create Extract and Table ExtractAPI.initialize( ) new_extract = Extract( path ) table_def = TableDefinition() # Set columns in Tableau if add_index: table_def.addColumn( 'index', Type.INTEGER ) for col_index, col_name in enumerate(self._dataframe): table_def.addColumn( col_name, self._column_static_type[col_index] ) # Create table new_table = new_extract.addTable( "Extract", table_def ) # Set Column values self.set_column_values( new_table, table_def, add_index ) # Close extract new_extract.close() ExtractAPI.cleanup()
def new_tde(self, data): # Step 1: Create the Extract File dataExtract = Extract(self.tde_name) if dataExtract.hasTable('Extract'): return print("tde already exist use another name") # Step 2: Create the table definition cols = data.columns.tolist() dataSchema = TableDefinition() for col in cols: dataSchema.addColumn(col, Type.UNICODE_STRING) # Step 3: Create a table in the image of the table definition table = dataExtract.addTable('Extract', dataSchema) # Step 4: Create rows and insert them one by one newRow = Row(dataSchema) cols = data.columns.tolist() for i in range(0, len(data)): for col in cols: col_index = cols.index(col) newRow.setString(col_index, data[col][i]) table.insert(newRow) dataExtract.close() # Step 5: Close the tde dataExtract.close()
def tde_export(): print "Start export to TDE" input_name = get_input_names_for_role('input')[0] input_dataset = dataiku.Dataset(input_name) input_schema = input_dataset.read_schema() partitions = input_dataset.list_partitions(raise_if_empty=False) if partitions not in [[], [u'NP']]: raise Exception( "Due to the current APIs, this plugin cannot support partitioned input " "(and it seems the input dataset " + input_name + " is partitioned). " "A workaround is to first run a sync recipe " "from " + input_name + " into a non partitioned dataset, " "then take the latter as input for tde export.") output_name = get_output_names_for_role('output_folder')[0] output_folder = dataiku.Folder(output_name) output_path = output_folder.get_path() os.chdir(output_path) # Clean output dir. We assume there is no subfolder. # (because this recipe never creates one. If there is, better fail than remove someone else's data) for file in os.listdir(output_path): os.remove(file) ExtractAPI.initialize() with Extract(output_filename()) as extract: assert (not extract.hasTable('Extract')) tableDef = makeTableDefinition(input_schema) table = extract.addTable('Extract', tableDef) insertData(input_dataset, table) extract.close() ExtractAPI.cleanup() print "End export to TDE"
def __init__(self, tde_file_path, input_schema): self.tde_file_path = tde_file_path self.output_path = os.path.dirname(self.tde_file_path) self.input_schema = input_schema self.errors = 0 self.nrows = 0 print "Writing TDE file: %s" % self.tde_file_path ExtractAPI.initialize() self.extract = Extract(self.tde_file_path) assert(not self.extract.hasTable('Extract')) self.table_def = TDEExport.make_table_definition(self.input_schema) self.table = self.extract.addTable('Extract', self.table_def)
def writeTableToTDE(pgFields, pgData, extractLocation): print("writing table to {0}").format(extractLocation) # 1. initialize a new extract ExtractAPI.initialize() # 2. Create a table definition new_extract = Extract(extractLocation) # 3. Add column definitions to the table definition table_definition = TableDefinition() for i in range(0, len(pgFields)): table_definition.addColumn(pgFields[i][0], pgFields[i][1]) # 4. Initialize a new table in the extract # a. check if the table already exists # Only add table if it doesn't already exist if (new_extract.hasTable('Extract') == False): new_table = new_extract.addTable('Extract', table_definition) else: new_table = new_extract.openTable('Extract') # 5. Create a new row new_row = Row( table_definition) # Pass the table definition to the constructor # 6. Populate each new row numberRecords = len(pgData) for i in range(0, numberRecords): # Note that this doesn't cover all possible TDE data types for j in range(0, len(pgFields)): if pgFields[j][1] == Type.INTEGER: new_row.setInteger(j, pgData[i][j]) elif pgFields[j][1] == Type.UNICODE_STRING: new_row.setString(j, pgData[i][j]) elif pgFields[j][1] == Type.SPATIAL: new_row.setSpatial(j, pgData[i][j]) elif pgFields[j][i] == Type.BOOLEAN: new_row.setBoolean(j, pgData[i][j]) elif pgFields[j][i] == Type.DOUBLE: new_row.setDouble(j, pgData[j][i]) new_table.insert(new_row) # Add the new row to the table # 7. Save the table and extract new_extract.close() # 8. Release the extract API ExtractAPI.cleanup() return
def create_tde(self, tde_filename, append=False): try: # Using "with" handles closing the TDE correctly with Extract("{}".format(tde_filename)) as extract: self.tde_object = None row_count = 0 # Create the Extract object (or set it for updating) if there are actually results if not extract.hasTable('Extract'): # Table does not exist; create it self.log(u'Creating Extract with table definition') self.tde_object = extract.addTable('Extract', self.table_definition) else: # Open an existing table to add more rows if append is True: self.tde_object = extract.openTable('Extract') else: self.log("Output file '" + tde_filename + "' already exists.") self.log( "Append mode is off, please delete file and then rerun..." ) sys.exit() # This is if you actually have data to put into the extract. Implement later # tde_row = Row(tableDef) # colNo = 0 # for field in db_row: # Possible for database to have types that do not map, we skip them # if cursor.description[colNo][1].__name__ in PyTypeMap: # if( (field == "" or field == None) ) : # tde_row.setNull( colNo ) # else : # From any given row from the cursor object, we can use the cursor_description collection to find information # for example, the column names and the datatypes. [0] is the column name string, [1] is the python type object. Mirrors cursor.description on the Row level # Second item is a Python Type object, to get the actual name as a string for comparison, have to use __name__ property # fieldSetterMap[PyTypeMap[ cursor.description[colNo][1].__name__ ] ](tde_row, colNo, field); # colNo += 1 # table.insert(tde_row) # row_count += 1 # print "TDE creation complete, " + str(row_count) + " rows inserted\n" # if len(skipped_cols) > 0: # print "The following columns were skipped due to datatypes that were not recognized:\n" # print skipped_cols except TableauException, e: self.log('Tableau TDE creation error:{}'.format(e)) raise
def __init__(self, extract_file_path, input_schema): self.extract_file_path = extract_file_path self.output_path = os.path.dirname(self.extract_file_path) self.input_schema = input_schema self.format_type = TABLEAU_FORMAT self.errors = 0 self.nrows = 0 print("Writing Extract {} file: {}".format(self.format_type, self.extract_file_path)) ExtractAPI.initialize() self.extract = Extract(self.extract_file_path) assert (not self.extract.hasTable('Extract')) self.table_def = TableauExport.make_table_definition(self.input_schema) self.table = self.extract.addTable('Extract', self.table_def)
def publishCsvDatasource(serverURL, siteName, username, password, filename): extractName = os.path.basename(filename).split('.', 2)[0] extractFilename = extractName + '.tde' try: os.remove(extractFilename) except OSError: pass ExtractAPI.initialize() with Extract(extractFilename) as extract: with open(filename) as csvfile: fileReader = csv.reader(csvfile, delimiter=',') headers = [] table = None for fileRow in fileReader: print('Reading: ' + str(fileRow)) if headers == []: headers = fileRow else: if table == None: tableDef = makeTableDefinition(headers, fileRow) table = extract.addTable('Extract', tableDef) insertData(table, fileRow) csvfile.close() extract.close() ExtractAPI.cleanup() # Initialize Tableau Server API ServerAPI.initialize() serverConnection = ServerConnection() serverConnection.connect(serverURL, username, password, siteName) # Publish order-py.tde to the server under the default project with name Order-py serverConnection.publishExtract(extractFilename, 'default', extractName, False) # Disconnect from the server serverConnection.disconnect() # Destroy the server connection object serverConnection.close() # Clean up Tableau Server API ServerAPI.cleanup()
def to_tableau(self, path, table_name='Extract', add_index=False): """ Converts a Pandas DataFrame to a Tableau .tde file @param path = path to write file @param table_name = name of the table in the extract @param add_index = adds incrementing integer index before dataframe columns """ # Delete Extract and debug log is already exist for file in [path, os.path.dirname(path) + '/debug.log', './DataExtract.log', './debug.log']: if os.path.isfile(file): os.remove(file) # Create Extract and Table ExtractAPI.initialize() new_extract = Extract(path) table_def = TableDefinition() # Set columns in Tableau if add_index: index_col = 'index' suffix = 1 while index_col in self._dataframe.columns: index_col = '%s_%s' % (index_col, suffix) suffix += 1 table_def.addColumn(index_col, Type.INTEGER) for col_index, col_name in enumerate(self._dataframe): table_def.addColumn(col_name, self._column_static_type[col_index]) # Create table new_table = new_extract.addTable(table_name, table_def) # Set Column values self.set_column_values(new_table, table_def, add_index) # Close extract new_extract.close() ExtractAPI.cleanup()
def to_tableau(self, path, table_name='Extract', add_index=False): """ Converts a Pandas DataFrame to a Tableau .tde file @param path = path to write file @param table_name = name of the table in the extract @param add_index = adds incrementing integer index before dataframe columns """ # Delete dataextract log and hyper_db temp files if already exists files = (glob('DataExtract*.log') + glob('hyper_db_*') + [ os.path.dirname(path) + '/debug.log', './DataExtract.log', './debug.log' ]) for file in files: if os.path.isfile(file): os.remove(file) # Create Extract and Table ExtractAPI.initialize() new_extract = Extract(path) if not new_extract.hasTable(table_name): print( 'Table \'{}\' does not exist in extract {}, creating.'.format( table_name, path)) self.set_table_structure(new_extract, table_name, add_index) new_table = new_extract.openTable(table_name) table_def = new_table.getTableDefinition() # Set Column values self.set_column_values(new_table, table_def, add_index) # Close extract new_extract.close() ExtractAPI.cleanup()
def to_tde(dataset, tde_mode, tde_name): if tde_mode == 'new': # Step 1: Create the Extract File dataExtract = Extract(tde_name) if dataExtract.hasTable('Extract'): return print("tde already exist use another name") # Step 2: Create the table definition dataSchema = TableDefinition() dataSchema.addColumn('Station', Type.UNICODE_STRING) dataSchema.addColumn('Time', Type.UNICODE_STRING) dataSchema.addColumn('Date', Type.DATE) dataSchema.addColumn('all', Type.DOUBLE) dataSchema.addColumn('n7', Type.DOUBLE) dataSchema.addColumn('n8', Type.DOUBLE) dataSchema.addColumn('y_serries', Type.DOUBLE) # Step 3: Create a table in the image of the table definition table = dataExtract.addTable('Extract', dataSchema) # Step 4: Create rows and insert them one by one newRow = Row(dataSchema) for i in range(0, len(dataset)): print(i) newRow.setString (0, dataset['Station'][i]) newRow.setString (1, dataset['Time'][i]) newRow.setDate (2, dataset['Date'][i].year, dataset['Date'][i].month, dataset['Date'][i].day) newRow.setDouble (3, dataset['all'][i]) newRow.setDouble (4, dataset['n7'][i]) newRow.setDouble (5, dataset['n8'][i]) newRow.setDouble (6, dataset['y_serries'][i]) table.insert(newRow) # Step 5: Close the tde dataExtract.close() elif tde_mode == 'append': # Step 1: Import the Extract File preExtract = Extract(tde_name) # Step 2: Open existing table table = preExtract.openTable('Extract') # Step 3: Import the table definition tableDef = table.getTableDefinition() # Step 4: Create rows and insert them one by one newRow = Row(tableDef) for i in range(0, len(dataset)): print(i) newRow.setString (0, dataset['Station'][i]) newRow.setString (1, dataset['Time'][i]) newRow.setDate (2, dataset['Date'][i].year, dataset['Date'][i].month, dataset['Date'][i].day) newRow.setDouble (3, dataset['all'][i]) newRow.setDouble (4, dataset['n7'][i]) newRow.setDouble (5, dataset['n8'][i]) newRow.setDouble (6, dataset['y_serries'][i]) table.insert(newRow) # Step 5: Close the extract preExtract.close()
from tableausdk import Extract as tde # **Module change from py2 for py3 from tableausdk import Types as tdeTypes # ** import os #Step 1: Create the Extract File if os.path.isfile('trivialExample.tde'): os.remove('trivialExample.tde') tdefile = tde.Extract('trivialExample.tde') #Step 2: Create the tableDef tableDef = tde.TableDefinition() tableDef.addColumn('rowID', tdeTypes.Type.CHAR_STRING) # Column index 0. ** tableDef.addColumn('value', tdeTypes.Type.DOUBLE) #Column index 1. ** #Step 3: Create the table in the image of the tableDef table = tdefile.addTable('Extract', tableDef) #Step 4: Create some rows and insert them into the table newrow = tde.Row(tableDef) for i in range(1, 101): newrow.setCharString(0, 'Row ' + str(i)) newrow.setDouble(1, i) table.insert(newrow) #Step 5: Close the tde tdefile.close()
def assemble_tde(self): """ Gather the data information and create the Tde. :return: """ try: print "Creating extract:", self.file_name ExtractAPI.initialize() with Extract(self.file_name) as data_extract: # If extract Does exist add to the Extract table and file if data_extract.hasTable('Extract'): # Open an existing table to add more rows table = data_extract.openTable('Extract') table_definition = table.getTableDefinition() else: table_definition = TableDefinition() for each in self.data_types.reset_index(level=0).values: # Add the column info to the table definition table_definition.addColumn( str(each[0]), schema_type_map[str(each[1])]) # Create the Table with the table definition table = data_extract.addTable("Extract", table_definition) new_row = Row(table_definition) count = self.data_frame.shape[0] pbar = Bar(count) # Run through dataframe data and add data to the table object for i in range(count): for j, column_name in enumerate( self.data_types.index.values.tolist()): self.add_to_row(new_row, j, self.data_frame[column_name].iloc[i], self.data_types['data_type'][j], column_name) table.insert(new_row) self.last_data_row_extracted = self.data_frame.iloc[ i].to_frame().transpose() # if i == 1: # raise Exception pbar.passed() data_extract.close() ExtractAPI.cleanup() except: file_name = self.tde_file() # Clean up resources Extract(file_name).close() ExtractAPI.cleanup() # Create csv and pickle of the data self.data_frame.to_pickle(file_name.replace('.tde', '_pickle')) self.data_frame.to_csv(file_name.replace('.tde', '.csv'), index=False, encoding='utf-8') raise Exception( "Error in creating tde file please consult data files. \n%s\n%s. \n%s, %s, %s" % (file_name.replace( '.tde', '_pickle'), file_name.replace('.tde', '.csv'), 'Error on line {}'.format(sys.exc_info()[-1].tb_lineno), sys.exc_info()[0], sys.exc_info()[1]))
pass ###Setting Tableau recognized data types### for i in column_types: if i is numpy.int64: column_headers_types.append(Type.INTEGER) elif i is numpy.float64: column_headers_types.append(Type.DOUBLE) elif i is pd.Timestamp: column_headers_types.append(Type.DATETIME) else: column_headers_types.append(Type.UNICODE_STRING) ###Getting the existing table schema for incremental refresh### if incremental_refresh == 1: dataExtract = Extract(extract_name) if dataExtract.hasTable('Extract'): table = dataExtract.openTable('Extract') dataSchema = table.getTableDefinition() else: ###Removing extract if it already exists (for full refresh)### try: cwd = os.getcwd() for f in os.listdir(cwd): if re.search(extract_name, f): os.remove(os.path.join(cwd, f)) except OSError, e: pass ###Initializng the Extract API, and applying the schema to the table###
DEST_LON = 4 SEG_NAME = 9 DEP_TIME = 6 # Departure time (minutes from midnight) DEP_TIME_HRS = 0 FLIGHT_NUM = 2 FLIGHT_TIME = 5 ##################################################################### ## Process the data and write the TDE ##################################################################### # 1. initialize a new extract ExtractAPI.initialize() # 2. Create a table definition new_extract = Extract(extractLocation) # 3. Add column definitions to the table definition table_definition = TableDefinition() table_definition.addColumn('route', Type.UNICODE_STRING) # column 0 table_definition.addColumn('latitude', Type.DOUBLE) table_definition.addColumn('longitude', Type.DOUBLE) table_definition.addColumn('point_number', Type.INTEGER) table_definition.addColumn('distance_km', Type.DOUBLE) table_definition.addColumn('FlightNumber', Type.UNICODE_STRING) table_definition.addColumn('DepartureTime', Type.UNICODE_STRING) table_definition.addColumn('DepartureTimeFromMidnight', Type.UNICODE_STRING) table_definition.addColumn('SegMinFromMidnight', Type.INTEGER) table_definition.addColumn('FlightTime', Type.INTEGER) table_definition.addColumn('SegTimeFromMidnight', Type.UNICODE_STRING)
for i in join_info: #joinedDF = pd.merge(outputDF, VPS_DF, on='IntubationCycle', how='left') joinDF_auto = pd.merge(joinDF_auto, i[0], on=i[1], how='inner') #output to CSV joinDF_auto.to_csv(output_CSV, index=False, encoding='utf-8') #typecast to date format joinDF_auto['ClaimDate'] = pd.to_datetime(joinDF_auto['ClaimDate']) #------------------------------------------------------------------------------ # Create Extract #------------------------------------------------------------------------------ ExtractAPI.initialize() new_extract = Extract(output_Extract) # Create a new table definition with 3 columns table_definition = TableDefinition() table_definition.addColumn('Patient_ID', Type.INTEGER) # column 0 table_definition.addColumn('ClaimDate', Type.DATE) # column 1 table_definition.addColumn('Patient Name', Type.UNICODE_STRING) # column 2 #table_definition.addColumn('Claim_ID', Type.INTEGER) # column 3 #table_definition.addColumn('Discount', Type.DOUBLE) # column 4 #------------------------------------------------------------------------------ # Populate Extract #------------------------------------------------------------------------------ new_table = new_extract.addTable('Extract', table_definition)
def tableau_extract(resultset, data_file): """ Create TDE extract. """ if isinstance(resultset, ResultSet): df_name = resultset.DataFrame() else: df_name = resultset data_type = [] fieldnames = [] data_type_map = {int: Type.INTEGER, str: Type.UNICODE_STRING, bool: Type.BOOLEAN, bytearray: Type.CHAR_STRING, list: Type.CHAR_STRING, dict: Type.CHAR_STRING, float: Type.DOUBLE, decimal.Decimal: Type.DOUBLE, datetime.date: Type.DATE, datetime.time: Type.DURATION, datetime.datetime: Type.DATETIME, pd._libs.tslib.Timestamp: Type.DATETIME } for col in df_name: fieldnames.append(col) data_type.append(df_name[col].apply(type).iat[0]) data_dict = dict(zip(fieldnames, data_type)) for col_name in data_dict: if data_dict[col_name] in data_type_map: data_dict[col_name] = data_type_map[data_dict[col_name]] else: data_dict[col_name] = Type.UNICODE_STRING # Initialize a new extract try: os.remove(data_file) except OSError: pass new_extract = Extract(data_file) table_definition = TableDefinition() for col_name in data_dict: table_definition.addColumn(col_name, data_dict[col_name]) new_table = new_extract.addTable('Extract', table_definition) new_row = Row(table_definition) tde_types = {'INTEGER': 7, 'DOUBLE': 10, 'BOOLEAN': 11, 'DATE': 12, 'DATETIME': 13, 'DURATION': 14, 'CHAR_STRING': 15, 'UNICODE_STRING': 16} for i in range(0, len(df_name)): for col in range(0, table_definition.getColumnCount()): col_name = table_definition.getColumnName(col) try: if data_dict[col_name] == tde_types['INTEGER']: new_row.setInteger(col, int(df_name[col_name][i])) elif data_dict[col_name] == tde_types['DOUBLE']: new_row.setDouble(col, float(df_name[col_name][i])) elif data_dict[col_name] == tde_types['BOOLEAN']: new_row.setBoolean(col, bool(df_name[col_name][i])) elif data_dict[col_name] == tde_types['DATE']: data = df_name[col_name][i] new_row.setDate(col, data.year, data.month, data.day) elif data_dict[col_name] == tde_types['DATETIME']: data = df_name[col_name][i] new_row.setDateTime(col, data.year, data.month, data.day, data.hour, data.minute, data.second, 0) elif data_dict[col_name] == tde_types['DURATION']: data = df_name[col_name][i] new_row.setDuration(col, data.hour, data.minute, data.second, 0) elif data_dict[col_name] == tde_types['CHAR_STRING']: new_row.setCharString(col, str(df_name[col_name][i])) elif data_dict[col_name] == tde_types['UNICODE_STRING']: new_row.setString(col, str(df_name[col_name][i])) else: print('Error') new_row.setNull(col) except TypeError: new_row.setNull(col) new_table.insert(new_row) new_extract.close() ExtractAPI.cleanup() for file_name in glob.glob("DataExtract*.log"): os.remove(file_name)
def create(target_filepath, dataframe): column = [] ## 宣告Dataframe(左)pandas資料格式 對應 到TDE的資料格式(右)。 fieldMap = { 'float64': tde.Types.Type.DOUBLE, 'float32': tde.Types.Type.DOUBLE, 'int64': tde.Types.Type.INTEGER, 'int8': tde.Types.Type.INTEGER, 'object': tde.Types.Type.UNICODE_STRING, 'bool': tde.Types.Type.DOUBLE, 'datetime64[ns]': tde.Types.Type.DATETIME } ## 若TDE已存在,則刪除。 if os.path.isfile(target_filepath): os.remove(target_filepath) tdefile = tde.Extract(target_filepath) schema = tde.TableDefinition() # define the table definition colnames = dataframe.columns # dataframe all column name coltypes = dataframe.dtypes # dataframe all column's datatype ## 根據Dataframe建立Tde Schema for i in range(0, len(colnames)): cname = colnames[i] ctype = fieldMap.get(str(coltypes[i])) schema.addColumn(cname, ctype) # 加入一行欄位(名稱,資料格式) column.append(cname) ## 開啟TDE FILE寫入資料 with tdefile as extract: table = extract.addTable("Extract", schema) for r in range(0, dataframe.shape[0]): row = tde.Row(schema) ## Loop 顯示剩餘筆數 count = dataframe.shape[0] - r - 1 print('剩' + str(count) + '筆', end="\r") ## 每筆資料依照格式寫入TDE內 for c in range(0, len(coltypes)): if str(coltypes[c]) == 'float64': if dataframe.iloc[r, c] is None: row.setNull(c) else: row.setDouble(c, dataframe.iloc[r, c]) elif str(coltypes[c]) == 'float32': row.setDouble(c, dataframe.iloc[r, c]) elif str(coltypes[c]) == 'int64': if dataframe.iloc[r, c] is None: row.setNull(c) else: row.setInteger(c, dataframe.iloc[r, c]) elif str(coltypes[c]) == 'int8': row.setInteger(c, dataframe.iloc[r, c]) elif str(coltypes[c]) == 'object': if dataframe.iloc[r, c] is None: row.setNull(c) else: row.setString(c, dataframe.iloc[r, c]) elif str(coltypes[c]) == 'bool': row.setBoolean(c, dataframe.iloc[r, c]) elif str(coltypes[c]) == 'datetime64[ns]': try: dt = dataframe.iloc[r, c] row.setDateTime(c, dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second, 0) except: row.setNull(c) else: row.setNull(c) ## insert the row table.insert(row) ## close the tdefile tdefile.close()
def build(self): # Set filename tdefileName = self.tde_settings_ins.tde_file if os.path.isfile(tdefileName): os.remove(tdefileName) #set the extract object instance tdefile = tdeEx.Extract(tdefileName) # create the column schema tableDef = tdeEx.TableDefinition() columnsListedInOrder = [] outputColIndex = {} outputColNumber = 0 for columnName, index in sorted( self.tde_settings_ins.columnIndex.items(), key=lambda x: x[1]): BasicDataType = self.tde_settings_ins.columns[columnName] tableDef.addColumn(columnName, tdeSettings.schemaIniTypeMap[BasicDataType]) outputColIndex[columnName] = outputColNumber outputColNumber += 1 #add the column schema to the ojbect instance of EXTRACT table = tdefile.addTable("Extract", tableDef) rowNo = 0 rowsIterator = self.tde_settings_ins.inputInfo.fileInformation.yieldRowsBase( ) printedBool = False startTime = datetime.datetime.now() print "Build start time:", datetime.datetime.strftime( startTime, '%Y-%m-%d %H:%M:%S') prevPct = -1 for myReaderRow in rowsIterator: putRow = tdeEx.Row(tableDef) for columnName, index in self.tde_settings_ins.columnIndex.items(): #if not column headers need to change this to index of the column if (tdeSettings.schemaIniTypeMap[ self.tde_settings_ins.columns[columnName]] != tde.Type.UNICODE_STRING and myReaderRow[index] == ""): putRow.setNull(outputColIndex[columnName]) else: try: #valueDecoded=myReaderRow[index].decode(self.tde_settings_ins.inputInfo.fileInformation.encodeing) self.fieldSetterMap[tdeSettings.schemaIniTypeMap[ self.tde_settings_ins.columns[columnName]]]( putRow, outputColIndex[columnName], myReaderRow[index]) except: print "column name", columnName, "contains invalid data" print "value: ", myReaderRow[index] print sys.exc_info()[0] raise table.insert(putRow) # Output progress line #only print percent one time when we hit that percent rowNo += 1 pct = math.floor( self.tde_settings_ins.inputInfo.fileInformation.getPercent( rowNo)) # print "row:",rowNo # print "pct:",pct # print "value: ",pct %2 if pct % 2 == 0: if printedBool == False or prevPct != pct: print "Percent:", round(pct, 0), "%, row:", rowNo printedBool = True prevPct = pct else: printedBool = False print "tde file built to:", tdefileName endTime = datetime.datetime.now() print "End Time:", endTime diff = endTime - startTime print "Build elapsed time:", diff
## import the libraries import tableausdk.Extract as tde import pandas as pd import os ## bring in a sample Graduate School Admissions datasets file_name = "http://www.ats.ucla.edu/stat/data/binary.csv" df = pd.read_csv(file_name) df.head() df.shape ## create the extract name, but remove the extract if it already exists fname = "example.tde" try: tdefile = tde.Extract(fname) except: os.system('del ' + fname) os.system('del DataExtract.log') tdefile = tde.Extract(fname) ## define the table definition tableDef = tde.TableDefinition() ## create a list of column names and types colnames = df.columns coltypes = df.dtypes ## create a dict for the field maps ## Caveat: I am not including all of the possibilities below fieldMap = { 'float64': tde.Types.Type.DOUBLE,
from tableausdk import * from tableausdk.Server import * from tableausdk.Extract import * import tableausdk.Extract as tde #Define a new tde file. tdefile = tde.Extract('test1.tde') #Defining a new data set/ table defination in tde tableDef = tde.TableDefinition() tableDef.addColumn("company", 15) #INTEGER tableDef.addColumn("projected", 10) #DOUBLE tableDef.addColumn("realRev", 10) #DOUBLE #Let's add dataset to the file tabletran = tdefile.addTable("Extract", tableDef) #Create new Row newrow = tde.Row(tableDef) # Adding data value # First param represents sequence of the column newrow.setCharString(0, 'myCompany') newrow.setDouble(1, 1000) newrow.setDouble(2, 888) # adding new row to the dataset tabletran.insert(newrow) #Create new Row newrow = tde.Row(tableDef) newrow.setCharString(0, 'myCompany2')
def extract(file_name): # move file to /extract # cd to /extract # if there is no extract called TRACK_TERM then create one, otherwise append to TRACK_TERM # define data model for extract global WORKING_DIRECTORY, TRACK_TERM if not os.path.exists(WORKING_DIRECTORY + '/extract/'): os.makedirs(WORKING_DIRECTORY + '/extract/') from_path = WORKING_DIRECTORY + '/' + file_name to_path = WORKING_DIRECTORY + '/extract/' + file_name os.rename(from_path, to_path) os.chdir(WORKING_DIRECTORY + '/extract') # define the extract with data.Extract(TRACK_TERM + '.tde') as extract: tableDef = data.TableDefinition() # define the columns and the data types in the extract tableDef.addColumn('lang', types.Type.CHAR_STRING) #0 tableDef.addColumn('sentiment', types.Type.DOUBLE) #1 tableDef.addColumn('country', types.Type.CHAR_STRING) #2 tableDef.addColumn('created_at', types.Type.DATETIME) #3 tableDef.addColumn('tweet_text', types.Type.CHAR_STRING) #4 tableDef.addColumn('Longitude', types.Type.DOUBLE) #5 tableDef.addColumn('source', types.Type.CHAR_STRING) #6 tableDef.addColumn('user', types.Type.CHAR_STRING) #7 tableDef.addColumn('Latitude', types.Type.DOUBLE) #8 table = None if not extract.hasTable('Extract'): # Table does not exist, so create it. print "Creating a new extract" table = extract.addTable('Extract', tableDef) else: # Table exists, so append the new data. print "Appending to an existing extract" table = extract.openTable('Extract') new_row = data.Row(tableDef) # read the data from the CSV into the extract row object with open(file_name, 'r') as inf: reader = csv.DictReader(inf, delimiter=',', lineterminator='\n') for row in reader: # insert data into the row object in the correct order as defined above new_row.setCharString(0, row['lang']) sentiment = float(row['sentiment']) new_row.setDouble(1, sentiment) new_row.setCharString(2, row['country']) # parse the twitter date string: # Mon Sep 21 11:03:53 +0000 2015 # %a %b %d %H:%M:%S +0000 %Y date_object = datetime.strptime(row['created_at'], '%a %b %d %H:%M:%S +0000 %Y') year = int(datetime.strftime(date_object, '%Y')) month = int(datetime.strftime(date_object, '%m')) day = int(datetime.strftime(date_object, '%d')) hour = int(datetime.strftime(date_object, '%H')) min = int(datetime.strftime(date_object, '%M')) sec = int(datetime.strftime(date_object, '%S')) frac = 0 # fractions of a second aka milliseconds new_row.setDateTime(3, year, month, day, hour, min, sec, frac) new_row.setCharString(4, row['tweet_text']) # check if there is a value for longitude, otherwise write a 0 try: longitude = float(row['longitude']) except: longitude = 0 new_row.setDouble(5, longitude) new_row.setCharString(6, row['source']) new_row.setCharString(7, row['user']) # check if there is a value for latitude, otherwise write a 0 try: latitude = float(row['latitude']) except: latitude = 0 new_row.setDouble(8, latitude) table.insert(new_row) # if the process fails we want to be able to re-run it without collisions between file names # so give each file a unique name (unix time stamp in this case). os.rename(file_name, str(time.time()).split('.')[0] + '.csv') # cd back to working directory os.chdir(WORKING_DIRECTORY) return
def heatmapToTDE(heatmap, extent, smoothing, extractLocation): xLen = len(heatmap) yLen = len(heatmap[0]) xRange = abs(extent[0] - extent[1]) yRange = abs(extent[2] - extent[3]) xMin = min(extent[0], extent[1]) yMin = min(extent[2], extent[3]) xIncrement = xRange / yLen yIncrement = yRange / xLen # 1. initialize a new extract ExtractAPI.initialize() # 2. Create a table definition new_extract = Extract(extractLocation) # 3. Add column definitions to the table definition table_definition = TableDefinition() table_definition.addColumn('ROW', Type.UNICODE_STRING) # column 0 table_definition.addColumn('COL', Type.UNICODE_STRING) # column 1 table_definition.addColumn('VALUE', Type.DOUBLE) # column 2 table_definition.addColumn('ID', Type.UNICODE_STRING) table_definition.addColumn('CellCount', Type.INTEGER) table_definition.addColumn('Smoothing', Type.DOUBLE) table_definition.addColumn('GEOM', Type.SPATIAL) # 4. Initialize a new table in the extract # a. check if the table already exists # Only add table if it doesn't already exist if (new_extract.hasTable('Extract') == False): new_table = new_extract.addTable('Extract', table_definition) else: new_table = new_extract.openTable('Extract') # 5. Create a new row new_row = Row(table_definition) # Pass the table definition to the constructor # 6. Populate each new row yCoord = yMin for i in range(0, xLen): yCoord += yIncrement xCoord = xMin for j in range(0, yLen): xCoord += xIncrement cellWkt = getFourCorners([xCoord, yCoord], xIncrement, yIncrement) new_row.setString(0, str(i)) # ROW new_row.setString(1, str(j)) # COL new_row.setDouble(2, heatmap[i][j]) # VAL new_row.setString(3, str(i) + '-' + str(j)) # id new_row.setInteger(4, len(heatmap[0])) # cell count new_row.setDouble(5, smoothing) # smoothing new_row.setSpatial(6, cellWkt) # WKT spatial new_table.insert(new_row) # Add the new row to the table # 7. Save the table and extract new_extract.close() # 8. Release the extract API ExtractAPI.cleanup() return
if arg_tde_filename: tde_filename = arg_tde_filename else: tde_filename = csv_filename.replace(".csv", ".tde") # print "Will write to [%s]" % os.path.join(tde_dirpath, tde_filename) # print # Step 1: Create the Extract file and open the csv tde_fullpath = os.path.join(tde_dirpath, tde_filename) # if the file doesn't exist, revoke the append if arg_append and not os.path.isfile(tde_fullpath): print "Couldn't append -- file doesn't exist." arg_append = False if not arg_append and os.path.isfile(tde_fullpath): os.remove(tde_fullpath) new_extract = Extract(tde_fullpath) # Define the columns by the first csv table_df = pandas.read_csv(os.path.join(csv_dirpaths[0], csv_filename), names=arg_header) for join_table_file in arg_join: join_df = pandas.read_csv(join_table_file) table_df = pandas.merge(table_df, join_df, how='left') # Step 2: Create the tableDef table_definition = TableDefinition() old_colnames = [] colnames_to_types = {} new_colnames_to_idx = {} for (col, dtype) in itertools.izip(table_df.columns, table_df.dtypes): colname = col.strip() # strip whitespace
def new_tde(self): dataExtract = Extract(self.tde_name) if dataExtract.hasTable('Extract'): return print("tde already exist use another name") return dataExtract