def to_tde(df, tde_filename='extract.tde'): if os.path.isfile(tde_filename): os.remove(tde_filename) ExtractAPI.initialize() new_extract = Extract(tde_filename) df = dedup_column_name(df) row_cnt, col_cnt = df.shape list_dtypes = df.dtypes table_definition = make_table_definition(df) new_table = new_extract.addTable('Extract', table_definition) # for j, row in df.iterrows(): for j, row in tqdm.tqdm(df.iterrows(), total=row_cnt): new_row = Row(table_definition) for i, (cell, column_data_type) in enumerate(zip(row, list_dtypes)): value = mapper[column_data_type]['value_modifier'](cell) if value: params = [new_row, i] + value mapper[column_data_type]['tableau_set_function'](*params) # for i,column in enumerate(df.columns): # column_data_type = df[column].dtype # value = mapper[column_data_type]['value_modifier'](row[1][i]) # # if value: # params = [new_row,i]+value # mapper[column_data_type]['tableau_set_function'](*params) # new_table.insert(new_row) new_extract.close() ExtractAPI.cleanup()
def to_tde_new(df, tde_filepath, showProgress=True): # try to use apply to test how it speeds up. # apply is a little faster than iterrows(), about 1.5x # testing by my dataset which contains 100000 rows and 25 cols if os.path.isfile(tde_filepath): os.remove(tde_filepath) ExtractAPI.initialize() new_extract = Extract(tde_filepath) df = dedup_column_name(df) row_cnt, col_cnt = df.shape list_dtypes = df.dtypes table_definition = make_table_definition(df) new_table = new_extract.addTable('Extract', table_definition) def insert_tde(x): new_row = Row(table_definition) for i, (cell, column_data_type) in enumerate(zip(x, list_dtypes)): value = mapper[column_data_type]['value_modifier'](cell) if value: params = [new_row, i] + value mapper[column_data_type]['tableau_set_function'](*params) new_table.insert(new_row) tqdm.tqdm.pandas(desc='My bar!') df.progress_apply(insert_tde, axis=1) new_extract.close() ExtractAPI.cleanup()
def to_tde(df, extractname, data_type, index = False, new_extract = False, append = True): ''' change pandas dataframe to tableau data extract ''' table_definition_map = { 'bool' : Type.BOOLEAN, 'integer': Type.INTEGER, 'double': Type.DOUBLE, 'date': Type.DATE, 'datetime': Type.DATETIME, 'unicode': Type.UNICODE_STRING, 'char': Type.CHAR_STRING } value_set_map = { Type.BOOLEAN: lambda row, col_num, value: row.setBoolean( col_num, bool(value) ), Type.INTEGER: lambda row, col_num, value: row.setInteger( col_num, int(value) ), Type.DOUBLE: lambda row, col_num, value: row.setDouble( col_num, float(value) ), Type.UNICODE_STRING: lambda row, col_num, value: row.setString( col_num, unicode(value) ), Type.CHAR_STRING: lambda row, col_num, value: row.setCharString( col_num, value ), Type.DATE: lambda row, col_num, value: row.setDate(col_num, value.year, value.month, value.day), Type.DATETIME: lambda row, col_num, value: row.setDateTime( col_num, value.year, value.month, value.day, value.hour, value.minute, value.second, value.microsecond/100 ) } if index: df_new = df.reset_index() else: df_new = df if extractname[-4:] != '.tde': extractname += '.tde' if new_extract & (extractname in os.listdir('.')): os.remove(extractname) extract_to_save = Extract(extractname) if extract_to_save.hasTable("Extract") & append: table_to_save = extract_to_save.openTable("Extract") table_definition = table_to_save.getTableDefinition() else: table_definition = TableDefinition() for colname in df_new.columns: type_code = table_definition_map[data_type.get(colname, 'unicode').lower()] table_definition.addColumn(colname, type_code) table_to_save = extract_to_save.addTable("Extract", table_definition) # currnetly table cannot be Extract!!! for df_row_tuple in df_new.iterrows(): new_row = Row(table_definition) # new_rwo is a tuple of len 2, index and the row as series df_row = df_row_tuple[1] for col_num, (col_val, null_col) in enumerate(zip(df_row, df_row.isnull())): if null_col: new_row.setNull(col_num) else: value_set_map[table_definition.getColumnType(col_num)](new_row, col_num, col_val) table_to_save.insert(new_row) extract_to_save.close() ExtractAPI.cleanup()
def __init__(self, extract_path): try: if path.exists(extract_path): #import os.remove as del_file del_file(extract_path) self.extract = Extract(extract_path) self.table_definition = TableDefinition() self.table = None self.tde_columns = list() except Exception as ex: log.exception(ex.message)
def to_tde(df,tde_filename = 'extract.tde'): ExtractAPI.initialize() new_extract = Extract(tde_filename) df = dedup_column_name(df) table_definition = make_table_definition(df) new_table = new_extract.addTable('Extract', table_definition) for row in df.iterrows(): new_row = Row(table_definition) for i,column in enumerate(df.columns): column_data_type = df[column].dtype value = mapper[column_data_type]['value_modifier'](row[1][i]) if value: params = [new_row,i]+value mapper[column_data_type]['tableau_set_function'](*params) new_table.insert(new_row) new_extract.close() ExtractAPI.cleanup()
def _initialise_extract(self): """ initialises extract with the defined tde path """ self._extract = Extract(self._tde_path)
class ExtractBuilder(object): """ Builds the tableau Extract by creating a Tableau Extract, defines the table skeleton, adds the table to the extract and fills this table with the relevant data for the TDE. """ def __init__(self): self._db_type = None self._db_conn = None self._cursor = None self._view_or_table_name = None self._extract = None self.columns = dict() self._db_consumer = None self._tde_path = None def _build_temp_tdepath(self): """ creates a temporary directory and defines the tde path as temp_dir+view_or_table_name.tde """ temp_dir_path = tempfile.mkdtemp() _, __, extract_name = get_db_components(self._view_or_table_name) file_name = "{}.tde".format(extract_name) self._tde_path = os.path.join(temp_dir_path, file_name) def _initialise_extract(self): """ initialises extract with the defined tde path """ self._extract = Extract(self._tde_path) def _build_skeleton(self): """ Gets the columns from the table or view and then auto builds the Tableau table definition - the skeleton of the extract we want to create :return: table_def: Tableau table definition """ table_def = TableDefinition() db_table_columns = self._db_consumer.get_table_definition() for field_name, position, field_type in db_table_columns: table_def.addColumn(field_name, field_type) self.columns[position] = field_type return table_def def _add_table_to_extract(self, table_def): """ Assigns the table definition (skeleton) to the extract :param table_def: the skeleton of the view or table :return: updated tableau `Extract` """ return self._extract.addTable('Extract', table_def) def _fill_extract(self): """ Fills the Extract with data from the view/table """ extract_table_definition = self._build_skeleton() tde_table = self._add_table_to_extract(extract_table_definition) extract_feed = ExtractFiller(tde_table, extract_table_definition, self.columns) for row in self._db_consumer.get_table_data(): extract_feed.insert_data_to_extract(row) self._extract.close() def connect_to_db(self, view_or_table_name, conn_string, dbtype=None): """ Connect to the view or table that needs to be turned into a .tde extract :param view_or_table_name: View or Table that needs to be an extract :param conn_string: connection string to the database where the view or table exists :param dbtype: type of db so the right pyodbc wrapper is used to connect """ self._db_conn = AnyDB(conn_string, dbtype) self._cursor = self._db_conn.get_cursor() self._db_type = dbtype self._view_or_table_name = view_or_table_name def create_extract(self): """ Creates the extract by - connecting to the database, - building the tde path - initializing the extract - pulling data from db and filling the extract :return: path to the created .tde extract """ self._db_consumer = DBConsumer(self._cursor, self._view_or_table_name, self._db_type) self._build_temp_tdepath() self._initialise_extract() self._fill_extract() return self._tde_path def close(self): self._db_conn.close()
class TdeWriter(object): def __init__(self, extract_path): try: if path.exists(extract_path): #import os.remove as del_file del_file(extract_path) self.extract = Extract(extract_path) self.table_definition = TableDefinition() self.table = None self.tde_columns = list() except Exception as ex: log.exception(ex.message) def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): try: self.extract.close() except Exception as ex: pass def write_row(self, row_data): tr = self.get_tde_row(self.table_definition, row_data) self.table.insert(tr) tr.close() def set_metadata(self, metadata): self.tde_columns = [TdeColumn(c[0], c[1]) for c in metadata] self.table_definition = self.get_table_definition(self.tde_columns) self.table = self.extract.addTable('Extract', self.table_definition) @staticmethod def get_tde_row(table_definition, row_data): row = Row(table_definition) for idx, data in enumerate(row_data): t = table_definition.getColumnType(idx) try: if t is Type.DOUBLE: row.setDouble(idx, data) elif t is Type.BOOLEAN: row.setBoolean(idx, data) elif t is Type.CHAR_STRING: row.setCharString(idx, data) elif t is Type.DATE: row.setDate(idx, data.year, data.month, data.day) elif t is Type.DATETIME: row.setDateTime(idx, data.year, data.month, data.day, data.hour, data.minute, data.second, 0) elif t is Type.INTEGER: row.setInteger(idx, data) elif t is Type.UNICODE_STRING: row.setString(idx, data) except Exception as ex: row.setNull(idx) return row @staticmethod def get_table_definition(tde_columns): td = TableDefinition() for c in tde_columns: td.addColumn(c.column_name, c.tde_type) return td