Example #1
0
def to_tde(df, tde_filename='extract.tde'):
    if os.path.isfile(tde_filename):
        os.remove(tde_filename)
    ExtractAPI.initialize()
    new_extract = Extract(tde_filename)

    df = dedup_column_name(df)
    row_cnt, col_cnt = df.shape
    list_dtypes = df.dtypes

    table_definition = make_table_definition(df)
    new_table = new_extract.addTable('Extract', table_definition)

    # for j, row in df.iterrows():
    for j, row in tqdm.tqdm(df.iterrows(), total=row_cnt):
        new_row = Row(table_definition)
        for i, (cell, column_data_type) in enumerate(zip(row, list_dtypes)):
            value = mapper[column_data_type]['value_modifier'](cell)
            if value:
                params = [new_row, i] + value
                mapper[column_data_type]['tableau_set_function'](*params)


#        for i,column in enumerate(df.columns):
#            column_data_type = df[column].dtype
#            value = mapper[column_data_type]['value_modifier'](row[1][i])
#
#            if value:
#                params = [new_row,i]+value
#                mapper[column_data_type]['tableau_set_function'](*params)
#
        new_table.insert(new_row)

    new_extract.close()
    ExtractAPI.cleanup()
Example #2
0
def to_tde_new(df, tde_filepath, showProgress=True):

    # try to use apply to test how it speeds up.
    # apply is a little faster than iterrows(), about 1.5x
    # testing by my dataset which contains 100000 rows and 25 cols

    if os.path.isfile(tde_filepath):
        os.remove(tde_filepath)
    ExtractAPI.initialize()
    new_extract = Extract(tde_filepath)

    df = dedup_column_name(df)
    row_cnt, col_cnt = df.shape
    list_dtypes = df.dtypes
    table_definition = make_table_definition(df)
    new_table = new_extract.addTable('Extract', table_definition)

    def insert_tde(x):
        new_row = Row(table_definition)
        for i, (cell, column_data_type) in enumerate(zip(x, list_dtypes)):
            value = mapper[column_data_type]['value_modifier'](cell)
            if value:
                params = [new_row, i] + value
                mapper[column_data_type]['tableau_set_function'](*params)

        new_table.insert(new_row)

    tqdm.tqdm.pandas(desc='My bar!')
    df.progress_apply(insert_tde, axis=1)
    new_extract.close()
    ExtractAPI.cleanup()
Example #3
0
def to_tde(df, extractname, data_type, index = False, new_extract = False, append = True):
    '''
    change pandas dataframe to tableau data extract
    '''
    table_definition_map = { 
                            'bool' :    Type.BOOLEAN,
                            'integer':  Type.INTEGER,
                            'double':   Type.DOUBLE,
                            'date':     Type.DATE,
                            'datetime': Type.DATETIME,
                            'unicode':  Type.UNICODE_STRING,
                            'char':     Type.CHAR_STRING
                            }
    value_set_map = {
                  Type.BOOLEAN:        lambda row, col_num, value: row.setBoolean( col_num, bool(value) ),
                  Type.INTEGER:        lambda row, col_num, value: row.setInteger( col_num, int(value) ),
                  Type.DOUBLE:         lambda row, col_num, value: row.setDouble( col_num, float(value) ),
                  Type.UNICODE_STRING: lambda row, col_num, value: row.setString( col_num, unicode(value) ),
                  Type.CHAR_STRING:    lambda row, col_num, value: row.setCharString( col_num, value ),
                  Type.DATE:           lambda row, col_num, value: row.setDate(col_num, value.year, value.month, value.day),
                  Type.DATETIME:       lambda row, col_num, value: row.setDateTime( col_num,  value.year,  value.month,  value.day,
                                                                                  value.hour,  value.minute,  value.second,
                                                                                  value.microsecond/100 )
                  }
    
    if index:
        df_new = df.reset_index()
    else:
        df_new = df        
        
    if extractname[-4:] != '.tde':
        extractname += '.tde'    
    
    if new_extract & (extractname in os.listdir('.')):
            os.remove(extractname)
    extract_to_save = Extract(extractname)
            
    if extract_to_save.hasTable("Extract") & append:
        table_to_save = extract_to_save.openTable("Extract")
        table_definition = table_to_save.getTableDefinition()
    else:
        table_definition = TableDefinition()
        for colname in df_new.columns:
            type_code = table_definition_map[data_type.get(colname, 'unicode').lower()]
            table_definition.addColumn(colname, type_code)
        table_to_save = extract_to_save.addTable("Extract", table_definition) # currnetly table cannot be Extract!!!
    
    for df_row_tuple in df_new.iterrows():
        new_row = Row(table_definition)
        # new_rwo is a tuple of len 2, index and the row as series
        df_row = df_row_tuple[1]
        for col_num, (col_val, null_col) in enumerate(zip(df_row, df_row.isnull())):
            if null_col:
                new_row.setNull(col_num)
            else:
                value_set_map[table_definition.getColumnType(col_num)](new_row, col_num, col_val)
        table_to_save.insert(new_row)
    
    extract_to_save.close()
    ExtractAPI.cleanup()
Example #4
0
 def __init__(self, extract_path):
     try:
         if path.exists(extract_path):
             #import os.remove as del_file
             del_file(extract_path)
         self.extract = Extract(extract_path)
         self.table_definition = TableDefinition()
         self.table = None
         self.tde_columns = list()
     except Exception as ex:
         log.exception(ex.message)
def to_tde(df,tde_filename = 'extract.tde'):
    ExtractAPI.initialize()
    new_extract = Extract(tde_filename)
    
    df = dedup_column_name(df)
    table_definition = make_table_definition(df)
    new_table = new_extract.addTable('Extract', table_definition)
    
    for row in df.iterrows():
        new_row = Row(table_definition)
        for i,column in enumerate(df.columns):
            column_data_type = df[column].dtype
            value = mapper[column_data_type]['value_modifier'](row[1][i])
                
            if value:
                params = [new_row,i]+value
                mapper[column_data_type]['tableau_set_function'](*params)

        new_table.insert(new_row)
        
    new_extract.close()
    ExtractAPI.cleanup()
Example #6
0
 def _initialise_extract(self):
     """
     initialises extract with the defined tde path
     """
     self._extract = Extract(self._tde_path)
Example #7
0
class ExtractBuilder(object):
    """
    Builds the tableau Extract by creating a Tableau Extract, defines the table skeleton, adds the 
    table to the extract and fills this table with the relevant data for the TDE. 
    """
    def __init__(self):
        self._db_type = None
        self._db_conn = None
        self._cursor = None
        self._view_or_table_name = None
        self._extract = None
        self.columns = dict()
        self._db_consumer = None
        self._tde_path = None

    def _build_temp_tdepath(self):
        """
        creates a temporary directory and defines the tde path as temp_dir+view_or_table_name.tde
        """
        temp_dir_path = tempfile.mkdtemp()
        _, __, extract_name = get_db_components(self._view_or_table_name)
        file_name = "{}.tde".format(extract_name)
        self._tde_path = os.path.join(temp_dir_path, file_name)

    def _initialise_extract(self):
        """
        initialises extract with the defined tde path
        """
        self._extract = Extract(self._tde_path)

    def _build_skeleton(self):
        """
        Gets the columns from the table or view and then auto builds the Tableau
        table definition - the skeleton of the extract we want to create
        
        :return: table_def: Tableau table definition
        """
        table_def = TableDefinition()
        db_table_columns = self._db_consumer.get_table_definition()
        for field_name, position, field_type in db_table_columns:
            table_def.addColumn(field_name, field_type)
            self.columns[position] = field_type
        return table_def

    def _add_table_to_extract(self, table_def):
        """
        Assigns the table definition (skeleton) to the extract
         
        :param table_def: the skeleton of the view or table
        :return: updated tableau `Extract`
        """
        return self._extract.addTable('Extract', table_def)

    def _fill_extract(self):
        """
        Fills the Extract with data from the view/table
        """
        extract_table_definition = self._build_skeleton()
        tde_table = self._add_table_to_extract(extract_table_definition)
        extract_feed = ExtractFiller(tde_table, extract_table_definition,
                                     self.columns)
        for row in self._db_consumer.get_table_data():
            extract_feed.insert_data_to_extract(row)
        self._extract.close()

    def connect_to_db(self, view_or_table_name, conn_string, dbtype=None):
        """
        Connect to the view or table that needs to be turned into a .tde extract
        
        :param view_or_table_name: View or Table that needs to be an extract
        :param conn_string: connection string to the database where the view or table exists
        :param dbtype: type of db so the right pyodbc wrapper is used to connect
        """
        self._db_conn = AnyDB(conn_string, dbtype)
        self._cursor = self._db_conn.get_cursor()
        self._db_type = dbtype
        self._view_or_table_name = view_or_table_name

    def create_extract(self):
        """
        Creates the extract by 
        - connecting to the database,
        - building the tde path
        - initializing the extract
        - pulling data from db and filling the extract
        
        :return: path to the created .tde extract 
        """
        self._db_consumer = DBConsumer(self._cursor, self._view_or_table_name,
                                       self._db_type)
        self._build_temp_tdepath()
        self._initialise_extract()
        self._fill_extract()
        return self._tde_path

    def close(self):
        self._db_conn.close()
Example #8
0
class TdeWriter(object):
    def __init__(self, extract_path):
        try:
            if path.exists(extract_path):
                #import os.remove as del_file
                del_file(extract_path)
            self.extract = Extract(extract_path)
            self.table_definition = TableDefinition()
            self.table = None
            self.tde_columns = list()
        except Exception as ex:
            log.exception(ex.message)

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        try:
            self.extract.close()
        except Exception as ex:
            pass

    def write_row(self, row_data):
        tr = self.get_tde_row(self.table_definition, row_data)
        self.table.insert(tr)
        tr.close()

    def set_metadata(self, metadata):
        self.tde_columns = [TdeColumn(c[0], c[1]) for c in metadata]
        self.table_definition = self.get_table_definition(self.tde_columns)
        self.table = self.extract.addTable('Extract', self.table_definition)

    @staticmethod
    def get_tde_row(table_definition, row_data):
        row = Row(table_definition)
        for idx, data in enumerate(row_data):
            t = table_definition.getColumnType(idx)
            try:
                if t is Type.DOUBLE:
                    row.setDouble(idx, data)
                elif t is Type.BOOLEAN:
                    row.setBoolean(idx, data)
                elif t is Type.CHAR_STRING:
                    row.setCharString(idx, data)
                elif t is Type.DATE:
                    row.setDate(idx, data.year, data.month, data.day)
                elif t is Type.DATETIME:
                    row.setDateTime(idx, data.year, data.month, data.day, data.hour, data.minute, data.second, 0)
                elif t is Type.INTEGER:
                    row.setInteger(idx, data)
                elif t is Type.UNICODE_STRING:
                    row.setString(idx, data)
            except Exception as ex:
                row.setNull(idx)
        return row

    @staticmethod
    def get_table_definition(tde_columns):
        td = TableDefinition()
        for c in tde_columns:
            td.addColumn(c.column_name, c.tde_type)
        return td