def createOrOpenExtract(filename, useSpatial):
    try:
        # Create Extract Object
        # (NOTE: The Extract constructor opens an existing extract with the
        #  given filename if one exists or creates a new extract with the given
        #  filename if one does not)
        extract = Extract(filename)

        # Define Table Schema (If we are creating a new extract)
        # (NOTE: In Tableau Data Engine, all tables must be named 'Extract')
        if (not extract.hasTable('Extract')):
            schema = TableDefinition()
            schema.setDefaultCollation(Collation.EN_GB)
            schema.addColumn('Purchased', Type.DATETIME)
            schema.addColumn('Product', Type.CHAR_STRING)
            schema.addColumn('uProduct', Type.UNICODE_STRING)
            schema.addColumn('Price', Type.DOUBLE)
            schema.addColumn('Quantity', Type.INTEGER)
            schema.addColumn('Taxed', Type.BOOLEAN)
            schema.addColumn('Expiration Date', Type.DATE)
            schema.addColumnWithCollation('Produkt', Type.CHAR_STRING,
                                          Collation.DE)
            if (useSpatial):
                schema.addColumn('Destination', Type.SPATIAL)
            table = extract.addTable('Extract', schema)
            if (table == None):
                print 'A fatal error occurred while creating the table:\nExiting now\n.'
                exit(-1)

    except TableauException, e:
        print 'A fatal error occurred while creating the new extract:\n', e, '\nExiting now.'
        exit(-1)
Exemple #2
0
    def to_tableau( self, path, add_index=False ):
        '''
        Converts a Pandas DataFrame to a Tableau .tde file
        @param path = path to write file
        @param tableName = name of the table in the extract
        
        '''
        
        # Delete Extract and debug log is already exist
        for file in [path, os.path.dirname(path) + '/debug.log',
        			'./DataExtract.log','./debug.log']:
        	if os.path.isfile(file):
        		os.remove(file)

        # Create Extract and Table
        ExtractAPI.initialize( )
        new_extract = Extract( path )
        table_def = TableDefinition()
        
        # Set columns in Tableau
        if add_index:
            table_def.addColumn( 'index', Type.INTEGER )
            
        for col_index, col_name in enumerate(self._dataframe):
            table_def.addColumn( col_name, self._column_static_type[col_index] )
        
        # Create table
        new_table = new_extract.addTable( "Extract", table_def )
        
        # Set Column values
        self.set_column_values( new_table, table_def, add_index )
        
        # Close extract
        new_extract.close()
        ExtractAPI.cleanup()
Exemple #3
0
    def new_tde(self, data):
        # Step 1: Create the Extract File
        dataExtract = Extract(self.tde_name)
        if dataExtract.hasTable('Extract'):
            return print("tde already exist use another name")

        # Step 2: Create the table definition
        cols = data.columns.tolist()
        dataSchema = TableDefinition()
        for col in cols:
            dataSchema.addColumn(col, Type.UNICODE_STRING)

        # Step 3: Create a table in the image of the table definition
        table = dataExtract.addTable('Extract', dataSchema)

        # Step 4: Create rows and insert them one by one
        newRow = Row(dataSchema)
        cols = data.columns.tolist()
        for i in range(0, len(data)):
            for col in cols:
                col_index = cols.index(col)
                newRow.setString(col_index, data[col][i])
                table.insert(newRow)
        dataExtract.close()

        # Step 5: Close the tde
        dataExtract.close()
def tde_export():
    print "Start export to TDE"
    input_name = get_input_names_for_role('input')[0]
    input_dataset = dataiku.Dataset(input_name)
    input_schema = input_dataset.read_schema()
    partitions = input_dataset.list_partitions(raise_if_empty=False)
    if partitions not in [[], [u'NP']]:
        raise Exception(
            "Due to the current APIs, this plugin cannot support partitioned input "
            "(and it seems the input dataset " + input_name +
            " is partitioned). "
            "A workaround is to first run a sync recipe "
            "from " + input_name + " into a non partitioned dataset, "
            "then take the latter as input for tde export.")
    output_name = get_output_names_for_role('output_folder')[0]
    output_folder = dataiku.Folder(output_name)
    output_path = output_folder.get_path()

    os.chdir(output_path)

    # Clean output dir. We assume there is no subfolder.
    # (because this recipe never creates one. If there is, better fail than remove someone else's data)
    for file in os.listdir(output_path):
        os.remove(file)

    ExtractAPI.initialize()

    with Extract(output_filename()) as extract:
        assert (not extract.hasTable('Extract'))
        tableDef = makeTableDefinition(input_schema)
        table = extract.addTable('Extract', tableDef)
        insertData(input_dataset, table)
        extract.close()
        ExtractAPI.cleanup()
    print "End export to TDE"
    def __init__(self, tde_file_path, input_schema):
        self.tde_file_path = tde_file_path
        self.output_path = os.path.dirname(self.tde_file_path)
        self.input_schema = input_schema

        self.errors = 0
        self.nrows = 0

        print "Writing TDE file: %s" % self.tde_file_path
        ExtractAPI.initialize()
        self.extract = Extract(self.tde_file_path)
        assert(not self.extract.hasTable('Extract'))
        self.table_def = TDEExport.make_table_definition(self.input_schema)
        self.table = self.extract.addTable('Extract', self.table_def)
def writeTableToTDE(pgFields, pgData, extractLocation):
    print("writing table to {0}").format(extractLocation)

    # 1. initialize a new extract
    ExtractAPI.initialize()

    # 2. Create a table definition
    new_extract = Extract(extractLocation)

    # 3. Add column definitions to the table definition
    table_definition = TableDefinition()
    for i in range(0, len(pgFields)):
        table_definition.addColumn(pgFields[i][0], pgFields[i][1])

    # 4. Initialize a new table in the extract
    # a. check if the table already exists
    # Only add table if it doesn't already exist
    if (new_extract.hasTable('Extract') == False):
        new_table = new_extract.addTable('Extract', table_definition)
    else:
        new_table = new_extract.openTable('Extract')

    # 5. Create a new row
    new_row = Row(
        table_definition)  # Pass the table definition to the constructor

    # 6. Populate each new row
    numberRecords = len(pgData)
    for i in range(0, numberRecords):
        # Note that this doesn't cover all possible TDE data types
        for j in range(0, len(pgFields)):
            if pgFields[j][1] == Type.INTEGER:
                new_row.setInteger(j, pgData[i][j])
            elif pgFields[j][1] == Type.UNICODE_STRING:
                new_row.setString(j, pgData[i][j])
            elif pgFields[j][1] == Type.SPATIAL:
                new_row.setSpatial(j, pgData[i][j])
            elif pgFields[j][i] == Type.BOOLEAN:
                new_row.setBoolean(j, pgData[i][j])
            elif pgFields[j][i] == Type.DOUBLE:
                new_row.setDouble(j, pgData[j][i])
        new_table.insert(new_row)  # Add the new row to the table

    # 7. Save the table and extract
    new_extract.close()

    # 8. Release the extract API
    ExtractAPI.cleanup()
    return
    def create_tde(self, tde_filename, append=False):
        try:
            # Using "with" handles closing the TDE correctly
            with Extract("{}".format(tde_filename)) as extract:
                self.tde_object = None
                row_count = 0
                # Create the Extract object (or set it for updating) if there are actually results
                if not extract.hasTable('Extract'):
                    # Table does not exist; create it
                    self.log(u'Creating Extract with table definition')
                    self.tde_object = extract.addTable('Extract',
                                                       self.table_definition)
                else:
                    # Open an existing table to add more rows
                    if append is True:
                        self.tde_object = extract.openTable('Extract')
                    else:
                        self.log("Output file '" + tde_filename +
                                 "' already exists.")
                        self.log(
                            "Append mode is off, please delete file and then rerun..."
                        )
                        sys.exit()

                    # This is if you actually have data to put into the extract. Implement later
                    #	tde_row = Row(tableDef)
                    #	colNo = 0
                    #	for field in db_row:
                    # Possible for database to have types that do not map, we skip them
                    #		if cursor.description[colNo][1].__name__ in PyTypeMap:
                    #			if( (field == "" or field == None) ) :
                    #				tde_row.setNull( colNo )
                    #			else :
                    # From any given row from the cursor object, we can use the cursor_description collection to find information
                    # for example, the column names and the datatypes. [0] is the column name string, [1] is the python type object. Mirrors cursor.description on the Row level
                    # Second item is a Python Type object, to get the actual name as a string for comparison, have to use __name__ property
                    #				fieldSetterMap[PyTypeMap[ cursor.description[colNo][1].__name__ ] ](tde_row, colNo, field);
                    #		colNo += 1
                    #	table.insert(tde_row)
                    #	row_count += 1
                    # print "TDE creation complete, " + str(row_count) + " rows inserted\n"
                    # if len(skipped_cols) > 0:
                    #	print "The following columns were skipped due to datatypes that were not recognized:\n"
                    #	print skipped_cols

        except TableauException, e:
            self.log('Tableau TDE creation error:{}'.format(e))
            raise
    def __init__(self, extract_file_path, input_schema):
        self.extract_file_path = extract_file_path
        self.output_path = os.path.dirname(self.extract_file_path)
        self.input_schema = input_schema
        self.format_type = TABLEAU_FORMAT

        self.errors = 0
        self.nrows = 0

        print("Writing Extract {} file: {}".format(self.format_type,
                                                   self.extract_file_path))
        ExtractAPI.initialize()
        self.extract = Extract(self.extract_file_path)
        assert (not self.extract.hasTable('Extract'))
        self.table_def = TableauExport.make_table_definition(self.input_schema)
        self.table = self.extract.addTable('Extract', self.table_def)
def publishCsvDatasource(serverURL, siteName, username, password, filename):

    extractName = os.path.basename(filename).split('.', 2)[0]
    extractFilename = extractName + '.tde'
    try:
        os.remove(extractFilename)
    except OSError:
        pass

    ExtractAPI.initialize()
    with Extract(extractFilename) as extract:
        with open(filename) as csvfile:
            fileReader = csv.reader(csvfile, delimiter=',')
            headers = []
            table = None
            for fileRow in fileReader:
                print('Reading: ' + str(fileRow))
                if headers == []:
                    headers = fileRow
                else:
                    if table == None:
                        tableDef = makeTableDefinition(headers, fileRow)
                        table = extract.addTable('Extract', tableDef)
                    insertData(table, fileRow)
            csvfile.close()
        extract.close()
    ExtractAPI.cleanup()

    # Initialize Tableau Server API
    ServerAPI.initialize()
    serverConnection = ServerConnection()
    serverConnection.connect(serverURL, username, password, siteName)

    # Publish order-py.tde to the server under the default project with name Order-py
    serverConnection.publishExtract(extractFilename, 'default', extractName,
                                    False)

    # Disconnect from the server
    serverConnection.disconnect()

    # Destroy the server connection object
    serverConnection.close()

    # Clean up Tableau Server API
    ServerAPI.cleanup()
Exemple #10
0
    def to_tableau(self, path, table_name='Extract', add_index=False):
        """
        Converts a Pandas DataFrame to a Tableau .tde file
        @param path = path to write file
        @param table_name = name of the table in the extract
        @param add_index = adds incrementing integer index before dataframe columns

        """

        # Delete Extract and debug log is already exist
        for file in [path, os.path.dirname(path) + '/debug.log',
                     './DataExtract.log', './debug.log']:
            if os.path.isfile(file):
                os.remove(file)

        # Create Extract and Table
        ExtractAPI.initialize()
        new_extract = Extract(path)
        table_def = TableDefinition()

        # Set columns in Tableau
        if add_index:
            index_col = 'index'
            suffix = 1
            while index_col in self._dataframe.columns:
                index_col = '%s_%s' % (index_col, suffix)
                suffix += 1
            table_def.addColumn(index_col, Type.INTEGER)

        for col_index, col_name in enumerate(self._dataframe):
            table_def.addColumn(col_name, self._column_static_type[col_index])

        # Create table
        new_table = new_extract.addTable(table_name, table_def)

        # Set Column values
        self.set_column_values(new_table, table_def, add_index)

        # Close extract
        new_extract.close()
        ExtractAPI.cleanup()
Exemple #11
0
    def to_tableau(self, path, table_name='Extract', add_index=False):
        """
        Converts a Pandas DataFrame to a Tableau .tde file
        @param path = path to write file
        @param table_name = name of the table in the extract
        @param add_index = adds incrementing integer index before dataframe columns

        """

        # Delete dataextract log and hyper_db temp files if already exists
        files = (glob('DataExtract*.log') + glob('hyper_db_*') + [
            os.path.dirname(path) + '/debug.log', './DataExtract.log',
            './debug.log'
        ])

        for file in files:
            if os.path.isfile(file):
                os.remove(file)

        # Create Extract and Table
        ExtractAPI.initialize()
        new_extract = Extract(path)

        if not new_extract.hasTable(table_name):
            print(
                'Table \'{}\' does not exist in extract {}, creating.'.format(
                    table_name, path))
            self.set_table_structure(new_extract, table_name, add_index)

        new_table = new_extract.openTable(table_name)
        table_def = new_table.getTableDefinition()

        # Set Column values
        self.set_column_values(new_table, table_def, add_index)

        # Close extract
        new_extract.close()
        ExtractAPI.cleanup()
Exemple #12
0
def to_tde(dataset, tde_mode, tde_name):

    if tde_mode == 'new':

        # Step 1: Create the Extract File
        dataExtract = Extract(tde_name)

        if dataExtract.hasTable('Extract'):
            return print("tde already exist use another name")

        # Step 2: Create the table definition
        dataSchema = TableDefinition()
        dataSchema.addColumn('Station', Type.UNICODE_STRING)
        dataSchema.addColumn('Time', Type.UNICODE_STRING)
        dataSchema.addColumn('Date', Type.DATE)
        dataSchema.addColumn('all', Type.DOUBLE)
        dataSchema.addColumn('n7', Type.DOUBLE)
        dataSchema.addColumn('n8', Type.DOUBLE)
        dataSchema.addColumn('y_serries', Type.DOUBLE)

        # Step 3: Create a table in the image of the table definition
        table = dataExtract.addTable('Extract', dataSchema)

        # Step 4: Create rows and insert them one by one
        newRow = Row(dataSchema)
        for i in range(0, len(dataset)):
            print(i)
            newRow.setString        (0, dataset['Station'][i])
            newRow.setString        (1, dataset['Time'][i])
            newRow.setDate          (2, dataset['Date'][i].year, dataset['Date'][i].month, dataset['Date'][i].day)
            newRow.setDouble        (3, dataset['all'][i])
            newRow.setDouble        (4, dataset['n7'][i])
            newRow.setDouble        (5, dataset['n8'][i])
            newRow.setDouble        (6, dataset['y_serries'][i])
            table.insert(newRow)

        # Step 5: Close the tde
        dataExtract.close()

    elif tde_mode == 'append':

        # Step 1: Import the Extract File
        preExtract = Extract(tde_name)

        # Step 2: Open existing table
        table = preExtract.openTable('Extract')

        # Step 3: Import the table definition
        tableDef = table.getTableDefinition()

        # Step 4: Create rows and insert them one by one
        newRow = Row(tableDef)
        for i in range(0, len(dataset)):
            print(i)
            newRow.setString	    (0, dataset['Station'][i])
            newRow.setString		(1, dataset['Time'][i])
            newRow.setDate          (2, dataset['Date'][i].year, dataset['Date'][i].month, dataset['Date'][i].day)
            newRow.setDouble	    (3, dataset['all'][i])
            newRow.setDouble		(4, dataset['n7'][i])
            newRow.setDouble	    (5, dataset['n8'][i])
            newRow.setDouble        (6, dataset['y_serries'][i])
            table.insert(newRow)

        # Step 5: Close the extract
        preExtract.close()
from tableausdk import Extract as tde  # **Module change from py2 for py3
from tableausdk import Types as tdeTypes  # **
import os

#Step 1: Create the Extract File
if os.path.isfile('trivialExample.tde'):
    os.remove('trivialExample.tde')

tdefile = tde.Extract('trivialExample.tde')

#Step 2: Create the tableDef
tableDef = tde.TableDefinition()
tableDef.addColumn('rowID', tdeTypes.Type.CHAR_STRING)  # Column index 0. **
tableDef.addColumn('value', tdeTypes.Type.DOUBLE)  #Column index 1. **

#Step 3: Create the table in the image of the tableDef
table = tdefile.addTable('Extract', tableDef)

#Step 4: Create some rows and insert them into the table
newrow = tde.Row(tableDef)
for i in range(1, 101):
    newrow.setCharString(0, 'Row ' + str(i))
    newrow.setDouble(1, i)
    table.insert(newrow)

#Step 5: Close the tde
tdefile.close()
    def assemble_tde(self):
        """ Gather the data information and create the Tde.
        :return:
        """
        try:
            print "Creating extract:", self.file_name

            ExtractAPI.initialize()

            with Extract(self.file_name) as data_extract:
                # If extract Does exist add to the Extract table and file
                if data_extract.hasTable('Extract'):
                    # Open an existing table to add more rows
                    table = data_extract.openTable('Extract')
                    table_definition = table.getTableDefinition()
                else:
                    table_definition = TableDefinition()
                    for each in self.data_types.reset_index(level=0).values:
                        # Add the column info to the table definition
                        table_definition.addColumn(
                            str(each[0]), schema_type_map[str(each[1])])
                    # Create the Table with the table definition
                    table = data_extract.addTable("Extract", table_definition)

                new_row = Row(table_definition)
                count = self.data_frame.shape[0]
                pbar = Bar(count)

                # Run through dataframe data and add data to the table object
                for i in range(count):
                    for j, column_name in enumerate(
                            self.data_types.index.values.tolist()):
                        self.add_to_row(new_row, j,
                                        self.data_frame[column_name].iloc[i],
                                        self.data_types['data_type'][j],
                                        column_name)
                    table.insert(new_row)

                    self.last_data_row_extracted = self.data_frame.iloc[
                        i].to_frame().transpose()
                    # if i == 1:
                    #     raise Exception
                    pbar.passed()

            data_extract.close()
            ExtractAPI.cleanup()
        except:
            file_name = self.tde_file()

            # Clean up resources
            Extract(file_name).close()
            ExtractAPI.cleanup()

            # Create csv and pickle of the data
            self.data_frame.to_pickle(file_name.replace('.tde', '_pickle'))
            self.data_frame.to_csv(file_name.replace('.tde', '.csv'),
                                   index=False,
                                   encoding='utf-8')
            raise Exception(
                "Error in creating tde file please consult data files. \n%s\n%s. \n%s, %s, %s"
                % (file_name.replace(
                    '.tde', '_pickle'), file_name.replace('.tde', '.csv'),
                   'Error on line {}'.format(sys.exc_info()[-1].tb_lineno),
                   sys.exc_info()[0], sys.exc_info()[1]))
    pass

###Setting Tableau recognized data types###
for i in column_types:
    if i is numpy.int64:
        column_headers_types.append(Type.INTEGER)
    elif i is numpy.float64:
        column_headers_types.append(Type.DOUBLE)
    elif i is pd.Timestamp:
        column_headers_types.append(Type.DATETIME)
    else:
        column_headers_types.append(Type.UNICODE_STRING)

###Getting the existing table schema for incremental refresh###
if incremental_refresh == 1:
    dataExtract = Extract(extract_name)
    if dataExtract.hasTable('Extract'):
        table = dataExtract.openTable('Extract')
        dataSchema = table.getTableDefinition()

else:
    ###Removing extract if it already exists (for full refresh)###
    try:
        cwd = os.getcwd()
        for f in os.listdir(cwd):
            if re.search(extract_name, f):
                os.remove(os.path.join(cwd, f))
    except OSError, e:
        pass

    ###Initializng the Extract API, and applying the schema to the table###
Exemple #16
0
DEST_LON =  4
SEG_NAME = 9
DEP_TIME = 6 # Departure time (minutes from midnight)
DEP_TIME_HRS = 0
FLIGHT_NUM = 2
FLIGHT_TIME = 5

#####################################################################
## Process the data and write the TDE
#####################################################################

# 1. initialize a new extract
ExtractAPI.initialize()

# 2. Create a table definition
new_extract = Extract(extractLocation)

# 3. Add column definitions to the table definition
table_definition = TableDefinition()
table_definition.addColumn('route', Type.UNICODE_STRING)  # column 0
table_definition.addColumn('latitude', Type.DOUBLE)
table_definition.addColumn('longitude', Type.DOUBLE)
table_definition.addColumn('point_number', Type.INTEGER)
table_definition.addColumn('distance_km', Type.DOUBLE)
table_definition.addColumn('FlightNumber', Type.UNICODE_STRING)
table_definition.addColumn('DepartureTime', Type.UNICODE_STRING)
table_definition.addColumn('DepartureTimeFromMidnight', Type.UNICODE_STRING)
table_definition.addColumn('SegMinFromMidnight', Type.INTEGER)
table_definition.addColumn('FlightTime', Type.INTEGER)
table_definition.addColumn('SegTimeFromMidnight', Type.UNICODE_STRING)
for i in join_info:
    #joinedDF = pd.merge(outputDF, VPS_DF, on='IntubationCycle', how='left')
    joinDF_auto = pd.merge(joinDF_auto, i[0], on=i[1], how='inner')

#output to CSV
joinDF_auto.to_csv(output_CSV, index=False, encoding='utf-8')

#typecast to date format
joinDF_auto['ClaimDate'] = pd.to_datetime(joinDF_auto['ClaimDate'])

#------------------------------------------------------------------------------
#   Create Extract
#------------------------------------------------------------------------------
ExtractAPI.initialize()

new_extract = Extract(output_Extract)

# Create a new table definition with 3 columns
table_definition = TableDefinition()
table_definition.addColumn('Patient_ID', Type.INTEGER)  # column 0
table_definition.addColumn('ClaimDate', Type.DATE)  # column 1
table_definition.addColumn('Patient Name', Type.UNICODE_STRING)  # column 2

#table_definition.addColumn('Claim_ID', Type.INTEGER)        		# column 3
#table_definition.addColumn('Discount', Type.DOUBLE) 				# column 4

#------------------------------------------------------------------------------
#   Populate Extract
#------------------------------------------------------------------------------

new_table = new_extract.addTable('Extract', table_definition)
Exemple #18
0
def tableau_extract(resultset, data_file):
    """
    Create TDE extract.
    """
    if isinstance(resultset, ResultSet):
        df_name = resultset.DataFrame()
    else:
        df_name = resultset
    data_type = []
    fieldnames = []
    data_type_map = {int: Type.INTEGER,
                     str: Type.UNICODE_STRING,
                     bool: Type.BOOLEAN,
                     bytearray: Type.CHAR_STRING,
                     list: Type.CHAR_STRING,
                     dict: Type.CHAR_STRING,
                     float: Type.DOUBLE,
                     decimal.Decimal: Type.DOUBLE,
                     datetime.date: Type.DATE,
                     datetime.time: Type.DURATION,
                     datetime.datetime: Type.DATETIME,
                     pd._libs.tslib.Timestamp: Type.DATETIME
                     }

    for col in df_name:
        fieldnames.append(col)
        data_type.append(df_name[col].apply(type).iat[0])
    data_dict = dict(zip(fieldnames, data_type))

    for col_name in data_dict:
        if data_dict[col_name] in data_type_map:
            data_dict[col_name] = data_type_map[data_dict[col_name]]
        else:
            data_dict[col_name] = Type.UNICODE_STRING
    # Initialize a new extract
    try:
        os.remove(data_file)
    except OSError:
        pass
    new_extract = Extract(data_file)
    table_definition = TableDefinition()
    for col_name in data_dict:
        table_definition.addColumn(col_name, data_dict[col_name])
    new_table = new_extract.addTable('Extract', table_definition)
    new_row = Row(table_definition)
    tde_types = {'INTEGER': 7, 'DOUBLE': 10, 'BOOLEAN': 11, 'DATE': 12,
                 'DATETIME': 13, 'DURATION': 14,
                 'CHAR_STRING': 15, 'UNICODE_STRING': 16}
    for i in range(0, len(df_name)):
        for col in range(0, table_definition.getColumnCount()):
            col_name = table_definition.getColumnName(col)
            try:
                if data_dict[col_name] == tde_types['INTEGER']:
                    new_row.setInteger(col, int(df_name[col_name][i]))
                elif data_dict[col_name] == tde_types['DOUBLE']:
                    new_row.setDouble(col, float(df_name[col_name][i]))
                elif data_dict[col_name] == tde_types['BOOLEAN']:
                    new_row.setBoolean(col, bool(df_name[col_name][i]))
                elif data_dict[col_name] == tde_types['DATE']:
                    data = df_name[col_name][i]
                    new_row.setDate(col, data.year, data.month, data.day)
                elif data_dict[col_name] == tde_types['DATETIME']:
                    data = df_name[col_name][i]
                    new_row.setDateTime(col, data.year, data.month, data.day,
                                        data.hour, data.minute, data.second, 0)
                elif data_dict[col_name] == tde_types['DURATION']:
                    data = df_name[col_name][i]
                    new_row.setDuration(col, data.hour, data.minute, data.second, 0)
                elif data_dict[col_name] == tde_types['CHAR_STRING']:
                    new_row.setCharString(col, str(df_name[col_name][i]))
                elif data_dict[col_name] == tde_types['UNICODE_STRING']:
                    new_row.setString(col, str(df_name[col_name][i]))
                else:
                    print('Error')
                    new_row.setNull(col)
            except TypeError:
                new_row.setNull(col)
        new_table.insert(new_row)

    new_extract.close()
    ExtractAPI.cleanup()
    for file_name in glob.glob("DataExtract*.log"):
        os.remove(file_name)
Exemple #19
0
def create(target_filepath, dataframe):
    column = []
    ## 宣告Dataframe(左)pandas資料格式 對應 到TDE的資料格式(右)。
    fieldMap = {
        'float64': tde.Types.Type.DOUBLE,
        'float32': tde.Types.Type.DOUBLE,
        'int64': tde.Types.Type.INTEGER,
        'int8': tde.Types.Type.INTEGER,
        'object': tde.Types.Type.UNICODE_STRING,
        'bool': tde.Types.Type.DOUBLE,
        'datetime64[ns]': tde.Types.Type.DATETIME
    }

    ## 若TDE已存在,則刪除。
    if os.path.isfile(target_filepath):
        os.remove(target_filepath)

    tdefile = tde.Extract(target_filepath)
    schema = tde.TableDefinition()  # define the table definition

    colnames = dataframe.columns  # dataframe all column name
    coltypes = dataframe.dtypes  # dataframe all column's datatype

    ## 根據Dataframe建立Tde Schema
    for i in range(0, len(colnames)):
        cname = colnames[i]
        ctype = fieldMap.get(str(coltypes[i]))
        schema.addColumn(cname, ctype)  # 加入一行欄位(名稱,資料格式)
        column.append(cname)

    ## 開啟TDE FILE寫入資料
    with tdefile as extract:
        table = extract.addTable("Extract", schema)
        for r in range(0, dataframe.shape[0]):
            row = tde.Row(schema)
            ## Loop 顯示剩餘筆數
            count = dataframe.shape[0] - r - 1
            print('剩' + str(count) + '筆', end="\r")
            ## 每筆資料依照格式寫入TDE內
            for c in range(0, len(coltypes)):
                if str(coltypes[c]) == 'float64':
                    if dataframe.iloc[r, c] is None:
                        row.setNull(c)
                    else:
                        row.setDouble(c, dataframe.iloc[r, c])
                elif str(coltypes[c]) == 'float32':
                    row.setDouble(c, dataframe.iloc[r, c])
                elif str(coltypes[c]) == 'int64':
                    if dataframe.iloc[r, c] is None:
                        row.setNull(c)
                    else:
                        row.setInteger(c, dataframe.iloc[r, c])
                elif str(coltypes[c]) == 'int8':
                    row.setInteger(c, dataframe.iloc[r, c])
                elif str(coltypes[c]) == 'object':
                    if dataframe.iloc[r, c] is None:
                        row.setNull(c)
                    else:
                        row.setString(c, dataframe.iloc[r, c])
                elif str(coltypes[c]) == 'bool':
                    row.setBoolean(c, dataframe.iloc[r, c])
                elif str(coltypes[c]) == 'datetime64[ns]':
                    try:
                        dt = dataframe.iloc[r, c]
                        row.setDateTime(c, dt.year, dt.month, dt.day, dt.hour,
                                        dt.minute, dt.second, 0)
                    except:
                        row.setNull(c)
                else:
                    row.setNull(c)
            ## insert the row
            table.insert(row)
    ## close the tdefile
    tdefile.close()
Exemple #20
0
    def build(self):
        # Set filename
        tdefileName = self.tde_settings_ins.tde_file
        if os.path.isfile(tdefileName):
            os.remove(tdefileName)
        #set the extract object instance
        tdefile = tdeEx.Extract(tdefileName)
        # create the column schema
        tableDef = tdeEx.TableDefinition()
        columnsListedInOrder = []
        outputColIndex = {}
        outputColNumber = 0
        for columnName, index in sorted(
                self.tde_settings_ins.columnIndex.items(), key=lambda x: x[1]):
            BasicDataType = self.tde_settings_ins.columns[columnName]
            tableDef.addColumn(columnName,
                               tdeSettings.schemaIniTypeMap[BasicDataType])
            outputColIndex[columnName] = outputColNumber
            outputColNumber += 1
        #add the column schema to the ojbect instance of EXTRACT
        table = tdefile.addTable("Extract", tableDef)
        rowNo = 0

        rowsIterator = self.tde_settings_ins.inputInfo.fileInformation.yieldRowsBase(
        )
        printedBool = False
        startTime = datetime.datetime.now()
        print "Build start time:", datetime.datetime.strftime(
            startTime, '%Y-%m-%d %H:%M:%S')
        prevPct = -1
        for myReaderRow in rowsIterator:
            putRow = tdeEx.Row(tableDef)
            for columnName, index in self.tde_settings_ins.columnIndex.items():
                #if not column headers need to change this to index of the column
                if (tdeSettings.schemaIniTypeMap[
                        self.tde_settings_ins.columns[columnName]] !=
                        tde.Type.UNICODE_STRING and myReaderRow[index] == ""):
                    putRow.setNull(outputColIndex[columnName])
                else:
                    try:
                        #valueDecoded=myReaderRow[index].decode(self.tde_settings_ins.inputInfo.fileInformation.encodeing)
                        self.fieldSetterMap[tdeSettings.schemaIniTypeMap[
                            self.tde_settings_ins.columns[columnName]]](
                                putRow, outputColIndex[columnName],
                                myReaderRow[index])
                    except:
                        print "column name", columnName, "contains invalid data"
                        print "value: ", myReaderRow[index]
                        print sys.exc_info()[0]
                        raise
            table.insert(putRow)
            # Output progress line
            #only print percent one time when we hit that percent
            rowNo += 1
            pct = math.floor(
                self.tde_settings_ins.inputInfo.fileInformation.getPercent(
                    rowNo))
            #            print "row:",rowNo
            #            print "pct:",pct
            #            print "value: ",pct %2
            if pct % 2 == 0:
                if printedBool == False or prevPct != pct:
                    print "Percent:", round(pct, 0), "%, row:", rowNo
                    printedBool = True
                    prevPct = pct
            else:
                printedBool = False
        print "tde file built to:", tdefileName
        endTime = datetime.datetime.now()
        print "End Time:", endTime
        diff = endTime - startTime
        print "Build elapsed time:", diff
Exemple #21
0
## import the libraries
import tableausdk.Extract as tde
import pandas as pd
import os

## bring in a sample Graduate School Admissions datasets
file_name = "http://www.ats.ucla.edu/stat/data/binary.csv"
df = pd.read_csv(file_name)
df.head()
df.shape

## create the extract name, but remove the extract if it already exists
fname = "example.tde"
try:
    tdefile = tde.Extract(fname)
except:
    os.system('del ' + fname)
    os.system('del DataExtract.log')
    tdefile = tde.Extract(fname)

## define the table definition
tableDef = tde.TableDefinition()

## create a list of column names and types
colnames = df.columns
coltypes = df.dtypes

## create a dict for the field maps
## Caveat: I am not including all of the possibilities below
fieldMap = {
    'float64': tde.Types.Type.DOUBLE,
from tableausdk import *
from tableausdk.Server import *
from tableausdk.Extract import *
import tableausdk.Extract as tde

#Define a new tde file.
tdefile = tde.Extract('test1.tde')

#Defining a new data set/ table defination in tde
tableDef = tde.TableDefinition()
tableDef.addColumn("company", 15)  #INTEGER
tableDef.addColumn("projected", 10)  #DOUBLE
tableDef.addColumn("realRev", 10)  #DOUBLE

#Let's add dataset to the file

tabletran = tdefile.addTable("Extract", tableDef)

#Create new Row
newrow = tde.Row(tableDef)
# Adding data value
# First param represents sequence of the column
newrow.setCharString(0, 'myCompany')
newrow.setDouble(1, 1000)
newrow.setDouble(2, 888)
# adding new row to the dataset
tabletran.insert(newrow)

#Create new Row
newrow = tde.Row(tableDef)
newrow.setCharString(0, 'myCompany2')
Exemple #23
0
def extract(file_name):
    # move file to /extract
    # cd to /extract
    # if there is no extract called TRACK_TERM then create one, otherwise append to TRACK_TERM
    # define data model for extract

    global WORKING_DIRECTORY, TRACK_TERM

    if not os.path.exists(WORKING_DIRECTORY + '/extract/'):
        os.makedirs(WORKING_DIRECTORY + '/extract/')

    from_path = WORKING_DIRECTORY + '/' + file_name
    to_path = WORKING_DIRECTORY + '/extract/' + file_name

    os.rename(from_path, to_path)

    os.chdir(WORKING_DIRECTORY + '/extract')

    # define the extract
    with data.Extract(TRACK_TERM + '.tde') as extract:

        tableDef = data.TableDefinition()

        # define the columns and the data types in the extract
        tableDef.addColumn('lang', types.Type.CHAR_STRING)  #0
        tableDef.addColumn('sentiment', types.Type.DOUBLE)  #1
        tableDef.addColumn('country', types.Type.CHAR_STRING)  #2
        tableDef.addColumn('created_at', types.Type.DATETIME)  #3
        tableDef.addColumn('tweet_text', types.Type.CHAR_STRING)  #4
        tableDef.addColumn('Longitude', types.Type.DOUBLE)  #5
        tableDef.addColumn('source', types.Type.CHAR_STRING)  #6
        tableDef.addColumn('user', types.Type.CHAR_STRING)  #7
        tableDef.addColumn('Latitude', types.Type.DOUBLE)  #8

        table = None

        if not extract.hasTable('Extract'):
            # Table does not exist, so create it.
            print "Creating a new extract"
            table = extract.addTable('Extract', tableDef)
        else:
            # Table exists, so append the new data.
            print "Appending to an existing extract"
            table = extract.openTable('Extract')

        new_row = data.Row(tableDef)

        # read the data from the CSV into the extract row object
        with open(file_name, 'r') as inf:
            reader = csv.DictReader(inf, delimiter=',', lineterminator='\n')
            for row in reader:
                # insert data into the row object in the correct order as defined above
                new_row.setCharString(0, row['lang'])

                sentiment = float(row['sentiment'])
                new_row.setDouble(1, sentiment)

                new_row.setCharString(2, row['country'])

                # parse the twitter date string:
                # Mon Sep 21 11:03:53 +0000 2015
                # %a %b %d %H:%M:%S +0000 %Y
                date_object = datetime.strptime(row['created_at'],
                                                '%a %b %d %H:%M:%S +0000 %Y')
                year = int(datetime.strftime(date_object, '%Y'))
                month = int(datetime.strftime(date_object, '%m'))
                day = int(datetime.strftime(date_object, '%d'))
                hour = int(datetime.strftime(date_object, '%H'))
                min = int(datetime.strftime(date_object, '%M'))
                sec = int(datetime.strftime(date_object, '%S'))
                frac = 0  # fractions of a second aka milliseconds
                new_row.setDateTime(3, year, month, day, hour, min, sec, frac)

                new_row.setCharString(4, row['tweet_text'])

                # check if there is a value for longitude, otherwise write a 0
                try:
                    longitude = float(row['longitude'])
                except:
                    longitude = 0
                new_row.setDouble(5, longitude)

                new_row.setCharString(6, row['source'])
                new_row.setCharString(7, row['user'])

                # check if there is a value for latitude, otherwise write a 0
                try:
                    latitude = float(row['latitude'])
                except:
                    latitude = 0
                new_row.setDouble(8, latitude)

                table.insert(new_row)

    # if the process fails we want to be able to re-run it without collisions between file names
    # so give each file a unique name (unix time stamp in this case).
    os.rename(file_name, str(time.time()).split('.')[0] + '.csv')

    # cd back to working directory
    os.chdir(WORKING_DIRECTORY)

    return
Exemple #24
0
def heatmapToTDE(heatmap, extent, smoothing, extractLocation):


  xLen = len(heatmap)
  yLen = len(heatmap[0])
  xRange = abs(extent[0] - extent[1])
  yRange = abs(extent[2] - extent[3])
  xMin = min(extent[0], extent[1])
  yMin = min(extent[2], extent[3])
  xIncrement = xRange / yLen
  yIncrement = yRange / xLen

  # 1. initialize a new extract
  ExtractAPI.initialize()

  # 2. Create a table definition
  new_extract = Extract(extractLocation)

  # 3. Add column definitions to the table definition
  table_definition = TableDefinition()
  table_definition.addColumn('ROW', Type.UNICODE_STRING)  # column 0
  table_definition.addColumn('COL', Type.UNICODE_STRING)  # column 1
  table_definition.addColumn('VALUE', Type.DOUBLE)  # column 2
  table_definition.addColumn('ID', Type.UNICODE_STRING)
  table_definition.addColumn('CellCount', Type.INTEGER)
  table_definition.addColumn('Smoothing', Type.DOUBLE)
  table_definition.addColumn('GEOM', Type.SPATIAL)

  # 4. Initialize a new table in the extract
  # a. check if the table already exists
  # Only add table if it doesn't already exist
  if (new_extract.hasTable('Extract') == False):
    new_table = new_extract.addTable('Extract', table_definition)
  else:
    new_table = new_extract.openTable('Extract')

  # 5. Create a new row
  new_row = Row(table_definition)  # Pass the table definition to the constructor

  # 6. Populate each new row
  yCoord = yMin
  for i in range(0, xLen):
    yCoord += yIncrement
    xCoord = xMin
    for j in range(0, yLen):
      xCoord += xIncrement

      cellWkt = getFourCorners([xCoord, yCoord], xIncrement, yIncrement)

      new_row.setString(0, str(i)) # ROW
      new_row.setString(1, str(j)) # COL
      new_row.setDouble(2, heatmap[i][j]) # VAL
      new_row.setString(3, str(i) + '-' + str(j)) # id
      new_row.setInteger(4, len(heatmap[0])) # cell count
      new_row.setDouble(5, smoothing) # smoothing
      new_row.setSpatial(6, cellWkt) # WKT spatial
      new_table.insert(new_row) # Add the new row to the table

  # 7. Save the table and extract
  new_extract.close()

  # 8. Release the extract API
  ExtractAPI.cleanup()
  return
Exemple #25
0
    if arg_tde_filename:
        tde_filename = arg_tde_filename
    else:
        tde_filename = csv_filename.replace(".csv", ".tde")
    # print "Will write to [%s]" % os.path.join(tde_dirpath, tde_filename)
    # print

    # Step 1: Create the Extract file and open the csv
    tde_fullpath = os.path.join(tde_dirpath, tde_filename)
    # if the file doesn't exist, revoke the append
    if arg_append and not os.path.isfile(tde_fullpath):
        print "Couldn't append -- file doesn't exist."
        arg_append = False
    if not arg_append and os.path.isfile(tde_fullpath):
        os.remove(tde_fullpath)
    new_extract = Extract(tde_fullpath)

    # Define the columns by the first csv
    table_df = pandas.read_csv(os.path.join(csv_dirpaths[0], csv_filename),
                               names=arg_header)
    for join_table_file in arg_join:
        join_df = pandas.read_csv(join_table_file)
        table_df = pandas.merge(table_df, join_df, how='left')

    # Step 2: Create the tableDef
    table_definition = TableDefinition()
    old_colnames = []
    colnames_to_types = {}
    new_colnames_to_idx = {}
    for (col, dtype) in itertools.izip(table_df.columns, table_df.dtypes):
        colname = col.strip()  # strip whitespace
Exemple #26
0
 def new_tde(self):
     dataExtract = Extract(self.tde_name)
     if dataExtract.hasTable('Extract'):
         return print("tde already exist use another name")
     return dataExtract