Esempio n. 1
0
    def to_tableau( self, path, add_index=False ):
        '''
        Converts a Pandas DataFrame to a Tableau .tde file
        @param path = path to write file
        @param tableName = name of the table in the extract
        
        '''
        
        # Delete Extract and debug log is already exist
        for file in [path, os.path.dirname(path) + '/debug.log',
        			'./DataExtract.log','./debug.log']:
        	if os.path.isfile(file):
        		os.remove(file)

        # Create Extract and Table
        ExtractAPI.initialize( )
        new_extract = Extract( path )
        table_def = TableDefinition()
        
        # Set columns in Tableau
        if add_index:
            table_def.addColumn( 'index', Type.INTEGER )
            
        for col_index, col_name in enumerate(self._dataframe):
            table_def.addColumn( col_name, self._column_static_type[col_index] )
        
        # Create table
        new_table = new_extract.addTable( "Extract", table_def )
        
        # Set Column values
        self.set_column_values( new_table, table_def, add_index )
        
        # Close extract
        new_extract.close()
        ExtractAPI.cleanup()
def createOrOpenExtract(filename, useSpatial):
    try:
        # Create Extract Object
        # (NOTE: The Extract constructor opens an existing extract with the
        #  given filename if one exists or creates a new extract with the given
        #  filename if one does not)
        extract = Extract(filename)

        # Define Table Schema (If we are creating a new extract)
        # (NOTE: In Tableau Data Engine, all tables must be named 'Extract')
        if (not extract.hasTable('Extract')):
            schema = TableDefinition()
            schema.setDefaultCollation(Collation.EN_GB)
            schema.addColumn('Purchased', Type.DATETIME)
            schema.addColumn('Product', Type.CHAR_STRING)
            schema.addColumn('uProduct', Type.UNICODE_STRING)
            schema.addColumn('Price', Type.DOUBLE)
            schema.addColumn('Quantity', Type.INTEGER)
            schema.addColumn('Taxed', Type.BOOLEAN)
            schema.addColumn('Expiration Date', Type.DATE)
            schema.addColumnWithCollation('Produkt', Type.CHAR_STRING,
                                          Collation.DE)
            if (useSpatial):
                schema.addColumn('Destination', Type.SPATIAL)
            table = extract.addTable('Extract', schema)
            if (table == None):
                print 'A fatal error occurred while creating the table:\nExiting now\n.'
                exit(-1)

    except TableauException, e:
        print 'A fatal error occurred while creating the new extract:\n', e, '\nExiting now.'
        exit(-1)
class TDEExport(object):
    @staticmethod
    def make_table_definition(schema):
        table_def = TableDefinition()
        table_def.setDefaultCollation(Collation.EN_GB)
        for col in schema:
            table_def.addColumn(col['name'], typeMap.get(col["type"],Type.UNICODE_STRING))
        return table_def

    def __init__(self, tde_file_path, input_schema):
        self.tde_file_path = tde_file_path
        self.output_path = os.path.dirname(self.tde_file_path)
        self.input_schema = input_schema

        self.errors = 0
        self.nrows = 0

        print "Writing TDE file: %s" % self.tde_file_path
        ExtractAPI.initialize()
        self.extract = Extract(self.tde_file_path)
        assert(not self.extract.hasTable('Extract'))
        self.table_def = TDEExport.make_table_definition(self.input_schema)
        self.table = self.extract.addTable('Extract', self.table_def)

    def __enter__(self):
        return self

    def __exit__(self, type, value, tb):
        self.close();

    def close(self):
        self.extract.close()
        ExtractAPI.cleanup()
        if self.errors > 0:
            print "Encountered %d errors" % self.errors

    def insert_array_row(self, input_row):
        # Work around bug in DSS 3.1.0 API for single-column datasets
        if len(input_row) == 0 and len(self.input_schema) == 1:
            input_row = [u'']
        nb_cols = len(self.input_schema)
        output_row = Row(self.table_def)
        for col_no in range(nb_cols):
            col = self.input_schema[col_no]
            data = input_row[col_no]
            try:
                fieldSetterMap[col['type']](output_row, col_no, data)
            except Exception, e:
                self.errors += 1
                if self.errors < 100 or (self.errors < 10000 and self.errors % 100 == 0) or (self.errors % 1000 ==0):
                    try:
                        print "[err #%s] Failed setting: col=%s type=%s val=%s  err=%s" % (self.errors, col["name"],  col["type"], data, e)
                    except Exception, e2:
                        print "[err #%s] Failed setting: col=%s type=%s val=NON_PRINTED  err=%s" % (self.errors, col["name"],  col["type"],e)
Esempio n. 4
0
class TDEExport(object):
    @staticmethod
    def make_table_definition(schema):
        table_def = TableDefinition()
        table_def.setDefaultCollation(Collation.EN_GB)
        for col in schema:
            table_def.addColumn(col['name'], typeMap.get(col["type"],Type.UNICODE_STRING))
        return table_def

    def __init__(self, tde_file_path, input_schema):
        self.tde_file_path = tde_file_path
        self.output_path = os.path.dirname(self.tde_file_path)
        self.input_schema = input_schema

        self.errors = 0
        self.nrows = 0

        print "Writing TDE file: %s" % self.tde_file_path
        ExtractAPI.initialize()
        self.extract = Extract(self.tde_file_path)
        assert(not self.extract.hasTable('Extract'))
        self.table_def = TDEExport.make_table_definition(self.input_schema)
        self.table = self.extract.addTable('Extract', self.table_def)

    def __enter__(self):
        return self

    def __exit__(self, type, value, tb):
        self.close();

    def close(self):
        self.extract.close()
        ExtractAPI.cleanup()
        if self.errors > 0:
            print "Encountered %d errors" % self.errors

    def insert_array_row(self, input_row):
        # Work around bug in DSS 3.1.0 API for single-column datasets
        if len(input_row) == 0 and len(self.input_schema) == 1:
            input_row = [u'']
        nb_cols = len(self.input_schema)
        output_row = Row(self.table_def)
        for col_no in range(nb_cols):
            col = self.input_schema[col_no]
            data = input_row[col_no]
            try:
                fieldSetterMap[col['type']](output_row, col_no, data)
            except Exception, e:
                self.errors += 1
                if self.errors < 100 or (self.errors < 10000 and self.errors % 100 == 0) or (self.errors % 1000 ==0):
                    try:
                        print "[err #%s] Failed setting: col=%s type=%s val=%s  err=%s" % (self.errors, col["name"],  col["type"], data, e)
                    except Exception, e2:
                        print "[err #%s] Failed setting: col=%s type=%s val=NON_PRINTED  err=%s" % (self.errors, col["name"],  col["type"],e)
Esempio n. 5
0
class TDEExport(object):

    @staticmethod
    def convert_type(type):
        return typeMap.get(type,Type.UNICODE_STRING)

    @staticmethod
    def make_table_definition(schema):
        table_def = TableDefinition()
        table_def.setDefaultCollation(Collation.EN_GB)
        for col in schema:
            table_def.addColumn(col['name'], TDEExport.convert_type(col['type']))
        return table_def

    def __init__(self, tde_file_path, input_schema):
        self.tde_file_path = tde_file_path
        self.output_path = os.path.dirname(self.tde_file_path)
        self.input_schema = input_schema

        print "Writing TDE file: %s" % self.tde_file_path
        ExtractAPI.initialize()
        self.extract = Extract(self.tde_file_path)
        assert(not self.extract.hasTable('Extract'))
        self.table_def = TDEExport.make_table_definition(self.input_schema)
        self.table = self.extract.addTable('Extract', self.table_def)

    def __enter__(self):
        return self

    def __exit__(self, type, value, tb):
        self.close();

    def close(self):
        self.extract.close()
        ExtractAPI.cleanup()

    def insert_array_row(self, input_row):
        # Work around bug in DSS 3.1.0 API for single-column datasets
        if len(input_row) == 0 and len(self.input_schema) == 1:
            input_row = [u'']
        nb_cols = len(self.input_schema)
        output_row = Row(self.table_def)
        for col_no in range(nb_cols):
            col = self.input_schema[col_no]
            data = input_row[col_no]
            try:
                fieldSetterMap[col['type']](output_row, col_no, data)
            except Exception, e:
                print "Failed setting field %s to value %s: %s" % (col["name"], data, e)
                pass
        self.table.insert(output_row)
    def __init__(self, tde_file_path, input_schema):
        self.tde_file_path = tde_file_path
        self.output_path = os.path.dirname(self.tde_file_path)
        self.input_schema = input_schema

        self.errors = 0
        self.nrows = 0

        print "Writing TDE file: %s" % self.tde_file_path
        ExtractAPI.initialize()
        self.extract = Extract(self.tde_file_path)
        assert(not self.extract.hasTable('Extract'))
        self.table_def = TDEExport.make_table_definition(self.input_schema)
        self.table = self.extract.addTable('Extract', self.table_def)
Esempio n. 7
0
    def new_tde(self, data):
        # Step 1: Create the Extract File
        dataExtract = Extract(self.tde_name)
        if dataExtract.hasTable('Extract'):
            return print("tde already exist use another name")

        # Step 2: Create the table definition
        cols = data.columns.tolist()
        dataSchema = TableDefinition()
        for col in cols:
            dataSchema.addColumn(col, Type.UNICODE_STRING)

        # Step 3: Create a table in the image of the table definition
        table = dataExtract.addTable('Extract', dataSchema)

        # Step 4: Create rows and insert them one by one
        newRow = Row(dataSchema)
        cols = data.columns.tolist()
        for i in range(0, len(data)):
            for col in cols:
                col_index = cols.index(col)
                newRow.setString(col_index, data[col][i])
                table.insert(newRow)
        dataExtract.close()

        # Step 5: Close the tde
        dataExtract.close()
def tde_export():
    print "Start export to TDE"
    input_name = get_input_names_for_role('input')[0]
    input_dataset = dataiku.Dataset(input_name)
    input_schema = input_dataset.read_schema()
    partitions = input_dataset.list_partitions(raise_if_empty=False)
    if partitions not in [[], [u'NP']]:
        raise Exception(
            "Due to the current APIs, this plugin cannot support partitioned input "
            "(and it seems the input dataset " + input_name +
            " is partitioned). "
            "A workaround is to first run a sync recipe "
            "from " + input_name + " into a non partitioned dataset, "
            "then take the latter as input for tde export.")
    output_name = get_output_names_for_role('output_folder')[0]
    output_folder = dataiku.Folder(output_name)
    output_path = output_folder.get_path()

    os.chdir(output_path)

    # Clean output dir. We assume there is no subfolder.
    # (because this recipe never creates one. If there is, better fail than remove someone else's data)
    for file in os.listdir(output_path):
        os.remove(file)

    ExtractAPI.initialize()

    with Extract(output_filename()) as extract:
        assert (not extract.hasTable('Extract'))
        tableDef = makeTableDefinition(input_schema)
        table = extract.addTable('Extract', tableDef)
        insertData(input_dataset, table)
        extract.close()
        ExtractAPI.cleanup()
    print "End export to TDE"
Esempio n. 9
0
    def __init__(self, extract_file_path, input_schema):
        self.extract_file_path = extract_file_path
        self.output_path = os.path.dirname(self.extract_file_path)
        self.input_schema = input_schema
        self.format_type = TABLEAU_FORMAT

        self.errors = 0
        self.nrows = 0

        print("Writing Extract {} file: {}".format(self.format_type,
                                                   self.extract_file_path))
        ExtractAPI.initialize()
        self.extract = Extract(self.extract_file_path)
        assert (not self.extract.hasTable('Extract'))
        self.table_def = TableauExport.make_table_definition(self.input_schema)
        self.table = self.extract.addTable('Extract', self.table_def)
Esempio n. 10
0
    def __init__(self, tde_file_path, input_schema):
        self.tde_file_path = tde_file_path
        self.output_path = os.path.dirname(self.tde_file_path)
        self.input_schema = input_schema

        print "Writing TDE file: %s" % self.tde_file_path
        ExtractAPI.initialize()
        self.extract = Extract(self.tde_file_path)
        assert(not self.extract.hasTable('Extract'))
        self.table_def = TDEExport.make_table_definition(self.input_schema)
        self.table = self.extract.addTable('Extract', self.table_def)
Esempio n. 11
0
    def to_tableau(self, path, table_name='Extract', add_index=False):
        """
        Converts a Pandas DataFrame to a Tableau .tde file
        @param path = path to write file
        @param table_name = name of the table in the extract
        @param add_index = adds incrementing integer index before dataframe columns

        """

        # Delete Extract and debug log is already exist
        for file in [path, os.path.dirname(path) + '/debug.log',
                     './DataExtract.log', './debug.log']:
            if os.path.isfile(file):
                os.remove(file)

        # Create Extract and Table
        ExtractAPI.initialize()
        new_extract = Extract(path)
        table_def = TableDefinition()

        # Set columns in Tableau
        if add_index:
            index_col = 'index'
            suffix = 1
            while index_col in self._dataframe.columns:
                index_col = '%s_%s' % (index_col, suffix)
                suffix += 1
            table_def.addColumn(index_col, Type.INTEGER)

        for col_index, col_name in enumerate(self._dataframe):
            table_def.addColumn(col_name, self._column_static_type[col_index])

        # Create table
        new_table = new_extract.addTable(table_name, table_def)

        # Set Column values
        self.set_column_values(new_table, table_def, add_index)

        # Close extract
        new_extract.close()
        ExtractAPI.cleanup()
    def create_tde(self, tde_filename, append=False):
        try:
            # Using "with" handles closing the TDE correctly
            with Extract("{}".format(tde_filename)) as extract:
                self.tde_object = None
                row_count = 0
                # Create the Extract object (or set it for updating) if there are actually results
                if not extract.hasTable('Extract'):
                    # Table does not exist; create it
                    self.log(u'Creating Extract with table definition')
                    self.tde_object = extract.addTable('Extract',
                                                       self.table_definition)
                else:
                    # Open an existing table to add more rows
                    if append is True:
                        self.tde_object = extract.openTable('Extract')
                    else:
                        self.log("Output file '" + tde_filename +
                                 "' already exists.")
                        self.log(
                            "Append mode is off, please delete file and then rerun..."
                        )
                        sys.exit()

                    # This is if you actually have data to put into the extract. Implement later
                    #	tde_row = Row(tableDef)
                    #	colNo = 0
                    #	for field in db_row:
                    # Possible for database to have types that do not map, we skip them
                    #		if cursor.description[colNo][1].__name__ in PyTypeMap:
                    #			if( (field == "" or field == None) ) :
                    #				tde_row.setNull( colNo )
                    #			else :
                    # From any given row from the cursor object, we can use the cursor_description collection to find information
                    # for example, the column names and the datatypes. [0] is the column name string, [1] is the python type object. Mirrors cursor.description on the Row level
                    # Second item is a Python Type object, to get the actual name as a string for comparison, have to use __name__ property
                    #				fieldSetterMap[PyTypeMap[ cursor.description[colNo][1].__name__ ] ](tde_row, colNo, field);
                    #		colNo += 1
                    #	table.insert(tde_row)
                    #	row_count += 1
                    # print "TDE creation complete, " + str(row_count) + " rows inserted\n"
                    # if len(skipped_cols) > 0:
                    #	print "The following columns were skipped due to datatypes that were not recognized:\n"
                    #	print skipped_cols

        except TableauException, e:
            self.log('Tableau TDE creation error:{}'.format(e))
            raise
def writeTableToTDE(pgFields, pgData, extractLocation):
    print("writing table to {0}").format(extractLocation)

    # 1. initialize a new extract
    ExtractAPI.initialize()

    # 2. Create a table definition
    new_extract = Extract(extractLocation)

    # 3. Add column definitions to the table definition
    table_definition = TableDefinition()
    for i in range(0, len(pgFields)):
        table_definition.addColumn(pgFields[i][0], pgFields[i][1])

    # 4. Initialize a new table in the extract
    # a. check if the table already exists
    # Only add table if it doesn't already exist
    if (new_extract.hasTable('Extract') == False):
        new_table = new_extract.addTable('Extract', table_definition)
    else:
        new_table = new_extract.openTable('Extract')

    # 5. Create a new row
    new_row = Row(
        table_definition)  # Pass the table definition to the constructor

    # 6. Populate each new row
    numberRecords = len(pgData)
    for i in range(0, numberRecords):
        # Note that this doesn't cover all possible TDE data types
        for j in range(0, len(pgFields)):
            if pgFields[j][1] == Type.INTEGER:
                new_row.setInteger(j, pgData[i][j])
            elif pgFields[j][1] == Type.UNICODE_STRING:
                new_row.setString(j, pgData[i][j])
            elif pgFields[j][1] == Type.SPATIAL:
                new_row.setSpatial(j, pgData[i][j])
            elif pgFields[j][i] == Type.BOOLEAN:
                new_row.setBoolean(j, pgData[i][j])
            elif pgFields[j][i] == Type.DOUBLE:
                new_row.setDouble(j, pgData[j][i])
        new_table.insert(new_row)  # Add the new row to the table

    # 7. Save the table and extract
    new_extract.close()

    # 8. Release the extract API
    ExtractAPI.cleanup()
    return
Esempio n. 14
0
def publishCsvDatasource(serverURL, siteName, username, password, filename):

    extractName = os.path.basename(filename).split('.', 2)[0]
    extractFilename = extractName + '.tde'
    try:
        os.remove(extractFilename)
    except OSError:
        pass

    ExtractAPI.initialize()
    with Extract(extractFilename) as extract:
        with open(filename) as csvfile:
            fileReader = csv.reader(csvfile, delimiter=',')
            headers = []
            table = None
            for fileRow in fileReader:
                print('Reading: ' + str(fileRow))
                if headers == []:
                    headers = fileRow
                else:
                    if table == None:
                        tableDef = makeTableDefinition(headers, fileRow)
                        table = extract.addTable('Extract', tableDef)
                    insertData(table, fileRow)
            csvfile.close()
        extract.close()
    ExtractAPI.cleanup()

    # Initialize Tableau Server API
    ServerAPI.initialize()
    serverConnection = ServerConnection()
    serverConnection.connect(serverURL, username, password, siteName)

    # Publish order-py.tde to the server under the default project with name Order-py
    serverConnection.publishExtract(extractFilename, 'default', extractName,
                                    False)

    # Disconnect from the server
    serverConnection.disconnect()

    # Destroy the server connection object
    serverConnection.close()

    # Clean up Tableau Server API
    ServerAPI.cleanup()
Esempio n. 15
0
    def to_tableau(self, path, table_name='Extract', add_index=False):
        """
        Converts a Pandas DataFrame to a Tableau .tde file
        @param path = path to write file
        @param table_name = name of the table in the extract
        @param add_index = adds incrementing integer index before dataframe columns

        """

        # Delete dataextract log and hyper_db temp files if already exists
        files = (glob('DataExtract*.log') + glob('hyper_db_*') + [
            os.path.dirname(path) + '/debug.log', './DataExtract.log',
            './debug.log'
        ])

        for file in files:
            if os.path.isfile(file):
                os.remove(file)

        # Create Extract and Table
        ExtractAPI.initialize()
        new_extract = Extract(path)

        if not new_extract.hasTable(table_name):
            print(
                'Table \'{}\' does not exist in extract {}, creating.'.format(
                    table_name, path))
            self.set_table_structure(new_extract, table_name, add_index)

        new_table = new_extract.openTable(table_name)
        table_def = new_table.getTableDefinition()

        # Set Column values
        self.set_column_values(new_table, table_def, add_index)

        # Close extract
        new_extract.close()
        ExtractAPI.cleanup()
Esempio n. 16
0
for i in join_info:
    #joinedDF = pd.merge(outputDF, VPS_DF, on='IntubationCycle', how='left')
    joinDF_auto = pd.merge(joinDF_auto, i[0], on=i[1], how='inner')

#output to CSV
joinDF_auto.to_csv(output_CSV, index=False, encoding='utf-8')

#typecast to date format
joinDF_auto['ClaimDate'] = pd.to_datetime(joinDF_auto['ClaimDate'])

#------------------------------------------------------------------------------
#   Create Extract
#------------------------------------------------------------------------------
ExtractAPI.initialize()

new_extract = Extract(output_Extract)

# Create a new table definition with 3 columns
table_definition = TableDefinition()
table_definition.addColumn('Patient_ID', Type.INTEGER)  # column 0
table_definition.addColumn('ClaimDate', Type.DATE)  # column 1
table_definition.addColumn('Patient Name', Type.UNICODE_STRING)  # column 2

#table_definition.addColumn('Claim_ID', Type.INTEGER)        		# column 3
#table_definition.addColumn('Discount', Type.DOUBLE) 				# column 4

#------------------------------------------------------------------------------
#   Populate Extract
#------------------------------------------------------------------------------

new_table = new_extract.addTable('Extract', table_definition)
Esempio n. 17
0
DEST_LON =  4
SEG_NAME = 9
DEP_TIME = 6 # Departure time (minutes from midnight)
DEP_TIME_HRS = 0
FLIGHT_NUM = 2
FLIGHT_TIME = 5

#####################################################################
## Process the data and write the TDE
#####################################################################

# 1. initialize a new extract
ExtractAPI.initialize()

# 2. Create a table definition
new_extract = Extract(extractLocation)

# 3. Add column definitions to the table definition
table_definition = TableDefinition()
table_definition.addColumn('route', Type.UNICODE_STRING)  # column 0
table_definition.addColumn('latitude', Type.DOUBLE)
table_definition.addColumn('longitude', Type.DOUBLE)
table_definition.addColumn('point_number', Type.INTEGER)
table_definition.addColumn('distance_km', Type.DOUBLE)
table_definition.addColumn('FlightNumber', Type.UNICODE_STRING)
table_definition.addColumn('DepartureTime', Type.UNICODE_STRING)
table_definition.addColumn('DepartureTimeFromMidnight', Type.UNICODE_STRING)
table_definition.addColumn('SegMinFromMidnight', Type.INTEGER)
table_definition.addColumn('FlightTime', Type.INTEGER)
table_definition.addColumn('SegTimeFromMidnight', Type.UNICODE_STRING)
Esempio n. 18
0
def create(target_filepath, dataframe):
    column = []
    ## 宣告Dataframe(左)pandas資料格式 對應 到TDE的資料格式(右)。
    fieldMap = {
        'float64': tde.Types.Type.DOUBLE,
        'float32': tde.Types.Type.DOUBLE,
        'int64': tde.Types.Type.INTEGER,
        'int8': tde.Types.Type.INTEGER,
        'object': tde.Types.Type.UNICODE_STRING,
        'bool': tde.Types.Type.DOUBLE,
        'datetime64[ns]': tde.Types.Type.DATETIME
    }

    ## 若TDE已存在,則刪除。
    if os.path.isfile(target_filepath):
        os.remove(target_filepath)

    tdefile = tde.Extract(target_filepath)
    schema = tde.TableDefinition()  # define the table definition

    colnames = dataframe.columns  # dataframe all column name
    coltypes = dataframe.dtypes  # dataframe all column's datatype

    ## 根據Dataframe建立Tde Schema
    for i in range(0, len(colnames)):
        cname = colnames[i]
        ctype = fieldMap.get(str(coltypes[i]))
        schema.addColumn(cname, ctype)  # 加入一行欄位(名稱,資料格式)
        column.append(cname)

    ## 開啟TDE FILE寫入資料
    with tdefile as extract:
        table = extract.addTable("Extract", schema)
        for r in range(0, dataframe.shape[0]):
            row = tde.Row(schema)
            ## Loop 顯示剩餘筆數
            count = dataframe.shape[0] - r - 1
            print('剩' + str(count) + '筆', end="\r")
            ## 每筆資料依照格式寫入TDE內
            for c in range(0, len(coltypes)):
                if str(coltypes[c]) == 'float64':
                    if dataframe.iloc[r, c] is None:
                        row.setNull(c)
                    else:
                        row.setDouble(c, dataframe.iloc[r, c])
                elif str(coltypes[c]) == 'float32':
                    row.setDouble(c, dataframe.iloc[r, c])
                elif str(coltypes[c]) == 'int64':
                    if dataframe.iloc[r, c] is None:
                        row.setNull(c)
                    else:
                        row.setInteger(c, dataframe.iloc[r, c])
                elif str(coltypes[c]) == 'int8':
                    row.setInteger(c, dataframe.iloc[r, c])
                elif str(coltypes[c]) == 'object':
                    if dataframe.iloc[r, c] is None:
                        row.setNull(c)
                    else:
                        row.setString(c, dataframe.iloc[r, c])
                elif str(coltypes[c]) == 'bool':
                    row.setBoolean(c, dataframe.iloc[r, c])
                elif str(coltypes[c]) == 'datetime64[ns]':
                    try:
                        dt = dataframe.iloc[r, c]
                        row.setDateTime(c, dt.year, dt.month, dt.day, dt.hour,
                                        dt.minute, dt.second, 0)
                    except:
                        row.setNull(c)
                else:
                    row.setNull(c)
            ## insert the row
            table.insert(row)
    ## close the tdefile
    tdefile.close()
Esempio n. 19
0
def main():
    pp = pprint.PrettyPrinter(indent=2)

    key = raw_input("API Key: ")

    limit = 100
    offset = 0

    # Create TDE extract file

    if os.path.isfile(extract_file):
        os.remove(extract_file)

    ExtractAPI.initialize()
    tdefile = Extract(extract_file)

    # Create the table definition
    # Data types: INTEGER, DOUBLE, BOOLEAN, DATE, DATETIME, DURATION, CHAR_STRING, UNICODE_STRING
    tableDef = TableDefinition()
    tableDef.addColumn('alert_id', Type.CHAR_STRING)
    tableDef.addColumn('timestamp', Type.DATETIME)
    tableDef.addColumn('alert_type', Type.CHAR_STRING)
    tableDef.addColumn('alert_status', Type.CHAR_STRING)
    tableDef.addColumn('alert_network', Type.CHAR_STRING)
    tableDef.addColumn('notes', 16) #UNICODE_STRING
    tableDef.addColumn('metadata', 16) #UNICODE_STRING
    tableDef.addColumn('rule_name', Type.CHAR_STRING)
    tableDef.addColumn('severity', Type.INTEGER)
    tableDef.addColumn('asset_name', 16) #UNICODE_STRING
    tableDef.addColumn('asset_image', Type.CHAR_STRING)
    tableDef.addColumn('perpetrator_username', 16) #UNICODE_STRING
    tableDef.addColumn('perpetrator_displayname', 16) #UNICODE_STRING
    tableDef.addColumn('perpetrator_type', Type.CHAR_STRING)
    tableDef.addColumn('perpetrator_image', Type.CHAR_STRING)
    tableDef.addColumn('takedown_request_time', Type.DATETIME)
    tableDef.addColumn('takedown_accept_time', Type.DATETIME)
    tableDef.addColumn('close_time', Type.DATETIME)

    # Create table in image of tableDef
    table = tdefile.addTable('Extract', tableDef)

    rowcount = 0

    done = 'n'
    while done == 'n':
        response = get_page(key, limit, offset)
        alerts = response['alerts']
        if len(alerts) == 0:
            done = 'y'
            break
        for alert in alerts:
            newrow = createrow(alert,tableDef)

            table.insert(newrow)

            rowcount +=1
            if rowcount >= LIMIT:
                done = 'y'
                break

        offset += 100
    tdefile.close()
    ExtractAPI.cleanup()
    print str(rowcount) + " alerts processed"
Esempio n. 20
0
    pass

###Setting Tableau recognized data types###
for i in column_types:
    if i is numpy.int64:
        column_headers_types.append(Type.INTEGER)
    elif i is numpy.float64:
        column_headers_types.append(Type.DOUBLE)
    elif i is pd.Timestamp:
        column_headers_types.append(Type.DATETIME)
    else:
        column_headers_types.append(Type.UNICODE_STRING)

###Getting the existing table schema for incremental refresh###
if incremental_refresh == 1:
    dataExtract = Extract(extract_name)
    if dataExtract.hasTable('Extract'):
        table = dataExtract.openTable('Extract')
        dataSchema = table.getTableDefinition()

else:
    ###Removing extract if it already exists (for full refresh)###
    try:
        cwd = os.getcwd()
        for f in os.listdir(cwd):
            if re.search(extract_name, f):
                os.remove(os.path.join(cwd, f))
    except OSError, e:
        pass

    ###Initializng the Extract API, and applying the schema to the table###
Esempio n. 21
0
def createTDE(query, dbname, tde_filename):
    start_time = time.time()
    db = sql.connect(host="127.0.0.1", port=9999, user="******", db=dbname)
    cursor = db.cursor()
    cursor.execute(query)
    
    ExtractAPI.initialize()

    if os.path.isfile(tde_filename):
        os.remove(tde_filename)
    tde = Extract(tde_filename)
    
    table_def = makeTableDefinition(cursor.description)
    
    tde_table = tde.addTable('Extract', table_def)

    tde_row = Row(table_def)   # Pass the table definition to the constructor

    size = 1000
    many = cursor.fetchmany(size)
    rows_counter = 0
    while many:
        # add counter for rows incase of database disconnection
        for row in many:
            # Create new row
            for colno, col in enumerate(row):

                col_type = table_def.getColumnType(colno)
                setType = tde_set_type_function_map[col_type]
                # print setType
                # print type(setType)
                setType(tde_row, colno, col)
            tde_table.insert(tde_row)
            rows_counter += 1
        many = cursor.fetchmany(size)
            #tde_row.close()
    print "cleared while loop"
            # Set column values. The first parameter is the column number (its
            # ordinal position) The second parameter (or second and subsequent paramaters) is 
            # the value to set the column to.    
                
#                 new_row.setInteger(0, 1)
#                 new_row.setString(1, 'Amanda')
#                 new_row.setDate(2, 2014, 6, 9)
#                 new_table.insert(new_row) # Add the new row to the table


# new_row.setInteger(0, 1)
# new_row.setString(1, 'Amanda')
# new_row.setDate(2, 2014, 6, 9)
# new_table.insert(new_row) # Add the new row to the table

# new_row.setInteger(0, 2)
# new_row.setString(1, 'Brian')
# new_row.setDate(2, 2014, 10, 13)
# new_table.insert(new_row)

# new_row.setInteger(0, 3)
# new_row.setString(1, 'Christina')
# new_row.setDate(2, 2015, 2, 16)
# new_table.insert(new_row)

    # Close the extract in order to save the .tde file and clean up resources
    tde.close()
    print "TDE Closed"
    ExtractAPI.cleanup()
    print "ExtractAPI cleaned up"
    cursor.close()
    print "cursor Closed"
    db.close()
    print "db closed"

    timetaken = time.time() - start_time
    print str(rows_counter) + ' rows inserted in ' + str(timetaken) + ' seconds'
    print '    (' + str(rows_counter/timetaken) + ' rows per second)'
    return
Esempio n. 22
0
def to_tde(dataset, tde_mode, tde_name):

    if tde_mode == 'new':

        # Step 1: Create the Extract File
        dataExtract = Extract(tde_name)

        if dataExtract.hasTable('Extract'):
            return print("tde already exist use another name")

        # Step 2: Create the table definition
        dataSchema = TableDefinition()
        dataSchema.addColumn('Station', Type.UNICODE_STRING)
        dataSchema.addColumn('Time', Type.UNICODE_STRING)
        dataSchema.addColumn('Date', Type.DATE)
        dataSchema.addColumn('all', Type.DOUBLE)
        dataSchema.addColumn('n7', Type.DOUBLE)
        dataSchema.addColumn('n8', Type.DOUBLE)
        dataSchema.addColumn('y_serries', Type.DOUBLE)

        # Step 3: Create a table in the image of the table definition
        table = dataExtract.addTable('Extract', dataSchema)

        # Step 4: Create rows and insert them one by one
        newRow = Row(dataSchema)
        for i in range(0, len(dataset)):
            print(i)
            newRow.setString        (0, dataset['Station'][i])
            newRow.setString        (1, dataset['Time'][i])
            newRow.setDate          (2, dataset['Date'][i].year, dataset['Date'][i].month, dataset['Date'][i].day)
            newRow.setDouble        (3, dataset['all'][i])
            newRow.setDouble        (4, dataset['n7'][i])
            newRow.setDouble        (5, dataset['n8'][i])
            newRow.setDouble        (6, dataset['y_serries'][i])
            table.insert(newRow)

        # Step 5: Close the tde
        dataExtract.close()

    elif tde_mode == 'append':

        # Step 1: Import the Extract File
        preExtract = Extract(tde_name)

        # Step 2: Open existing table
        table = preExtract.openTable('Extract')

        # Step 3: Import the table definition
        tableDef = table.getTableDefinition()

        # Step 4: Create rows and insert them one by one
        newRow = Row(tableDef)
        for i in range(0, len(dataset)):
            print(i)
            newRow.setString	    (0, dataset['Station'][i])
            newRow.setString		(1, dataset['Time'][i])
            newRow.setDate          (2, dataset['Date'][i].year, dataset['Date'][i].month, dataset['Date'][i].day)
            newRow.setDouble	    (3, dataset['all'][i])
            newRow.setDouble		(4, dataset['n7'][i])
            newRow.setDouble	    (5, dataset['n8'][i])
            newRow.setDouble        (6, dataset['y_serries'][i])
            table.insert(newRow)

        # Step 5: Close the extract
        preExtract.close()
Esempio n. 23
0
def heatmapToTDE(heatmap, extent, smoothing, extractLocation):


  xLen = len(heatmap)
  yLen = len(heatmap[0])
  xRange = abs(extent[0] - extent[1])
  yRange = abs(extent[2] - extent[3])
  xMin = min(extent[0], extent[1])
  yMin = min(extent[2], extent[3])
  xIncrement = xRange / yLen
  yIncrement = yRange / xLen

  # 1. initialize a new extract
  ExtractAPI.initialize()

  # 2. Create a table definition
  new_extract = Extract(extractLocation)

  # 3. Add column definitions to the table definition
  table_definition = TableDefinition()
  table_definition.addColumn('ROW', Type.UNICODE_STRING)  # column 0
  table_definition.addColumn('COL', Type.UNICODE_STRING)  # column 1
  table_definition.addColumn('VALUE', Type.DOUBLE)  # column 2
  table_definition.addColumn('ID', Type.UNICODE_STRING)
  table_definition.addColumn('CellCount', Type.INTEGER)
  table_definition.addColumn('Smoothing', Type.DOUBLE)
  table_definition.addColumn('GEOM', Type.SPATIAL)

  # 4. Initialize a new table in the extract
  # a. check if the table already exists
  # Only add table if it doesn't already exist
  if (new_extract.hasTable('Extract') == False):
    new_table = new_extract.addTable('Extract', table_definition)
  else:
    new_table = new_extract.openTable('Extract')

  # 5. Create a new row
  new_row = Row(table_definition)  # Pass the table definition to the constructor

  # 6. Populate each new row
  yCoord = yMin
  for i in range(0, xLen):
    yCoord += yIncrement
    xCoord = xMin
    for j in range(0, yLen):
      xCoord += xIncrement

      cellWkt = getFourCorners([xCoord, yCoord], xIncrement, yIncrement)

      new_row.setString(0, str(i)) # ROW
      new_row.setString(1, str(j)) # COL
      new_row.setDouble(2, heatmap[i][j]) # VAL
      new_row.setString(3, str(i) + '-' + str(j)) # id
      new_row.setInteger(4, len(heatmap[0])) # cell count
      new_row.setDouble(5, smoothing) # smoothing
      new_row.setSpatial(6, cellWkt) # WKT spatial
      new_table.insert(new_row) # Add the new row to the table

  # 7. Save the table and extract
  new_extract.close()

  # 8. Release the extract API
  ExtractAPI.cleanup()
  return
from tableausdk import *
from tableausdk.Server import *
from tableausdk.Extract import *
import tableausdk.Extract as tde

#Define a new tde file.
tdefile = tde.Extract('test1.tde')

#Defining a new data set/ table defination in tde
tableDef = tde.TableDefinition()
tableDef.addColumn("company", 15)  #INTEGER
tableDef.addColumn("projected", 10)  #DOUBLE
tableDef.addColumn("realRev", 10)  #DOUBLE

#Let's add dataset to the file

tabletran = tdefile.addTable("Extract", tableDef)

#Create new Row
newrow = tde.Row(tableDef)
# Adding data value
# First param represents sequence of the column
newrow.setCharString(0, 'myCompany')
newrow.setDouble(1, 1000)
newrow.setDouble(2, 888)
# adding new row to the dataset
tabletran.insert(newrow)

#Create new Row
newrow = tde.Row(tableDef)
newrow.setCharString(0, 'myCompany2')
Esempio n. 25
0
 def new_tde(self):
     dataExtract = Extract(self.tde_name)
     if dataExtract.hasTable('Extract'):
         return print("tde already exist use another name")
     return dataExtract
Esempio n. 26
0
    def build(self):
        # Set filename
        tdefileName = self.tde_settings_ins.tde_file
        if os.path.isfile(tdefileName):
            os.remove(tdefileName)
        #set the extract object instance
        tdefile = tdeEx.Extract(tdefileName)
        # create the column schema
        tableDef = tdeEx.TableDefinition()
        columnsListedInOrder = []
        outputColIndex = {}
        outputColNumber = 0
        for columnName, index in sorted(
                self.tde_settings_ins.columnIndex.items(), key=lambda x: x[1]):
            BasicDataType = self.tde_settings_ins.columns[columnName]
            tableDef.addColumn(columnName,
                               tdeSettings.schemaIniTypeMap[BasicDataType])
            outputColIndex[columnName] = outputColNumber
            outputColNumber += 1
        #add the column schema to the ojbect instance of EXTRACT
        table = tdefile.addTable("Extract", tableDef)
        rowNo = 0

        rowsIterator = self.tde_settings_ins.inputInfo.fileInformation.yieldRowsBase(
        )
        printedBool = False
        startTime = datetime.datetime.now()
        print "Build start time:", datetime.datetime.strftime(
            startTime, '%Y-%m-%d %H:%M:%S')
        prevPct = -1
        for myReaderRow in rowsIterator:
            putRow = tdeEx.Row(tableDef)
            for columnName, index in self.tde_settings_ins.columnIndex.items():
                #if not column headers need to change this to index of the column
                if (tdeSettings.schemaIniTypeMap[
                        self.tde_settings_ins.columns[columnName]] !=
                        tde.Type.UNICODE_STRING and myReaderRow[index] == ""):
                    putRow.setNull(outputColIndex[columnName])
                else:
                    try:
                        #valueDecoded=myReaderRow[index].decode(self.tde_settings_ins.inputInfo.fileInformation.encodeing)
                        self.fieldSetterMap[tdeSettings.schemaIniTypeMap[
                            self.tde_settings_ins.columns[columnName]]](
                                putRow, outputColIndex[columnName],
                                myReaderRow[index])
                    except:
                        print "column name", columnName, "contains invalid data"
                        print "value: ", myReaderRow[index]
                        print sys.exc_info()[0]
                        raise
            table.insert(putRow)
            # Output progress line
            #only print percent one time when we hit that percent
            rowNo += 1
            pct = math.floor(
                self.tde_settings_ins.inputInfo.fileInformation.getPercent(
                    rowNo))
            #            print "row:",rowNo
            #            print "pct:",pct
            #            print "value: ",pct %2
            if pct % 2 == 0:
                if printedBool == False or prevPct != pct:
                    print "Percent:", round(pct, 0), "%, row:", rowNo
                    printedBool = True
                    prevPct = pct
            else:
                printedBool = False
        print "tde file built to:", tdefileName
        endTime = datetime.datetime.now()
        print "End Time:", endTime
        diff = endTime - startTime
        print "Build elapsed time:", diff
import threading
import time


import config_sql_bdi_call

# TAB_SDK_LOGDIR The folder where the Tableau SDK writes log files.
# TAB_SDK_TMPDIR The temporary directory where the SDK keeps intermediate (temporary) files, such as when it's creating an extract.
# os.environ['TAB_SDK_LOGDIR'] = 'G:\Scripts\ServerAdmin\Logs\Back_Dated_Inc'
# os.environ['TAB_SDK_TMPDIR'] = 'G:\Scripts\ServerAdmin\Logs\Back_Dated_Inc'

# Initialize the extract API process
ExtractAPI.initialize()

# create new extract file
tdenew = Extract("./" + str(sys.argv[1] + "_histmonth.tde"))

print str(datetime.datetime.now()) + " begin time of sql"

# Connect to Greenplum
# cnxn = pyodbc.connect('DRIVER={DataDirect 7.1 Greenplum Wire Protocol};HOST=dtord01gpv01p.dc.dotomi.net;PORT=5432;DATABASE=vds_prd;UID=generic_sql_report_utility;PWD=DTMSqls3rv3r')
cnxn = pyodbc.connect("DSN=PostgreSQL-test")
# cnxn = pyodbc.connect('DRIVER={psqlodbca.so};HOST=dtord01gpv01p.dc.dotomi.net;PORT=5432;DATABASE=vds_prd;UID=generic_sql_report_utility;PWD=DTMSqls3rv3r')
cursor = cnxn.cursor()

# Get SQL
sqlrun = config_sql_bdi_call.execScriptsFromFile("config_" + str(sys.argv[1]) + ".sql")

# execute SQL
cursor.execute(sqlrun)
Esempio n. 28
0
#         predict_date.append(pd.to_datetime('2012-01-01') + timedelta(i))
# # #xp = preprocessing.scale(ml_data)
# # #print dataw
# # print ml_data
# df = pd.concat([data4_1,DataFrame(predict_date,columns=['predict value'],index=get_data_model(get_date_diff2(data4_1,t=4.1),model=4.1).index)],ignore_index=False,axis=1)

combined_data = pd.concat([a,b,c,d],axis=0,ignore_index=True)

#print combined_data

ExtractAPI.initialize()

if os.path.isfile("/Users/jlin/Desktop/data_stuff/predict_data_Extract.tde"):
    os.remove("/Users/jlin/Desktop/data_stuff/predict_data_Extract.tde")

createExtract = Extract("/Users/jlin/Desktop/data_stuff/predict_data_Extract.tde")


for i in combined_data.columns:
     if i in ['Contract Execute','Contract Sent to Site','Essential Doc','IRB','Ip Release']:

             combined_data[i] = pd.to_datetime(combined_data[i],coerce=True)



#combined_data = combined_data.drop(['Contract Execute','Contract Sent to Site','Essential Doc','IRB','Ip Release'],1)



if createExtract.hasTable('Extract'):
    table = createExtract.openTable('Extract')
Esempio n. 29
0
def extract(file_name):
    # move file to /extract
    # cd to /extract
    # if there is no extract called TRACK_TERM then create one, otherwise append to TRACK_TERM
    # define data model for extract

    global WORKING_DIRECTORY, TRACK_TERM

    if not os.path.exists(WORKING_DIRECTORY + '/extract/'):
        os.makedirs(WORKING_DIRECTORY + '/extract/')

    from_path = WORKING_DIRECTORY + '/' + file_name
    to_path = WORKING_DIRECTORY + '/extract/' + file_name

    os.rename(from_path, to_path)

    os.chdir(WORKING_DIRECTORY + '/extract')

    # define the extract
    with data.Extract(TRACK_TERM + '.tde') as extract:

        tableDef = data.TableDefinition()

        # define the columns and the data types in the extract
        tableDef.addColumn('lang', types.Type.CHAR_STRING)  #0
        tableDef.addColumn('sentiment', types.Type.DOUBLE)  #1
        tableDef.addColumn('country', types.Type.CHAR_STRING)  #2
        tableDef.addColumn('created_at', types.Type.DATETIME)  #3
        tableDef.addColumn('tweet_text', types.Type.CHAR_STRING)  #4
        tableDef.addColumn('Longitude', types.Type.DOUBLE)  #5
        tableDef.addColumn('source', types.Type.CHAR_STRING)  #6
        tableDef.addColumn('user', types.Type.CHAR_STRING)  #7
        tableDef.addColumn('Latitude', types.Type.DOUBLE)  #8

        table = None

        if not extract.hasTable('Extract'):
            # Table does not exist, so create it.
            print "Creating a new extract"
            table = extract.addTable('Extract', tableDef)
        else:
            # Table exists, so append the new data.
            print "Appending to an existing extract"
            table = extract.openTable('Extract')

        new_row = data.Row(tableDef)

        # read the data from the CSV into the extract row object
        with open(file_name, 'r') as inf:
            reader = csv.DictReader(inf, delimiter=',', lineterminator='\n')
            for row in reader:
                # insert data into the row object in the correct order as defined above
                new_row.setCharString(0, row['lang'])

                sentiment = float(row['sentiment'])
                new_row.setDouble(1, sentiment)

                new_row.setCharString(2, row['country'])

                # parse the twitter date string:
                # Mon Sep 21 11:03:53 +0000 2015
                # %a %b %d %H:%M:%S +0000 %Y
                date_object = datetime.strptime(row['created_at'],
                                                '%a %b %d %H:%M:%S +0000 %Y')
                year = int(datetime.strftime(date_object, '%Y'))
                month = int(datetime.strftime(date_object, '%m'))
                day = int(datetime.strftime(date_object, '%d'))
                hour = int(datetime.strftime(date_object, '%H'))
                min = int(datetime.strftime(date_object, '%M'))
                sec = int(datetime.strftime(date_object, '%S'))
                frac = 0  # fractions of a second aka milliseconds
                new_row.setDateTime(3, year, month, day, hour, min, sec, frac)

                new_row.setCharString(4, row['tweet_text'])

                # check if there is a value for longitude, otherwise write a 0
                try:
                    longitude = float(row['longitude'])
                except:
                    longitude = 0
                new_row.setDouble(5, longitude)

                new_row.setCharString(6, row['source'])
                new_row.setCharString(7, row['user'])

                # check if there is a value for latitude, otherwise write a 0
                try:
                    latitude = float(row['latitude'])
                except:
                    latitude = 0
                new_row.setDouble(8, latitude)

                table.insert(new_row)

    # if the process fails we want to be able to re-run it without collisions between file names
    # so give each file a unique name (unix time stamp in this case).
    os.rename(file_name, str(time.time()).split('.')[0] + '.csv')

    # cd back to working directory
    os.chdir(WORKING_DIRECTORY)

    return
Esempio n. 30
0
    if arg_tde_filename:
        tde_filename = arg_tde_filename
    else:
        tde_filename = csv_filename.replace(".csv", ".tde")
    # print "Will write to [%s]" % os.path.join(tde_dirpath, tde_filename)
    # print

    # Step 1: Create the Extract file and open the csv
    tde_fullpath = os.path.join(tde_dirpath, tde_filename)
    # if the file doesn't exist, revoke the append
    if arg_append and not os.path.isfile(tde_fullpath):
        print "Couldn't append -- file doesn't exist."
        arg_append = False
    if not arg_append and os.path.isfile(tde_fullpath):
        os.remove(tde_fullpath)
    new_extract = Extract(tde_fullpath)

    # Define the columns by the first csv
    table_df = pandas.read_csv(os.path.join(csv_dirpaths[0], csv_filename),
                               names=arg_header)
    for join_table_file in arg_join:
        join_df = pandas.read_csv(join_table_file)
        table_df = pandas.merge(table_df, join_df, how='left')

    # Step 2: Create the tableDef
    table_definition = TableDefinition()
    old_colnames = []
    colnames_to_types = {}
    new_colnames_to_idx = {}
    for (col, dtype) in itertools.izip(table_df.columns, table_df.dtypes):
        colname = col.strip()  # strip whitespace
Esempio n. 31
0
    def assemble_tde(self):
        """ Gather the data information and create the Tde.
        :return:
        """
        try:
            print "Creating extract:", self.file_name

            ExtractAPI.initialize()

            with Extract(self.file_name) as data_extract:
                # If extract Does exist add to the Extract table and file
                if data_extract.hasTable('Extract'):
                    # Open an existing table to add more rows
                    table = data_extract.openTable('Extract')
                    table_definition = table.getTableDefinition()
                else:
                    table_definition = TableDefinition()
                    for each in self.data_types.reset_index(level=0).values:
                        # Add the column info to the table definition
                        table_definition.addColumn(
                            str(each[0]), schema_type_map[str(each[1])])
                    # Create the Table with the table definition
                    table = data_extract.addTable("Extract", table_definition)

                new_row = Row(table_definition)
                count = self.data_frame.shape[0]
                pbar = Bar(count)

                # Run through dataframe data and add data to the table object
                for i in range(count):
                    for j, column_name in enumerate(
                            self.data_types.index.values.tolist()):
                        self.add_to_row(new_row, j,
                                        self.data_frame[column_name].iloc[i],
                                        self.data_types['data_type'][j],
                                        column_name)
                    table.insert(new_row)

                    self.last_data_row_extracted = self.data_frame.iloc[
                        i].to_frame().transpose()
                    # if i == 1:
                    #     raise Exception
                    pbar.passed()

            data_extract.close()
            ExtractAPI.cleanup()
        except:
            file_name = self.tde_file()

            # Clean up resources
            Extract(file_name).close()
            ExtractAPI.cleanup()

            # Create csv and pickle of the data
            self.data_frame.to_pickle(file_name.replace('.tde', '_pickle'))
            self.data_frame.to_csv(file_name.replace('.tde', '.csv'),
                                   index=False,
                                   encoding='utf-8')
            raise Exception(
                "Error in creating tde file please consult data files. \n%s\n%s. \n%s, %s, %s"
                % (file_name.replace(
                    '.tde', '_pickle'), file_name.replace('.tde', '.csv'),
                   'Error on line {}'.format(sys.exc_info()[-1].tb_lineno),
                   sys.exc_info()[0], sys.exc_info()[1]))
        tde_filename = arg_tde_filename
    else:
        tde_filename = csv_filename.replace(".csv", ".tde")    
    # print "Will write to [%s]" % os.path.join(tde_dirpath, tde_filename)
    # print
  
  
    # Step 1: Create the Extract file and open the csv
    tde_fullpath = os.path.join(tde_dirpath, tde_filename)
    # if the file doesn't exist, revoke the append
    if arg_append and not os.path.isfile(tde_fullpath):
        print "Couldn't append -- file doesn't exist."
        arg_append = False
    if not arg_append and os.path.isfile(tde_fullpath):
        os.remove(tde_fullpath)    
    new_extract = Extract(tde_fullpath)

    # Define the columns by the first csv
    table_df = pandas.read_csv(os.path.join(csv_dirpaths[0], csv_filename), names=arg_header)
    for join_table_file in arg_join:
        join_df = pandas.read_csv(join_table_file)
        table_df = pandas.merge(table_df, join_df, how='left')

    # Step 2: Create the tableDef
    table_definition = TableDefinition()
    old_colnames = []
    colnames_to_types = {}
    new_colnames_to_idx = {}
    for (col,dtype) in itertools.izip(table_df.columns, table_df.dtypes):
        colname = col.strip() # strip whitespace
        old_colnames.append(colname)
Esempio n. 33
0
from tableausdk import Extract as tde  # **Module change from py2 for py3
from tableausdk import Types as tdeTypes  # **
import os

#Step 1: Create the Extract File
if os.path.isfile('trivialExample.tde'):
    os.remove('trivialExample.tde')

tdefile = tde.Extract('trivialExample.tde')

#Step 2: Create the tableDef
tableDef = tde.TableDefinition()
tableDef.addColumn('rowID', tdeTypes.Type.CHAR_STRING)  # Column index 0. **
tableDef.addColumn('value', tdeTypes.Type.DOUBLE)  #Column index 1. **

#Step 3: Create the table in the image of the tableDef
table = tdefile.addTable('Extract', tableDef)

#Step 4: Create some rows and insert them into the table
newrow = tde.Row(tableDef)
for i in range(1, 101):
    newrow.setCharString(0, 'Row ' + str(i))
    newrow.setDouble(1, i)
    table.insert(newrow)

#Step 5: Close the tde
tdefile.close()
Esempio n. 34
0
def tableau_extract(resultset, data_file):
    """
    Create TDE extract.
    """
    if isinstance(resultset, ResultSet):
        df_name = resultset.DataFrame()
    else:
        df_name = resultset
    data_type = []
    fieldnames = []
    data_type_map = {int: Type.INTEGER,
                     str: Type.UNICODE_STRING,
                     bool: Type.BOOLEAN,
                     bytearray: Type.CHAR_STRING,
                     list: Type.CHAR_STRING,
                     dict: Type.CHAR_STRING,
                     float: Type.DOUBLE,
                     decimal.Decimal: Type.DOUBLE,
                     datetime.date: Type.DATE,
                     datetime.time: Type.DURATION,
                     datetime.datetime: Type.DATETIME,
                     pd._libs.tslib.Timestamp: Type.DATETIME
                     }

    for col in df_name:
        fieldnames.append(col)
        data_type.append(df_name[col].apply(type).iat[0])
    data_dict = dict(zip(fieldnames, data_type))

    for col_name in data_dict:
        if data_dict[col_name] in data_type_map:
            data_dict[col_name] = data_type_map[data_dict[col_name]]
        else:
            data_dict[col_name] = Type.UNICODE_STRING
    # Initialize a new extract
    try:
        os.remove(data_file)
    except OSError:
        pass
    new_extract = Extract(data_file)
    table_definition = TableDefinition()
    for col_name in data_dict:
        table_definition.addColumn(col_name, data_dict[col_name])
    new_table = new_extract.addTable('Extract', table_definition)
    new_row = Row(table_definition)
    tde_types = {'INTEGER': 7, 'DOUBLE': 10, 'BOOLEAN': 11, 'DATE': 12,
                 'DATETIME': 13, 'DURATION': 14,
                 'CHAR_STRING': 15, 'UNICODE_STRING': 16}
    for i in range(0, len(df_name)):
        for col in range(0, table_definition.getColumnCount()):
            col_name = table_definition.getColumnName(col)
            try:
                if data_dict[col_name] == tde_types['INTEGER']:
                    new_row.setInteger(col, int(df_name[col_name][i]))
                elif data_dict[col_name] == tde_types['DOUBLE']:
                    new_row.setDouble(col, float(df_name[col_name][i]))
                elif data_dict[col_name] == tde_types['BOOLEAN']:
                    new_row.setBoolean(col, bool(df_name[col_name][i]))
                elif data_dict[col_name] == tde_types['DATE']:
                    data = df_name[col_name][i]
                    new_row.setDate(col, data.year, data.month, data.day)
                elif data_dict[col_name] == tde_types['DATETIME']:
                    data = df_name[col_name][i]
                    new_row.setDateTime(col, data.year, data.month, data.day,
                                        data.hour, data.minute, data.second, 0)
                elif data_dict[col_name] == tde_types['DURATION']:
                    data = df_name[col_name][i]
                    new_row.setDuration(col, data.hour, data.minute, data.second, 0)
                elif data_dict[col_name] == tde_types['CHAR_STRING']:
                    new_row.setCharString(col, str(df_name[col_name][i]))
                elif data_dict[col_name] == tde_types['UNICODE_STRING']:
                    new_row.setString(col, str(df_name[col_name][i]))
                else:
                    print('Error')
                    new_row.setNull(col)
            except TypeError:
                new_row.setNull(col)
        new_table.insert(new_row)

    new_extract.close()
    ExtractAPI.cleanup()
    for file_name in glob.glob("DataExtract*.log"):
        os.remove(file_name)
    type_non_obj.append(i)

obj_dict = {x: None for x in type_obj}
non_obj_dict = {x: None for x in type_non_obj}

df = df.replace(obj_dict,'na')
df = df.replace(non_obj_dict,0)

# STEP 1 - Initialize the Extract API
ExtractAPI.initialize()

# STEP 2 - Initialize a new extract by calling the Extract() constructor
if os.path.isfile(tde_file):
    os.remove(tde_file)

td_extract = Extract(tde_file)

# STEP 3 - Create a table definition
table_definition = TableDefinition()

# STEP 4 - Create new column definition
for i in range(len(df.columns)):
    if df.dtypes[i] == 'object':
        table_definition.addColumn(df.columns[i], Type.UNICODE_STRING)
    elif df.dtypes[i] == 'float64':
        table_definition.addColumn(df.columns[i], Type.DOUBLE)
    elif df.dtypes[i] == 'int64':
        table_definition.addColumn(df.columns[i], Type.INTEGER)
    elif df.dtypes[i] == 'datetime64[ns]':
        table_definition.addColumn(df.columns[i], Type.DATE)
Esempio n. 36
0
## import the libraries
import tableausdk.Extract as tde
import pandas as pd
import os

## bring in a sample Graduate School Admissions datasets
file_name = "http://www.ats.ucla.edu/stat/data/binary.csv"
df = pd.read_csv(file_name)
df.head()
df.shape

## create the extract name, but remove the extract if it already exists
fname = "example.tde"
try:
    tdefile = tde.Extract(fname)
except:
    os.system('del ' + fname)
    os.system('del DataExtract.log')
    tdefile = tde.Extract(fname)

## define the table definition
tableDef = tde.TableDefinition()

## create a list of column names and types
colnames = df.columns
coltypes = df.dtypes

## create a dict for the field maps
## Caveat: I am not including all of the possibilities below
fieldMap = {
    'float64': tde.Types.Type.DOUBLE,