Beispiel #1
0
    def new_tde(self, data):
        # Step 1: Create the Extract File
        dataExtract = Extract(self.tde_name)
        if dataExtract.hasTable('Extract'):
            return print("tde already exist use another name")

        # Step 2: Create the table definition
        cols = data.columns.tolist()
        dataSchema = TableDefinition()
        for col in cols:
            dataSchema.addColumn(col, Type.UNICODE_STRING)

        # Step 3: Create a table in the image of the table definition
        table = dataExtract.addTable('Extract', dataSchema)

        # Step 4: Create rows and insert them one by one
        newRow = Row(dataSchema)
        cols = data.columns.tolist()
        for i in range(0, len(data)):
            for col in cols:
                col_index = cols.index(col)
                newRow.setString(col_index, data[col][i])
                table.insert(newRow)
        dataExtract.close()

        # Step 5: Close the tde
        dataExtract.close()
def createOrOpenExtract(filename, useSpatial):
    try:
        # Create Extract Object
        # (NOTE: The Extract constructor opens an existing extract with the
        #  given filename if one exists or creates a new extract with the given
        #  filename if one does not)
        extract = Extract(filename)

        # Define Table Schema (If we are creating a new extract)
        # (NOTE: In Tableau Data Engine, all tables must be named 'Extract')
        if (not extract.hasTable('Extract')):
            schema = TableDefinition()
            schema.setDefaultCollation(Collation.EN_GB)
            schema.addColumn('Purchased', Type.DATETIME)
            schema.addColumn('Product', Type.CHAR_STRING)
            schema.addColumn('uProduct', Type.UNICODE_STRING)
            schema.addColumn('Price', Type.DOUBLE)
            schema.addColumn('Quantity', Type.INTEGER)
            schema.addColumn('Taxed', Type.BOOLEAN)
            schema.addColumn('Expiration Date', Type.DATE)
            schema.addColumnWithCollation('Produkt', Type.CHAR_STRING,
                                          Collation.DE)
            if (useSpatial):
                schema.addColumn('Destination', Type.SPATIAL)
            table = extract.addTable('Extract', schema)
            if (table == None):
                print 'A fatal error occurred while creating the table:\nExiting now\n.'
                exit(-1)

    except TableauException, e:
        print 'A fatal error occurred while creating the new extract:\n', e, '\nExiting now.'
        exit(-1)
class TDEExport(object):
    @staticmethod
    def make_table_definition(schema):
        table_def = TableDefinition()
        table_def.setDefaultCollation(Collation.EN_GB)
        for col in schema:
            table_def.addColumn(col['name'], typeMap.get(col["type"],Type.UNICODE_STRING))
        return table_def

    def __init__(self, tde_file_path, input_schema):
        self.tde_file_path = tde_file_path
        self.output_path = os.path.dirname(self.tde_file_path)
        self.input_schema = input_schema

        self.errors = 0
        self.nrows = 0

        print "Writing TDE file: %s" % self.tde_file_path
        ExtractAPI.initialize()
        self.extract = Extract(self.tde_file_path)
        assert(not self.extract.hasTable('Extract'))
        self.table_def = TDEExport.make_table_definition(self.input_schema)
        self.table = self.extract.addTable('Extract', self.table_def)

    def __enter__(self):
        return self

    def __exit__(self, type, value, tb):
        self.close();

    def close(self):
        self.extract.close()
        ExtractAPI.cleanup()
        if self.errors > 0:
            print "Encountered %d errors" % self.errors

    def insert_array_row(self, input_row):
        # Work around bug in DSS 3.1.0 API for single-column datasets
        if len(input_row) == 0 and len(self.input_schema) == 1:
            input_row = [u'']
        nb_cols = len(self.input_schema)
        output_row = Row(self.table_def)
        for col_no in range(nb_cols):
            col = self.input_schema[col_no]
            data = input_row[col_no]
            try:
                fieldSetterMap[col['type']](output_row, col_no, data)
            except Exception, e:
                self.errors += 1
                if self.errors < 100 or (self.errors < 10000 and self.errors % 100 == 0) or (self.errors % 1000 ==0):
                    try:
                        print "[err #%s] Failed setting: col=%s type=%s val=%s  err=%s" % (self.errors, col["name"],  col["type"], data, e)
                    except Exception, e2:
                        print "[err #%s] Failed setting: col=%s type=%s val=NON_PRINTED  err=%s" % (self.errors, col["name"],  col["type"],e)
class TDEExport(object):
    @staticmethod
    def make_table_definition(schema):
        table_def = TableDefinition()
        table_def.setDefaultCollation(Collation.EN_GB)
        for col in schema:
            table_def.addColumn(col['name'], typeMap.get(col["type"],Type.UNICODE_STRING))
        return table_def

    def __init__(self, tde_file_path, input_schema):
        self.tde_file_path = tde_file_path
        self.output_path = os.path.dirname(self.tde_file_path)
        self.input_schema = input_schema

        self.errors = 0
        self.nrows = 0

        print "Writing TDE file: %s" % self.tde_file_path
        ExtractAPI.initialize()
        self.extract = Extract(self.tde_file_path)
        assert(not self.extract.hasTable('Extract'))
        self.table_def = TDEExport.make_table_definition(self.input_schema)
        self.table = self.extract.addTable('Extract', self.table_def)

    def __enter__(self):
        return self

    def __exit__(self, type, value, tb):
        self.close();

    def close(self):
        self.extract.close()
        ExtractAPI.cleanup()
        if self.errors > 0:
            print "Encountered %d errors" % self.errors

    def insert_array_row(self, input_row):
        # Work around bug in DSS 3.1.0 API for single-column datasets
        if len(input_row) == 0 and len(self.input_schema) == 1:
            input_row = [u'']
        nb_cols = len(self.input_schema)
        output_row = Row(self.table_def)
        for col_no in range(nb_cols):
            col = self.input_schema[col_no]
            data = input_row[col_no]
            try:
                fieldSetterMap[col['type']](output_row, col_no, data)
            except Exception, e:
                self.errors += 1
                if self.errors < 100 or (self.errors < 10000 and self.errors % 100 == 0) or (self.errors % 1000 ==0):
                    try:
                        print "[err #%s] Failed setting: col=%s type=%s val=%s  err=%s" % (self.errors, col["name"],  col["type"], data, e)
                    except Exception, e2:
                        print "[err #%s] Failed setting: col=%s type=%s val=NON_PRINTED  err=%s" % (self.errors, col["name"],  col["type"],e)
class TDEExport(object):

    @staticmethod
    def convert_type(type):
        return typeMap.get(type,Type.UNICODE_STRING)

    @staticmethod
    def make_table_definition(schema):
        table_def = TableDefinition()
        table_def.setDefaultCollation(Collation.EN_GB)
        for col in schema:
            table_def.addColumn(col['name'], TDEExport.convert_type(col['type']))
        return table_def

    def __init__(self, tde_file_path, input_schema):
        self.tde_file_path = tde_file_path
        self.output_path = os.path.dirname(self.tde_file_path)
        self.input_schema = input_schema

        print "Writing TDE file: %s" % self.tde_file_path
        ExtractAPI.initialize()
        self.extract = Extract(self.tde_file_path)
        assert(not self.extract.hasTable('Extract'))
        self.table_def = TDEExport.make_table_definition(self.input_schema)
        self.table = self.extract.addTable('Extract', self.table_def)

    def __enter__(self):
        return self

    def __exit__(self, type, value, tb):
        self.close();

    def close(self):
        self.extract.close()
        ExtractAPI.cleanup()

    def insert_array_row(self, input_row):
        # Work around bug in DSS 3.1.0 API for single-column datasets
        if len(input_row) == 0 and len(self.input_schema) == 1:
            input_row = [u'']
        nb_cols = len(self.input_schema)
        output_row = Row(self.table_def)
        for col_no in range(nb_cols):
            col = self.input_schema[col_no]
            data = input_row[col_no]
            try:
                fieldSetterMap[col['type']](output_row, col_no, data)
            except Exception, e:
                print "Failed setting field %s to value %s: %s" % (col["name"], data, e)
                pass
        self.table.insert(output_row)
def writeTableToTDE(pgFields, pgData, extractLocation):
    print("writing table to {0}").format(extractLocation)

    # 1. initialize a new extract
    ExtractAPI.initialize()

    # 2. Create a table definition
    new_extract = Extract(extractLocation)

    # 3. Add column definitions to the table definition
    table_definition = TableDefinition()
    for i in range(0, len(pgFields)):
        table_definition.addColumn(pgFields[i][0], pgFields[i][1])

    # 4. Initialize a new table in the extract
    # a. check if the table already exists
    # Only add table if it doesn't already exist
    if (new_extract.hasTable('Extract') == False):
        new_table = new_extract.addTable('Extract', table_definition)
    else:
        new_table = new_extract.openTable('Extract')

    # 5. Create a new row
    new_row = Row(
        table_definition)  # Pass the table definition to the constructor

    # 6. Populate each new row
    numberRecords = len(pgData)
    for i in range(0, numberRecords):
        # Note that this doesn't cover all possible TDE data types
        for j in range(0, len(pgFields)):
            if pgFields[j][1] == Type.INTEGER:
                new_row.setInteger(j, pgData[i][j])
            elif pgFields[j][1] == Type.UNICODE_STRING:
                new_row.setString(j, pgData[i][j])
            elif pgFields[j][1] == Type.SPATIAL:
                new_row.setSpatial(j, pgData[i][j])
            elif pgFields[j][i] == Type.BOOLEAN:
                new_row.setBoolean(j, pgData[i][j])
            elif pgFields[j][i] == Type.DOUBLE:
                new_row.setDouble(j, pgData[j][i])
        new_table.insert(new_row)  # Add the new row to the table

    # 7. Save the table and extract
    new_extract.close()

    # 8. Release the extract API
    ExtractAPI.cleanup()
    return
Beispiel #7
0
    def to_tableau(self, path, table_name='Extract', add_index=False):
        """
        Converts a Pandas DataFrame to a Tableau .tde file
        @param path = path to write file
        @param table_name = name of the table in the extract
        @param add_index = adds incrementing integer index before dataframe columns

        """

        # Delete dataextract log and hyper_db temp files if already exists
        files = (glob('DataExtract*.log') + glob('hyper_db_*') + [
            os.path.dirname(path) + '/debug.log', './DataExtract.log',
            './debug.log'
        ])

        for file in files:
            if os.path.isfile(file):
                os.remove(file)

        # Create Extract and Table
        ExtractAPI.initialize()
        new_extract = Extract(path)

        if not new_extract.hasTable(table_name):
            print(
                'Table \'{}\' does not exist in extract {}, creating.'.format(
                    table_name, path))
            self.set_table_structure(new_extract, table_name, add_index)

        new_table = new_extract.openTable(table_name)
        table_def = new_table.getTableDefinition()

        # Set Column values
        self.set_column_values(new_table, table_def, add_index)

        # Close extract
        new_extract.close()
        ExtractAPI.cleanup()
###Setting Tableau recognized data types###
for i in column_types:
    if i is numpy.int64:
        column_headers_types.append(Type.INTEGER)
    elif i is numpy.float64:
        column_headers_types.append(Type.DOUBLE)
    elif i is pd.Timestamp:
        column_headers_types.append(Type.DATETIME)
    else:
        column_headers_types.append(Type.UNICODE_STRING)

###Getting the existing table schema for incremental refresh###
if incremental_refresh == 1:
    dataExtract = Extract(extract_name)
    if dataExtract.hasTable('Extract'):
        table = dataExtract.openTable('Extract')
        dataSchema = table.getTableDefinition()

else:
    ###Removing extract if it already exists (for full refresh)###
    try:
        cwd = os.getcwd()
        for f in os.listdir(cwd):
            if re.search(extract_name, f):
                os.remove(os.path.join(cwd, f))
    except OSError, e:
        pass

    ###Initializng the Extract API, and applying the schema to the table###
    ExtractAPI.initialize()
Beispiel #9
0
createExtract = Extract("/Users/jlin/Desktop/data_stuff/predict_data_Extract.tde")


for i in combined_data.columns:
     if i in ['Contract Execute','Contract Sent to Site','Essential Doc','IRB','Ip Release']:

             combined_data[i] = pd.to_datetime(combined_data[i],coerce=True)



#combined_data = combined_data.drop(['Contract Execute','Contract Sent to Site','Essential Doc','IRB','Ip Release'],1)



if createExtract.hasTable('Extract'):
    table = createExtract.openTable('Extract')
    tabledef = table.getTableDefinition()
else:
    tabledef = TableDefinition()


for i in combined_data.columns :
    #print combined_data[i].dtype
    if str(combined_data[i].dtype) == "int64":
        tabledef.addColumn(i,Types.Type.INTEGER)
    if str(combined_data[i].dtype) == "object":
        tabledef.addColumn(i,Types.Type.CHAR_STRING)
    if str(combined_data[i].dtype) == "datetime64[ns]":
        tabledef.addColumn(i,Types.Type.DATE)
    if str(combined_data[i].dtype) == "float64":
Beispiel #10
0
# 3. Add column definitions to the table definition
table_definition = TableDefinition()
table_definition.addColumn('route', Type.UNICODE_STRING)  # column 0
table_definition.addColumn('latitude', Type.DOUBLE)
table_definition.addColumn('longitude', Type.DOUBLE)
table_definition.addColumn('point_number', Type.INTEGER)
table_definition.addColumn('distance_km', Type.DOUBLE)
table_definition.addColumn('FlightNumber', Type.UNICODE_STRING)
table_definition.addColumn('DepartureTime', Type.UNICODE_STRING)
table_definition.addColumn('DepartureTimeFromMidnight', Type.UNICODE_STRING)
table_definition.addColumn('SegMinFromMidnight', Type.INTEGER)
table_definition.addColumn('FlightTime', Type.INTEGER)
table_definition.addColumn('SegTimeFromMidnight', Type.UNICODE_STRING)

# 4. Initialize a new table in the extract
if (new_extract.hasTable('Extract') == False):
  new_table = new_extract.addTable('Extract', table_definition)
else:
  new_table = new_extract.openTable('Extract')

# 5. Create a new row
new_row = Row(table_definition)  # Pass the table definition to the constructor

# 6. walk through the origin/destination data from CSV, write each path to TDE
with open(csvLocation, 'rb') as csvfile:
  csvreader = csv.reader(csvfile, delimiter='|')

  next(csvreader) # skip header


  for row in csvreader:
Beispiel #11
0
def to_tde(dataset, tde_mode, tde_name):

    if tde_mode == 'new':

        # Step 1: Create the Extract File
        dataExtract = Extract(tde_name)

        if dataExtract.hasTable('Extract'):
            return print("tde already exist use another name")

        # Step 2: Create the table definition
        dataSchema = TableDefinition()
        dataSchema.addColumn('Station', Type.UNICODE_STRING)
        dataSchema.addColumn('Time', Type.UNICODE_STRING)
        dataSchema.addColumn('Date', Type.DATE)
        dataSchema.addColumn('all', Type.DOUBLE)
        dataSchema.addColumn('n7', Type.DOUBLE)
        dataSchema.addColumn('n8', Type.DOUBLE)
        dataSchema.addColumn('y_serries', Type.DOUBLE)

        # Step 3: Create a table in the image of the table definition
        table = dataExtract.addTable('Extract', dataSchema)

        # Step 4: Create rows and insert them one by one
        newRow = Row(dataSchema)
        for i in range(0, len(dataset)):
            print(i)
            newRow.setString        (0, dataset['Station'][i])
            newRow.setString        (1, dataset['Time'][i])
            newRow.setDate          (2, dataset['Date'][i].year, dataset['Date'][i].month, dataset['Date'][i].day)
            newRow.setDouble        (3, dataset['all'][i])
            newRow.setDouble        (4, dataset['n7'][i])
            newRow.setDouble        (5, dataset['n8'][i])
            newRow.setDouble        (6, dataset['y_serries'][i])
            table.insert(newRow)

        # Step 5: Close the tde
        dataExtract.close()

    elif tde_mode == 'append':

        # Step 1: Import the Extract File
        preExtract = Extract(tde_name)

        # Step 2: Open existing table
        table = preExtract.openTable('Extract')

        # Step 3: Import the table definition
        tableDef = table.getTableDefinition()

        # Step 4: Create rows and insert them one by one
        newRow = Row(tableDef)
        for i in range(0, len(dataset)):
            print(i)
            newRow.setString	    (0, dataset['Station'][i])
            newRow.setString		(1, dataset['Time'][i])
            newRow.setDate          (2, dataset['Date'][i].year, dataset['Date'][i].month, dataset['Date'][i].day)
            newRow.setDouble	    (3, dataset['all'][i])
            newRow.setDouble		(4, dataset['n7'][i])
            newRow.setDouble	    (5, dataset['n8'][i])
            newRow.setDouble        (6, dataset['y_serries'][i])
            table.insert(newRow)

        # Step 5: Close the extract
        preExtract.close()
Beispiel #12
0
def heatmapToTDE(heatmap, extent, smoothing, extractLocation):


  xLen = len(heatmap)
  yLen = len(heatmap[0])
  xRange = abs(extent[0] - extent[1])
  yRange = abs(extent[2] - extent[3])
  xMin = min(extent[0], extent[1])
  yMin = min(extent[2], extent[3])
  xIncrement = xRange / yLen
  yIncrement = yRange / xLen

  # 1. initialize a new extract
  ExtractAPI.initialize()

  # 2. Create a table definition
  new_extract = Extract(extractLocation)

  # 3. Add column definitions to the table definition
  table_definition = TableDefinition()
  table_definition.addColumn('ROW', Type.UNICODE_STRING)  # column 0
  table_definition.addColumn('COL', Type.UNICODE_STRING)  # column 1
  table_definition.addColumn('VALUE', Type.DOUBLE)  # column 2
  table_definition.addColumn('ID', Type.UNICODE_STRING)
  table_definition.addColumn('CellCount', Type.INTEGER)
  table_definition.addColumn('Smoothing', Type.DOUBLE)
  table_definition.addColumn('GEOM', Type.SPATIAL)

  # 4. Initialize a new table in the extract
  # a. check if the table already exists
  # Only add table if it doesn't already exist
  if (new_extract.hasTable('Extract') == False):
    new_table = new_extract.addTable('Extract', table_definition)
  else:
    new_table = new_extract.openTable('Extract')

  # 5. Create a new row
  new_row = Row(table_definition)  # Pass the table definition to the constructor

  # 6. Populate each new row
  yCoord = yMin
  for i in range(0, xLen):
    yCoord += yIncrement
    xCoord = xMin
    for j in range(0, yLen):
      xCoord += xIncrement

      cellWkt = getFourCorners([xCoord, yCoord], xIncrement, yIncrement)

      new_row.setString(0, str(i)) # ROW
      new_row.setString(1, str(j)) # COL
      new_row.setDouble(2, heatmap[i][j]) # VAL
      new_row.setString(3, str(i) + '-' + str(j)) # id
      new_row.setInteger(4, len(heatmap[0])) # cell count
      new_row.setDouble(5, smoothing) # smoothing
      new_row.setSpatial(6, cellWkt) # WKT spatial
      new_table.insert(new_row) # Add the new row to the table

  # 7. Save the table and extract
  new_extract.close()

  # 8. Release the extract API
  ExtractAPI.cleanup()
  return
Beispiel #13
0
 def new_tde(self):
     dataExtract = Extract(self.tde_name)
     if dataExtract.hasTable('Extract'):
         return print("tde already exist use another name")
     return dataExtract