Beispiel #1
0
    def to_tableau(self, path, table_name='Extract', add_index=False):
        """
        Converts a Pandas DataFrame to a Tableau .tde file
        @param path = path to write file
        @param table_name = name of the table in the extract
        @param add_index = adds incrementing integer index before dataframe columns

        """

        # Delete dataextract log and hyper_db temp files if already exists
        files = (glob('DataExtract*.log') + glob('hyper_db_*') + [
            os.path.dirname(path) + '/debug.log', './DataExtract.log',
            './debug.log'
        ])

        for file in files:
            if os.path.isfile(file):
                os.remove(file)

        # Create Extract and Table
        ExtractAPI.initialize()
        new_extract = Extract(path)

        if not new_extract.hasTable(table_name):
            print(
                'Table \'{}\' does not exist in extract {}, creating.'.format(
                    table_name, path))
            self.set_table_structure(new_extract, table_name, add_index)

        new_table = new_extract.openTable(table_name)
        table_def = new_table.getTableDefinition()

        # Set Column values
        self.set_column_values(new_table, table_def, add_index)

        # Close extract
        new_extract.close()
        ExtractAPI.cleanup()
###Setting Tableau recognized data types###
for i in column_types:
    if i is numpy.int64:
        column_headers_types.append(Type.INTEGER)
    elif i is numpy.float64:
        column_headers_types.append(Type.DOUBLE)
    elif i is pd.Timestamp:
        column_headers_types.append(Type.DATETIME)
    else:
        column_headers_types.append(Type.UNICODE_STRING)

###Getting the existing table schema for incremental refresh###
if incremental_refresh == 1:
    dataExtract = Extract(extract_name)
    if dataExtract.hasTable('Extract'):
        table = dataExtract.openTable('Extract')
        dataSchema = table.getTableDefinition()

else:
    ###Removing extract if it already exists (for full refresh)###
    try:
        cwd = os.getcwd()
        for f in os.listdir(cwd):
            if re.search(extract_name, f):
                os.remove(os.path.join(cwd, f))
    except OSError, e:
        pass

    ###Initializng the Extract API, and applying the schema to the table###
    ExtractAPI.initialize()
    dataExtract = Extract(extract_name)
Beispiel #3
0
createExtract = Extract("/Users/jlin/Desktop/data_stuff/predict_data_Extract.tde")


for i in combined_data.columns:
     if i in ['Contract Execute','Contract Sent to Site','Essential Doc','IRB','Ip Release']:

             combined_data[i] = pd.to_datetime(combined_data[i],coerce=True)



#combined_data = combined_data.drop(['Contract Execute','Contract Sent to Site','Essential Doc','IRB','Ip Release'],1)



if createExtract.hasTable('Extract'):
    table = createExtract.openTable('Extract')
    tabledef = table.getTableDefinition()
else:
    tabledef = TableDefinition()


for i in combined_data.columns :
    #print combined_data[i].dtype
    if str(combined_data[i].dtype) == "int64":
        tabledef.addColumn(i,Types.Type.INTEGER)
    if str(combined_data[i].dtype) == "object":
        tabledef.addColumn(i,Types.Type.CHAR_STRING)
    if str(combined_data[i].dtype) == "datetime64[ns]":
        tabledef.addColumn(i,Types.Type.DATE)
    if str(combined_data[i].dtype) == "float64":
         tabledef.addColumn(i,Types.Type.DOUBLE)
Beispiel #4
0
table_definition.addColumn('latitude', Type.DOUBLE)
table_definition.addColumn('longitude', Type.DOUBLE)
table_definition.addColumn('point_number', Type.INTEGER)
table_definition.addColumn('distance_km', Type.DOUBLE)
table_definition.addColumn('FlightNumber', Type.UNICODE_STRING)
table_definition.addColumn('DepartureTime', Type.UNICODE_STRING)
table_definition.addColumn('DepartureTimeFromMidnight', Type.UNICODE_STRING)
table_definition.addColumn('SegMinFromMidnight', Type.INTEGER)
table_definition.addColumn('FlightTime', Type.INTEGER)
table_definition.addColumn('SegTimeFromMidnight', Type.UNICODE_STRING)

# 4. Initialize a new table in the extract
if (new_extract.hasTable('Extract') == False):
  new_table = new_extract.addTable('Extract', table_definition)
else:
  new_table = new_extract.openTable('Extract')

# 5. Create a new row
new_row = Row(table_definition)  # Pass the table definition to the constructor

# 6. walk through the origin/destination data from CSV, write each path to TDE
with open(csvLocation, 'rb') as csvfile:
  csvreader = csv.reader(csvfile, delimiter='|')

  next(csvreader) # skip header


  for row in csvreader:
    # print row
    olat = float(row[ORIG_LAT])
    olon = float(row[ORIG_LON])
Beispiel #5
0
def to_tde(dataset, tde_mode, tde_name):

    if tde_mode == 'new':

        # Step 1: Create the Extract File
        dataExtract = Extract(tde_name)

        if dataExtract.hasTable('Extract'):
            return print("tde already exist use another name")

        # Step 2: Create the table definition
        dataSchema = TableDefinition()
        dataSchema.addColumn('Station', Type.UNICODE_STRING)
        dataSchema.addColumn('Time', Type.UNICODE_STRING)
        dataSchema.addColumn('Date', Type.DATE)
        dataSchema.addColumn('all', Type.DOUBLE)
        dataSchema.addColumn('n7', Type.DOUBLE)
        dataSchema.addColumn('n8', Type.DOUBLE)
        dataSchema.addColumn('y_serries', Type.DOUBLE)

        # Step 3: Create a table in the image of the table definition
        table = dataExtract.addTable('Extract', dataSchema)

        # Step 4: Create rows and insert them one by one
        newRow = Row(dataSchema)
        for i in range(0, len(dataset)):
            print(i)
            newRow.setString        (0, dataset['Station'][i])
            newRow.setString        (1, dataset['Time'][i])
            newRow.setDate          (2, dataset['Date'][i].year, dataset['Date'][i].month, dataset['Date'][i].day)
            newRow.setDouble        (3, dataset['all'][i])
            newRow.setDouble        (4, dataset['n7'][i])
            newRow.setDouble        (5, dataset['n8'][i])
            newRow.setDouble        (6, dataset['y_serries'][i])
            table.insert(newRow)

        # Step 5: Close the tde
        dataExtract.close()

    elif tde_mode == 'append':

        # Step 1: Import the Extract File
        preExtract = Extract(tde_name)

        # Step 2: Open existing table
        table = preExtract.openTable('Extract')

        # Step 3: Import the table definition
        tableDef = table.getTableDefinition()

        # Step 4: Create rows and insert them one by one
        newRow = Row(tableDef)
        for i in range(0, len(dataset)):
            print(i)
            newRow.setString	    (0, dataset['Station'][i])
            newRow.setString		(1, dataset['Time'][i])
            newRow.setDate          (2, dataset['Date'][i].year, dataset['Date'][i].month, dataset['Date'][i].day)
            newRow.setDouble	    (3, dataset['all'][i])
            newRow.setDouble		(4, dataset['n7'][i])
            newRow.setDouble	    (5, dataset['n8'][i])
            newRow.setDouble        (6, dataset['y_serries'][i])
            table.insert(newRow)

        # Step 5: Close the extract
        preExtract.close()
Beispiel #6
0
def heatmapToTDE(heatmap, extent, smoothing, extractLocation):


  xLen = len(heatmap)
  yLen = len(heatmap[0])
  xRange = abs(extent[0] - extent[1])
  yRange = abs(extent[2] - extent[3])
  xMin = min(extent[0], extent[1])
  yMin = min(extent[2], extent[3])
  xIncrement = xRange / yLen
  yIncrement = yRange / xLen

  # 1. initialize a new extract
  ExtractAPI.initialize()

  # 2. Create a table definition
  new_extract = Extract(extractLocation)

  # 3. Add column definitions to the table definition
  table_definition = TableDefinition()
  table_definition.addColumn('ROW', Type.UNICODE_STRING)  # column 0
  table_definition.addColumn('COL', Type.UNICODE_STRING)  # column 1
  table_definition.addColumn('VALUE', Type.DOUBLE)  # column 2
  table_definition.addColumn('ID', Type.UNICODE_STRING)
  table_definition.addColumn('CellCount', Type.INTEGER)
  table_definition.addColumn('Smoothing', Type.DOUBLE)
  table_definition.addColumn('GEOM', Type.SPATIAL)

  # 4. Initialize a new table in the extract
  # a. check if the table already exists
  # Only add table if it doesn't already exist
  if (new_extract.hasTable('Extract') == False):
    new_table = new_extract.addTable('Extract', table_definition)
  else:
    new_table = new_extract.openTable('Extract')

  # 5. Create a new row
  new_row = Row(table_definition)  # Pass the table definition to the constructor

  # 6. Populate each new row
  yCoord = yMin
  for i in range(0, xLen):
    yCoord += yIncrement
    xCoord = xMin
    for j in range(0, yLen):
      xCoord += xIncrement

      cellWkt = getFourCorners([xCoord, yCoord], xIncrement, yIncrement)

      new_row.setString(0, str(i)) # ROW
      new_row.setString(1, str(j)) # COL
      new_row.setDouble(2, heatmap[i][j]) # VAL
      new_row.setString(3, str(i) + '-' + str(j)) # id
      new_row.setInteger(4, len(heatmap[0])) # cell count
      new_row.setDouble(5, smoothing) # smoothing
      new_row.setSpatial(6, cellWkt) # WKT spatial
      new_table.insert(new_row) # Add the new row to the table

  # 7. Save the table and extract
  new_extract.close()

  # 8. Release the extract API
  ExtractAPI.cleanup()
  return
        # add the time period column
        table_definition.addColumn("timeperiod", Type.UNICODE_STRING)
        new_colnames_to_idx["timeperiod"] = len(new_colnames_to_idx)
    # add the src column
    table_definition.addColumn("src", Type.UNICODE_STRING)
    new_colnames_to_idx["src"] = len(new_colnames_to_idx)
    table_definition.addColumn("Scenario", Type.UNICODE_STRING)
    new_colnames_to_idx["Scenario"] = len(new_colnames_to_idx)
    
    # print "old_colnames = %s" % str(old_colnames)
    # print "new colnames = %s" % str(new_colnames_to_idx)
    # print colnames_to_types
    
    # Step 3: Creat the table in the image of the tableDef
    if arg_append:
        new_table = new_extract.openTable('Extract')
    else:
        new_table = new_extract.addTable('Extract', table_definition)

    # Step 4: Loop through the csv grab all the data, put it into rows
    # and insert the rows in the table
    new_row = Row(table_definition)
    for csv_dirpath in csv_dirpaths:
        csv_fullpath = os.path.join(csv_dirpath, csv_filename)

        # add the new column `src`
        src = os.path.split(csv_fullpath)[0] # remove the filename part of the path
        tail = ""
        while tail in ["", "trn", "iter3", "hwy", "OUTPUT"]:
            (src,tail) = os.path.split(src)      # remove one more dir from path (e.g. core_summaries)
        print "tail=[%s] src=[%s]" % (tail,src)