Beispiel #1
0
    def to_tableau( self, path, add_index=False ):
        '''
        Converts a Pandas DataFrame to a Tableau .tde file
        @param path = path to write file
        @param tableName = name of the table in the extract
        
        '''
        
        # Delete Extract and debug log is already exist
        for file in [path, os.path.dirname(path) + '/debug.log',
        			'./DataExtract.log','./debug.log']:
        	if os.path.isfile(file):
        		os.remove(file)

        # Create Extract and Table
        ExtractAPI.initialize( )
        new_extract = Extract( path )
        table_def = TableDefinition()
        
        # Set columns in Tableau
        if add_index:
            table_def.addColumn( 'index', Type.INTEGER )
            
        for col_index, col_name in enumerate(self._dataframe):
            table_def.addColumn( col_name, self._column_static_type[col_index] )
        
        # Create table
        new_table = new_extract.addTable( "Extract", table_def )
        
        # Set Column values
        self.set_column_values( new_table, table_def, add_index )
        
        # Close extract
        new_extract.close()
        ExtractAPI.cleanup()
Beispiel #2
0
    def new_tde(self, data):
        # Step 1: Create the Extract File
        dataExtract = Extract(self.tde_name)
        if dataExtract.hasTable('Extract'):
            return print("tde already exist use another name")

        # Step 2: Create the table definition
        cols = data.columns.tolist()
        dataSchema = TableDefinition()
        for col in cols:
            dataSchema.addColumn(col, Type.UNICODE_STRING)

        # Step 3: Create a table in the image of the table definition
        table = dataExtract.addTable('Extract', dataSchema)

        # Step 4: Create rows and insert them one by one
        newRow = Row(dataSchema)
        cols = data.columns.tolist()
        for i in range(0, len(data)):
            for col in cols:
                col_index = cols.index(col)
                newRow.setString(col_index, data[col][i])
                table.insert(newRow)
        dataExtract.close()

        # Step 5: Close the tde
        dataExtract.close()
class TDEExport(object):
    @staticmethod
    def make_table_definition(schema):
        table_def = TableDefinition()
        table_def.setDefaultCollation(Collation.EN_GB)
        for col in schema:
            table_def.addColumn(col['name'], typeMap.get(col["type"],Type.UNICODE_STRING))
        return table_def

    def __init__(self, tde_file_path, input_schema):
        self.tde_file_path = tde_file_path
        self.output_path = os.path.dirname(self.tde_file_path)
        self.input_schema = input_schema

        self.errors = 0
        self.nrows = 0

        print "Writing TDE file: %s" % self.tde_file_path
        ExtractAPI.initialize()
        self.extract = Extract(self.tde_file_path)
        assert(not self.extract.hasTable('Extract'))
        self.table_def = TDEExport.make_table_definition(self.input_schema)
        self.table = self.extract.addTable('Extract', self.table_def)

    def __enter__(self):
        return self

    def __exit__(self, type, value, tb):
        self.close();

    def close(self):
        self.extract.close()
        ExtractAPI.cleanup()
        if self.errors > 0:
            print "Encountered %d errors" % self.errors

    def insert_array_row(self, input_row):
        # Work around bug in DSS 3.1.0 API for single-column datasets
        if len(input_row) == 0 and len(self.input_schema) == 1:
            input_row = [u'']
        nb_cols = len(self.input_schema)
        output_row = Row(self.table_def)
        for col_no in range(nb_cols):
            col = self.input_schema[col_no]
            data = input_row[col_no]
            try:
                fieldSetterMap[col['type']](output_row, col_no, data)
            except Exception, e:
                self.errors += 1
                if self.errors < 100 or (self.errors < 10000 and self.errors % 100 == 0) or (self.errors % 1000 ==0):
                    try:
                        print "[err #%s] Failed setting: col=%s type=%s val=%s  err=%s" % (self.errors, col["name"],  col["type"], data, e)
                    except Exception, e2:
                        print "[err #%s] Failed setting: col=%s type=%s val=NON_PRINTED  err=%s" % (self.errors, col["name"],  col["type"],e)
class TDEExport(object):
    @staticmethod
    def make_table_definition(schema):
        table_def = TableDefinition()
        table_def.setDefaultCollation(Collation.EN_GB)
        for col in schema:
            table_def.addColumn(col['name'], typeMap.get(col["type"],Type.UNICODE_STRING))
        return table_def

    def __init__(self, tde_file_path, input_schema):
        self.tde_file_path = tde_file_path
        self.output_path = os.path.dirname(self.tde_file_path)
        self.input_schema = input_schema

        self.errors = 0
        self.nrows = 0

        print "Writing TDE file: %s" % self.tde_file_path
        ExtractAPI.initialize()
        self.extract = Extract(self.tde_file_path)
        assert(not self.extract.hasTable('Extract'))
        self.table_def = TDEExport.make_table_definition(self.input_schema)
        self.table = self.extract.addTable('Extract', self.table_def)

    def __enter__(self):
        return self

    def __exit__(self, type, value, tb):
        self.close();

    def close(self):
        self.extract.close()
        ExtractAPI.cleanup()
        if self.errors > 0:
            print "Encountered %d errors" % self.errors

    def insert_array_row(self, input_row):
        # Work around bug in DSS 3.1.0 API for single-column datasets
        if len(input_row) == 0 and len(self.input_schema) == 1:
            input_row = [u'']
        nb_cols = len(self.input_schema)
        output_row = Row(self.table_def)
        for col_no in range(nb_cols):
            col = self.input_schema[col_no]
            data = input_row[col_no]
            try:
                fieldSetterMap[col['type']](output_row, col_no, data)
            except Exception, e:
                self.errors += 1
                if self.errors < 100 or (self.errors < 10000 and self.errors % 100 == 0) or (self.errors % 1000 ==0):
                    try:
                        print "[err #%s] Failed setting: col=%s type=%s val=%s  err=%s" % (self.errors, col["name"],  col["type"], data, e)
                    except Exception, e2:
                        print "[err #%s] Failed setting: col=%s type=%s val=NON_PRINTED  err=%s" % (self.errors, col["name"],  col["type"],e)
class TDEExport(object):

    @staticmethod
    def convert_type(type):
        return typeMap.get(type,Type.UNICODE_STRING)

    @staticmethod
    def make_table_definition(schema):
        table_def = TableDefinition()
        table_def.setDefaultCollation(Collation.EN_GB)
        for col in schema:
            table_def.addColumn(col['name'], TDEExport.convert_type(col['type']))
        return table_def

    def __init__(self, tde_file_path, input_schema):
        self.tde_file_path = tde_file_path
        self.output_path = os.path.dirname(self.tde_file_path)
        self.input_schema = input_schema

        print "Writing TDE file: %s" % self.tde_file_path
        ExtractAPI.initialize()
        self.extract = Extract(self.tde_file_path)
        assert(not self.extract.hasTable('Extract'))
        self.table_def = TDEExport.make_table_definition(self.input_schema)
        self.table = self.extract.addTable('Extract', self.table_def)

    def __enter__(self):
        return self

    def __exit__(self, type, value, tb):
        self.close();

    def close(self):
        self.extract.close()
        ExtractAPI.cleanup()

    def insert_array_row(self, input_row):
        # Work around bug in DSS 3.1.0 API for single-column datasets
        if len(input_row) == 0 and len(self.input_schema) == 1:
            input_row = [u'']
        nb_cols = len(self.input_schema)
        output_row = Row(self.table_def)
        for col_no in range(nb_cols):
            col = self.input_schema[col_no]
            data = input_row[col_no]
            try:
                fieldSetterMap[col['type']](output_row, col_no, data)
            except Exception, e:
                print "Failed setting field %s to value %s: %s" % (col["name"], data, e)
                pass
        self.table.insert(output_row)
def writeTableToTDE(pgFields, pgData, extractLocation):
    print("writing table to {0}").format(extractLocation)

    # 1. initialize a new extract
    ExtractAPI.initialize()

    # 2. Create a table definition
    new_extract = Extract(extractLocation)

    # 3. Add column definitions to the table definition
    table_definition = TableDefinition()
    for i in range(0, len(pgFields)):
        table_definition.addColumn(pgFields[i][0], pgFields[i][1])

    # 4. Initialize a new table in the extract
    # a. check if the table already exists
    # Only add table if it doesn't already exist
    if (new_extract.hasTable('Extract') == False):
        new_table = new_extract.addTable('Extract', table_definition)
    else:
        new_table = new_extract.openTable('Extract')

    # 5. Create a new row
    new_row = Row(
        table_definition)  # Pass the table definition to the constructor

    # 6. Populate each new row
    numberRecords = len(pgData)
    for i in range(0, numberRecords):
        # Note that this doesn't cover all possible TDE data types
        for j in range(0, len(pgFields)):
            if pgFields[j][1] == Type.INTEGER:
                new_row.setInteger(j, pgData[i][j])
            elif pgFields[j][1] == Type.UNICODE_STRING:
                new_row.setString(j, pgData[i][j])
            elif pgFields[j][1] == Type.SPATIAL:
                new_row.setSpatial(j, pgData[i][j])
            elif pgFields[j][i] == Type.BOOLEAN:
                new_row.setBoolean(j, pgData[i][j])
            elif pgFields[j][i] == Type.DOUBLE:
                new_row.setDouble(j, pgData[j][i])
        new_table.insert(new_row)  # Add the new row to the table

    # 7. Save the table and extract
    new_extract.close()

    # 8. Release the extract API
    ExtractAPI.cleanup()
    return
Beispiel #7
0
    def to_tableau(self, path, table_name='Extract', add_index=False):
        """
        Converts a Pandas DataFrame to a Tableau .tde file
        @param path = path to write file
        @param table_name = name of the table in the extract
        @param add_index = adds incrementing integer index before dataframe columns

        """

        # Delete Extract and debug log is already exist
        for file in [path, os.path.dirname(path) + '/debug.log',
                     './DataExtract.log', './debug.log']:
            if os.path.isfile(file):
                os.remove(file)

        # Create Extract and Table
        ExtractAPI.initialize()
        new_extract = Extract(path)
        table_def = TableDefinition()

        # Set columns in Tableau
        if add_index:
            index_col = 'index'
            suffix = 1
            while index_col in self._dataframe.columns:
                index_col = '%s_%s' % (index_col, suffix)
                suffix += 1
            table_def.addColumn(index_col, Type.INTEGER)

        for col_index, col_name in enumerate(self._dataframe):
            table_def.addColumn(col_name, self._column_static_type[col_index])

        # Create table
        new_table = new_extract.addTable(table_name, table_def)

        # Set Column values
        self.set_column_values(new_table, table_def, add_index)

        # Close extract
        new_extract.close()
        ExtractAPI.cleanup()
Beispiel #8
0
    def to_tableau(self, path, table_name='Extract', add_index=False):
        """
        Converts a Pandas DataFrame to a Tableau .tde file
        @param path = path to write file
        @param table_name = name of the table in the extract
        @param add_index = adds incrementing integer index before dataframe columns

        """

        # Delete dataextract log and hyper_db temp files if already exists
        files = (glob('DataExtract*.log') + glob('hyper_db_*') + [
            os.path.dirname(path) + '/debug.log', './DataExtract.log',
            './debug.log'
        ])

        for file in files:
            if os.path.isfile(file):
                os.remove(file)

        # Create Extract and Table
        ExtractAPI.initialize()
        new_extract = Extract(path)

        if not new_extract.hasTable(table_name):
            print(
                'Table \'{}\' does not exist in extract {}, creating.'.format(
                    table_name, path))
            self.set_table_structure(new_extract, table_name, add_index)

        new_table = new_extract.openTable(table_name)
        table_def = new_table.getTableDefinition()

        # Set Column values
        self.set_column_values(new_table, table_def, add_index)

        # Close extract
        new_extract.close()
        ExtractAPI.cleanup()
        table_definition.addColumn(df.columns[i], Type.INTEGER)
    elif df.dtypes[i] == 'datetime64[ns]':
        table_definition.addColumn(df.columns[i], Type.DATE)

# STEP 5 - Initialize a new table in the extract with the addTable() method
new_table = td_extract.addTable('Extract', table_definition)

# STEP 6 - Create a new row with the Row() constructor
new_row = Row(table_definition)

# STEP 7 - Populate each new row
for j in range(0, df.shape[0]):
    for i in range(len(df.columns)):
        if df.dtypes[i] == 'object':
            new_row.setString(i, df.iloc[j,i])
        if df.dtypes[i] == 'float64':
            new_row.setDouble(i, df.iloc[j,i])
        elif df.dtypes[i] == 'int64':
            new_row.setInteger(i, df.iloc[j,i])
        elif df.dtypes[i] == 'datetime64[ns]':
            new_row.setDate(i, df.iloc[j,i].year, df.iloc[j,i].month, df.iloc[j,i].day)
    new_table.insert(new_row)

# STEP 8 - Save table, extract, and performs cleanup
td_extract.close()

# STEP 9 - Release the Extract API. ONLY if ExtractAPI.initialize() was used
ExtractAPI.cleanup()


            elif column_headers_types[j] == 13:
                try:
                    newRow.setDateTime(j,
                                       i.get(column_headers[j]).year,
                                       i.get(column_headers[j]).month,
                                       i.get(column_headers[j]).day,
                                       i.get(column_headers[j]).hour,
                                       i.get(column_headers[j]).minute,
                                       i.get(column_headers[j]).second, 0)
                except:
                    newRow.setNull(j)
            elif column_headers_types[j] == 10:
                try:
                    newRow.setDouble(j, i.get(column_headers[j]))
                except:
                    newRow.setNull(j)
            else:
                try:
                    newRow.setString(j, i.get(column_headers[j]))
                except:
                    newRow.setNull(j)
        table.insert(newRow)
    except:
        pass

###Final Procedures###
client.close()
dataExtract.close()
ExtractAPI.cleanup()
#####################################################
                        except:
                            print "line = ", line
                            print "exception!  colname=[%s] val=[%s]" % (colname, str(line[orig_colname]))
                            raise
                            
                    # unwind case
                    else:
                        new_colname = unwind_timeperiods[colname][0]
                        col_tp      = unwind_timeperiods[colname][1]
                        col_idx     = new_colnames_to_idx[new_colname]
                        if col_tp == timeperiod:
                            new_row.setDouble(col_idx, float(line[orig_colname]))

                if unwinding:
                    # and the time period
                    new_row.setString(new_colnames_to_idx["timeperiod"], timeperiod)
                # and the src
                new_row.setString(new_colnames_to_idx["src"], src)
                new_row.setString(new_colnames_to_idx["Scenario"], scenario)
                new_table.insert(new_row)
                
                tde_lines_written += 1
        
        print "Read  %6d rows from %s" % (csv_lines_read, csv_fullpath)
        
    # Step 5: Close the tde
    new_extract.close()
    print "Wrote %6d rows to   %s" % (tde_lines_written, tde_fullpath)
	
ExtractAPI.cleanup()	
Beispiel #12
0
                newrow.setNull(n_col)
        if str(combined_data[name].dtype) == "datetime64[ns]":
            #newrow.setCharString(n_col,c[name])
            if c[name] is not None and str(c[name])!="NaT":
                date = datetime.datetime.strptime(str(c[name])[:10],"%Y-%m-%d")
                newrow.setDate(n_col,date.year,date.month,date.day)
            if c[name] is None or str(c[name])=="NaT":
                newrow.setNull(n_col)
        if str(combined_data[name].dtype) == "float64":
            if c[name] is not None:
                newrow.setDouble(n_col,c[name])
            if c[name] is None:
                newrow.setNull(n_col)
    table.insert(newrow)
#print combined_data._slice(slice(1,2))
createExtract.close()


# ServerAPI.initialize()
# con=ServerConnection()
# con.connect("https://tableau-dev.gobalto.com","*****@*****.**","blahblahblah^","Storm")
# print "connect success"
#
# con.publishExtract("/Users/justinlin/Desktop/data_stuff/predict_data_Extract.tde","default",'predict_stuff',overwrite=True)
# print "publish datasource"
#
# con.close()
# print "close server connection"


    print("row:", n, "column:1", "value:", joinDF_auto.iloc[n, 1].year,
          joinDF_auto.iloc[n, 1].month, joinDF_auto.iloc[n, 1].day)
    print("row:", n, "column:2", "value:", joinDF_auto.loc[n, 'Patient Name'])

    #print("row:",n, "column:3", "value:",joinDF_auto.iloc[n,3])
    #print("row:",n, "column:4", "value:",joinDF_auto.iloc[n,4])

    new_row.setInteger(0, joinDF_auto.iloc[n, 0])
    new_row.setDate(1, joinDF_auto.iloc[n, 1].year,
                    joinDF_auto.iloc[n, 1].month, joinDF_auto.iloc[n, 1].day)
    new_row.setString(2, joinDF_auto.loc[n, 'Patient Name'])
    #parse out and set date parts: year, month, day

    #new_row.setString(3, joinDF_auto.iloc[n,3])
    #new_row.setString(4, joinDF_auto.iloc[n,4])

    new_table.insert(new_row)  # Add the new row to the table

# Close the extract in order to save the .tde file and clean up resources
new_extract.close()
ExtractAPI.cleanup()

print("------------------------------------------------")
print("------------------------------------------------")
print "data prep finsished..."

# Output elapsed time
print "Elapsed:", locale.format("%.2f", time.clock() - startTime), "seconds"
print("------------------------------------------------")
print("------------------------------------------------")
Beispiel #14
0
def tableau_extract(resultset, data_file):
    """
    Create TDE extract.
    """
    if isinstance(resultset, ResultSet):
        df_name = resultset.DataFrame()
    else:
        df_name = resultset
    data_type = []
    fieldnames = []
    data_type_map = {int: Type.INTEGER,
                     str: Type.UNICODE_STRING,
                     bool: Type.BOOLEAN,
                     bytearray: Type.CHAR_STRING,
                     list: Type.CHAR_STRING,
                     dict: Type.CHAR_STRING,
                     float: Type.DOUBLE,
                     decimal.Decimal: Type.DOUBLE,
                     datetime.date: Type.DATE,
                     datetime.time: Type.DURATION,
                     datetime.datetime: Type.DATETIME,
                     pd._libs.tslib.Timestamp: Type.DATETIME
                     }

    for col in df_name:
        fieldnames.append(col)
        data_type.append(df_name[col].apply(type).iat[0])
    data_dict = dict(zip(fieldnames, data_type))

    for col_name in data_dict:
        if data_dict[col_name] in data_type_map:
            data_dict[col_name] = data_type_map[data_dict[col_name]]
        else:
            data_dict[col_name] = Type.UNICODE_STRING
    # Initialize a new extract
    try:
        os.remove(data_file)
    except OSError:
        pass
    new_extract = Extract(data_file)
    table_definition = TableDefinition()
    for col_name in data_dict:
        table_definition.addColumn(col_name, data_dict[col_name])
    new_table = new_extract.addTable('Extract', table_definition)
    new_row = Row(table_definition)
    tde_types = {'INTEGER': 7, 'DOUBLE': 10, 'BOOLEAN': 11, 'DATE': 12,
                 'DATETIME': 13, 'DURATION': 14,
                 'CHAR_STRING': 15, 'UNICODE_STRING': 16}
    for i in range(0, len(df_name)):
        for col in range(0, table_definition.getColumnCount()):
            col_name = table_definition.getColumnName(col)
            try:
                if data_dict[col_name] == tde_types['INTEGER']:
                    new_row.setInteger(col, int(df_name[col_name][i]))
                elif data_dict[col_name] == tde_types['DOUBLE']:
                    new_row.setDouble(col, float(df_name[col_name][i]))
                elif data_dict[col_name] == tde_types['BOOLEAN']:
                    new_row.setBoolean(col, bool(df_name[col_name][i]))
                elif data_dict[col_name] == tde_types['DATE']:
                    data = df_name[col_name][i]
                    new_row.setDate(col, data.year, data.month, data.day)
                elif data_dict[col_name] == tde_types['DATETIME']:
                    data = df_name[col_name][i]
                    new_row.setDateTime(col, data.year, data.month, data.day,
                                        data.hour, data.minute, data.second, 0)
                elif data_dict[col_name] == tde_types['DURATION']:
                    data = df_name[col_name][i]
                    new_row.setDuration(col, data.hour, data.minute, data.second, 0)
                elif data_dict[col_name] == tde_types['CHAR_STRING']:
                    new_row.setCharString(col, str(df_name[col_name][i]))
                elif data_dict[col_name] == tde_types['UNICODE_STRING']:
                    new_row.setString(col, str(df_name[col_name][i]))
                else:
                    print('Error')
                    new_row.setNull(col)
            except TypeError:
                new_row.setNull(col)
        new_table.insert(new_row)

    new_extract.close()
    ExtractAPI.cleanup()
    for file_name in glob.glob("DataExtract*.log"):
        os.remove(file_name)
Beispiel #15
0
def to_tde(dataset, tde_mode, tde_name):

    if tde_mode == 'new':

        # Step 1: Create the Extract File
        dataExtract = Extract(tde_name)

        if dataExtract.hasTable('Extract'):
            return print("tde already exist use another name")

        # Step 2: Create the table definition
        dataSchema = TableDefinition()
        dataSchema.addColumn('Station', Type.UNICODE_STRING)
        dataSchema.addColumn('Time', Type.UNICODE_STRING)
        dataSchema.addColumn('Date', Type.DATE)
        dataSchema.addColumn('all', Type.DOUBLE)
        dataSchema.addColumn('n7', Type.DOUBLE)
        dataSchema.addColumn('n8', Type.DOUBLE)
        dataSchema.addColumn('y_serries', Type.DOUBLE)

        # Step 3: Create a table in the image of the table definition
        table = dataExtract.addTable('Extract', dataSchema)

        # Step 4: Create rows and insert them one by one
        newRow = Row(dataSchema)
        for i in range(0, len(dataset)):
            print(i)
            newRow.setString        (0, dataset['Station'][i])
            newRow.setString        (1, dataset['Time'][i])
            newRow.setDate          (2, dataset['Date'][i].year, dataset['Date'][i].month, dataset['Date'][i].day)
            newRow.setDouble        (3, dataset['all'][i])
            newRow.setDouble        (4, dataset['n7'][i])
            newRow.setDouble        (5, dataset['n8'][i])
            newRow.setDouble        (6, dataset['y_serries'][i])
            table.insert(newRow)

        # Step 5: Close the tde
        dataExtract.close()

    elif tde_mode == 'append':

        # Step 1: Import the Extract File
        preExtract = Extract(tde_name)

        # Step 2: Open existing table
        table = preExtract.openTable('Extract')

        # Step 3: Import the table definition
        tableDef = table.getTableDefinition()

        # Step 4: Create rows and insert them one by one
        newRow = Row(tableDef)
        for i in range(0, len(dataset)):
            print(i)
            newRow.setString	    (0, dataset['Station'][i])
            newRow.setString		(1, dataset['Time'][i])
            newRow.setDate          (2, dataset['Date'][i].year, dataset['Date'][i].month, dataset['Date'][i].day)
            newRow.setDouble	    (3, dataset['all'][i])
            newRow.setDouble		(4, dataset['n7'][i])
            newRow.setDouble	    (5, dataset['n8'][i])
            newRow.setDouble        (6, dataset['y_serries'][i])
            table.insert(newRow)

        # Step 5: Close the extract
        preExtract.close()
Beispiel #16
0
def heatmapToTDE(heatmap, extent, smoothing, extractLocation):


  xLen = len(heatmap)
  yLen = len(heatmap[0])
  xRange = abs(extent[0] - extent[1])
  yRange = abs(extent[2] - extent[3])
  xMin = min(extent[0], extent[1])
  yMin = min(extent[2], extent[3])
  xIncrement = xRange / yLen
  yIncrement = yRange / xLen

  # 1. initialize a new extract
  ExtractAPI.initialize()

  # 2. Create a table definition
  new_extract = Extract(extractLocation)

  # 3. Add column definitions to the table definition
  table_definition = TableDefinition()
  table_definition.addColumn('ROW', Type.UNICODE_STRING)  # column 0
  table_definition.addColumn('COL', Type.UNICODE_STRING)  # column 1
  table_definition.addColumn('VALUE', Type.DOUBLE)  # column 2
  table_definition.addColumn('ID', Type.UNICODE_STRING)
  table_definition.addColumn('CellCount', Type.INTEGER)
  table_definition.addColumn('Smoothing', Type.DOUBLE)
  table_definition.addColumn('GEOM', Type.SPATIAL)

  # 4. Initialize a new table in the extract
  # a. check if the table already exists
  # Only add table if it doesn't already exist
  if (new_extract.hasTable('Extract') == False):
    new_table = new_extract.addTable('Extract', table_definition)
  else:
    new_table = new_extract.openTable('Extract')

  # 5. Create a new row
  new_row = Row(table_definition)  # Pass the table definition to the constructor

  # 6. Populate each new row
  yCoord = yMin
  for i in range(0, xLen):
    yCoord += yIncrement
    xCoord = xMin
    for j in range(0, yLen):
      xCoord += xIncrement

      cellWkt = getFourCorners([xCoord, yCoord], xIncrement, yIncrement)

      new_row.setString(0, str(i)) # ROW
      new_row.setString(1, str(j)) # COL
      new_row.setDouble(2, heatmap[i][j]) # VAL
      new_row.setString(3, str(i) + '-' + str(j)) # id
      new_row.setInteger(4, len(heatmap[0])) # cell count
      new_row.setDouble(5, smoothing) # smoothing
      new_row.setSpatial(6, cellWkt) # WKT spatial
      new_table.insert(new_row) # Add the new row to the table

  # 7. Save the table and extract
  new_extract.close()

  # 8. Release the extract API
  ExtractAPI.cleanup()
  return
Beispiel #17
0
def createTDE(query, dbname, tde_filename):
    start_time = time.time()
    db = sql.connect(host="127.0.0.1", port=9999, user="******", db=dbname)
    cursor = db.cursor()
    cursor.execute(query)
    
    ExtractAPI.initialize()

    if os.path.isfile(tde_filename):
        os.remove(tde_filename)
    tde = Extract(tde_filename)
    
    table_def = makeTableDefinition(cursor.description)
    
    tde_table = tde.addTable('Extract', table_def)

    tde_row = Row(table_def)   # Pass the table definition to the constructor

    size = 1000
    many = cursor.fetchmany(size)
    rows_counter = 0
    while many:
        # add counter for rows incase of database disconnection
        for row in many:
            # Create new row
            for colno, col in enumerate(row):

                col_type = table_def.getColumnType(colno)
                setType = tde_set_type_function_map[col_type]
                # print setType
                # print type(setType)
                setType(tde_row, colno, col)
            tde_table.insert(tde_row)
            rows_counter += 1
        many = cursor.fetchmany(size)
            #tde_row.close()
    print "cleared while loop"
            # Set column values. The first parameter is the column number (its
            # ordinal position) The second parameter (or second and subsequent paramaters) is 
            # the value to set the column to.    
                
#                 new_row.setInteger(0, 1)
#                 new_row.setString(1, 'Amanda')
#                 new_row.setDate(2, 2014, 6, 9)
#                 new_table.insert(new_row) # Add the new row to the table


# new_row.setInteger(0, 1)
# new_row.setString(1, 'Amanda')
# new_row.setDate(2, 2014, 6, 9)
# new_table.insert(new_row) # Add the new row to the table

# new_row.setInteger(0, 2)
# new_row.setString(1, 'Brian')
# new_row.setDate(2, 2014, 10, 13)
# new_table.insert(new_row)

# new_row.setInteger(0, 3)
# new_row.setString(1, 'Christina')
# new_row.setDate(2, 2015, 2, 16)
# new_table.insert(new_row)

    # Close the extract in order to save the .tde file and clean up resources
    tde.close()
    print "TDE Closed"
    ExtractAPI.cleanup()
    print "ExtractAPI cleaned up"
    cursor.close()
    print "cursor Closed"
    db.close()
    print "db closed"

    timetaken = time.time() - start_time
    print str(rows_counter) + ' rows inserted in ' + str(timetaken) + ' seconds'
    print '    (' + str(rows_counter/timetaken) + ' rows per second)'
    return
def methodMultithread(chunkSize):
    threads = []

    writeThread = WriteThread()
    writeThread.start()
    threads.append(writeThread)

    readThread = ReadThread(chunkSize)
    readThread.start()
    threads.append(readThread)

    for t in threads:
        t.join()


print str(datetime.datetime.now()) + " start time of fetch"
methodFetchOne()
# methodFetchMany(100000)
# methodFetchMany(500000)
# methodMultithread(100000)
# methodMultithread(500000)
print str(datetime.datetime.now()) + " end time of fetch"

# Disconnect from Greenplum
cnxn.close
# Close the extract in order to save the .tde file and clean up resources
tdenew.close()
# Cleanup the extract API process
ExtractAPI.cleanup()
def main():
    pp = pprint.PrettyPrinter(indent=2)

    key = raw_input("API Key: ")

    limit = 100
    offset = 0

    # Create TDE extract file

    if os.path.isfile(extract_file):
        os.remove(extract_file)

    ExtractAPI.initialize()
    tdefile = Extract(extract_file)

    # Create the table definition
    # Data types: INTEGER, DOUBLE, BOOLEAN, DATE, DATETIME, DURATION, CHAR_STRING, UNICODE_STRING
    tableDef = TableDefinition()
    tableDef.addColumn('alert_id', Type.CHAR_STRING)
    tableDef.addColumn('timestamp', Type.DATETIME)
    tableDef.addColumn('alert_type', Type.CHAR_STRING)
    tableDef.addColumn('alert_status', Type.CHAR_STRING)
    tableDef.addColumn('alert_network', Type.CHAR_STRING)
    tableDef.addColumn('notes', 16) #UNICODE_STRING
    tableDef.addColumn('metadata', 16) #UNICODE_STRING
    tableDef.addColumn('rule_name', Type.CHAR_STRING)
    tableDef.addColumn('severity', Type.INTEGER)
    tableDef.addColumn('asset_name', 16) #UNICODE_STRING
    tableDef.addColumn('asset_image', Type.CHAR_STRING)
    tableDef.addColumn('perpetrator_username', 16) #UNICODE_STRING
    tableDef.addColumn('perpetrator_displayname', 16) #UNICODE_STRING
    tableDef.addColumn('perpetrator_type', Type.CHAR_STRING)
    tableDef.addColumn('perpetrator_image', Type.CHAR_STRING)
    tableDef.addColumn('takedown_request_time', Type.DATETIME)
    tableDef.addColumn('takedown_accept_time', Type.DATETIME)
    tableDef.addColumn('close_time', Type.DATETIME)

    # Create table in image of tableDef
    table = tdefile.addTable('Extract', tableDef)

    rowcount = 0

    done = 'n'
    while done == 'n':
        response = get_page(key, limit, offset)
        alerts = response['alerts']
        if len(alerts) == 0:
            done = 'y'
            break
        for alert in alerts:
            newrow = createrow(alert,tableDef)

            table.insert(newrow)

            rowcount +=1
            if rowcount >= LIMIT:
                done = 'y'
                break

        offset += 100
    tdefile.close()
    ExtractAPI.cleanup()
    print str(rowcount) + " alerts processed"