def to_tableau( self, path, add_index=False ): ''' Converts a Pandas DataFrame to a Tableau .tde file @param path = path to write file @param tableName = name of the table in the extract ''' # Delete Extract and debug log is already exist for file in [path, os.path.dirname(path) + '/debug.log', './DataExtract.log','./debug.log']: if os.path.isfile(file): os.remove(file) # Create Extract and Table ExtractAPI.initialize( ) new_extract = Extract( path ) table_def = TableDefinition() # Set columns in Tableau if add_index: table_def.addColumn( 'index', Type.INTEGER ) for col_index, col_name in enumerate(self._dataframe): table_def.addColumn( col_name, self._column_static_type[col_index] ) # Create table new_table = new_extract.addTable( "Extract", table_def ) # Set Column values self.set_column_values( new_table, table_def, add_index ) # Close extract new_extract.close() ExtractAPI.cleanup()
def new_tde(self, data): # Step 1: Create the Extract File dataExtract = Extract(self.tde_name) if dataExtract.hasTable('Extract'): return print("tde already exist use another name") # Step 2: Create the table definition cols = data.columns.tolist() dataSchema = TableDefinition() for col in cols: dataSchema.addColumn(col, Type.UNICODE_STRING) # Step 3: Create a table in the image of the table definition table = dataExtract.addTable('Extract', dataSchema) # Step 4: Create rows and insert them one by one newRow = Row(dataSchema) cols = data.columns.tolist() for i in range(0, len(data)): for col in cols: col_index = cols.index(col) newRow.setString(col_index, data[col][i]) table.insert(newRow) dataExtract.close() # Step 5: Close the tde dataExtract.close()
class TDEExport(object): @staticmethod def make_table_definition(schema): table_def = TableDefinition() table_def.setDefaultCollation(Collation.EN_GB) for col in schema: table_def.addColumn(col['name'], typeMap.get(col["type"],Type.UNICODE_STRING)) return table_def def __init__(self, tde_file_path, input_schema): self.tde_file_path = tde_file_path self.output_path = os.path.dirname(self.tde_file_path) self.input_schema = input_schema self.errors = 0 self.nrows = 0 print "Writing TDE file: %s" % self.tde_file_path ExtractAPI.initialize() self.extract = Extract(self.tde_file_path) assert(not self.extract.hasTable('Extract')) self.table_def = TDEExport.make_table_definition(self.input_schema) self.table = self.extract.addTable('Extract', self.table_def) def __enter__(self): return self def __exit__(self, type, value, tb): self.close(); def close(self): self.extract.close() ExtractAPI.cleanup() if self.errors > 0: print "Encountered %d errors" % self.errors def insert_array_row(self, input_row): # Work around bug in DSS 3.1.0 API for single-column datasets if len(input_row) == 0 and len(self.input_schema) == 1: input_row = [u''] nb_cols = len(self.input_schema) output_row = Row(self.table_def) for col_no in range(nb_cols): col = self.input_schema[col_no] data = input_row[col_no] try: fieldSetterMap[col['type']](output_row, col_no, data) except Exception, e: self.errors += 1 if self.errors < 100 or (self.errors < 10000 and self.errors % 100 == 0) or (self.errors % 1000 ==0): try: print "[err #%s] Failed setting: col=%s type=%s val=%s err=%s" % (self.errors, col["name"], col["type"], data, e) except Exception, e2: print "[err #%s] Failed setting: col=%s type=%s val=NON_PRINTED err=%s" % (self.errors, col["name"], col["type"],e)
class TDEExport(object): @staticmethod def make_table_definition(schema): table_def = TableDefinition() table_def.setDefaultCollation(Collation.EN_GB) for col in schema: table_def.addColumn(col['name'], typeMap.get(col["type"],Type.UNICODE_STRING)) return table_def def __init__(self, tde_file_path, input_schema): self.tde_file_path = tde_file_path self.output_path = os.path.dirname(self.tde_file_path) self.input_schema = input_schema self.errors = 0 self.nrows = 0 print "Writing TDE file: %s" % self.tde_file_path ExtractAPI.initialize() self.extract = Extract(self.tde_file_path) assert(not self.extract.hasTable('Extract')) self.table_def = TDEExport.make_table_definition(self.input_schema) self.table = self.extract.addTable('Extract', self.table_def) def __enter__(self): return self def __exit__(self, type, value, tb): self.close(); def close(self): self.extract.close() ExtractAPI.cleanup() if self.errors > 0: print "Encountered %d errors" % self.errors def insert_array_row(self, input_row): # Work around bug in DSS 3.1.0 API for single-column datasets if len(input_row) == 0 and len(self.input_schema) == 1: input_row = [u''] nb_cols = len(self.input_schema) output_row = Row(self.table_def) for col_no in range(nb_cols): col = self.input_schema[col_no] data = input_row[col_no] try: fieldSetterMap[col['type']](output_row, col_no, data) except Exception, e: self.errors += 1 if self.errors < 100 or (self.errors < 10000 and self.errors % 100 == 0) or (self.errors % 1000 ==0): try: print "[err #%s] Failed setting: col=%s type=%s val=%s err=%s" % (self.errors, col["name"], col["type"], data, e) except Exception, e2: print "[err #%s] Failed setting: col=%s type=%s val=NON_PRINTED err=%s" % (self.errors, col["name"], col["type"],e)
class TDEExport(object): @staticmethod def convert_type(type): return typeMap.get(type,Type.UNICODE_STRING) @staticmethod def make_table_definition(schema): table_def = TableDefinition() table_def.setDefaultCollation(Collation.EN_GB) for col in schema: table_def.addColumn(col['name'], TDEExport.convert_type(col['type'])) return table_def def __init__(self, tde_file_path, input_schema): self.tde_file_path = tde_file_path self.output_path = os.path.dirname(self.tde_file_path) self.input_schema = input_schema print "Writing TDE file: %s" % self.tde_file_path ExtractAPI.initialize() self.extract = Extract(self.tde_file_path) assert(not self.extract.hasTable('Extract')) self.table_def = TDEExport.make_table_definition(self.input_schema) self.table = self.extract.addTable('Extract', self.table_def) def __enter__(self): return self def __exit__(self, type, value, tb): self.close(); def close(self): self.extract.close() ExtractAPI.cleanup() def insert_array_row(self, input_row): # Work around bug in DSS 3.1.0 API for single-column datasets if len(input_row) == 0 and len(self.input_schema) == 1: input_row = [u''] nb_cols = len(self.input_schema) output_row = Row(self.table_def) for col_no in range(nb_cols): col = self.input_schema[col_no] data = input_row[col_no] try: fieldSetterMap[col['type']](output_row, col_no, data) except Exception, e: print "Failed setting field %s to value %s: %s" % (col["name"], data, e) pass self.table.insert(output_row)
def writeTableToTDE(pgFields, pgData, extractLocation): print("writing table to {0}").format(extractLocation) # 1. initialize a new extract ExtractAPI.initialize() # 2. Create a table definition new_extract = Extract(extractLocation) # 3. Add column definitions to the table definition table_definition = TableDefinition() for i in range(0, len(pgFields)): table_definition.addColumn(pgFields[i][0], pgFields[i][1]) # 4. Initialize a new table in the extract # a. check if the table already exists # Only add table if it doesn't already exist if (new_extract.hasTable('Extract') == False): new_table = new_extract.addTable('Extract', table_definition) else: new_table = new_extract.openTable('Extract') # 5. Create a new row new_row = Row( table_definition) # Pass the table definition to the constructor # 6. Populate each new row numberRecords = len(pgData) for i in range(0, numberRecords): # Note that this doesn't cover all possible TDE data types for j in range(0, len(pgFields)): if pgFields[j][1] == Type.INTEGER: new_row.setInteger(j, pgData[i][j]) elif pgFields[j][1] == Type.UNICODE_STRING: new_row.setString(j, pgData[i][j]) elif pgFields[j][1] == Type.SPATIAL: new_row.setSpatial(j, pgData[i][j]) elif pgFields[j][i] == Type.BOOLEAN: new_row.setBoolean(j, pgData[i][j]) elif pgFields[j][i] == Type.DOUBLE: new_row.setDouble(j, pgData[j][i]) new_table.insert(new_row) # Add the new row to the table # 7. Save the table and extract new_extract.close() # 8. Release the extract API ExtractAPI.cleanup() return
def to_tableau(self, path, table_name='Extract', add_index=False): """ Converts a Pandas DataFrame to a Tableau .tde file @param path = path to write file @param table_name = name of the table in the extract @param add_index = adds incrementing integer index before dataframe columns """ # Delete Extract and debug log is already exist for file in [path, os.path.dirname(path) + '/debug.log', './DataExtract.log', './debug.log']: if os.path.isfile(file): os.remove(file) # Create Extract and Table ExtractAPI.initialize() new_extract = Extract(path) table_def = TableDefinition() # Set columns in Tableau if add_index: index_col = 'index' suffix = 1 while index_col in self._dataframe.columns: index_col = '%s_%s' % (index_col, suffix) suffix += 1 table_def.addColumn(index_col, Type.INTEGER) for col_index, col_name in enumerate(self._dataframe): table_def.addColumn(col_name, self._column_static_type[col_index]) # Create table new_table = new_extract.addTable(table_name, table_def) # Set Column values self.set_column_values(new_table, table_def, add_index) # Close extract new_extract.close() ExtractAPI.cleanup()
def to_tableau(self, path, table_name='Extract', add_index=False): """ Converts a Pandas DataFrame to a Tableau .tde file @param path = path to write file @param table_name = name of the table in the extract @param add_index = adds incrementing integer index before dataframe columns """ # Delete dataextract log and hyper_db temp files if already exists files = (glob('DataExtract*.log') + glob('hyper_db_*') + [ os.path.dirname(path) + '/debug.log', './DataExtract.log', './debug.log' ]) for file in files: if os.path.isfile(file): os.remove(file) # Create Extract and Table ExtractAPI.initialize() new_extract = Extract(path) if not new_extract.hasTable(table_name): print( 'Table \'{}\' does not exist in extract {}, creating.'.format( table_name, path)) self.set_table_structure(new_extract, table_name, add_index) new_table = new_extract.openTable(table_name) table_def = new_table.getTableDefinition() # Set Column values self.set_column_values(new_table, table_def, add_index) # Close extract new_extract.close() ExtractAPI.cleanup()
table_definition.addColumn(df.columns[i], Type.INTEGER) elif df.dtypes[i] == 'datetime64[ns]': table_definition.addColumn(df.columns[i], Type.DATE) # STEP 5 - Initialize a new table in the extract with the addTable() method new_table = td_extract.addTable('Extract', table_definition) # STEP 6 - Create a new row with the Row() constructor new_row = Row(table_definition) # STEP 7 - Populate each new row for j in range(0, df.shape[0]): for i in range(len(df.columns)): if df.dtypes[i] == 'object': new_row.setString(i, df.iloc[j,i]) if df.dtypes[i] == 'float64': new_row.setDouble(i, df.iloc[j,i]) elif df.dtypes[i] == 'int64': new_row.setInteger(i, df.iloc[j,i]) elif df.dtypes[i] == 'datetime64[ns]': new_row.setDate(i, df.iloc[j,i].year, df.iloc[j,i].month, df.iloc[j,i].day) new_table.insert(new_row) # STEP 8 - Save table, extract, and performs cleanup td_extract.close() # STEP 9 - Release the Extract API. ONLY if ExtractAPI.initialize() was used ExtractAPI.cleanup()
elif column_headers_types[j] == 13: try: newRow.setDateTime(j, i.get(column_headers[j]).year, i.get(column_headers[j]).month, i.get(column_headers[j]).day, i.get(column_headers[j]).hour, i.get(column_headers[j]).minute, i.get(column_headers[j]).second, 0) except: newRow.setNull(j) elif column_headers_types[j] == 10: try: newRow.setDouble(j, i.get(column_headers[j])) except: newRow.setNull(j) else: try: newRow.setString(j, i.get(column_headers[j])) except: newRow.setNull(j) table.insert(newRow) except: pass ###Final Procedures### client.close() dataExtract.close() ExtractAPI.cleanup() #####################################################
except: print "line = ", line print "exception! colname=[%s] val=[%s]" % (colname, str(line[orig_colname])) raise # unwind case else: new_colname = unwind_timeperiods[colname][0] col_tp = unwind_timeperiods[colname][1] col_idx = new_colnames_to_idx[new_colname] if col_tp == timeperiod: new_row.setDouble(col_idx, float(line[orig_colname])) if unwinding: # and the time period new_row.setString(new_colnames_to_idx["timeperiod"], timeperiod) # and the src new_row.setString(new_colnames_to_idx["src"], src) new_row.setString(new_colnames_to_idx["Scenario"], scenario) new_table.insert(new_row) tde_lines_written += 1 print "Read %6d rows from %s" % (csv_lines_read, csv_fullpath) # Step 5: Close the tde new_extract.close() print "Wrote %6d rows to %s" % (tde_lines_written, tde_fullpath) ExtractAPI.cleanup()
newrow.setNull(n_col) if str(combined_data[name].dtype) == "datetime64[ns]": #newrow.setCharString(n_col,c[name]) if c[name] is not None and str(c[name])!="NaT": date = datetime.datetime.strptime(str(c[name])[:10],"%Y-%m-%d") newrow.setDate(n_col,date.year,date.month,date.day) if c[name] is None or str(c[name])=="NaT": newrow.setNull(n_col) if str(combined_data[name].dtype) == "float64": if c[name] is not None: newrow.setDouble(n_col,c[name]) if c[name] is None: newrow.setNull(n_col) table.insert(newrow) #print combined_data._slice(slice(1,2)) createExtract.close() # ServerAPI.initialize() # con=ServerConnection() # con.connect("https://tableau-dev.gobalto.com","*****@*****.**","blahblahblah^","Storm") # print "connect success" # # con.publishExtract("/Users/justinlin/Desktop/data_stuff/predict_data_Extract.tde","default",'predict_stuff',overwrite=True) # print "publish datasource" # # con.close() # print "close server connection"
print("row:", n, "column:1", "value:", joinDF_auto.iloc[n, 1].year, joinDF_auto.iloc[n, 1].month, joinDF_auto.iloc[n, 1].day) print("row:", n, "column:2", "value:", joinDF_auto.loc[n, 'Patient Name']) #print("row:",n, "column:3", "value:",joinDF_auto.iloc[n,3]) #print("row:",n, "column:4", "value:",joinDF_auto.iloc[n,4]) new_row.setInteger(0, joinDF_auto.iloc[n, 0]) new_row.setDate(1, joinDF_auto.iloc[n, 1].year, joinDF_auto.iloc[n, 1].month, joinDF_auto.iloc[n, 1].day) new_row.setString(2, joinDF_auto.loc[n, 'Patient Name']) #parse out and set date parts: year, month, day #new_row.setString(3, joinDF_auto.iloc[n,3]) #new_row.setString(4, joinDF_auto.iloc[n,4]) new_table.insert(new_row) # Add the new row to the table # Close the extract in order to save the .tde file and clean up resources new_extract.close() ExtractAPI.cleanup() print("------------------------------------------------") print("------------------------------------------------") print "data prep finsished..." # Output elapsed time print "Elapsed:", locale.format("%.2f", time.clock() - startTime), "seconds" print("------------------------------------------------") print("------------------------------------------------")
def tableau_extract(resultset, data_file): """ Create TDE extract. """ if isinstance(resultset, ResultSet): df_name = resultset.DataFrame() else: df_name = resultset data_type = [] fieldnames = [] data_type_map = {int: Type.INTEGER, str: Type.UNICODE_STRING, bool: Type.BOOLEAN, bytearray: Type.CHAR_STRING, list: Type.CHAR_STRING, dict: Type.CHAR_STRING, float: Type.DOUBLE, decimal.Decimal: Type.DOUBLE, datetime.date: Type.DATE, datetime.time: Type.DURATION, datetime.datetime: Type.DATETIME, pd._libs.tslib.Timestamp: Type.DATETIME } for col in df_name: fieldnames.append(col) data_type.append(df_name[col].apply(type).iat[0]) data_dict = dict(zip(fieldnames, data_type)) for col_name in data_dict: if data_dict[col_name] in data_type_map: data_dict[col_name] = data_type_map[data_dict[col_name]] else: data_dict[col_name] = Type.UNICODE_STRING # Initialize a new extract try: os.remove(data_file) except OSError: pass new_extract = Extract(data_file) table_definition = TableDefinition() for col_name in data_dict: table_definition.addColumn(col_name, data_dict[col_name]) new_table = new_extract.addTable('Extract', table_definition) new_row = Row(table_definition) tde_types = {'INTEGER': 7, 'DOUBLE': 10, 'BOOLEAN': 11, 'DATE': 12, 'DATETIME': 13, 'DURATION': 14, 'CHAR_STRING': 15, 'UNICODE_STRING': 16} for i in range(0, len(df_name)): for col in range(0, table_definition.getColumnCount()): col_name = table_definition.getColumnName(col) try: if data_dict[col_name] == tde_types['INTEGER']: new_row.setInteger(col, int(df_name[col_name][i])) elif data_dict[col_name] == tde_types['DOUBLE']: new_row.setDouble(col, float(df_name[col_name][i])) elif data_dict[col_name] == tde_types['BOOLEAN']: new_row.setBoolean(col, bool(df_name[col_name][i])) elif data_dict[col_name] == tde_types['DATE']: data = df_name[col_name][i] new_row.setDate(col, data.year, data.month, data.day) elif data_dict[col_name] == tde_types['DATETIME']: data = df_name[col_name][i] new_row.setDateTime(col, data.year, data.month, data.day, data.hour, data.minute, data.second, 0) elif data_dict[col_name] == tde_types['DURATION']: data = df_name[col_name][i] new_row.setDuration(col, data.hour, data.minute, data.second, 0) elif data_dict[col_name] == tde_types['CHAR_STRING']: new_row.setCharString(col, str(df_name[col_name][i])) elif data_dict[col_name] == tde_types['UNICODE_STRING']: new_row.setString(col, str(df_name[col_name][i])) else: print('Error') new_row.setNull(col) except TypeError: new_row.setNull(col) new_table.insert(new_row) new_extract.close() ExtractAPI.cleanup() for file_name in glob.glob("DataExtract*.log"): os.remove(file_name)
def to_tde(dataset, tde_mode, tde_name): if tde_mode == 'new': # Step 1: Create the Extract File dataExtract = Extract(tde_name) if dataExtract.hasTable('Extract'): return print("tde already exist use another name") # Step 2: Create the table definition dataSchema = TableDefinition() dataSchema.addColumn('Station', Type.UNICODE_STRING) dataSchema.addColumn('Time', Type.UNICODE_STRING) dataSchema.addColumn('Date', Type.DATE) dataSchema.addColumn('all', Type.DOUBLE) dataSchema.addColumn('n7', Type.DOUBLE) dataSchema.addColumn('n8', Type.DOUBLE) dataSchema.addColumn('y_serries', Type.DOUBLE) # Step 3: Create a table in the image of the table definition table = dataExtract.addTable('Extract', dataSchema) # Step 4: Create rows and insert them one by one newRow = Row(dataSchema) for i in range(0, len(dataset)): print(i) newRow.setString (0, dataset['Station'][i]) newRow.setString (1, dataset['Time'][i]) newRow.setDate (2, dataset['Date'][i].year, dataset['Date'][i].month, dataset['Date'][i].day) newRow.setDouble (3, dataset['all'][i]) newRow.setDouble (4, dataset['n7'][i]) newRow.setDouble (5, dataset['n8'][i]) newRow.setDouble (6, dataset['y_serries'][i]) table.insert(newRow) # Step 5: Close the tde dataExtract.close() elif tde_mode == 'append': # Step 1: Import the Extract File preExtract = Extract(tde_name) # Step 2: Open existing table table = preExtract.openTable('Extract') # Step 3: Import the table definition tableDef = table.getTableDefinition() # Step 4: Create rows and insert them one by one newRow = Row(tableDef) for i in range(0, len(dataset)): print(i) newRow.setString (0, dataset['Station'][i]) newRow.setString (1, dataset['Time'][i]) newRow.setDate (2, dataset['Date'][i].year, dataset['Date'][i].month, dataset['Date'][i].day) newRow.setDouble (3, dataset['all'][i]) newRow.setDouble (4, dataset['n7'][i]) newRow.setDouble (5, dataset['n8'][i]) newRow.setDouble (6, dataset['y_serries'][i]) table.insert(newRow) # Step 5: Close the extract preExtract.close()
def heatmapToTDE(heatmap, extent, smoothing, extractLocation): xLen = len(heatmap) yLen = len(heatmap[0]) xRange = abs(extent[0] - extent[1]) yRange = abs(extent[2] - extent[3]) xMin = min(extent[0], extent[1]) yMin = min(extent[2], extent[3]) xIncrement = xRange / yLen yIncrement = yRange / xLen # 1. initialize a new extract ExtractAPI.initialize() # 2. Create a table definition new_extract = Extract(extractLocation) # 3. Add column definitions to the table definition table_definition = TableDefinition() table_definition.addColumn('ROW', Type.UNICODE_STRING) # column 0 table_definition.addColumn('COL', Type.UNICODE_STRING) # column 1 table_definition.addColumn('VALUE', Type.DOUBLE) # column 2 table_definition.addColumn('ID', Type.UNICODE_STRING) table_definition.addColumn('CellCount', Type.INTEGER) table_definition.addColumn('Smoothing', Type.DOUBLE) table_definition.addColumn('GEOM', Type.SPATIAL) # 4. Initialize a new table in the extract # a. check if the table already exists # Only add table if it doesn't already exist if (new_extract.hasTable('Extract') == False): new_table = new_extract.addTable('Extract', table_definition) else: new_table = new_extract.openTable('Extract') # 5. Create a new row new_row = Row(table_definition) # Pass the table definition to the constructor # 6. Populate each new row yCoord = yMin for i in range(0, xLen): yCoord += yIncrement xCoord = xMin for j in range(0, yLen): xCoord += xIncrement cellWkt = getFourCorners([xCoord, yCoord], xIncrement, yIncrement) new_row.setString(0, str(i)) # ROW new_row.setString(1, str(j)) # COL new_row.setDouble(2, heatmap[i][j]) # VAL new_row.setString(3, str(i) + '-' + str(j)) # id new_row.setInteger(4, len(heatmap[0])) # cell count new_row.setDouble(5, smoothing) # smoothing new_row.setSpatial(6, cellWkt) # WKT spatial new_table.insert(new_row) # Add the new row to the table # 7. Save the table and extract new_extract.close() # 8. Release the extract API ExtractAPI.cleanup() return
def createTDE(query, dbname, tde_filename): start_time = time.time() db = sql.connect(host="127.0.0.1", port=9999, user="******", db=dbname) cursor = db.cursor() cursor.execute(query) ExtractAPI.initialize() if os.path.isfile(tde_filename): os.remove(tde_filename) tde = Extract(tde_filename) table_def = makeTableDefinition(cursor.description) tde_table = tde.addTable('Extract', table_def) tde_row = Row(table_def) # Pass the table definition to the constructor size = 1000 many = cursor.fetchmany(size) rows_counter = 0 while many: # add counter for rows incase of database disconnection for row in many: # Create new row for colno, col in enumerate(row): col_type = table_def.getColumnType(colno) setType = tde_set_type_function_map[col_type] # print setType # print type(setType) setType(tde_row, colno, col) tde_table.insert(tde_row) rows_counter += 1 many = cursor.fetchmany(size) #tde_row.close() print "cleared while loop" # Set column values. The first parameter is the column number (its # ordinal position) The second parameter (or second and subsequent paramaters) is # the value to set the column to. # new_row.setInteger(0, 1) # new_row.setString(1, 'Amanda') # new_row.setDate(2, 2014, 6, 9) # new_table.insert(new_row) # Add the new row to the table # new_row.setInteger(0, 1) # new_row.setString(1, 'Amanda') # new_row.setDate(2, 2014, 6, 9) # new_table.insert(new_row) # Add the new row to the table # new_row.setInteger(0, 2) # new_row.setString(1, 'Brian') # new_row.setDate(2, 2014, 10, 13) # new_table.insert(new_row) # new_row.setInteger(0, 3) # new_row.setString(1, 'Christina') # new_row.setDate(2, 2015, 2, 16) # new_table.insert(new_row) # Close the extract in order to save the .tde file and clean up resources tde.close() print "TDE Closed" ExtractAPI.cleanup() print "ExtractAPI cleaned up" cursor.close() print "cursor Closed" db.close() print "db closed" timetaken = time.time() - start_time print str(rows_counter) + ' rows inserted in ' + str(timetaken) + ' seconds' print ' (' + str(rows_counter/timetaken) + ' rows per second)' return
def methodMultithread(chunkSize): threads = [] writeThread = WriteThread() writeThread.start() threads.append(writeThread) readThread = ReadThread(chunkSize) readThread.start() threads.append(readThread) for t in threads: t.join() print str(datetime.datetime.now()) + " start time of fetch" methodFetchOne() # methodFetchMany(100000) # methodFetchMany(500000) # methodMultithread(100000) # methodMultithread(500000) print str(datetime.datetime.now()) + " end time of fetch" # Disconnect from Greenplum cnxn.close # Close the extract in order to save the .tde file and clean up resources tdenew.close() # Cleanup the extract API process ExtractAPI.cleanup()
def main(): pp = pprint.PrettyPrinter(indent=2) key = raw_input("API Key: ") limit = 100 offset = 0 # Create TDE extract file if os.path.isfile(extract_file): os.remove(extract_file) ExtractAPI.initialize() tdefile = Extract(extract_file) # Create the table definition # Data types: INTEGER, DOUBLE, BOOLEAN, DATE, DATETIME, DURATION, CHAR_STRING, UNICODE_STRING tableDef = TableDefinition() tableDef.addColumn('alert_id', Type.CHAR_STRING) tableDef.addColumn('timestamp', Type.DATETIME) tableDef.addColumn('alert_type', Type.CHAR_STRING) tableDef.addColumn('alert_status', Type.CHAR_STRING) tableDef.addColumn('alert_network', Type.CHAR_STRING) tableDef.addColumn('notes', 16) #UNICODE_STRING tableDef.addColumn('metadata', 16) #UNICODE_STRING tableDef.addColumn('rule_name', Type.CHAR_STRING) tableDef.addColumn('severity', Type.INTEGER) tableDef.addColumn('asset_name', 16) #UNICODE_STRING tableDef.addColumn('asset_image', Type.CHAR_STRING) tableDef.addColumn('perpetrator_username', 16) #UNICODE_STRING tableDef.addColumn('perpetrator_displayname', 16) #UNICODE_STRING tableDef.addColumn('perpetrator_type', Type.CHAR_STRING) tableDef.addColumn('perpetrator_image', Type.CHAR_STRING) tableDef.addColumn('takedown_request_time', Type.DATETIME) tableDef.addColumn('takedown_accept_time', Type.DATETIME) tableDef.addColumn('close_time', Type.DATETIME) # Create table in image of tableDef table = tdefile.addTable('Extract', tableDef) rowcount = 0 done = 'n' while done == 'n': response = get_page(key, limit, offset) alerts = response['alerts'] if len(alerts) == 0: done = 'y' break for alert in alerts: newrow = createrow(alert,tableDef) table.insert(newrow) rowcount +=1 if rowcount >= LIMIT: done = 'y' break offset += 100 tdefile.close() ExtractAPI.cleanup() print str(rowcount) + " alerts processed"