def processYearPartition(self, year): opts = self.dbPane.getDbOptions() try: dbSession = getDbConnection(optparse.Values(opts)).getSession() dbSession.execute( "TRUNCATE TABLE allocation.allocation_data_partition_udi") dbSession.execute( "SELECT allocation.populate_allocation_data_partition_udi(%s)" % year) dbSession.execute( "VACUUM ANALYZE allocation.allocation_data_partition_udi") opts['sqlfile'] = None opts[ 'sqlcmd'] = "SELECT allocation.generate_insert_cell_catch_partition_statements(%s)" % year opts['threads'] = 16 sp.process(optparse.Values(opts)) # Post insertions operation to finalize the target cell catch partition for immediate use cellCatchPartition = "cell_catch_p%s" % year dbSession.execute("VACUUM ANALYZE web_partition.%s" % cellCatchPartition) for indexSql in dbSession.execute( "SELECT web_partition.maintain_cell_catch_indexes('%s') AS cmd" % cellCatchPartition).fetchall(): dbSession.execute(indexSql.cmd) finally: dbSession.close()
def postAggregationOperations(self): opts = self.dbPane.getDbOptions() dbConn = getDbConnection(optparse.Values(opts)) if 'threads' not in opts or opts['threads'] == 0: opts['threads'] = 2 print("Merging Unknown fishing entity in catch data...") dbConn.execute( "UPDATE web.v_fact_data SET fishing_entity_id = 213 WHERE fishing_entity_id = 223" ) print("Vacuuming v_fact_data afterward...") dbConn.execute("vacuum analyze web.v_fact_data") # And now refresh all materialized views as most are dependent on data in the v_fact_data table opts[ 'sqlcmd'] = "SELECT 'refresh materialized view web.' || table_name FROM matview_v('web') WHERE table_name NOT LIKE 'TOTALS%'" sp.process(optparse.Values(opts)) opts[ 'sqlcmd'] = "SELECT 'vacuum analyze web.' || table_name FROM matview_v('web') WHERE table_name NOT LIKE 'TOTALS%'" sp.process(optparse.Values(opts)) print("Aggregation process completed...") dbConn.close()
def postAggregationOperations(self, summaryTable): opts = self.dbPane.getDbOptions() dbConn = getDbConnection(optparse.Values(opts)) # Not necessary to update price in the allocation data table because the price will be added in the aggregate step to account for end use type # print("Updating allocation data unit price...") # if 'threads' not in opts or opts['threads'] == 0: # opts['threads'] = 8 # opts['sqlfile'] = "sql/update_allocation_data_unit_price.sql" # #Uncomment on January 2019 # #opts['sqlfile'] = "sql/layer3gear_update.sql" # #print("Changing layer3gear to sau_gear") sp.process(optparse.Values(opts)) # # dbConn.execute("UPDATE allocation.allocation_data SET unit_price = %s WHERE unit_price IS NULL" % SummarizeCommandPane.GLOBAL_AVERAGE_UNIT_PRICE) dbConn.execute("VACUUM ANALYZE allocation.allocation_data") print("Vacuum and analyze target summary table(s)...") if summaryTable: dbConn.execute("VACUUM ANALYZE allocation.%s" % summaryTable) else: # if input summaryTable = None, it's really the signal to vacuum analyze all summary tables for tab in SummarizeCommandPane.SUMMARY_TABLES: if tab: dbConn.execute("VACUUM ANALYZE allocation.%s" % tab) print("Summarization process completed...") dbConn.close()
def refresh_all_materialized_views(dbPane): dbOpts = dbPane.getDbOptions() dbOpts['sqlfile'] = "sql/refresh_matviews.sql" dbOpts['threads'] = 4 sp.process(optparse.Values(dbOpts)) print('All materialized views in db refreshed.')
def postAggregationOperations(self, summaryTable): opts = self.dbPane.getDbOptions() dbConn = getDbConnection(optparse.Values(opts)) print("Updating allocation data unit price...") if 'threads' not in opts or opts['threads'] == 0: opts['threads'] = 8 opts['sqlfile'] = "sql/update_allocation_data_unit_price.sql" sp.process(optparse.Values(opts)) dbConn.execute( "UPDATE allocation.allocation_data SET unit_price = %s WHERE unit_price IS NULL" % SummarizeCommandPane.GLOBAL_AVERAGE_UNIT_PRICE) dbConn.execute("VACUUM ANALYZE allocation.allocation_data") print("Vacuum and analyze target summary table(s)...") if summaryTable: dbConn.execute("VACUUM ANALYZE allocation.%s" % summaryTable) else: # if input summaryTable = None, it's really the signal to vacuum analyze all summary tables for tab in SummarizeCommandPane.SUMMARY_TABLES: if tab: dbConn.execute("VACUUM ANALYZE allocation.%s" % tab) print("Summarization process completed...") dbConn.close()
def processTable(self, tabDescriptor): opts = self.sourceDbPane.getDbOptions() if tabDescriptor.target_table_name != "allocation_result": self.downloadAndCopyTable(tabDescriptor, opts) else: # # Special processing for allocation_result due to its excessive size. # # Clone the table descriptor for the input table to create one for the allocation_result_distribution table tableDesc = copy.deepcopy(tabDescriptor) tableDesc.target_table_name = "allocation_result_distribution" tableDesc.source_select_clause = "UniversalDataID, count(*)" tableDesc.source_where_clause = "GROUP BY UniversalDataID" self.dbSession.execute("TRUNCATE TABLE allocation.%s" % tableDesc.target_table_name) self.downloadAndCopyTable(tableDesc, opts) self.dbSession.execute("VACUUM ANALYZE allocation.%s" % tableDesc.target_table_name) # Now managed the allocation result partition map table to receive download data partitionMapCmds = [ "TRUNCATE TABLE allocation.allocation_result_partition_map", "select allocation.maintain_allocation_result_partition()", "select allocation.calculate_allocation_result_partition_map(%s)" % NUM_OF_ALLOCATION_RESULT_PARTITIONS, "VACUUM ANALYZE allocation.allocation_result_partition_map", "select allocation.maintain_allocation_result_partition()" ] for cmd in partitionMapCmds: self.dbSession.execute(cmd) # Loop over each partition to load data from source arPartitionMaps = self.dbSession.query( AllocationResultPartitionMap).order_by( AllocationResultPartitionMap.partition_id).all() tableDesc.target_schema_name = "allocation_partition" tableDesc.source_select_clause = "*" for partitionMap in arPartitionMaps: tableDesc.target_table_name = "allocation_result_%s" % partitionMap.partition_id tableDesc.source_where_clause = "WHERE AllocatedCatch > 0 AND UniversalDataID BETWEEN %s AND %s" \ % (partitionMap.begin_universal_data_id, partitionMap.end_universal_data_id) self.downloadAndCopyTable(tableDesc, opts) mainDbOpts = self.mainDbPane.getDbOptions() mainDbOpts['sqlfile'] = None mainDbOpts['threads'] = 8 # Let's now vacuum and analyze all the partitions we just populated above mainDbOpts['sqlcmd'] = "SELECT format('VACUUM ANALYZE allocation_partition.%s', table_name) " +\ " FROM schema_v('allocation_partition') " + \ " WHERE table_name NOT LIKE 'TOTALS%'" sp.process(optparse.Values(mainDbOpts)) # And now add necessary indexes to all the partitions we just created above mainDbOpts['sqlcmd'] = "SELECT allocation.maintain_allocation_result_indexes(table_name)" + \ " FROM schema_v('allocation_partition')" + \ " WHERE table_name NOT LIKE 'TOTALS%'" sp.process(optparse.Values(mainDbOpts))
def process(dbPane): if not dbPane.isConnectionTestedSuccessfully(): messagebox.showinfo("Connection not yet tested", "The DB Connection has not been tested successfully.\n" +\ "Once the DB Connection has been tested successfully, you can click the Process button again.") return sp.process(optparse.Values(dbPane.getDbOptions()))
def kickoffSqlProcessor(self, sqlFileName, isPostOpsRequired=True): opts = self.dbPane.getDbOptions() opts['sqlfile'] = sqlFileName if 'threads' not in opts or opts['threads'] == 0: opts['threads'] = 2 sp.process(optparse.Values(opts)) if isPostOpsRequired: self.postAggregationOperations()
def kickoffSqlProcessor(self, sqlFileName, isPostOpsRequired=True): opts = self.dbPane.getDbOptions() opts['sqlfile'] = sqlFileName if 'threads' not in opts or opts['threads'] == 0: opts['threads'] = 8 sp.process(optparse.Values(opts)) if isPostOpsRequired: self.postAggregationOperations()
def processTable(self, tabDescriptor): opts = self.sourceDbPane.getDbOptions() if tabDescriptor.target_table_name != "allocation_result": self.downloadAndCopyTable(tabDescriptor, opts) else: # # Special processing for allocation_result due to its excessive size. # # Clone the table descriptor for the input table to create one for the allocation_result_distribution table tableDesc = copy.deepcopy(tabDescriptor) tableDesc.target_table_name = "allocation_result_distribution" tableDesc.source_select_clause = "UniversalDataID, count(*)" tableDesc.source_where_clause = "GROUP BY UniversalDataID" self.dbSession.execute("TRUNCATE TABLE allocation.%s" % tableDesc.target_table_name) self.downloadAndCopyTable(tableDesc, opts) self.dbSession.execute("VACUUM ANALYZE allocation.%s" % tableDesc.target_table_name) # Now managed the allocation result partition map table to receive download data partitionMapCmds = [ "TRUNCATE TABLE allocation.allocation_result_partition_map", "select allocation.maintain_allocation_result_partition()", "select allocation.calculate_allocation_result_partition_map(%s)" % NUM_OF_ALLOCATION_RESULT_PARTITIONS, "VACUUM ANALYZE allocation.allocation_result_partition_map", "select allocation.maintain_allocation_result_partition()" ] for cmd in partitionMapCmds: self.dbSession.execute(cmd) # Loop over each partition to load data from source arPartitionMaps = self.dbSession.query(AllocationResultPartitionMap).order_by(AllocationResultPartitionMap.partition_id).all() tableDesc.target_schema_name = "allocation_partition" tableDesc.source_select_clause = "*" for partitionMap in arPartitionMaps: tableDesc.target_table_name = "allocation_result_%s" % partitionMap.partition_id tableDesc.source_where_clause = "WHERE AllocatedCatch > 0 AND UniversalDataID BETWEEN %s AND %s" \ % (partitionMap.begin_universal_data_id, partitionMap.end_universal_data_id) self.downloadAndCopyTable(tableDesc, opts) mainDbOpts = self.mainDbPane.getDbOptions() mainDbOpts['sqlfile'] = None mainDbOpts['threads'] = 8 # Let's now vacuum and analyze all the partitions we just populated above mainDbOpts['sqlcmd'] = "SELECT format('VACUUM ANALYZE allocation_partition.%s', table_name) " +\ " FROM schema_v('allocation_partition') " + \ " WHERE table_name NOT LIKE 'TOTALS%'" sp.process(optparse.Values(mainDbOpts)) # And now add necessary indexes to all the partitions we just created above mainDbOpts['sqlcmd'] = "SELECT allocation.maintain_allocation_result_indexes(table_name)" + \ " FROM schema_v('allocation_partition')" + \ " WHERE table_name NOT LIKE 'TOTALS%'" sp.process(optparse.Values(mainDbOpts))
def kickoffSqlProcessor(self, summaryTable, isPostOpsRequired=True): opts = self.dbPane.getDbOptions() dbConn = getDbConnection(optparse.Values(opts)) dbConn.execute("TRUNCATE allocation.%s" % summaryTable) opts['sqlfile'] = "sql/summarize_%s.sql" % summaryTable if 'threads' not in opts or opts['threads'] == 0: opts['threads'] = 8 sp.process(optparse.Values(opts)) if isPostOpsRequired: self.postAggregationOperations(summaryTable)
def process(self): if not self.dbPane.isConnectionTestedSuccessfully(): messagebox.showinfo("Connection not yet tested", "The DB Connection has not been tested successfully.\n" +\ "Once the DB Connection has been tested successfully, you can click the Process button again.") return dbOpts = self.dbPane.getDbOptions() if self.promptForSqlFile: dbOpts['sqlfile'] = self.db_sqlfile.get() dbOpts['threads'] = self.db_threads.get() dbOpts['sqlcmd'] = self.db_sqlcmd.get() sp.process(optparse.Values(dbOpts))
def process(self, entity_layer_id): if not self.dbPane.isConnectionTestedSuccessfully(): popup_message("Connection not yet tested", "The DB Connection has not been tested successfully.\n" + "Once the DB Connection has been tested successfully, you can click the Process button again.") return dbOpts = self.dbPane.getDbOptions() dbSession = getDbConnection(optparse.Values(dbOpts)).getSession() dbSession.execute("SELECT * FROM web_cache.maintain_catch_csv_partition(%s)" % entity_layer_id) dbOpts['sqlfile'] = "sql/populate_catch_data_in_csv.sql" dbOpts['sqlcmd'] = "select format('vacuum analyze web_cache.%s', table_name) from schema_v('web_cache') where table_name not like 'TOTAL%'" dbOpts['threads'] = 4 sp.process(optparse.Values(dbOpts))
def postAggregationOperations(self): opts = self.dbPane.getDbOptions() dbConn = getDbConnection(optparse.Values(opts)) if 'threads' not in opts or opts['threads'] == 0: opts['threads'] = 8 print("Merging Unknown fishing entity in catch data...") dbConn.execute("UPDATE web.v_fact_data SET fishing_entity_id = 213 WHERE fishing_entity_id = 223") print("Vacuuming v_fact_data afterward...") dbConn.execute("vacuum analyze web.v_fact_data") # And now refresh all materialized views as most are dependent on data in the v_fact_data table opts['sqlcmd'] = "SELECT 'refresh materialized view web.' || table_name FROM matview_v('web') WHERE table_name NOT LIKE 'TOTALS%'" sp.process(optparse.Values(opts)) opts['sqlcmd'] = "SELECT 'vacuum analyze web.' || table_name FROM matview_v('web') WHERE table_name NOT LIKE 'TOTALS%'" sp.process(optparse.Values(opts)) print("Aggregation process completed...") dbConn.close()
def process(self, entity_layer_id): if not self.dbPane.isConnectionTestedSuccessfully(): popup_message( "Connection not yet tested", "The DB Connection has not been tested successfully.\n" + "Once the DB Connection has been tested successfully, you can click the Process button again." ) return dbOpts = self.dbPane.getDbOptions() dbSession = getDbConnection(optparse.Values(dbOpts)).getSession() dbSession.execute( "SELECT * FROM web_cache.maintain_catch_csv_partition(%s)" % entity_layer_id) dbOpts['sqlfile'] = "sql/populate_catch_data_in_csv.sql" dbOpts[ 'sqlcmd'] = "select format('vacuum analyze web_cache.%s', table_name) from schema_v('web_cache') where table_name not like 'TOTAL%'" dbOpts['threads'] = 1 sp.process(optparse.Values(dbOpts))
def processYearPartition(self, year): opts = self.dbPane.getDbOptions() try: dbSession = getDbConnection(optparse.Values(opts)).getSession() dbSession.execute("TRUNCATE TABLE allocation.allocation_data_partition_udi") dbSession.execute("SELECT allocation.populate_allocation_data_partition_udi(%s)" % year) dbSession.execute("VACUUM ANALYZE allocation.allocation_data_partition_udi") opts['sqlfile'] = None opts['sqlcmd'] = "SELECT allocation.generate_insert_cell_catch_partition_statements(%s)" % year opts['threads'] = 16 sp.process(optparse.Values(opts)) # Post insertions operation to finalize the target cell catch partition for immediate use cellCatchPartition = "cell_catch_p%s" % year dbSession.execute("VACUUM ANALYZE web_partition.%s" % cellCatchPartition) for indexSql in dbSession.execute( "SELECT web_partition.maintain_cell_catch_indexes('%s') AS cmd" % cellCatchPartition).fetchall(): dbSession.execute(indexSql.cmd) finally: dbSession.close()
def postAggregationOperations(self, summaryTable): opts = self.dbPane.getDbOptions() dbConn = getDbConnection(optparse.Values(opts)) print("Updating allocation data unit price...") if 'threads' not in opts or opts['threads'] == 0: opts['threads'] = 8 opts['sqlfile'] = "sql/update_allocation_data_unit_price.sql" sp.process(optparse.Values(opts)) dbConn.execute("UPDATE allocation.allocation_data SET unit_price = %s WHERE unit_price IS NULL" % SummarizeCommandPane.GLOBAL_AVERAGE_UNIT_PRICE) dbConn.execute("VACUUM ANALYZE allocation.allocation_data") print("Vacuum and analyze target summary table(s)...") if summaryTable: dbConn.execute("VACUUM ANALYZE allocation.%s" % summaryTable) else: # if input summaryTable = None, it's really the signal to vacuum analyze all summary tables for tab in SummarizeCommandPane.SUMMARY_TABLES: if tab: dbConn.execute("VACUUM ANALYZE allocation.%s" % tab) print("Summarization process completed...") dbConn.close()
def setupCommandPane(self): if not self.dbPane.isConnectionTestedSuccessfully(): popup_message("Connection not yet tested", "The Main DB Connection has not been tested successfully.\n" + \ "Once the Main DB Connection has been tested successfully, you can click that button again.") return for child in self.cmdFrame.winfo_children(): child.destroy() i = 0 row = 0 column = 0 try: opts = self.dbPane.getDbOptions() dbSession = getDbConnection(optparse.Values(opts)).getSession() # Rebuild the allocation_data partitions to make sure we are using the freshest allocation data partitions = dbSession.execute( "SELECT ('allocation_data_partition.' || table_name) AS partition_name" + " FROM schema_v('allocation_data_partition') " + " WHERE table_name NOT LIKE 'TOTALS%'" + " ORDER BY 1").fetchall() for partition in partitions: dbSession.execute("DROP TABLE %s" % partition.partition_name) dbSession.execute( "SELECT allocation.maintain_allocation_data_partition()") opts['sqlfile'] = "sql/insert_allocation_data_eez_hs.sql" if 'threads' not in opts or opts['threads'] == 0: opts['threads'] = 8 sp.process(optparse.Values(opts)) # Add buttons to command pane self.yearList = dbSession.execute( "SELECT replace(table_name, 'allocation_data_', '')::INT AS year " + " FROM schema_v('allocation_data_partition') " + " WHERE table_name NOT LIKE 'TOTALS%'" + " ORDER BY 1").fetchall() row = 0 button_data = [] for par in self.yearList: button_data.append([ par.year, partial(self.processYearPartition, par.year), "blue" ]) column += 1 if column >= self.buttonsPerRow: add_buttons(self.cmdFrame, button_data, row, 0, "horizontal") button_data = [] column = 0 row += 1 if button_data != []: add_buttons(self.cmdFrame, button_data, row, 0, "horizontal") row += 1 # Calling maintain cell catch just in case the cell catch partitions are not present for any reason dbSession.execute( "SELECT web_partition.maintain_cell_catch_partition()") finally: dbSession.close() for child in self.cmdFrame.winfo_children(): child.grid_configure(padx=5, pady=5) if self.aggregateAllBt == None: self.aggregateAllBt = tk.Button( self.parent, text="Aggregate All Year Partitions", fg="red", command=self.aggregateAllPartition, height=1) self.parent.add(self.aggregateAllBt) # Adding a filler pane for look only self.fillerPane = ttk.Panedwindow(self.parent, orient=VERTICAL) self.parent.add(self.fillerPane)
def setupCommandPane(self): if not self.dbPane.isConnectionTestedSuccessfully(): popup_message("Connection not yet tested", "The Main DB Connection has not been tested successfully.\n" + \ "Once the Main DB Connection has been tested successfully, you can click that button again.") return for child in self.cmdFrame.winfo_children(): child.destroy() i = 0 row = 0 column = 0 try: opts = self.dbPane.getDbOptions() dbSession = getDbConnection(optparse.Values(opts)).getSession() # Rebuild the allocation_data partitions to make sure we are using the freshest allocation data partitions = dbSession.execute( "SELECT ('allocation_data_partition.' || table_name) AS partition_name" + " FROM schema_v('allocation_data_partition') " + " WHERE table_name NOT LIKE 'TOTALS%'" + " ORDER BY 1").fetchall() for partition in partitions: dbSession.execute("DROP TABLE %s" % partition.partition_name) dbSession.execute("SELECT allocation.maintain_allocation_data_partition()") opts['sqlfile'] = "sql/insert_allocation_data_eez_hs.sql" if 'threads' not in opts or opts['threads'] == 0: opts['threads'] = 8 sp.process(optparse.Values(opts)) # Add buttons to command pane self.yearList = dbSession.execute( "SELECT replace(table_name, 'allocation_data_', '')::INT AS year " + " FROM schema_v('allocation_data_partition') " + " WHERE table_name NOT LIKE 'TOTALS%'" + " ORDER BY 1").fetchall() row = 0 button_data = [] for par in self.yearList: button_data.append([par.year, partial(self.processYearPartition, par.year), "blue"]) column += 1 if column >= self.buttonsPerRow: add_buttons(self.cmdFrame, button_data, row, 0, "horizontal") button_data = [] column = 0 row += 1 if button_data != []: add_buttons(self.cmdFrame, button_data, row, 0, "horizontal") row += 1 # Calling maintain cell catch just in case the cell catch partitions are not present for any reason dbSession.execute("SELECT web_partition.maintain_cell_catch_partition()") finally: dbSession.close() for child in self.cmdFrame.winfo_children(): child.grid_configure(padx=5, pady=5) if self.aggregateAllBt == None: self.aggregateAllBt = tk.Button(self.parent, text="Aggregate All Year Partitions", fg="red", command=self.aggregateAllPartition, height=1) self.parent.add(self.aggregateAllBt) # Adding a filler pane for look only self.fillerPane = ttk.Panedwindow(self.parent, orient=VERTICAL) self.parent.add(self.fillerPane)