def loadRemainingFiles(input_queue, output_queue, message_queue, config, db_config, start_time): """ calls the modules that load the smaller shape files and csv files Arguments In: input_queue: a multiprocessing queue that can be shared across multiple servers and cores. All information to be processed is loaded into the queue output_queue: a multiprocessing queue that can be shared across multiple servers and cores. All results from the various processes are loaded into the queue message_queue: a multiprocessing queue variable that is used to communicate between the master and servants config: the json variable that contains all configration data required for the data processing db_config: a dictionary that contains the configuration information for the database and queue start_time: the clock time that the step began using the time.clock() format Arguments Out: continue_run: a boolean variable that indicates if the routine successfully completed and whether the next steps should be exectuted """ temp_time = time.localtime() continue_run = True build_list = [] if config['step0']['census_csv']: continue_run = buildHH_HU_POP_sql(config, start_time) if config['step0']['census_shape'] or config['step0']['census_csv']: for _ in range(config['number_servers']): message_queue.put('load_other_files') if continue_run and config['step0']['census_shape']: continue_run, build_list = loadSimpleShapeFiles(config, db_config, build_list, start_time) if continue_run and config['step0']['census_csv']: continue_run, build_list = loadCSVFiles(build_list, config, db_config, start_time) if continue_run and len(build_list) > 0: # populate the input queue with the results of the list build [input_queue.put(b) for b in build_list] continue_run = s0f.processWork(config, input_queue, output_queue, len(build_list), start_time) # create the text file that contains county fips if continue_run: continue_run = makeCountyFIPsFile(config, db_config, start_time) if continue_run: if config['step0']['census_shape'] or config['step0']['census_csv']: my_message = """ INFO - STEP 0 (MASTER): TASK 5 OF 13 - COMPLETED LOADING SHAPE FILES AND CSV FILES INTO DATABASE """ my_message = ' '.join(my_message.split()) print(nbmf.logMessage(my_message, temp_time, time.localtime(), time.mktime(time.localtime()) - time.mktime(start_time))) del build_list gc.collect() return True else: my_message = """ ERROR - STEP 0 (MASTER): TASK 5 OF 13 - FAILED LOADING SHAPE FILES AND CSV FILES INTO DATABASE """ my_message = ' '.join(my_message.split()) my_message += "\n" + traceback.format_exc() print(nbmf.logMessage(my_message, temp_time, time.localtime(), time.mktime(time.localtime()) - time.mktime(start_time))) return False
def updateSpatialIntersections(input_queue, output_queue, message_queue, config, db_config, start_time): """ The main subprocess that manages the completion of the spatial intersections of blocks and key geographies (places, congressional distrincts, and tribal regions) Arguments In: input_queue: a multiprocessing queue that can be shared across multiple servers and cores. All information to be processed is loaded into the queue output_queue: a multiprocessing queue that can be shared across multiple servers and cores. All results from the various processes are loaded into the queue message_queue: a multiprocessing queue variable that is used to communicate between the master and servants config: a dictionary that contains the configuration information of various steps of NMB2 data processing db_config: a dictionary that contains the configuration information for the database and queue start_time: a time structure variable that indicates when the current step started Arguments Out: continue_run: a boolean variable that indicates if the routine successfully completed and whether the next steps should be executed """ continue_run = True temp_time = time.localtime() try: # connect to the database my_conn = psycopg2.connect(host=db_config['db_host'], user=db_config['db_user'], password=db_config['db_password'], database=db_config['db']) my_cursor = my_conn.cursor() except: my_message = """ ERROR - STEP 0 (MASTER): TASK 8 OF 13 - FAILED TO CONNECT TO DATABASE """ my_message = ' '.join(my_message.split()) my_message += '\n%s' % traceback.format_exc() print( nbmf.logMessage( my_message, temp_time, time.localtime(), time.mktime(time.localtime()) - time.mktime(start_time))) return False # Fill the nulls that were left empty in step 7 continue_run = fillNulls(my_cursor, config, db_config, start_time) # # transfer data with the correct assigned area to the final block tables if continue_run: continue_run = transferData(my_cursor, config, db_config, start_time) # load queue for water blocks if continue_run: continue_run, file_count = loadWaterBlocksQueue( input_queue, my_cursor, config, db_config, start_time) # process the results coming from the distributed workers if continue_run: for _ in range(config['number_servers']): message_queue.put('assign_water_blocks') continue_run = s0f.processWork(config, input_queue, output_queue, file_count, start_time) if continue_run: my_message = """ INFO - STEP 0 (MASTER): TASK 8 OF 13 - COMPLETED ASSIGNING WATER BLOCKS TO CONGRESSIONAL DISTRICTS """ my_message = ' '.join(my_message.split()) print( nbmf.logMessage( my_message, temp_time, time.localtime(), time.mktime(time.localtime()) - time.mktime(start_time))) my_cursor.close() my_conn.close() gc.collect() return True else: my_message = """ ERROR - STEP 0 (MASTER): TASK 8 OF 13 - FAILED PROCESSING WATER BLOCKS FOR CONGRESSIONAL DISTRICTS """ my_message = ' '.join( my_message.split()) + '\n%s' % traceback.format_exc() print( nbmf.logMessage( my_message, temp_time, time.localtime(), time.mktime(time.localtime()) - time.mktime(start_time))) my_cursor.close() my_conn.close() return False
def parseFBData(input_queue, output_queue, message_queue, config, db_config, start_time): """ main subroutine that manages parsing of the fixed broadband deployment data into county level geojson files Arguments In: input_queue: a multiprocessing queue that can be shared across multiple servers and cores. All information to be processed is loaded into the queue output_queue: a multiprocessing queue that can be shared across multiple servers and cores. All results from the various processes are loaded into the queue message_queue: a multiprocessing queue variable that is used to communicate between the master and servants config: the json variable that contains all configration data required for the data processing db_config: a dictionary that contains the configuration information for the database and queue start_time: the clock time that the step began using the time.clock() format Arguments Out: continue_run: a boolean variable that indicates if the routine successfully completed and whether the next steps should be executed """ # get county fips temp_time = time.localtime() my_message = """ INFO - STEP 0 (MASTER): TASK 13 OF 13 - STARTING TO MAKE COUNTY LEVEL FBD CSV FILES """ my_message = ' '.join(my_message.split()) print(nbmf.logMessage(my_message,temp_time, time.localtime(), time.mktime(time.localtime()) - time.mktime(start_time))) continue_run, county_fips = getCounty_fips(config, start_time) # load queue if continue_run: continue_run, file_count = loadFBDQueue(input_queue, county_fips, config, start_time) # process data if continue_run: for _ in range(config['number_servers']): message_queue.put('parse_fbd') continue_run = s0f.processWork(config, input_queue, output_queue, file_count, start_time) # close out if continue_run: my_message = """ INFO - STEP 0 (MASTER): TASK 13 OF 13 - COMPLETED CREATING COUNTY LEVEL FBD CSV FILES """ my_message = ' '.join(my_message.split()) print(nbmf.logMessage(my_message,temp_time, time.localtime(), time.mktime(time.localtime()) - time.mktime(start_time))) gc.collect() return True else: my_message = """ ERROR - STEP 0 (MASTER): TASK 13 OF 13 - FAILED TO CREATE COUNTY LEVEL FBD CSV FILES """ my_message = ' '.join(my_message.split()) + '\n' + traceback.format_exc() print(nbmf.logMessage(my_message,temp_time, time.localtime(), time.mktime(time.localtime()) - time.mktime(start_time))) return False
def breakOutBlockData(input_queue, output_queue, message_queue, config, db_config, start_time): """ main subroutine that manages the creation of the county level block data files Arguments In: input_queue: a multiprocessing queue that can be shared across multiple servers and cores. All information to be processed is loaded into the queue output_queue: a multiprocessing queue that can be shared across multiple servers and cores. All results from the various processes are loaded into the queue message_queue: a multiprocessing queue variable that is used to communicate between the master and servants config: a dictionary that contains the configuration information of various steps of NMB2 data processing db_config: a dictionary that contains the configuration information for the database and queue start_time: a time structure variable that indicates when the current step started Arguments Out: continue_run: a boolean variable that indicates if the routine successfully completed and whether the next steps should be exectuted """ try: temp_time = time.localtime() continue_run, county_fips = getCounty_fips(config, start_time) if continue_run: continue_run = changeTogeom(db_config, config, start_time) if continue_run: continue_run = makeBlockTablePickle(config, db_config, start_time) if continue_run: for _ in range(config['number_servers']): message_queue.put('parse_blockdf') continue_run, county_counter = loadBlockQueue( input_queue, county_fips, config, start_time) if continue_run: continue_run = s0f.processWork(config, input_queue, output_queue, county_counter, start_time) if continue_run: continue_run = changeToGEOMETRY(config, db_config, start_time) if continue_run: my_message = """ INFO - STEP 0 (MASTER): TASK 6 OF 13 - COMPLETED CREATING COUNTY LEVEL GEOJSON BLOCK FILES """ my_message = ' '.join(my_message.split()) print( nbmf.logMessage( my_message, temp_time, time.localtime(), time.mktime(time.localtime()) - time.mktime(start_time))) gc.collect() return True else: my_message = """ ERROR - STEP 0 (MASTER): TASK 6 OF 13 - FAILED TO CREATE COUNTY LEVEL GEOJSON BLOCK FILES """ my_message = ' '.join( my_message.split()) + '\n' + traceback.format_exc() print( nbmf.logMessage( my_message, temp_time, time.localtime(), time.mktime(time.localtime()) - time.mktime(start_time))) return False except: my_message = """ ERROR - STEP 0 (MASTER): TASK 6 OF 13 - FAILED TO CREATE COUNTY LEVEL GEOJSON BLOCK FILES """ my_message = ' '.join( my_message.split()) + '\n' + traceback.format_exc() print( nbmf.logMessage( my_message, temp_time, time.localtime(), time.mktime(time.localtime()) - time.mktime(start_time))) return False
def loadComplexShapeFiles(input_queue, output_queue, message_queue, config, db_config, start_time): """ main routine for loading place and block shape files (56 files each) Arguments In: input_queue: a multiprocessing queue variable that connects the module to the main and contains the inputs used by all subproceses to do the required work output_queue: a multiprocessing queue that can be shared across multiple servers and cores. All results from the various processes are loaded into the queue message_queue: a multiprocessing queue variable that is used to communicate between the master and servants config: the json variable that contains all configration data required for the data processing db_config: a dictionary that contains the configuration information for the database and queue start_time: the clock time that the step began using the time.clock() format Arguments Out: continue_run: a boolean variable that indicates if the routine successfully completed and whether the next steps should be exectuted """ continue_run = True block_counter = 0 place_counter = 0 # start the worker processes if the block and/or place shape files need to be loaded if config['step0']['census_block_shape'] or config['step0'][ 'census_place_shape']: for _ in range(config['number_servers']): message_queue.put('load_complex_shape') # load the block file data into the queue if continue_run and config['step0']['census_block_shape']: continue_run, block_counter = loadBlockQueue(input_queue, config, db_config, start_time) # load the place file data into the queue if continue_run and config['step0']['census_place_shape']: continue_run, place_counter = loadPlaceQueue(input_queue, config, db_config, start_time) # process the results file_count = block_counter + place_counter if continue_run and file_count > 0: continue_run = s0f.processWork(config, input_queue, output_queue, file_count, start_time) # add additional columns to the tables and changes geometry if continue_run and config['step0']['census_block_shape']: # modify block table to get the fields we need continue_run = modifyBlockTable(config, db_config, start_time) # adjust indexes and columns names for shape_type in ['block', 'place']: if continue_run and config['step0']['census_%s_shape' % shape_type]: index_list = config['%s_indexes' % shape_type] continue_run = s0f.modifyGeoTables(config, db_config, shape_type, index_list, start_time) gc.collect() return continue_run
def startSpatialIntersections(input_queue, output_queue, message_queue, config, db_config, start_time): """ The main subprocess that manages the creation of the spatial intersections of blocks and key geographies (places, congressional distrincts, and tribal regions) Arguments In: input_queue: a multiprocessing queue that can be shared across multiple servers and cores. All information to be processed is loaded into the queue output_queue: a multiprocessing queue that can be shared across multiple servers and cores. All results from the various processes are loaded into the queue message_queue: a multiprocessing queue variable that is used to communicate between the master and servants config: a dictionary that contains the configuration information of various steps of NMB2 data processing db_config: a dictionary that contains the configuration information for the database and queue start_time: a time structure variable that indicates when the current step started Arguments Out: continue_run: a boolean variable that indicates if the routine successfully completed and whether the next steps should be exectuted """ try: temp_time = time.localtime() # create the staging tables for all three geographies continue_run = createSpatialTables(config, db_config, start_time) # identify which counties intersect with each tribe, place, congress, geom if continue_run: continue_run, task_count = findIntersectingCounties( input_queue, config, db_config, start_time) # start the distributed worker tasks and process results if continue_run: for _ in range(config['number_servers']): message_queue.put('initial_spatial_intersection') continue_run = s0f.processWork(config, input_queue, output_queue, task_count, start_time) # end the procedure if continue_run: my_message = """ INFO - STEP 0 (MASTER): TASK 7 OF 13 - COMPLETED INITIAL SPATIAL INTERSECTIONS """ my_message = ' '.join(my_message.split()) print( nbmf.logMessage( my_message, temp_time, time.localtime(), time.mktime(time.localtime()) - time.mktime(start_time))) gc.collect() return True else: my_message = """ ERROR - STEP 0 (MASTER): TASK 7 OF 13 - FAILED TO EXECUTE INITIAL SPATIAL INTERSECTIONS """ my_message = ' '.join( my_message.split()) + '\n' + traceback.format_exc() print( nbmf.logMessage( my_message, temp_time, time.localtime(), time.mktime(time.localtime()) - time.mktime(start_time))) return False except: my_message = """ ERROR - STEP 0 (MASTER): TASK 7 OF 13 - FAILED TO EXECUTE INITIAL SPATIAL INTERSECTIONS """ my_message = ' '.join( my_message.split()) + '\n' + traceback.format_exc() print( nbmf.logMessage( my_message, temp_time, time.localtime(), time.mktime(time.localtime()) - time.mktime(start_time))) return False