Beispiel #1
0
def loadRemainingFiles(input_queue, output_queue, message_queue, config, 
                        db_config, start_time):
    """
    calls the modules that load the smaller shape files and csv files

    Arguments In:
        input_queue:    a multiprocessing queue that can be shared across
                        multiple servers and cores.  All information to be
                        processed is loaded into the queue
        output_queue:   a multiprocessing queue that can be shared across
                        multiple servers and cores.  All results from the 
                        various processes are loaded into the queue
        message_queue:  a multiprocessing queue variable that is used to 
                        communicate between the master and servants
        config:			the json variable that contains all configration
    					data required for the data processing
        db_config:      a dictionary that contains the configuration
                        information for the database and queue
    	start_time:	    the clock time that the step began using the 
    					time.clock() format

    Arguments Out:
        continue_run:   a boolean variable that indicates if the routine
                        successfully completed and whether the next steps
                        should be exectuted
    """
    temp_time = time.localtime()
    continue_run = True
    build_list = []

    if config['step0']['census_csv']:
        continue_run = buildHH_HU_POP_sql(config, start_time)

    if config['step0']['census_shape'] or config['step0']['census_csv']:
        for _ in range(config['number_servers']):
            message_queue.put('load_other_files')

    if continue_run and config['step0']['census_shape']:
            continue_run, build_list = loadSimpleShapeFiles(config, db_config, 
                                        build_list, start_time)

    if continue_run and config['step0']['census_csv']:
        continue_run, build_list = loadCSVFiles(build_list, config, db_config, 
                                    start_time)

    if continue_run and len(build_list) > 0:
        # populate the input queue with the results of the list build
        [input_queue.put(b) for b in build_list]         
        continue_run = s0f.processWork(config, input_queue, output_queue, 
                        len(build_list), start_time)

    # create the text file that contains county fips
    if continue_run:
        continue_run = makeCountyFIPsFile(config, db_config, start_time)

    if continue_run: 
        if config['step0']['census_shape'] or config['step0']['census_csv']:
            my_message = """ 
                INFO - STEP 0 (MASTER): TASK 5 OF 13 - COMPLETED LOADING SHAPE 
                FILES AND CSV FILES INTO DATABASE
                """
            my_message = ' '.join(my_message.split())
            print(nbmf.logMessage(my_message, temp_time, time.localtime(), 
                    time.mktime(time.localtime()) - time.mktime(start_time))) 
            
        del build_list
        gc.collect()
        return True 

    else:
        my_message = """ 
            ERROR - STEP 0 (MASTER): TASK 5 OF 13 - FAILED LOADING SHAPE 
            FILES AND CSV FILES INTO DATABASE
            """
        my_message = ' '.join(my_message.split())
        my_message += "\n" + traceback.format_exc()
        print(nbmf.logMessage(my_message, temp_time, time.localtime(), 
                time.mktime(time.localtime()) - time.mktime(start_time))) 
        return False 
Beispiel #2
0
def updateSpatialIntersections(input_queue, output_queue, message_queue,
                               config, db_config, start_time):
    """
    The main subprocess that manages the completion of the spatial 
    intersections of blocks and key geographies (places, congressional 
    distrincts, and tribal regions)    

    Arguments In:
        input_queue:        a multiprocessing queue that can be shared across
                            multiple servers and cores.  All information to be
                            processed is loaded into the queue
        output_queue:       a multiprocessing queue that can be shared across
                            multiple servers and cores.  All results from the 
                            various processes are loaded into the queue
        message_queue:      a multiprocessing queue variable that is used to 
                            communicate between the master and servants
        config:             a dictionary that contains the configuration
                            information of various steps of NMB2 data 
                            processing
        db_config:          a dictionary that contains the configuration
                            information for the database and queue
        start_time:         a time structure variable that indicates when 
                            the current step started

    Arguments Out:
        continue_run:       a boolean variable that indicates if the routine
                            successfully completed and whether the next 
                            steps should be executed        
    """

    continue_run = True
    temp_time = time.localtime()
    try:
        # connect to the database
        my_conn = psycopg2.connect(host=db_config['db_host'],
                                   user=db_config['db_user'],
                                   password=db_config['db_password'],
                                   database=db_config['db'])
        my_cursor = my_conn.cursor()

    except:
        my_message = """
            ERROR - STEP 0 (MASTER): TASK 8 OF 13 - FAILED TO CONNECT TO 
            DATABASE
            """
        my_message = ' '.join(my_message.split())
        my_message += '\n%s' % traceback.format_exc()
        print(
            nbmf.logMessage(
                my_message, temp_time, time.localtime(),
                time.mktime(time.localtime()) - time.mktime(start_time)))
        return False

    # Fill the nulls that were left empty in step 7
    continue_run = fillNulls(my_cursor, config, db_config, start_time)

    # # transfer data with the correct assigned area to the final block tables
    if continue_run:
        continue_run = transferData(my_cursor, config, db_config, start_time)

    # load queue for water blocks
    if continue_run:
        continue_run, file_count = loadWaterBlocksQueue(
            input_queue, my_cursor, config, db_config, start_time)

    # process the results coming from the distributed workers
    if continue_run:
        for _ in range(config['number_servers']):
            message_queue.put('assign_water_blocks')

        continue_run = s0f.processWork(config, input_queue, output_queue,
                                       file_count, start_time)

    if continue_run:
        my_message = """
            INFO - STEP 0 (MASTER): TASK 8 OF 13 - COMPLETED ASSIGNING WATER
            BLOCKS TO CONGRESSIONAL DISTRICTS
            """
        my_message = ' '.join(my_message.split())
        print(
            nbmf.logMessage(
                my_message, temp_time, time.localtime(),
                time.mktime(time.localtime()) - time.mktime(start_time)))
        my_cursor.close()
        my_conn.close()
        gc.collect()
        return True

    else:
        my_message = """
            ERROR - STEP 0 (MASTER): TASK 8 OF 13 - FAILED PROCESSING WATER BLOCKS
            FOR CONGRESSIONAL DISTRICTS
            """
        my_message = ' '.join(
            my_message.split()) + '\n%s' % traceback.format_exc()
        print(
            nbmf.logMessage(
                my_message, temp_time, time.localtime(),
                time.mktime(time.localtime()) - time.mktime(start_time)))
        my_cursor.close()
        my_conn.close()
        return False
Beispiel #3
0
def parseFBData(input_queue, output_queue, message_queue, config, db_config, 
                start_time):
    """
    main subroutine that manages parsing of the fixed broadband deployment
    data into county level geojson files

    Arguments In:
        input_queue:        a multiprocessing queue that can be shared across
                            multiple servers and cores.  All information to be
                            processed is loaded into the queue
        output_queue:       a multiprocessing queue that can be shared across
                            multiple servers and cores.  All results from the 
                            various processes are loaded into the queue
        message_queue:      a multiprocessing queue variable that is used to 
                            communicate between the master and servants
        config:			    the json variable that contains all configration
    					    data required for the data processing
        db_config:          a dictionary that contains the configuration
                            information for the database and queue
    	start_time:	        the clock time that the step began using the 
    					    time.clock() format

    Arguments Out:
        continue_run:   a boolean variable that indicates if the routine
                        successfully completed and whether the next steps
                        should be executed

    """

    # get county fips
    temp_time = time.localtime()

    my_message = """
        INFO - STEP 0 (MASTER): TASK 13 OF 13 - STARTING TO MAKE COUNTY
        LEVEL FBD CSV FILES
        """
    my_message = ' '.join(my_message.split())
    print(nbmf.logMessage(my_message,temp_time, time.localtime(), 
        time.mktime(time.localtime()) - time.mktime(start_time)))

    continue_run, county_fips = getCounty_fips(config, start_time) 

    # load queue
    if continue_run:
        continue_run, file_count = loadFBDQueue(input_queue, county_fips, 
                                        config, start_time)

    # process data
    if continue_run:
        for _ in range(config['number_servers']):
            message_queue.put('parse_fbd')

        continue_run = s0f.processWork(config, input_queue, output_queue, 
                        file_count, start_time)

    # close out 
    if continue_run:
        my_message = """
            INFO - STEP 0 (MASTER): TASK 13 OF 13 - COMPLETED CREATING COUNTY
            LEVEL FBD CSV FILES
            """
        my_message = ' '.join(my_message.split())
        print(nbmf.logMessage(my_message,temp_time, time.localtime(), 
            time.mktime(time.localtime()) - time.mktime(start_time)))    
        gc.collect()        
        return True
    
    else:
        my_message = """
            ERROR - STEP 0 (MASTER): TASK 13 OF 13 - FAILED TO CREATE COUNTY
            LEVEL FBD CSV FILES
            """
        my_message = ' '.join(my_message.split()) + '\n' + traceback.format_exc()
        print(nbmf.logMessage(my_message,temp_time, time.localtime(), 
            time.mktime(time.localtime()) - time.mktime(start_time)))
        return False                
Beispiel #4
0
def breakOutBlockData(input_queue, output_queue, message_queue, config,
                      db_config, start_time):
    """
    main subroutine that manages the creation of the county level block data 
    files

    Arguments In:
        input_queue:        a multiprocessing queue that can be shared across
                            multiple servers and cores.  All information to be
                            processed is loaded into the queue
        output_queue:       a multiprocessing queue that can be shared across
                            multiple servers and cores.  All results from the 
                            various processes are loaded into the queue
        message_queue:      a multiprocessing queue variable that is used to 
                            communicate between the master and servants
        config:             a dictionary that contains the configuration
                            information of various steps of NMB2 data 
                            processing
        db_config:          a dictionary that contains the configuration
                            information for the database and queue
        start_time:         a time structure variable that indicates when 
                            the current step started

    Arguments Out:
        continue_run:       a boolean variable that indicates if the routine
                            successfully completed and whether the next 
                            steps should be exectuted        
    """

    try:
        temp_time = time.localtime()
        continue_run, county_fips = getCounty_fips(config, start_time)

        if continue_run:
            continue_run = changeTogeom(db_config, config, start_time)

        if continue_run:
            continue_run = makeBlockTablePickle(config, db_config, start_time)

        if continue_run:
            for _ in range(config['number_servers']):
                message_queue.put('parse_blockdf')

            continue_run, county_counter = loadBlockQueue(
                input_queue, county_fips, config, start_time)

        if continue_run:
            continue_run = s0f.processWork(config, input_queue, output_queue,
                                           county_counter, start_time)

        if continue_run:
            continue_run = changeToGEOMETRY(config, db_config, start_time)

        if continue_run:
            my_message = """
                INFO - STEP 0 (MASTER): TASK 6 OF 13 - COMPLETED CREATING COUNTY
                LEVEL GEOJSON BLOCK FILES
                """
            my_message = ' '.join(my_message.split())
            print(
                nbmf.logMessage(
                    my_message, temp_time, time.localtime(),
                    time.mktime(time.localtime()) - time.mktime(start_time)))
            gc.collect()
            return True

        else:
            my_message = """
                ERROR - STEP 0 (MASTER): TASK 6 OF 13 - FAILED TO CREATE COUNTY
                LEVEL GEOJSON BLOCK FILES
                """
            my_message = ' '.join(
                my_message.split()) + '\n' + traceback.format_exc()
            print(
                nbmf.logMessage(
                    my_message, temp_time, time.localtime(),
                    time.mktime(time.localtime()) - time.mktime(start_time)))
            return False

    except:
        my_message = """
            ERROR - STEP 0 (MASTER): TASK 6 OF 13 - FAILED TO CREATE COUNTY
            LEVEL GEOJSON BLOCK FILES
            """
        my_message = ' '.join(
            my_message.split()) + '\n' + traceback.format_exc()
        print(
            nbmf.logMessage(
                my_message, temp_time, time.localtime(),
                time.mktime(time.localtime()) - time.mktime(start_time)))
        return False
Beispiel #5
0
def loadComplexShapeFiles(input_queue, output_queue, message_queue, config,
                          db_config, start_time):
    """
    main routine for loading place and block shape files (56 files each)

    Arguments In:
    	input_queue:	a multiprocessing queue variable that connects the
    					module to the main and contains the inputs used by 
    					all subproceses to do the required work
        output_queue:   a multiprocessing queue that can be shared across
                        multiple servers and cores.  All results from the 
                        various processes are loaded into the queue
        message_queue:  a multiprocessing queue variable that is used to 
                        communicate between the master and servants
    	config:			the json variable that contains all configration
    					data required for the data processing 
        db_config:      a dictionary that contains the configuration
                        information for the database and queue
    	start_time:	        the clock time that the step began using the 
    					    time.clock() format       
    
    Arguments Out:
        continue_run:       a boolean variable that indicates if the routine
                            successfully completed and whether the next steps
                            should be exectuted        
    """

    continue_run = True
    block_counter = 0
    place_counter = 0

    # start the worker processes if the block and/or place shape files need to be loaded
    if config['step0']['census_block_shape'] or config['step0'][
            'census_place_shape']:
        for _ in range(config['number_servers']):
            message_queue.put('load_complex_shape')

    # load the block file data into the queue
    if continue_run and config['step0']['census_block_shape']:
        continue_run, block_counter = loadBlockQueue(input_queue, config,
                                                     db_config, start_time)
    # load the place file data into the queue
    if continue_run and config['step0']['census_place_shape']:
        continue_run, place_counter = loadPlaceQueue(input_queue, config,
                                                     db_config, start_time)

    # process the results
    file_count = block_counter + place_counter
    if continue_run and file_count > 0:
        continue_run = s0f.processWork(config, input_queue, output_queue,
                                       file_count, start_time)

    # add additional columns to the tables and changes geometry
    if continue_run and config['step0']['census_block_shape']:
        # modify block table to get the fields we need
        continue_run = modifyBlockTable(config, db_config, start_time)

    # adjust indexes and columns names
    for shape_type in ['block', 'place']:
        if continue_run and config['step0']['census_%s_shape' % shape_type]:
            index_list = config['%s_indexes' % shape_type]
            continue_run = s0f.modifyGeoTables(config, db_config, shape_type,
                                               index_list, start_time)

    gc.collect()
    return continue_run
Beispiel #6
0
def startSpatialIntersections(input_queue, output_queue, message_queue, config,
                              db_config, start_time):
    """
    The main subprocess that manages the creation of the spatial 
    intersections of blocks and key geographies (places, congressional 
    distrincts, and tribal regions)

    Arguments In:
        input_queue:        a multiprocessing queue that can be shared across
                            multiple servers and cores.  All information to be
                            processed is loaded into the queue
        output_queue:       a multiprocessing queue that can be shared across
                            multiple servers and cores.  All results from the 
                            various processes are loaded into the queue
        message_queue:      a multiprocessing queue variable that is used to 
                            communicate between the master and servants
        config:             a dictionary that contains the configuration
                            information of various steps of NMB2 data 
                            processing
        db_config:          a dictionary that contains the configuration
                            information for the database and queue
        start_time:         a time structure variable that indicates when 
                            the current step started

    Arguments Out:
        continue_run:       a boolean variable that indicates if the routine
                            successfully completed and whether the next 
                            steps should be exectuted        
    """
    try:
        temp_time = time.localtime()
        # create the staging tables for all three geographies
        continue_run = createSpatialTables(config, db_config, start_time)

        # identify which counties intersect with each tribe, place, congress, geom
        if continue_run:
            continue_run, task_count = findIntersectingCounties(
                input_queue, config, db_config, start_time)

        # start the distributed worker tasks and process results
        if continue_run:
            for _ in range(config['number_servers']):
                message_queue.put('initial_spatial_intersection')

            continue_run = s0f.processWork(config, input_queue, output_queue,
                                           task_count, start_time)

        # end the procedure
        if continue_run:
            my_message = """
                INFO - STEP 0 (MASTER): TASK 7 OF 13 - COMPLETED INITIAL SPATIAL
                INTERSECTIONS
                """
            my_message = ' '.join(my_message.split())
            print(
                nbmf.logMessage(
                    my_message, temp_time, time.localtime(),
                    time.mktime(time.localtime()) - time.mktime(start_time)))
            gc.collect()
            return True
        else:
            my_message = """
                ERROR - STEP 0 (MASTER): TASK 7 OF 13 - FAILED TO EXECUTE INITIAL 
                SPATIAL INTERSECTIONS
                """
            my_message = ' '.join(
                my_message.split()) + '\n' + traceback.format_exc()
            print(
                nbmf.logMessage(
                    my_message, temp_time, time.localtime(),
                    time.mktime(time.localtime()) - time.mktime(start_time)))
            return False

    except:
        my_message = """
            ERROR - STEP 0 (MASTER): TASK 7 OF 13 - FAILED TO EXECUTE INITIAL 
            SPATIAL INTERSECTIONS
            """
        my_message = ' '.join(
            my_message.split()) + '\n' + traceback.format_exc()
        print(
            nbmf.logMessage(
                my_message, temp_time, time.localtime(),
                time.mktime(time.localtime()) - time.mktime(start_time)))
        return False