def split_line_if_valid(line, line_number):
    items = line.split()
    if len(items) != 3:
        logger.warning(
            "Skipping line {}: wrong file information. Should be <file name> <md5|sha1|sha256> <checksum>."
            .format(line_number))
        return None
    elif not re.match(r'^([^/><|:&]+)', items[0]):
        logger.warning(
            "Skipping line {}: filename contains illegal symbols: {}.".format(
                line_number, items[0]))
        return None
    elif not re.match(r'(sha(?:1|256)|md5)', items[1]):
        logger.warning(
            "Skipping line {}: Unknown hashing algorithm: {}.".format(
                line_number, items[1]))
        return None
    elif not re.match(r'([a-fA-F0-9]+)', items[2]):
        logger.warning(
            "Skipping line {}: checksum contains illegal symbols: {}.".format(
                line_number, items[2]))
        return None
    else:
        return items
def main(scenarios=scenarios, DB=DB, ROOT_DIR=ROOT_DIR, ZONES=ZONES, map_file=map_file):
	"main entry point - loops over scenarios"
	
	msg='{} Starting benefits calculations using input file {}'
	logger.info(msg.format(datetime.now().strftime("%b %d %Y %H:%M:%S"), map_file))
	print(msg.format(datetime.now().strftime("%b %d %Y %H:%M:%S"), map_file))
	
	#This isn't the most efficient way to do it, but it's the most transparent:  we'll loop through each base:scenario pair.  For each, we'll read
	#  the input file a line at a time and draw our consumer surplus benefits
	
	
	base_scenario = scenarios[0]
	
	###for s in scenarios[1:]:
	for s in scenarios[0:]:   ###run this for the 'red line' case only
		
		arr_dict={}
		#grab a reader for the input file
		reader=csv.DictReader(open(map_file))
		
		#process one line at a time from the setup file
		for line_ix, line in enumerate(reader, start=1):
			
			# the info comes in as a dict - this cleans up the content, removing comments, etc.  Return a dict.
			dmap = grabinfo(line)
			
			#these set processing parameters
			transpose=dmap['transpose']				#use transposed trip matrix?
			hov_adj=dmap['hov_adj']						#occupancy adjustment (hov2 implies double time costs, say)
			pct_hb=dmap['pct_hb'] 							#fraction of benefits occuring to origin node ('home base')		
			
			#these set storage parameters
			arr_name=dmap['aggregate_to']
			column_name= arr_name
			table_name=s['name']+"_"+dmap['dbtable']			
			
			#get information for the base case
			base_dir=base_scenario['location']		#root directory location
			base_name=base_scenario['name']		#name for this scenari
			
			#Build fully specified path names built from locations in mappyings.py; subdirectory determined by file name
			#   then create np arrays out of them
			base_cost_file=get_full_filename(location=base_dir, filename=dmap['cost_file'])
			base_trips_file=get_full_filename(location=base_dir,  filename=dmap['trip_file'])
			
			#try to create npa arrays from the raw data files; if they don't exist go on to the next line
			try:
				base_trips_raw = npa_from_file( base_trips_file)
				base_cost_per_trip_raw = npa_from_file( base_cost_file)	
			except:
				exc_type, exc_value, exc_traceback = sys.exc_info()
				msg='Scenario {}: could not open requisite files \n {} {} specified in line {} of {}'
				logger.warning(msg.format(s['name'], exc_type, exc_value, line_ix, map_file))
				continue

			#Costs and trips for the base case - returns  base costs, base trips as  square np 
			#   arrays w/o OD headers, trips transposed if needed
			base_costs, base_trips=prep_data( base_cost_per_trip_raw , base_trips_raw,  transpose,  hov_adj )
			
			##Process the scenario costs and trips the same way
			#test_dir = s['location']
			##grab the files and put them in np arrays
			#try:
				#test_cost_file=get_full_filename(location=test_dir, filename=dmap['cost_file'])
				#test_trip_file=get_full_filename(location=test_dir,  filename=dmap['trip_file'])
			#except:
				#msg='Scenario {}: could not open requisite files \n {} {} specified in line {} of {}'
				#logger.warning(msg.format(s['name'], exc_type, exc_value, line_ix, map_file))						
			#test_trips_raw = npa_from_file( test_trip_file)
			#test_cost_per_trip_raw = npa_from_file( test_cost_file)					
			#test_name=s['name']
			##Scenario case trips*cost/trip and trips used 
				
			#test_costs, test_trips=prep_data( cost_per_trip=test_cost_per_trip_raw , trips=test_trips_raw,  transpose=transpose,   hov_adj=hov_adj  )			
			
			#With all costs gathered, calculate the change in consumer surplus in square np array; produces a square np array
			###cs_delta = get_cs_delta(base_trips, test_trips, base_costs, test_costs)
			
			
			my_trips=get_base_trips_only(base_trips=base_trips)
			#From the cs_delta matrix, assign benefits to the origin and destination node; produces a vector of nodes w/o OD headers
			#  For home-based transit trips, both outbound and return accrue to home node, as do am and pm highway trips.
			#  For mid-day and night-time highway trips, the benefit is split between origin and dest nodes.
			###benefits_by_zone = calculate_benefits(cs_delta, pct_hb)
			
			trips_by_zone=calculate_benefits(my_trips, pct_hb)

			#the balance of this block stores the benefits and other information for posterity/more analysis
			
			#We'll aggregate the cs_delta by benefit type, denominated in natural units (minutes, dollars, miles).  We can use scalars to transform
					#minutes to dollars, miles to CO2, etc. as a post-processing step.
					
			#We'll create an aggregation array if requested in the 'aggregate to' column.  	This bit of code adds the array to the arr_dict if needed,
			#   then creates a null np array if needed.   It adds a column of current benefits_by_zone to the array if the array is null; it increments 
			#  the array by the benefits just calculated otherwise.   This essentially creates a bundle of an array containing all the summary information 
			#  we've gleaned from a scenario and where to store it when we're done.
			
			#to hold the results, we'll make a dict   arr_dict{'aggregate_to':  {'data':npa_of_data,
			#                                                                                                                              'column': 'aggregate_to',  
			#                                                                                                                              'table': 'db_table}}
			#... where the 'aggregate_to' value comes from the input spreadsheet and serves as the name of the database column.
			
			#create the dict if needed (it doesnt yet exist)
			if not arr_name in arr_dict:
				arr_dict[arr_name]={}
				arr_dict[arr_name]['data']=None
				
			#update the dict with current state of the roll-up array
			arr_dict[arr_name]={ 'data': sum_col_to_np_array(npa=arr_dict[arr_name]['data'], 
			                                                                                     ###vector=benefits_by_zone, 
			                                                                                     vector=trips_by_zone, 
			                                                                                     max_index_val=len(base_trips)),
			                                          'column': column_name,
			                                           'table': table_name
			                                           }
			logger.debug('line {}\n\t{} -versus- {} \n\t {}  \n\t {} \n\t base trips: {}  test trips: {}  sum dlta cs: {}  (summary stats - not used in benefit calcs)'.format(
			            line_ix,
				         base_name, 'NA',  ###test_name, 
			             dmap['trip_file'],
			             dmap['cost_file'].split()[0],
			             np.sum(base_trips), 'NA',   ###np.sum(test_trips),
			             np.sum(trips_by_zone)))
		
		#store the arrays in db tables	(written after all the processing for a scenario is completed.)
		store_data(arr_dict=arr_dict, db=DB)
	
	finish = datetime.now()
	msg='Finished at {}.  Processed {} files in {}.'
	elapsed=str(finish-start).split('.')[0]
	print(msg.format(datetime.now().strftime("%b %d %Y %H:%M:%S"), line_ix, elapsed))
	logger.info(msg.format(datetime.now().strftime("%b %d %Y %H:%M:%S"), line_ix, elapsed))
Exemple #3
0
def rollup_hwy_metrics(
    scenario=None,
    income=None,
    purposes=purposes,
    master_header=None,
    purposes_round_trip=purposes_round_trip,
    np_rows=NP_ROWS,
    topic=None,
    occupancy=None,
):

    """Aggregate costs for highway-only travel.  Roll up topics over purpose, tod.  
        Keeps occupancies (sov, hov2, hov3) and costs (time, distance, toll) separate"""

    """Typical SELECTS


     """

    """   
                    #     hwy_toll_hov_am
                #     hbo_inc1_md_hov3"""
    # general info for this metric
    base_trips_table = "{}trips_purpose_income_tod_occ".format(base_scenario)
    test_trips_table = "{}trips_purpose_income_tod_occ".format(scenario)
    base_metrics_table = "{}loaded_hwy_od_timecost".format(base_scenario)
    test_metrics_table = "{}loaded_hwy_od_timecost".format(scenario)

    # We'll need to leverage the time to reflect vehicle occupancy.  Since the topic (metric) is an
    #  input parameter, we can calculate a uniform multiplier here.  Occupancy leverages only time.
    if topic == "time":
        mult = time_adjust[occupancy]
    else:
        mult = 1

    cols_added = 5  # metric base, trips base, metric trial, trips trial, benefit

    logger.info("\n\n\n***** Beginning aggregation of highway data for {}".format(topic))
    logger.info("Trips using {}\n and {}".format(base_trips_table, test_trips_table))
    logger.info("Costs using {}\n and {}".format(base_metrics_table, test_metrics_table))

    # this array will aggreate the info gleaned here
    export_array = np.zeros(((np_rows - 1) ** 2, 3))
    this_export_array_col = -1

    # initialize null np array
    npa = np.zeros(((np_rows - 1) ** 2, 3))  # orig, dest, value
    fresh_npa_array = True

    logger.info("Beginning aggregation of {} data".format(topic))

    # we're passing in topic (metric), occupancy and income, purpose (grouped by business/personal as input, analyzed
    #   atomisticly here and provided as aggregate as output values)

    for purpose in purposes:  # rolls up whatever list of purposes provided (allows biz/personal segregation)

        # round trip of one-way (round trip for home based journeys)?
        trip_legs = ["outbound"]
        if purpose in purposes_round_trip:
            trip_legs.append("return")

        for tod in tod_hwy_loaded:  #'am', 'pm', 'md', 'nt'

            logger.info("beginning benefit calcs for {} {} {} {}".format(purpose, tod, occupancy, topic))

            need_npa_combined = True  # holds outbound+return benefits rollup

            # flag for npa creation
            this_np_col = -1

            # calculate benefits for each leg of the trip separately; combine the benefits from a round-trip at the end
            #     of the 'trip_leg' loop.
            for trip_leg in trip_legs:

                if this_np_col < 0:
                    npa = np.zeros(((np_rows - 1) ** 2, cols_added + 2))  # scratch array

                # this selects from the base and trial case tables
                for metrics_table, trips_table, name in zip(
                    [base_metrics_table, test_metrics_table], [base_trips_table, test_trips_table], ["base", scenario]
                ):

                    #          --adding {topic} for {purpose} {tod} {occupancy}- {trip_leg} leg\n'
                    select = "--adding {} for {} {} {} - {} leg\n".format(topic, purpose, tod, occupancy, trip_leg)
                    select += "SELECT  DISTINCT\n "
                    #                 {metrics_table}.origin
                    select += "\t{}.origin,\n".format(metrics_table)
                    #                 {metrics_table}.dest
                    select += "\t{}.dest,\n".format(metrics_table)

                    #               '{trips_table}.{purpose}_{income}_{tod}_{occ} * {metrics_table}.hwy_{topic}_{occupancy}_{tod} * {mult}'
                    stmt = "\t{}.{}_{}_{}_{} * {}.hwy_{}_{}_{} * mult \n "  # mult leverages time for HOVs

                    select += stmt.format(
                        trips_table, purpose, income, tod, occupancy, metrics_table, topic, occupancy, tod
                    )

                    #               '{trips_table}.{purpose}_{income}_{tod}_{occ} '
                    stmt = "\t{}.{}_{}_{}_{}"

                    select += stmt.format(trips_table, purpose, income, tod, occupancy)

                    #                FROM {trips_table} , {metrics_table}
                    select += "FROM \n\t {} , {} \n ".format(trips_table, metrics_table)

                    if trip_leg == "outbound":  # use OD pairs from trip table same as metric table's

                        #               WHERE  {trips_table}.origin={metrics_table}.origin AND
                        select += "WHERE  \n\t{}.origin={}.origin AND \n".format(trips_table, metrics_table)
                        #                   {metrics_table}.dest={metrics_table}.dest)
                        select += "\t{}.dest={}.dest \n".format(metrics_table, trips_table)

                    else:  # use transposed OD pairs from trip table (origin = metrics.dest, dest=metrics.origin)

                        #               WHERE  {trips_table}.dest={metrics_table}.origin AND
                        select += "WHERE  \n\t{}.dest={}.origin AND \n".format(trips_table, metrics_table)
                        #                   {metrics_table}.origin={metrics_table}.dest)
                        select += "\t{}.origin={}.dest \n".format(trips_table, metrics_table)

                    #             ORDER BY {metrics_table}.origin, {metrics_table}.dest
                    select += "ORDER BY \n\t{}.origin, {}.dest\n\n".format(metrics_table, metrics_table)

                    logger.debug(select)
                    try:
                        good_table = True
                        curs.execute(select)
                    except:
                        # some empty columns  were not produced  (e.g., wexpbus_autodistance) because they don't apply
                        relations = [
                            "\t{}.{}_{}_{}_{} * {}.hwy_{}_{}_{} * mult \n ".format(
                                trips_table, purpose, income, tod, occupancy, metrics_table, topic, occupancy, tod
                            ),
                            "\t{}.{}_{}_{}".format(trips_table, purpose, income, tod),
                        ]
                        logger.warning(
                            "This SELECT failed, probably because the data is n/a:  {}".format("  ".join(relations))
                        )
                        good_table = False
                        # close out any
                        curs.execute("END")

                    # if the query failed, we've busted out; so go to the next tod
                    if good_table:

                        res = np.array(curs.fetchall())

                        # This rolls up the costs and trips from both scenarios:  npa is rows of:  origin, dest, benefit base, trips base, benefit trial, trips trial
                        if this_np_col < 0:
                            # add first 4 columns of result to a the first 4 columns of the scratch array  (origin, dest, base cost, base trips)
                            npa[:, :4] = res
                            this_np_col = 4
                        else:
                            # add the cost, trips columns from the result to cols 4-6 of the scratch array (trial cost, trial trips)
                            npa[:, 4:6] = res[:, -2:]
                            this_np_col += 2
                            # calculate the benefits
                            logger.info("calculating delta cs for {} {} {} ".format(scenario, purpose, mode))
                            npa = add_dlta_cons_surplus(npa)

                        # npa_combined rolls up the atomized benefits, calculated separately for each leg of the journey.
                        if need_npa_combined:
                            npa_combined = npa
                            need_npa_combined = False
                            logger.info("adding benefits to new npa_combined array")
                        else:
                            # otherwise add the benefits from the second leg (the last column) to the combined_npa array
                            npa_combined[:, -1] += npa[:, -1]
                            logger.info(
                                "done with both legs; adding return leg to npa_combined:     {}  {} ".format(
                                    purpose, mode
                                )
                            )
                        #   next mode at col 12

        # if a mode fails to produce a clean query, don't bother trying to add the info
        if good_table:
            if this_export_array_col < 0:
                # not yet created; add the orig and destin columns, along with the cs deltas
                export_array[:, :2] = npa_combined[:, :2]
                export_array[:, -1] = npa_combined[:, -1]
                this_export_array_col = 3
                logger.debug("creating new export array")
            else:
                # ... otherwise just add the new benfits to the cumulative total
                export_array[:, -1] += npa_combined[:, -1]
                logger.info("adding additional cs deltas to export array")

            ###
            logger.info("Done with mode {}\n\n".format(mode))
        logger.info("Done with purpose {}".format(purpose))
        return export_array
Exemple #4
0
def rollup_transit_metrics(
    scenario=None,
    base_scenario=None,
    income=None,
    purposes=purposes,
    master_col=None,
    master_header=None,
    purposes_round_trip=purposes_round_trip,
    bus_modes=bus_modes,
    rail_modes=rail_modes,
    np_rows=NP_ROWS,
    topic=None,
):

    """Aggregate time costs for mass transit.  Roll up topics over purpose and mode.  
        Cf aggregate_bus_rail_fares() for more verbose documentation."""

    """Keeps topics (initialwaittime, bustime, etc.) separate for now.  For final analysis it may makes sense to consolodate 
         waiting:   initialwaittime, transfertime
         bus time: wexpbus, dexpbus, wbus, dbus
         train time: wrail, wcrail, drail, dcrail

         ... but it's easier to combine later than have to separate."""

    """Typical SELECTS

     """

    # general info for this metric

    scenario = scenario + "_"
    base_scenario = base_scenario + "_"

    base_trips_table = "{}mode_choice_od".format(base_scenario)
    test_trips_table = "{}mode_choice_od".format(scenario)
    base_metrics_table = "{}transit_od_timecost".format(base_scenario)
    test_metrics_table = "{}transit_od_timecost".format(scenario)
    cols_added = 5  # metric base, trips base, metric trial, trips trial, benefit

    logger.info("\n\n\n***** Beginning aggregation of transit data for {}".format(topic))
    logger.info("Trips using {}\n and {}".format(base_trips_table, test_trips_table))
    logger.info("Costs using {}\n and {}".format(base_metrics_table, test_metrics_table))

    # this array will aggreate the info gleaned here
    export_array = np.zeros(((np_rows - 1) ** 2, 3))
    this_export_array_col = -1

    # initialize null np array
    npa = np.zeros(((np_rows - 1) ** 2, 3))  # orig, dest, value
    fresh_npa_array = True

    for purpose in purposes:
        # peak or off peak as f(purpose)
        pk_flag = purpose_peak_flag[purpose]

        # round trip of one-way (round trip for home based journeys)?
        trip_legs = ["outbound"]

        if purpose in purposes_round_trip:
            trip_legs.append("return")

        # loop thru appropriate modes and compose SELECT
        for mode in bus_modes + rail_modes:

            logger.info("beginning benefit calcs for {} {} {}".format(purpose, mode, topic))

            # calculate benefits for each leg of the trip separately; combine the benefits from a round-trip at the end
            #     of the 'trip_leg' loop.

            need_npa_combined = True  # holds outbound+return benefits rollup

            # flag for npa creation
            this_np_col = -1

            # calculate each leg of the trip separately
            for trip_leg in trip_legs:

                if this_np_col < 0:
                    npa = np.zeros(((np_rows - 1) ** 2, cols_added + 2))  # scratch array

                # this selects from the base and trial case tables
                for metrics_table, trips_table, name in zip(
                    [base_metrics_table, test_metrics_table], [base_trips_table, test_trips_table], ["base", scenario]
                ):

                    logger.info(
                        "running {} case for {} {}  {} using {} and {}".format(
                            name, purpose, mode, trip_leg, trips_table, metrics_table
                        )
                    )

                    # create SELECT statements

                    #          --adding {topic} for {purpose}. {mode} - {trip_leg} leg\n'
                    select = "--adding {} for {}. {} - {} leg\n".format(topic, purpose, mode, trip_leg)

                    select += "SELECT  DISTINCT\n "
                    #                 {metrics_table}.origin                                                        origin
                    select += "\t{}.origin,\n".format(metrics_table)
                    #                 {metrics_table}.dest                                                           destination
                    select += "\t{}.dest,\n".format(metrics_table)

                    #               '{trips_table}.{purpose}_{income}_{mode} * {metrics_table}.{pk_flag}_{mode}_{topic} '                      metric
                    stmt = "\t{}.{}_{}_{} * {}.{}_{}_{},\n "
                    select += stmt.format(trips_table, purpose, income, mode, metrics_table, pk_flag, mode, topic)

                    #               '{trips_table}.{purpose}_{income}_{mode} '                                                                                                            trips
                    stmt = "\t{}.{}_{}_{}\n "
                    select += stmt.format(trips_table, purpose, income, mode)

                    # print(select)
                    #                FROM {trips_table} , {metrics_table}
                    select += "FROM \n\t {} , {} \n ".format(trips_table, metrics_table)

                    if trip_leg == "outbound":  # use OD pairs from trip table same as metric table's

                        #               WHERE  {trips_table}.origin={metrics_table}.origin AND
                        select += "WHERE  \n\t{}.origin={}.origin AND \n".format(trips_table, metrics_table)
                        #                   {metrics_table}.dest={metrics_table}.dest)
                        select += "\t{}.dest={}.dest \n".format(metrics_table, trips_table)

                    else:  # use transposed OD pairs from trip table (origin = metrics.dest, dest=metrics.origin)

                        #               WHERE  {trips_table}.dest={metrics_table}.origin AND
                        select += "WHERE  \n\t{}.dest={}.origin AND \n".format(trips_table, metrics_table)
                        #                   {metrics_table}.origin={metrics_table}.dest)
                        select += "\t{}.origin={}.dest \n".format(trips_table, metrics_table)

                    #             ORDER BY {metrics_table}.origin, {metrics_table}.dest
                    select += "ORDER BY \n\t{}.origin, {}.dest\n\n".format(metrics_table, metrics_table)

                    logger.debug(select)
                    try:
                        good_table = True
                        curs.execute(select)
                    except:
                        # some empty columns  were not produced  (e.g., wexpbus_autodistance) because they don't apply
                        relations = [
                            "\t{}.{}_{}_{} * {}.{}_{}_{},\n ".format(
                                trips_table, purpose, income, mode, metrics_table, pk_flag, mode, topic
                            ),
                            "\t{}.{}_{}_{}\n ".format(trips_table, purpose, income, mode),
                        ]
                        logger.warning(
                            "This SELECT failed, probably because the data is n/a:  {}".format("  ".join(relations))
                        )
                        good_table = False
                        # close out any
                        curs.execute("END")

                    # if the query failed, we've busted out; so go to the next mode
                    if good_table:

                        res = np.array(curs.fetchall())

                        # This rolls up the costs and trips from both scenarios:  npa is rows of:  origin, dest, benefit base, trips base, benefit trial, trips trial
                        if this_np_col < 0:
                            # add first 4 columns of result to a the first 4 columns of the scratch array  (origin, dest, base cost, base trips)
                            npa[:, :4] = res
                            this_np_col = 4
                        else:
                            # add the cost, trips columns from the result to cols 4-6 of the scratch array (trial cost, trial trips)
                            npa[:, 4:6] = res[:, -2:]
                            this_np_col += 2
                            # calculate the benefits
                            logger.info("calculating delta cs for {} {} {} ".format(scenario, purpose, mode))
                            npa = add_dlta_cons_surplus(npa)

                        # npa_combined rolls up the atomized benefits, calculated separately for each leg of the journey.
                        if need_npa_combined:
                            npa_combined = npa
                            need_npa_combined = False
                            logger.info("adding benefits to new npa_combined array")
                        else:
                            # otherwise add the benefits from the second leg (the last column) to the combined_npa array
                            npa_combined[:, -1] += npa[:, -1]
                            logger.info(
                                "done with both legs; adding return leg to npa_combined:     {}  {} ".format(
                                    purpose, mode
                                )
                            )
            #   next mode at col 12

            # if a mode fails to produce a clean query, don't bother trying to add the info
            if good_table:
                if this_export_array_col < 0:
                    # not yet created; add the orig and destin columns, along with the cs deltas
                    export_array[:, :2] = npa_combined[:, :2]
                    export_array[:, -1] = npa_combined[:, -1]
                    this_export_array_col = 3
                    logger.debug("creating new export array")
                else:
                    # ... otherwise just add the new benfits to the cumulative total
                    export_array[:, -1] += npa_combined[:, -1]
                    logger.info("adding additional cs deltas to export array")

        ###
        logger.info("Done with mode {}\n\n".format(mode))
    logger.info("Done with purpose {}".format(purpose))

    return export_array