def split_line_if_valid(line, line_number): items = line.split() if len(items) != 3: logger.warning( "Skipping line {}: wrong file information. Should be <file name> <md5|sha1|sha256> <checksum>." .format(line_number)) return None elif not re.match(r'^([^/><|:&]+)', items[0]): logger.warning( "Skipping line {}: filename contains illegal symbols: {}.".format( line_number, items[0])) return None elif not re.match(r'(sha(?:1|256)|md5)', items[1]): logger.warning( "Skipping line {}: Unknown hashing algorithm: {}.".format( line_number, items[1])) return None elif not re.match(r'([a-fA-F0-9]+)', items[2]): logger.warning( "Skipping line {}: checksum contains illegal symbols: {}.".format( line_number, items[2])) return None else: return items
def main(scenarios=scenarios, DB=DB, ROOT_DIR=ROOT_DIR, ZONES=ZONES, map_file=map_file): "main entry point - loops over scenarios" msg='{} Starting benefits calculations using input file {}' logger.info(msg.format(datetime.now().strftime("%b %d %Y %H:%M:%S"), map_file)) print(msg.format(datetime.now().strftime("%b %d %Y %H:%M:%S"), map_file)) #This isn't the most efficient way to do it, but it's the most transparent: we'll loop through each base:scenario pair. For each, we'll read # the input file a line at a time and draw our consumer surplus benefits base_scenario = scenarios[0] ###for s in scenarios[1:]: for s in scenarios[0:]: ###run this for the 'red line' case only arr_dict={} #grab a reader for the input file reader=csv.DictReader(open(map_file)) #process one line at a time from the setup file for line_ix, line in enumerate(reader, start=1): # the info comes in as a dict - this cleans up the content, removing comments, etc. Return a dict. dmap = grabinfo(line) #these set processing parameters transpose=dmap['transpose'] #use transposed trip matrix? hov_adj=dmap['hov_adj'] #occupancy adjustment (hov2 implies double time costs, say) pct_hb=dmap['pct_hb'] #fraction of benefits occuring to origin node ('home base') #these set storage parameters arr_name=dmap['aggregate_to'] column_name= arr_name table_name=s['name']+"_"+dmap['dbtable'] #get information for the base case base_dir=base_scenario['location'] #root directory location base_name=base_scenario['name'] #name for this scenari #Build fully specified path names built from locations in mappyings.py; subdirectory determined by file name # then create np arrays out of them base_cost_file=get_full_filename(location=base_dir, filename=dmap['cost_file']) base_trips_file=get_full_filename(location=base_dir, filename=dmap['trip_file']) #try to create npa arrays from the raw data files; if they don't exist go on to the next line try: base_trips_raw = npa_from_file( base_trips_file) base_cost_per_trip_raw = npa_from_file( base_cost_file) except: exc_type, exc_value, exc_traceback = sys.exc_info() msg='Scenario {}: could not open requisite files \n {} {} specified in line {} of {}' logger.warning(msg.format(s['name'], exc_type, exc_value, line_ix, map_file)) continue #Costs and trips for the base case - returns base costs, base trips as square np # arrays w/o OD headers, trips transposed if needed base_costs, base_trips=prep_data( base_cost_per_trip_raw , base_trips_raw, transpose, hov_adj ) ##Process the scenario costs and trips the same way #test_dir = s['location'] ##grab the files and put them in np arrays #try: #test_cost_file=get_full_filename(location=test_dir, filename=dmap['cost_file']) #test_trip_file=get_full_filename(location=test_dir, filename=dmap['trip_file']) #except: #msg='Scenario {}: could not open requisite files \n {} {} specified in line {} of {}' #logger.warning(msg.format(s['name'], exc_type, exc_value, line_ix, map_file)) #test_trips_raw = npa_from_file( test_trip_file) #test_cost_per_trip_raw = npa_from_file( test_cost_file) #test_name=s['name'] ##Scenario case trips*cost/trip and trips used #test_costs, test_trips=prep_data( cost_per_trip=test_cost_per_trip_raw , trips=test_trips_raw, transpose=transpose, hov_adj=hov_adj ) #With all costs gathered, calculate the change in consumer surplus in square np array; produces a square np array ###cs_delta = get_cs_delta(base_trips, test_trips, base_costs, test_costs) my_trips=get_base_trips_only(base_trips=base_trips) #From the cs_delta matrix, assign benefits to the origin and destination node; produces a vector of nodes w/o OD headers # For home-based transit trips, both outbound and return accrue to home node, as do am and pm highway trips. # For mid-day and night-time highway trips, the benefit is split between origin and dest nodes. ###benefits_by_zone = calculate_benefits(cs_delta, pct_hb) trips_by_zone=calculate_benefits(my_trips, pct_hb) #the balance of this block stores the benefits and other information for posterity/more analysis #We'll aggregate the cs_delta by benefit type, denominated in natural units (minutes, dollars, miles). We can use scalars to transform #minutes to dollars, miles to CO2, etc. as a post-processing step. #We'll create an aggregation array if requested in the 'aggregate to' column. This bit of code adds the array to the arr_dict if needed, # then creates a null np array if needed. It adds a column of current benefits_by_zone to the array if the array is null; it increments # the array by the benefits just calculated otherwise. This essentially creates a bundle of an array containing all the summary information # we've gleaned from a scenario and where to store it when we're done. #to hold the results, we'll make a dict arr_dict{'aggregate_to': {'data':npa_of_data, # 'column': 'aggregate_to', # 'table': 'db_table}} #... where the 'aggregate_to' value comes from the input spreadsheet and serves as the name of the database column. #create the dict if needed (it doesnt yet exist) if not arr_name in arr_dict: arr_dict[arr_name]={} arr_dict[arr_name]['data']=None #update the dict with current state of the roll-up array arr_dict[arr_name]={ 'data': sum_col_to_np_array(npa=arr_dict[arr_name]['data'], ###vector=benefits_by_zone, vector=trips_by_zone, max_index_val=len(base_trips)), 'column': column_name, 'table': table_name } logger.debug('line {}\n\t{} -versus- {} \n\t {} \n\t {} \n\t base trips: {} test trips: {} sum dlta cs: {} (summary stats - not used in benefit calcs)'.format( line_ix, base_name, 'NA', ###test_name, dmap['trip_file'], dmap['cost_file'].split()[0], np.sum(base_trips), 'NA', ###np.sum(test_trips), np.sum(trips_by_zone))) #store the arrays in db tables (written after all the processing for a scenario is completed.) store_data(arr_dict=arr_dict, db=DB) finish = datetime.now() msg='Finished at {}. Processed {} files in {}.' elapsed=str(finish-start).split('.')[0] print(msg.format(datetime.now().strftime("%b %d %Y %H:%M:%S"), line_ix, elapsed)) logger.info(msg.format(datetime.now().strftime("%b %d %Y %H:%M:%S"), line_ix, elapsed))
def rollup_hwy_metrics( scenario=None, income=None, purposes=purposes, master_header=None, purposes_round_trip=purposes_round_trip, np_rows=NP_ROWS, topic=None, occupancy=None, ): """Aggregate costs for highway-only travel. Roll up topics over purpose, tod. Keeps occupancies (sov, hov2, hov3) and costs (time, distance, toll) separate""" """Typical SELECTS """ """ # hwy_toll_hov_am # hbo_inc1_md_hov3""" # general info for this metric base_trips_table = "{}trips_purpose_income_tod_occ".format(base_scenario) test_trips_table = "{}trips_purpose_income_tod_occ".format(scenario) base_metrics_table = "{}loaded_hwy_od_timecost".format(base_scenario) test_metrics_table = "{}loaded_hwy_od_timecost".format(scenario) # We'll need to leverage the time to reflect vehicle occupancy. Since the topic (metric) is an # input parameter, we can calculate a uniform multiplier here. Occupancy leverages only time. if topic == "time": mult = time_adjust[occupancy] else: mult = 1 cols_added = 5 # metric base, trips base, metric trial, trips trial, benefit logger.info("\n\n\n***** Beginning aggregation of highway data for {}".format(topic)) logger.info("Trips using {}\n and {}".format(base_trips_table, test_trips_table)) logger.info("Costs using {}\n and {}".format(base_metrics_table, test_metrics_table)) # this array will aggreate the info gleaned here export_array = np.zeros(((np_rows - 1) ** 2, 3)) this_export_array_col = -1 # initialize null np array npa = np.zeros(((np_rows - 1) ** 2, 3)) # orig, dest, value fresh_npa_array = True logger.info("Beginning aggregation of {} data".format(topic)) # we're passing in topic (metric), occupancy and income, purpose (grouped by business/personal as input, analyzed # atomisticly here and provided as aggregate as output values) for purpose in purposes: # rolls up whatever list of purposes provided (allows biz/personal segregation) # round trip of one-way (round trip for home based journeys)? trip_legs = ["outbound"] if purpose in purposes_round_trip: trip_legs.append("return") for tod in tod_hwy_loaded: #'am', 'pm', 'md', 'nt' logger.info("beginning benefit calcs for {} {} {} {}".format(purpose, tod, occupancy, topic)) need_npa_combined = True # holds outbound+return benefits rollup # flag for npa creation this_np_col = -1 # calculate benefits for each leg of the trip separately; combine the benefits from a round-trip at the end # of the 'trip_leg' loop. for trip_leg in trip_legs: if this_np_col < 0: npa = np.zeros(((np_rows - 1) ** 2, cols_added + 2)) # scratch array # this selects from the base and trial case tables for metrics_table, trips_table, name in zip( [base_metrics_table, test_metrics_table], [base_trips_table, test_trips_table], ["base", scenario] ): # --adding {topic} for {purpose} {tod} {occupancy}- {trip_leg} leg\n' select = "--adding {} for {} {} {} - {} leg\n".format(topic, purpose, tod, occupancy, trip_leg) select += "SELECT DISTINCT\n " # {metrics_table}.origin select += "\t{}.origin,\n".format(metrics_table) # {metrics_table}.dest select += "\t{}.dest,\n".format(metrics_table) # '{trips_table}.{purpose}_{income}_{tod}_{occ} * {metrics_table}.hwy_{topic}_{occupancy}_{tod} * {mult}' stmt = "\t{}.{}_{}_{}_{} * {}.hwy_{}_{}_{} * mult \n " # mult leverages time for HOVs select += stmt.format( trips_table, purpose, income, tod, occupancy, metrics_table, topic, occupancy, tod ) # '{trips_table}.{purpose}_{income}_{tod}_{occ} ' stmt = "\t{}.{}_{}_{}_{}" select += stmt.format(trips_table, purpose, income, tod, occupancy) # FROM {trips_table} , {metrics_table} select += "FROM \n\t {} , {} \n ".format(trips_table, metrics_table) if trip_leg == "outbound": # use OD pairs from trip table same as metric table's # WHERE {trips_table}.origin={metrics_table}.origin AND select += "WHERE \n\t{}.origin={}.origin AND \n".format(trips_table, metrics_table) # {metrics_table}.dest={metrics_table}.dest) select += "\t{}.dest={}.dest \n".format(metrics_table, trips_table) else: # use transposed OD pairs from trip table (origin = metrics.dest, dest=metrics.origin) # WHERE {trips_table}.dest={metrics_table}.origin AND select += "WHERE \n\t{}.dest={}.origin AND \n".format(trips_table, metrics_table) # {metrics_table}.origin={metrics_table}.dest) select += "\t{}.origin={}.dest \n".format(trips_table, metrics_table) # ORDER BY {metrics_table}.origin, {metrics_table}.dest select += "ORDER BY \n\t{}.origin, {}.dest\n\n".format(metrics_table, metrics_table) logger.debug(select) try: good_table = True curs.execute(select) except: # some empty columns were not produced (e.g., wexpbus_autodistance) because they don't apply relations = [ "\t{}.{}_{}_{}_{} * {}.hwy_{}_{}_{} * mult \n ".format( trips_table, purpose, income, tod, occupancy, metrics_table, topic, occupancy, tod ), "\t{}.{}_{}_{}".format(trips_table, purpose, income, tod), ] logger.warning( "This SELECT failed, probably because the data is n/a: {}".format(" ".join(relations)) ) good_table = False # close out any curs.execute("END") # if the query failed, we've busted out; so go to the next tod if good_table: res = np.array(curs.fetchall()) # This rolls up the costs and trips from both scenarios: npa is rows of: origin, dest, benefit base, trips base, benefit trial, trips trial if this_np_col < 0: # add first 4 columns of result to a the first 4 columns of the scratch array (origin, dest, base cost, base trips) npa[:, :4] = res this_np_col = 4 else: # add the cost, trips columns from the result to cols 4-6 of the scratch array (trial cost, trial trips) npa[:, 4:6] = res[:, -2:] this_np_col += 2 # calculate the benefits logger.info("calculating delta cs for {} {} {} ".format(scenario, purpose, mode)) npa = add_dlta_cons_surplus(npa) # npa_combined rolls up the atomized benefits, calculated separately for each leg of the journey. if need_npa_combined: npa_combined = npa need_npa_combined = False logger.info("adding benefits to new npa_combined array") else: # otherwise add the benefits from the second leg (the last column) to the combined_npa array npa_combined[:, -1] += npa[:, -1] logger.info( "done with both legs; adding return leg to npa_combined: {} {} ".format( purpose, mode ) ) # next mode at col 12 # if a mode fails to produce a clean query, don't bother trying to add the info if good_table: if this_export_array_col < 0: # not yet created; add the orig and destin columns, along with the cs deltas export_array[:, :2] = npa_combined[:, :2] export_array[:, -1] = npa_combined[:, -1] this_export_array_col = 3 logger.debug("creating new export array") else: # ... otherwise just add the new benfits to the cumulative total export_array[:, -1] += npa_combined[:, -1] logger.info("adding additional cs deltas to export array") ### logger.info("Done with mode {}\n\n".format(mode)) logger.info("Done with purpose {}".format(purpose)) return export_array
def rollup_transit_metrics( scenario=None, base_scenario=None, income=None, purposes=purposes, master_col=None, master_header=None, purposes_round_trip=purposes_round_trip, bus_modes=bus_modes, rail_modes=rail_modes, np_rows=NP_ROWS, topic=None, ): """Aggregate time costs for mass transit. Roll up topics over purpose and mode. Cf aggregate_bus_rail_fares() for more verbose documentation.""" """Keeps topics (initialwaittime, bustime, etc.) separate for now. For final analysis it may makes sense to consolodate waiting: initialwaittime, transfertime bus time: wexpbus, dexpbus, wbus, dbus train time: wrail, wcrail, drail, dcrail ... but it's easier to combine later than have to separate.""" """Typical SELECTS """ # general info for this metric scenario = scenario + "_" base_scenario = base_scenario + "_" base_trips_table = "{}mode_choice_od".format(base_scenario) test_trips_table = "{}mode_choice_od".format(scenario) base_metrics_table = "{}transit_od_timecost".format(base_scenario) test_metrics_table = "{}transit_od_timecost".format(scenario) cols_added = 5 # metric base, trips base, metric trial, trips trial, benefit logger.info("\n\n\n***** Beginning aggregation of transit data for {}".format(topic)) logger.info("Trips using {}\n and {}".format(base_trips_table, test_trips_table)) logger.info("Costs using {}\n and {}".format(base_metrics_table, test_metrics_table)) # this array will aggreate the info gleaned here export_array = np.zeros(((np_rows - 1) ** 2, 3)) this_export_array_col = -1 # initialize null np array npa = np.zeros(((np_rows - 1) ** 2, 3)) # orig, dest, value fresh_npa_array = True for purpose in purposes: # peak or off peak as f(purpose) pk_flag = purpose_peak_flag[purpose] # round trip of one-way (round trip for home based journeys)? trip_legs = ["outbound"] if purpose in purposes_round_trip: trip_legs.append("return") # loop thru appropriate modes and compose SELECT for mode in bus_modes + rail_modes: logger.info("beginning benefit calcs for {} {} {}".format(purpose, mode, topic)) # calculate benefits for each leg of the trip separately; combine the benefits from a round-trip at the end # of the 'trip_leg' loop. need_npa_combined = True # holds outbound+return benefits rollup # flag for npa creation this_np_col = -1 # calculate each leg of the trip separately for trip_leg in trip_legs: if this_np_col < 0: npa = np.zeros(((np_rows - 1) ** 2, cols_added + 2)) # scratch array # this selects from the base and trial case tables for metrics_table, trips_table, name in zip( [base_metrics_table, test_metrics_table], [base_trips_table, test_trips_table], ["base", scenario] ): logger.info( "running {} case for {} {} {} using {} and {}".format( name, purpose, mode, trip_leg, trips_table, metrics_table ) ) # create SELECT statements # --adding {topic} for {purpose}. {mode} - {trip_leg} leg\n' select = "--adding {} for {}. {} - {} leg\n".format(topic, purpose, mode, trip_leg) select += "SELECT DISTINCT\n " # {metrics_table}.origin origin select += "\t{}.origin,\n".format(metrics_table) # {metrics_table}.dest destination select += "\t{}.dest,\n".format(metrics_table) # '{trips_table}.{purpose}_{income}_{mode} * {metrics_table}.{pk_flag}_{mode}_{topic} ' metric stmt = "\t{}.{}_{}_{} * {}.{}_{}_{},\n " select += stmt.format(trips_table, purpose, income, mode, metrics_table, pk_flag, mode, topic) # '{trips_table}.{purpose}_{income}_{mode} ' trips stmt = "\t{}.{}_{}_{}\n " select += stmt.format(trips_table, purpose, income, mode) # print(select) # FROM {trips_table} , {metrics_table} select += "FROM \n\t {} , {} \n ".format(trips_table, metrics_table) if trip_leg == "outbound": # use OD pairs from trip table same as metric table's # WHERE {trips_table}.origin={metrics_table}.origin AND select += "WHERE \n\t{}.origin={}.origin AND \n".format(trips_table, metrics_table) # {metrics_table}.dest={metrics_table}.dest) select += "\t{}.dest={}.dest \n".format(metrics_table, trips_table) else: # use transposed OD pairs from trip table (origin = metrics.dest, dest=metrics.origin) # WHERE {trips_table}.dest={metrics_table}.origin AND select += "WHERE \n\t{}.dest={}.origin AND \n".format(trips_table, metrics_table) # {metrics_table}.origin={metrics_table}.dest) select += "\t{}.origin={}.dest \n".format(trips_table, metrics_table) # ORDER BY {metrics_table}.origin, {metrics_table}.dest select += "ORDER BY \n\t{}.origin, {}.dest\n\n".format(metrics_table, metrics_table) logger.debug(select) try: good_table = True curs.execute(select) except: # some empty columns were not produced (e.g., wexpbus_autodistance) because they don't apply relations = [ "\t{}.{}_{}_{} * {}.{}_{}_{},\n ".format( trips_table, purpose, income, mode, metrics_table, pk_flag, mode, topic ), "\t{}.{}_{}_{}\n ".format(trips_table, purpose, income, mode), ] logger.warning( "This SELECT failed, probably because the data is n/a: {}".format(" ".join(relations)) ) good_table = False # close out any curs.execute("END") # if the query failed, we've busted out; so go to the next mode if good_table: res = np.array(curs.fetchall()) # This rolls up the costs and trips from both scenarios: npa is rows of: origin, dest, benefit base, trips base, benefit trial, trips trial if this_np_col < 0: # add first 4 columns of result to a the first 4 columns of the scratch array (origin, dest, base cost, base trips) npa[:, :4] = res this_np_col = 4 else: # add the cost, trips columns from the result to cols 4-6 of the scratch array (trial cost, trial trips) npa[:, 4:6] = res[:, -2:] this_np_col += 2 # calculate the benefits logger.info("calculating delta cs for {} {} {} ".format(scenario, purpose, mode)) npa = add_dlta_cons_surplus(npa) # npa_combined rolls up the atomized benefits, calculated separately for each leg of the journey. if need_npa_combined: npa_combined = npa need_npa_combined = False logger.info("adding benefits to new npa_combined array") else: # otherwise add the benefits from the second leg (the last column) to the combined_npa array npa_combined[:, -1] += npa[:, -1] logger.info( "done with both legs; adding return leg to npa_combined: {} {} ".format( purpose, mode ) ) # next mode at col 12 # if a mode fails to produce a clean query, don't bother trying to add the info if good_table: if this_export_array_col < 0: # not yet created; add the orig and destin columns, along with the cs deltas export_array[:, :2] = npa_combined[:, :2] export_array[:, -1] = npa_combined[:, -1] this_export_array_col = 3 logger.debug("creating new export array") else: # ... otherwise just add the new benfits to the cumulative total export_array[:, -1] += npa_combined[:, -1] logger.info("adding additional cs deltas to export array") ### logger.info("Done with mode {}\n\n".format(mode)) logger.info("Done with purpose {}".format(purpose)) return export_array