def main(scenarios=scenarios, DB=DB, ROOT_DIR=ROOT_DIR, ZONES=ZONES, map_file=map_file): "main entry point - loops over scenarios" msg='{} Starting total cost calculations using input file {}' logger.info(msg.format(datetime.now().strftime("%b %d %Y %H:%M:%S"), map_file)) print(msg.format(datetime.now().strftime("%b %d %Y %H:%M:%S"), map_file)) #This isn't the most efficient way to do it, but it's the most transparent: we'll loop through each base:scenario pair. For each, we'll read # the input file a line at a time and draw our consumer surplus benefits base_scenario = scenarios[0] for s in scenarios[1:]: arr_dict={} #grab a reader for the input file reader=csv.DictReader(open(map_file)) #process one line at a time from the setup file for line_ix, line in enumerate(reader, start=1): # the info comes in as a dict - this cleans up the content, removing comments, etc. Return a dict. dmap = grabinfo(line) #these set processing parameters transpose=dmap['transpose'] #use transposed trip matrix? hov_adj=dmap['hov_adj'] #occupancy adjustment (hov2 implies double time costs, say) pct_hb=dmap['pct_hb'] #fraction of benefits occuring to origin node ('home base') #these set storage parameters arr_name=dmap['rollup_to'] ## updated for total costs column_name= arr_name table_name=s['name']+"_"+dmap['rollup_dbtable'] ## updated for total costs #unless this line has both a data table and column specified, go to the next line if not arr_name or not column_name: continue #get information for the base case base_dir=base_scenario['location'] #root directory location base_name=base_scenario['name'] #name for this scenari #Build fully specified path names built from locations in mappyings.py; subdirectory determined by file name # then create np arrays out of them base_cost_file=get_full_filename(location=base_dir, filename=dmap['cost_file']) base_trips_file=get_full_filename(location=base_dir, filename=dmap['trip_file']) #try to create npa arrays from the raw data files; if they don't exist go on to the next line try: base_trips_raw = npa_from_file( base_trips_file) base_cost_per_trip_raw = npa_from_file( base_cost_file) except: exc_type, exc_value, exc_traceback = sys.exc_info() msg='Scenario {}: could not open requisite files \n {} {} specified in line {} of {}' logger.warn(msg.format(s['name'], exc_type, exc_value, line_ix, map_file)) continue #Costs and trips for the base case - returns base costs, base trips as square np # arrays w/o OD headers, trips transposed if needed base_costs, base_trips=prep_data( base_cost_per_trip_raw , base_trips_raw, transpose, hov_adj ) #Process the scenario costs and trips the same way test_dir = s['location'] #grab the files and put them in np arrays test_cost_file=get_full_filename(location=test_dir, filename=dmap['cost_file']) test_trip_file=get_full_filename(location=test_dir, filename=dmap['trip_file']) try: test_trips_raw = npa_from_file( test_trip_file) test_cost_per_trip_raw = npa_from_file( test_cost_file) except: exc_type, exc_value, exc_traceback = sys.exc_info() msg='Scenario {}: could not open requisite files \n {} {} specified in line {} of {}' logger.warn(msg.format(s['name'], exc_type, exc_value, line_ix, map_file)) continue test_name=s['name'] test_costs, test_trips=prep_data( cost_per_trip=test_cost_per_trip_raw , trips=test_trips_raw, transpose=transpose, hov_adj=hov_adj ) #Scenario case trips*cost/trip and trips used if np.equal(test_costs, base_costs).all() and np.isclose(test_trips, base_trips).all(): if not "FARE" in test_cost_file and not "FARE" in base_cost_file: msg="\nWARN: Same file: {} ::: {}" logger.critical(msg.format(base_trips_file, test_trip_file)) logger.critical(msg.format( base_cost_file, test_cost_file)) logger.critical('\n') #With all costs gathered, calculate the change in costsin square np array; produces a square np array ##cs_delta = get_cs_delta(base_trips, test_trips, base_costs, test_costs) ## updated for total costs total_cost_delta=get_total_cost_delta(base_trips, test_trips, base_costs, test_costs) logger.debug('total_cost_delta {}'.format(total_cost_delta.sum())) #From the cs_delta matrix, assign benefits to the origin and destination node; produces a vector of nodes w/o OD headers # For home-based transit trips, both outbound and return accrue to home node, as do am and pm highway trips. # For mid-day and night-time highway trips, the benefit is split between origin and dest nodes. ## updated for total costs #benefits_by_zone = calculate_benefits(cs_delta, pct_hb) costs_by_zone = calculate_benefits(total_cost_delta, pct_hb) #the balance of this block stores the benefits and other information for posterity/more analysis #We'll aggregate the cs_delta by benefit type, denominated in natural units (minutes, dollars, miles). We can use scalars to transform #minutes to dollars, miles to CO2, etc. as a post-processing step. #We'll create an aggregation array if requested in the 'aggregate to' column. This bit of code adds the array to the arr_dict if needed, # then creates a null np array if needed. It adds a column of current benefits_by_zone to the array if the array is null; it increments # the array by the benefits just calculated otherwise. #to hold the results, we'll make a dict arr_dict{'aggregate_to': {'data':npa_of_data, # 'column': 'aggregate_to', # 'table': 'db_table}} #... where the 'aggregate_to' value comes from the input spreadsheet and serves as the name of the database column. #create the dict if needed (it doesnt yet exist) if not arr_name in arr_dict: arr_dict[arr_name]={} arr_dict[arr_name]['data']=None #update the dict with current state of the roll-up array arr_dict[arr_name]={ 'data': sum_col_to_np_array(npa=arr_dict[arr_name]['data'], vector= costs_by_zone, max_index_val=len(base_trips)), 'column': column_name, 'table': table_name } logger.debug('{} -versus- {} line {}\n\t {} \n\t {} \n\t base trips: {} test trips: {} sum dlta costs: {} (summary stats - not used in benefit calcs)'.format( base_name, test_name, line_ix, dmap['trip_file'], dmap['cost_file'].split()[0], np.sum(base_trips), np.sum(test_trips), np.sum(total_cost_delta))) #store the arrays in db tables store_data(arr_dict=arr_dict, db=DB, zones=ZONES) finish = datetime.now() msg='Finished at {}. Processed {} files in {}.' elapsed=str(finish-start).split('.')[0] print(msg.format(datetime.now().strftime("%b %d %Y %H:%M:%S"), line_ix, elapsed)) logger.info(msg.format(datetime.now().strftime("%b %d %Y %H:%M:%S"), line_ix, elapsed))
def test_cons_surplus_calcs(self): #spin up a couple o'test arrays (same as above) base_trips=np.array(([ [ 0, 1, 2, 3], [1, 11, 12, 13], [2, 21, 22, 23], [3, 31, 32, 33] ])) base_cost_per_trip=np.array(([ [0, 1, 2, 3], [1, .10, .20, .30], [2, .40, .50, .60], [3, .70, .80, .90] ])) #create a test case with triple trips, each at half price test_trips=base_trips*3 test_cost_per_trip=base_cost_per_trip* 1/2 #We expect the benefits to be a function of cost and quantity, calculated cell-wise # .5 * (base_trips + test_trips)*(base_costs - test_costs) exp_cs_delta=np.array(([[ 1.1, 2.4, 3.9], #[.5*(11+11*3)*(.10-.10/2), .5*(12+12*3)*(.20-.20/2), .5*(13+13*3)*(.30-.30/2)] [ 8.4, 11. , 13.8], #[.5*(21+21*3)*(.40-.40/2), .5*(22+22*3)*(.50-.50/2), .5*(23+23*3)*(.60-.60/2)] [21.7, 25.6, 29.7]])) #[.5*(31+31*3)*(.70-.70/2), .5*(32+32*3)*(.80-.80/2), .5*(33+33*3)*(.90-.90/2)]] #run get_costs() on both the base and test case - calculates cell-wise trips*cost/trip base_costs, base_trips_used = analyze_main.prep_data(base_cost_per_trip, base_trips) test_costs, test_trips_used = analyze_main.prep_data(test_cost_per_trip, test_trips) #calculate the consumer surplus delta with get_cs_delta(), cell-wise cs_delta = analyze_main.get_cs_delta(base_trips_used, test_trips_used, base_cost_per_trip[1:,1:], test_cost_per_trip[1:,1:]) #do we have the expected values? self.assertTrue( np.isclose(cs_delta, exp_cs_delta).all()) #So far so good. Now, we'll roll up these benefits by Zone - that's our unit of analysis to assign bennies by race. #For each origin Zone, total benefits will be a sum of the benefits derived by trips to each destination. In other words, the row sum. exp_benefits_by_zone=np.array(([[ 7.4, # 1.1 + 2.4 + 3.9 33.2, # 8.4 + 11. , + 13.8 77.0]])) # 21.7 + 25.6, 29.7 pct_hb = 100 #percent of benefits accruing to originating (home) Zone benefits_by_zone=analyze_main.calculate_benefits(cs_delta, pct_hb) self.assertTrue( np.isclose(benefits_by_zone, exp_benefits_by_zone).all()) #For some trips (highway trips at night or mid-day), only half of the benefits accrue to the originating Zone. The other half accrue to the destination Zone. # In these cases, the originating zone gets the benefits of half the trips to the destination (half the row sum). The destination Zone gets the other half. To # operationalize this, we'll give the origin zone half the row sum (each row is an origin); and we'll give the destination zone half the column sum. Since any zone # can be both an origin and a destination, we'll add up the 'Zone as origin' and 'Zone as destination' values for attirubtion to each zone: #Or should we count the 'destination' part of this at all? *Someone* gets the benefit - I'd argue that if Chinese food is delivered to your Zone someone there # benefits from a reduced-cost trip (cheaper food) or less time (hotter food). On the other hand, the benefit might accue to the origin (lower labor cost # for delivery guy, more repeat business from timely delivery). Hmmmm. # credit for being origin + credit for being dest exp_benefits_by_zone=np.array(([[ 19.3, # (1.1 + 2.4 + 3.9) /2 + (1.1 + 8.4 +3.9) /2 = 1/2 row sum row 1 + 1/2 col sum col 1 36.1, # (8.4 + 11. , + 13.8) /2 + (2.4 + 11. +13.8) /2 = 1/2 row sum row 2 + 1/2 col sum col 2 62.2]])) # (21.7 + 25.6, 29.7) /2 + (1.1 +26.6 +29.7) /2 = 1/2 row sum row 3 + 1/2 col sum col 3 pct_hb = 50 #percent of benefits accruing to originating (home) Zone benefits_by_zone=analyze_main.calculate_benefits(cs_delta, pct_hb) self.assertTrue( np.isclose(benefits_by_zone, exp_benefits_by_zone).all()) #make sure that the total benefits are accounted for self.assertTrue(np.sum(benefits_by_zone) == np.sum(cs_delta))