def create_estimation_dataset(G,path_list_collection,id_list,trip_times,master_config): output_config=master_config.output_config choice_set_config=master_config.choice_set_config est_file=open(output_config['estimation_data'],'w') est_writer=csv.writer(est_file,lineterminator='\r') if output_config['path_size']: path_size_alias=['path_size'] else: path_size_alias=[] est_writer.writerow(['occ','alt','trip_id','chosen']+output_config['aliases']+path_size_alias) path_size_data=path_size(G,path_list_collection,choice_set_config) for occ_idx in range(len(path_list_collection)): for alt_idx in range(len(path_list_collection[occ_idx])): path=path_list_collection[occ_idx][alt_idx] values=[] for i in range(len(output_config['variables'])): if output_config['variables'][i] in master_config['time_dependent_relation']: key=get_time_dependent_variable(output_config['variables'][i],trip_times[id_list[occ_idx]],master_config['time_dependent_relation']) else: key=output_config['variables'][i] values.append(str(path_trace(G,path,key,output_config['trace_funs'][i],output_config['final_funs'][i],output_config['weights'][i]))) if output_config['path_size']: values.append(path_size_data[occ_idx][alt_idx]) est_writer.writerow([str(occ_idx),str(alt_idx),str(id_list[occ_idx]),str(alt_idx==0)]+values) est_file.close()
def ds_generate_master(G,chosen,choice_set_config,link_randomizer,time_dependent_relation,trip_time,ext_bound): config=choice_set_config source=chosen[0] target=chosen[-1] if ext_bound is not None: bounding_box=ext_bound else: bounding_box=find_coef_bounding_box(G,source,target,config,time_dependent_relation,trip_time) num_draws=config['ds_num_draws'] varcoef={} master_set=[] for i in range(num_draws): #sample random coefficients from bounding box for prelim_key in bounding_box: key=get_time_dependent_variable(prelim_key,trip_time,time_dependent_relation) if config['log_prior']: varcoef[key]=exp(random.uniform(log(bounding_box[prelim_key][0]),log(bounding_box[prelim_key][1]))) else: varcoef[key]=random.uniform(bounding_box[prelim_key][0],bounding_box[prelim_key][1]) to_iter=1 if config['randomize_after']: to_iter=config['randomize_after_iters'] for i in range(to_iter): #perform generalized cost shortest path search master_set.append(bidirectional_dijkstra(G,source,target,varcoef,config['weights'],link_randomizer)[1]) return master_set
def create_holdback_prediction_dataset(G, path_list_collection, id_list, trip_times, master_config, chosen_overlap): output_config = master_config.output_config choice_set_config = master_config.choice_set_config est_file = open(output_config['estimation_data'], 'w') est_writer = csv.writer(est_file, lineterminator='\r') if output_config['path_size']: path_size_alias = ['path_size'] else: path_size_alias = [] est_writer.writerow(['occ', 'alt', 'trip_id', 'chosen'] + output_config['aliases'] + path_size_alias + ['overlap']) sans_chosen = copy.deepcopy(path_list_collection) for path_list in sans_chosen: path_list.pop(0) path_size_data = path_size(G, sans_chosen, choice_set_config) for occ_idx in range(len(path_list_collection)): for alt_idx in range(len(path_list_collection[occ_idx])): path = path_list_collection[occ_idx][alt_idx] values = [] for i in range(len(output_config['variables'])): if output_config['variables'][i] in master_config[ 'time_dependent_relation']: key = get_time_dependent_variable( output_config['variables'][i], trip_times[id_list[occ_idx]], master_config['time_dependent_relation']) else: key = output_config['variables'][i] values.append( str( path_trace(G, path, key, output_config['trace_funs'][i], output_config['final_funs'][i], output_config['weights'][i]))) if output_config['path_size']: if alt_idx == 0: values.append(0) else: values.append(path_size_data[occ_idx][alt_idx - 1]) values.append(chosen_overlap[occ_idx][alt_idx]) est_writer.writerow([ str(occ_idx), str(alt_idx), str(id_list[occ_idx]), str(alt_idx == 0) ] + values) est_file.close()
def find_coef_bounding_box(G,source,target,choice_set_config,time_dependent_relation,trip_time): final_bound={} config=choice_set_config verbose=False#config['verbose'] for prelim_key in config['variables']: key=get_time_dependent_variable(prelim_key,trip_time,time_dependent_relation) if key==config['ref']: final_bound[key]=[1,1] continue vc={config['ref']:1} if key in config['median_compare']: for compare_key in final_bound: if key not in config['median_compare']: if config['log_prior']: vc[compare_key]=exp( (log(final_bound[compare_key][0])+log(final_bound[compare_key][1]))/2) else: vc[compare_key]=(final_bound[compare_key][0]+final_bound[compare_key][1])/2 link_randomizer=None if key in config['randomize_compare']: if not config['randomize_after']: raise Exception, "randomize_compare not allowed without randomize_after" link_randomizer=config['randomize_after_dev'] the_seed=random.randint(0,sys.maxint) if verbose: print vc cur_wgt=None if key in config['weights']: cur_wgt=config['weights'][key] myfun=lambda cur_coef: path_trace( G, bidirectional_dijkstra(G,source,target,dict(vc,**{key:cur_coef}),config['weights'],link_randomizer)[1], key, 'sum', wgtvar=cur_wgt ) coef_min_low = coef_min_high = log(config['ranges'][prelim_key][0]) coef_max_low = coef_max_high = log(config['ranges'][prelim_key][1]) val_min_low = val_min_high = myfun(exp(coef_min_low)) val_max_low = val_max_high = myfun(exp(coef_max_low)) if verbose: print key, "coef_min_low:", exp(coef_min_low) print key, "coef_max_low:", exp(coef_max_low) print key, "val_min_low:", val_min_low print key, "val_max_low:", val_max_low if val_min_low == val_max_low: if verbose: print key, "no range... ignoring" continue if verbose: print key, "coef_min_low:", exp(coef_min_low) print key, "coef_max_low:", exp(coef_max_low) print key, "val_min_low:", val_min_low print key, "val_max_low:", val_max_low while True: random.seed(the_seed) coef_mid_low = (coef_min_low+coef_max_low)/2 coef_mid_high = (coef_min_high+coef_max_high)/2 val_mid_low = myfun(exp(coef_mid_low)) val_mid_high = myfun(exp(coef_mid_high)) if verbose: print key, "coef_mid_low:", exp(coef_mid_low) print key, "coef_mid_high:", exp(coef_mid_high) print key, "val_mid_low:", val_mid_low print key, "val_mid_high:", val_mid_high if val_mid_low==val_min_low: coef_min_low=coef_mid_low else: coef_max_low=coef_mid_low val_max_low=val_mid_low if val_mid_high==val_max_high: coef_max_high=coef_mid_high else: coef_min_high=coef_mid_high val_min_high=val_mid_high if verbose: print key, "coef_low:", (exp(coef_min_low),exp(coef_max_low)) print key, "val_low:", (val_min_low,val_max_low) print key, "coef_high:", (exp(coef_min_high),exp(coef_max_high)) print key, "val_high:", (val_min_high,val_max_high) if (coef_max_low-coef_min_low)<config['tolerance']: break if coef_mid_low!=coef_mid_high: final_bound[key]=[exp(coef_mid_low),exp(coef_mid_high)] if verbose: print final_bound return final_bound