def load_pace_vectors(dates, consistent_link_set): # Map (begin_node,connecting_node) --> ID in the pace vector link_id_map = defaultdict(lambda : -1) # -1 indicates an invalid ID number for i in xrange(len(consistent_link_set)): key = consistent_link_set[i] link_id_map[key] = i db_main.connect('db_functions/database.conf') vects = [] weights = [] for date in dates: # Initialize to zero vect = matrix(zeros((len(consistent_link_set), 1))) weight = matrix(zeros((len(consistent_link_set), 1))) # Get the travel times for this datetime curs = db_travel_times.get_travel_times_cursor(date) # Assign travel times into the vector, if this link is in the consistant link set for (begin_node_id, end_node_id, date_time, travel_time, num_trips) in curs: i = link_id_map[begin_node_id, end_node_id] # i will be -1 if the link is not in the consistant link set if(i>=0): vect[i] = travel_time weight[i] = num_trips vects.append(vect) weights.append(weight) db_main.close() return vects, weights
def load_pace_vectors(dates, consistent_link_set): # Map (begin_node,connecting_node) --> ID in the pace vector link_id_map = defaultdict(lambda: -1) # -1 indicates an invalid ID number for i in xrange(len(consistent_link_set)): # print i key = consistent_link_set[i] # print long(key) key = long(key) link_id_map[key] = i db_main.connect("db_functions/database.conf") vects = [] weights = [] for date in dates: # Initialize to zero # print date vect = matrix(zeros((len(consistent_link_set), 1))) weight = matrix(zeros((len(consistent_link_set), 1))) # Get the travel times for this datetime curs = db_travel_times.get_travel_times_cursor_new(date) # Assign travel times into the vector, if this link is in the consistant link set for (link_id, date_time, paces, num_cars) in curs: # print (link_id, date_time, paces, num_cars) # print "link_id",link_id i = link_id_map[link_id] # i will be -1 if the link is not in the consistant link set if i >= 0: vect[i] = paces weight[i] = num_cars vects.append(vect) weights.append(weight) db_main.close() return vects, weights
def run_chunk(road_map, time): try: print("Connecting to db") db_main.connect("db_functions/database.conf", retry_interval=10) print (str(datetime.now()) + " : Analysing " + str(time)) road_map.unflatten() t1 = datetime.now() trips = db_trip.find_pickup_dt(time, time + timedelta(hours=1)) t2 = datetime.now() db_main.close() print ("Loaded " + str(len(trips)) + " trips after " + str(t2 - t1)) estimate_travel_times(road_map, trips, max_iter=2, test_set=None, distance_weighting=None, model_idle_time=False, initial_idle_time=0) t3 = datetime.now() print (str(t3) + " : Finished estimating traffic for " + str(time) + " after " + str(t3-t2)) road_map.save_speeds('tmp_speeds.csv') #db_main.close() except Exception as e: print("Failed to estimate traffic for %s : %s" % (str(time), e.message))
def load_pace_vectors(dates, consistent_link_set): # Map (begin_node,connecting_node) --> ID in the pace vector link_id_map = defaultdict(lambda : -1) # -1 indicates an invalid ID number for i in xrange(len(consistent_link_set)): # print i key = consistent_link_set[i] # print long(key) key = long(key) link_id_map[key] = i db_main.connect('db_functions/database.conf') vects = [] weights = [] for date in dates: # Initialize to zero # print date vect = matrix(zeros((len(consistent_link_set), 1))) weight = matrix(zeros((len(consistent_link_set), 1))) # Get the travel times for this datetime curs = db_travel_times.get_travel_times_cursor_new(date) # Assign travel times into the vector, if this link is in the consistant link set for (link_id, date_time, paces, num_cars) in curs: # print (link_id, date_time, paces, num_cars) # print "link_id",link_id i = link_id_map[link_id] # i will be -1 if the link is not in the consistant link set if(i>=0): vect[i] = paces weight[i] = num_cars vects.append(vect) weights.append(weight) db_main.close() return vects, weights
def run_chunk(road_map, time): try: print("Connecting to db") db_main.connect("db_functions/database.conf", retry_interval=10) print (str(datetime.now()) + " : Analysing " + str(time)) road_map.unflatten() t1 = datetime.now() trips = db_trip.find_pickup_dt(time, time + timedelta(hours=1)) t2 = datetime.now() db_main.close() print ("Loaded " + str(len(trips)) + " trips after " + str(t2 - t1)) estimate_travel_times(road_map, trips, max_iter=20, test_set=None, distance_weighting=None, model_idle_time=False, initial_idle_time=0) t3 = datetime.now() print (str(t3) + " : Finished estimating traffic for " + str(time) + " after " + str(t3-t2)) db_main.connect("db_functions/database.conf", retry_interval=10) t1 = datetime.now() db_travel_times.save_travel_times(road_map, time) t2 = datetime.now() print("Saved travel times after " + str(t2 - t1)) db_main.close() except Exception as e: print("Failed to estimate traffic for %s : %s" % (str(time), e.message))
def run(region_size = 250): # nyc_map = Map("nyc_map4/nodes.csv", "nyc_map4/links.csv", # lookup_kd_size=1, region_kd_size=region_size, # limit_bbox=Map.reasonable_nyc_bbox) # nyc_map.assign_node_regions() nyc_map = cluster_kd.createMap(region_size) nyc_map.assign_link_arc_flags() #nyc_map.save_region("../nyc_map4/region.csv") #get_correct_nodes(nyc_map, "../speeds_per_hour/" + map_file, None) i = 0 print nyc_map.total_region_count for region_id in range(nyc_map.total_region_count): # print "Next Region!" boundary_nodes = nyc_map.get_region_boundary_nodes(region_id) # Does a multi-origin bidirectional dijkstra search to get an # arcflag tree warmstart = True use_domination_value = False DijkstrasAlgorithm.bidirectional_dijkstra(boundary_nodes, nyc_map, warmstart, use_domination_value) ##################################################################### # DRAW ARC_FLAGS USING THIS # pace_dict = {} # for link in nyc_map.links: # if link.backward_arc_flags_vector[i] == True: # pace_dict[(link.origin_node_id, link.connecting_node_id)] = 5 # else: # pace_dict[(link.origin_node_id, link.connecting_node_id)] = -5 # plot_estimates.plot_speed(nyc_map, "Backward Arc Flags Region: " + str(i), "Backward"+str(i), pace_dict) # pace_dict = {} # for link in nyc_map.links: # if link.forward_arc_flags_vector[i] == True: # pace_dict[(link.origin_node_id, link.connecting_node_id)] = 5 # else: # pace_dict[(link.origin_node_id, link.connecting_node_id)] = -5 # plot_estimates.plot_speed(nyc_map, "Forward Arc Flags Region: " + str(i), "Forward"+str(i), pace_dict) ##################################################################### i += 1 d = datetime(2012,3,5,2) db_main.connect("db_functions/database.conf") db_arc_flags.create_arc_flag_table() db_arc_flags.save_arc_flags(nyc_map, d) db_main.close()
def createMap(region_size): db_main.connect("db_functions/database.conf") d = date(2011, 3, 2) t = time(19, 40) t1 = time(20, 00) dt1 = datetime.combine(d, t) dt2 = datetime.combine(d, t1) trips_arc = db_trip.find_pickup_dt(dt1, dt2) region_size = region_size/4 approxSize = len(trips_arc)/region_size arc_flags_map = Map.Map("nyc_map4/nodes.csv", "nyc_map4/links.csv", lookup_kd_size=1, region_kd_size=approxSize, limit_bbox=Map.Map.reasonable_nyc_bbox) arc_flags_map.assign_node_regions() trips_arc = arc_flags_map.match_trips_to_nodes(trips_arc) for trip in trips_arc: trip.origin_node.trip_weight += 1 trip.dest_node.trip_weight += 1 arc_flags_map.build_kd_trees(split_weights=True) arc_flags_map.assign_node_regions() # same_region = 0 # for trip in trips_arc: # if trip.origin_node.region_id == trip.dest_node.region_id: # same_region+=1 # print "number of trips in same region are: %d out of %d \n" % (same_region, len(trips_arc)) region_graph_generator(arc_flags_map) db_main.close() # region_to_trips = {} # for node in arc_flags_map.nodes: # if node.region_id in region_to_trips: # region_to_trips[node.region_id] += node.trip_weight # else: # region_to_trips[node.region_id] = node.trip_weight # for i in region_to_trips: # print "Region %d: %d trips" % (i, region_to_trips[i]) return arc_flags_map
def compute_link_counts(dates): num_appearances = defaultdict(float) db_main.connect("db_functions/database.conf") for date in dates: curs = db_travel_times.get_travel_times_cursor_new(date) for [link_id, date_time, paces, num_cars] in curs: num_appearances[link_id] += 1 db_main.close() return num_appearances
def compute_link_counts(dates): num_appearances = defaultdict(float) db_main.connect('db_functions/database.conf') for date in dates: curs = db_travel_times.get_travel_times_cursor_new(date) for [link_id, date_time, paces, num_cars] in curs: num_appearances[link_id] += 1 db_main.close() return num_appearances
def compute_link_counts(dates): num_obs = defaultdict(float) num_appearances = defaultdict(float) db_main.connect('db_functions/database.conf') for date in dates: curs = db_travel_times.get_travel_times_cursor(date) for [begin_node_id, end_node_id, date_time, travel_time, num_trips] in curs: num_obs[begin_node_id, end_node_id] += num_trips num_appearances[begin_node_id, end_node_id] += 1 db_main.close() return num_obs, num_appearances
def run_full_day(): pool = Pool(8) dates = [datetime(2012,4,15,h) for h in xrange(24)] for start_date in dates: end_date = start_date + timedelta(hours=1) db_main.connect('db_functions/database.conf') trips = db_trip.find_pickup_dt(start_date, end_date) db_main.close() fn_prefix = "2012_4_15_%d" % start_date.hour perform_cv(trips, 'nyc_map4/nodes.csv', 'nyc_map4/links.csv', 8, pool, fn_prefix=fn_prefix)
def load_pace_data(perc_data_threshold, pool=DefaultPool()): weekday_names = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"] # Connect to the database adn get hte available dates logMsg("Getting relevant dates.") db_main.connect("db_functions/database.conf") # dates = db_travel_times.get_available_dates() dates = list(dateRange(datetime(2014, 06, 01), datetime(2014, 07, 01))) """ Only Do Once for the whole dataset and store in link_counts_chicago table""" # logMsg ("Computing consistent link set") # compute_all_link_counts(dates, pool=pool) logMsg("Loading consistent link set") consistent_link_set = load_consistent_link_set(dates, perc_data_threshold) if len(consistent_link_set) == 0: logMsg("Find 0 consistent_links. Return.") return else: print ("len of consistent_link_set", len(consistent_link_set)) db_main.close() logMsg("Generating vectors") # Initialize dictionaries pace_timeseries = {} pace_grouped = defaultdict(list) dates_grouped = defaultdict(list) weights_grouped = defaultdict(list) # Split the dates into several pieces and use parallel processing to load the # vectors for each of these dates. We will use a partial function to hold the # consistent_link_set constant across all dates it = splitList(dates, pool._processes) load_pace_vectors_consistent = partial(load_pace_vectors, consistent_link_set=consistent_link_set) list_of_lists = pool.map(load_pace_vectors_consistent, it) logMsg("Merging outputs.") # Flatten the vectors into one big list vects = [vect for vect_lst, weight_lst in list_of_lists for vect in vect_lst] weights = [weight for vect_lst, weight_lst in list_of_lists for weight in weight_lst] # Loop through all dates - one vector will be created for each one for i in xrange(len(dates)): date = dates[i] vect = vects[i] weight = weights[i] # Extract the date, hour of day, and day of week just_date = str(date.date()) hour = date.hour weekday = weekday_names[date.weekday()] # Save vector in the timeseries # save the vector into the group # pace_grouped[(weekday, hour)].append(vect) # weights_grouped[(weekday, hour)].append(weight) # dates_grouped[(weekday, hour)].append(just_date) # use constant as key for this moment # weekday = 0 # hour = 0 # print just_date pace_timeseries[(just_date, hour, weekday)] = vect # print "vect here =========", vect pace_grouped[(weekday, hour)].append(vect) weights_grouped[(weekday, hour)].append(weight) dates_grouped[(weekday, hour)].append(just_date) # print pace_timeseries.keys() print len(pace_grouped[(0, 0)]), len(pace_grouped[(0, 0)][0]) # Assign trip names based on node ids trip_names = ["%d" % link_id for link_id in consistent_link_set] # print " len", len(pace_grouped.values()) return (pace_timeseries, pace_grouped, weights_grouped, dates_grouped, trip_names, consistent_link_set)
def run_test(region_size, preprocess=False): if preprocess: start = timeit.default_timer() ArcFlagsPreProcess.run(region_size) stop = timeit.default_timer() print "The time for preprocessing was " + str(stop-start) # arc_flags_map = Map.Map("nyc_map4/nodes.csv", "nyc_map4/links.csv", # lookup_kd_size=1, region_kd_size=region_size, # limit_bbox=Map.Map.reasonable_nyc_bbox) # arc_flags_map.assign_node_regions() arc_flags_map = cluster_kd.createMap(region_size) arc_flags_map.assign_link_arc_flags() ########################### # arc_flags_map.save_as_csv("nodeRegions.csv", "linkRegions.csv") ########################### print "loaded map" db_main.connect("db_functions/database.conf") d = datetime(2012,3,5,2) db_arc_flags.load_arc_flags(arc_flags_map, d) print "loaded arcflags" d = date(2011, 3, 2) t = time(19, 40) t1 = time(20, 00) dt1 = datetime.combine(d, t) dt2 = datetime.combine(d, t1) trips_arc = db_trip.find_pickup_dt(dt1, dt2) trips_arc = arc_flags_map.match_trips_to_nodes(trips_arc) trips_star = db_trip.find_pickup_dt(dt1, dt2) trips_star = arc_flags_map.match_trips_to_nodes(trips_star) trips_none = db_trip.find_pickup_dt(dt1, dt2) trips_none = arc_flags_map.match_trips_to_nodes(trips_none) trips_both = db_trip.find_pickup_dt(dt1, dt2) trips_both = arc_flags_map.match_trips_to_nodes(trips_both) db_main.close() same = True # for i in range(len(trips_star)): # if trips_star[i].fromLon != trips_arc[i].fromLon: # same = False # if trips_star[i].toLon != trips_arc[i].toLon: # same = False # print "the two trips are the same: " + str(same) print "got " + str(len(trips_arc)) + " trips" # start = timeit.default_timer() # arc_flags_map.routeTrips(trips_none) # stop = timeit.default_timer() # print "Computed trips using normal dijkstras in " + str(stop-start) start = timeit.default_timer() arc_flags_map.routeTrips(trips_star, astar_used=True) stop = timeit.default_timer() print "Computed trips using Astar in " + str(stop-start) start = timeit.default_timer() arc_flags_map.routeTrips(trips_arc, arcflags_used=True) stop = timeit.default_timer() print "Computed trips using arc_flags in " + str(stop-start) start = timeit.default_timer() arc_flags_map.routeTrips(trips_both, arcflags_used=True, astar_used=True) stop = timeit.default_timer() print "Computed trips using arc_flags and a_star in " + str(stop-start) failed_trips = [] same = True for i in range(len(trips_arc)): if trips_none[i].path_links != trips_star[i].path_links: same = False if trips_none[i].path_links != trips_both[i].path_links: same = False if trips_none[i].path_links != trips_arc[i].path_links: time1 = 0 time2 = 0 draw_graphs(trips_none, trips_arc, arc_flags_map, i) for link in trips_none[i].path_links: time1 += link.time for link in trips_arc[i].path_links: time2 += link.time print "The time for none is: " + str(time1) + " The time for the arcs_flags is: " + str(time2) failed_trips.append(trips_none[i]) same = False # print "The four trips are the same: " + str(same) print "\n\n\n\n" return failed_trips, arc_flags_map
def load_pace_data(perc_data_threshold, pool=DefaultPool()): weekday_names = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'] # Connect to the database adn get hte available dates logMsg ("Getting relevant dates.") db_main.connect('db_functions/database.conf') # dates = db_travel_times.get_available_dates() dates = list(dateRange(datetime(2014,06,01), datetime(2014,07,01))) ''' Only Do Once for the whole dataset and store in link_counts_chicago table''' #logMsg ("Computing consistent link set") #compute_all_link_counts(dates, pool=pool) logMsg("Loading consistent link set") consistent_link_set = load_consistent_link_set(dates, perc_data_threshold) if len(consistent_link_set) == 0: logMsg("Find 0 consistent_links. Return.") return else: print("len of consistent_link_set", len(consistent_link_set)) db_main.close() logMsg("Generating vectors") #Initialize dictionaries pace_timeseries = {} pace_grouped = defaultdict(list) dates_grouped = defaultdict(list) weights_grouped = defaultdict(list) # Split the dates into several pieces and use parallel processing to load the # vectors for each of these dates. We will use a partial function to hold the # consistent_link_set constant across all dates it = splitList(dates, pool._processes) load_pace_vectors_consistent = partial(load_pace_vectors, consistent_link_set=consistent_link_set) list_of_lists = pool.map(load_pace_vectors_consistent, it) logMsg("Merging outputs.") # Flatten the vectors into one big list vects = [vect for vect_lst, weight_lst in list_of_lists for vect in vect_lst] weights = [weight for vect_lst, weight_lst in list_of_lists for weight in weight_lst] # Loop through all dates - one vector will be created for each one for i in xrange(len(dates)): date = dates[i] vect = vects[i] weight = weights[i] # Extract the date, hour of day, and day of week just_date = str(date.date()) hour = date.hour weekday = weekday_names[date.weekday()] #Save vector in the timeseries #save the vector into the group # pace_grouped[(weekday, hour)].append(vect) # weights_grouped[(weekday, hour)].append(weight) # dates_grouped[(weekday, hour)].append(just_date) # use constant as key for this moment # weekday = 0 # hour = 0 # print just_date pace_timeseries[(just_date, hour, weekday)] = vect # print "vect here =========", vect pace_grouped[(weekday, hour)].append(vect) weights_grouped[(weekday, hour)].append(weight) dates_grouped[(weekday, hour)].append(just_date) # print pace_timeseries.keys() print len(pace_grouped[(0,0)]), len(pace_grouped[(0,0)][0]) # Assign trip names based on node ids trip_names = ["%d" % link_id for link_id in consistent_link_set] # print " len", len(pace_grouped.values()) return (pace_timeseries, pace_grouped, weights_grouped, dates_grouped, trip_names, consistent_link_set)
#remove(filename + ".csv") def plot_group_of_speeds((dts, pace_dicts), road_map, tmp_dir): road_map.unflatten() db_main.connect("db_functions/database.conf") for i in range(len(dts)): dt = dts[i] if(pace_dicts==None): pace_dict = None else: pace_dict = pace_dicts[i] out_file = path.join(tmp_dir, str(dt) + ".png") plot_speed(road_map, dt, out_file, pace_dict=pace_dict) db_main.close() def plot_speeds_in_parallel(road_map, dts, speed_dicts=None, tmp_dir="analysis/tmp", pool=DefaultPool()): road_map.flatten() plt_speeds_fun = partial(plot_group_of_speeds, road_map=road_map, tmp_dir = tmp_dir) list_it = splitLists(dts, speed_dicts, pool._processes) pool.map(plt_speeds_fun, list_it) def build_speed_dicts(consistent_link_set, zscore_vectors): speed_dicts = [] for vect in zscore_vectors:
def load_pace_data(perc_data_threshold=.95, pool=DefaultPool()): weekday_names = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'] # Connect to the database adn get hte available dates logMsg ("Getting relevant dates.") db_main.connect('db_functions/database.conf') dates = db_travel_times.get_available_dates() #dates = list(dateRange(datetime(2012,10,21), datetime(2012,11,11))) #logMsg ("Computing consistent link set") #compute_all_link_counts(dates, pool=pool) logMsg("Loading consistent link set") consistent_link_set = load_consistent_link_set(dates, perc_data_threshold) db_main.close() logMsg("Generating vectors") #Initialize dictionaries pace_timeseries = {} pace_grouped = defaultdict(list) dates_grouped = defaultdict(list) weights_grouped = defaultdict(list) # Split the dates into several pieces and use parallel processing to load the # vectors for each of these dates. We will use a partial function to hold the # consistent_link_set constant across all dates it = splitList(dates, pool._processes) load_pace_vectors_consistent = partial(load_pace_vectors, consistent_link_set=consistent_link_set) list_of_lists = pool.map(load_pace_vectors_consistent, it) logMsg("Merging outputs.") # Flatten the vectors into one big list vects = [vect for vect_lst, weight_lst in list_of_lists for vect in vect_lst] weights = [weight for vect_lst, weight_lst in list_of_lists for weight in weight_lst] # Loop through all dates - one vector will be created for each one for i in xrange(len(dates)): date = dates[i] vect = vects[i] weight = weights[i] # Extract the date, hour of day, and day of week just_date = str(date.date()) hour = date.hour weekday = weekday_names[date.weekday()] #Save vector in the timeseries pace_timeseries[(just_date, hour, weekday)] = vect #save the vector into the group pace_grouped[(weekday, hour)].append(vect) weights_grouped[(weekday, hour)].append(weight) dates_grouped[(weekday, hour)].append(just_date) # Assign trip names based on node ids trip_names = ["%d-->%d"%(start, end) for (start, end) in consistent_link_set] return (pace_timeseries, pace_grouped, weights_grouped, dates_grouped, trip_names, consistent_link_set)