def load_pace_vectors(dates, consistent_link_set): # Map (begin_node,connecting_node) --> ID in the pace vector link_id_map = defaultdict(lambda : -1) # -1 indicates an invalid ID number for i in xrange(len(consistent_link_set)): # print i key = consistent_link_set[i] # print long(key) key = long(key) link_id_map[key] = i db_main.connect('db_functions/database.conf') vects = [] weights = [] for date in dates: # Initialize to zero # print date vect = matrix(zeros((len(consistent_link_set), 1))) weight = matrix(zeros((len(consistent_link_set), 1))) # Get the travel times for this datetime curs = db_travel_times.get_travel_times_cursor_new(date) # Assign travel times into the vector, if this link is in the consistant link set for (link_id, date_time, paces, num_cars) in curs: # print (link_id, date_time, paces, num_cars) # print "link_id",link_id i = link_id_map[link_id] # i will be -1 if the link is not in the consistant link set if(i>=0): vect[i] = paces weight[i] = num_cars vects.append(vect) weights.append(weight) db_main.close() return vects, weights
def run_chunk(road_map, time): try: print("Connecting to db") db_main.connect("db_functions/database.conf", retry_interval=10) print (str(datetime.now()) + " : Analysing " + str(time)) road_map.unflatten() t1 = datetime.now() trips = db_trip.find_pickup_dt(time, time + timedelta(hours=1)) t2 = datetime.now() db_main.close() print ("Loaded " + str(len(trips)) + " trips after " + str(t2 - t1)) estimate_travel_times(road_map, trips, max_iter=2, test_set=None, distance_weighting=None, model_idle_time=False, initial_idle_time=0) t3 = datetime.now() print (str(t3) + " : Finished estimating traffic for " + str(time) + " after " + str(t3-t2)) road_map.save_speeds('tmp_speeds.csv') #db_main.close() except Exception as e: print("Failed to estimate traffic for %s : %s" % (str(time), e.message))
def load_pace_vectors(dates, consistent_link_set): # Map (begin_node,connecting_node) --> ID in the pace vector link_id_map = defaultdict(lambda : -1) # -1 indicates an invalid ID number for i in xrange(len(consistent_link_set)): key = consistent_link_set[i] link_id_map[key] = i db_main.connect('db_functions/database.conf') vects = [] weights = [] for date in dates: # Initialize to zero vect = matrix(zeros((len(consistent_link_set), 1))) weight = matrix(zeros((len(consistent_link_set), 1))) # Get the travel times for this datetime curs = db_travel_times.get_travel_times_cursor(date) # Assign travel times into the vector, if this link is in the consistant link set for (begin_node_id, end_node_id, date_time, travel_time, num_trips) in curs: i = link_id_map[begin_node_id, end_node_id] # i will be -1 if the link is not in the consistant link set if(i>=0): vect[i] = travel_time weight[i] = num_trips vects.append(vect) weights.append(weight) db_main.close() return vects, weights
def run_chunk(road_map, time): try: print("Connecting to db") db_main.connect("db_functions/database.conf", retry_interval=10) print (str(datetime.now()) + " : Analysing " + str(time)) road_map.unflatten() t1 = datetime.now() trips = db_trip.find_pickup_dt(time, time + timedelta(hours=1)) t2 = datetime.now() db_main.close() print ("Loaded " + str(len(trips)) + " trips after " + str(t2 - t1)) estimate_travel_times(road_map, trips, max_iter=20, test_set=None, distance_weighting=None, model_idle_time=False, initial_idle_time=0) t3 = datetime.now() print (str(t3) + " : Finished estimating traffic for " + str(time) + " after " + str(t3-t2)) db_main.connect("db_functions/database.conf", retry_interval=10) t1 = datetime.now() db_travel_times.save_travel_times(road_map, time) t2 = datetime.now() print("Saved travel times after " + str(t2 - t1)) db_main.close() except Exception as e: print("Failed to estimate traffic for %s : %s" % (str(time), e.message))
def load_pace_vectors(dates, consistent_link_set): # Map (begin_node,connecting_node) --> ID in the pace vector link_id_map = defaultdict(lambda: -1) # -1 indicates an invalid ID number for i in xrange(len(consistent_link_set)): # print i key = consistent_link_set[i] # print long(key) key = long(key) link_id_map[key] = i db_main.connect("db_functions/database.conf") vects = [] weights = [] for date in dates: # Initialize to zero # print date vect = matrix(zeros((len(consistent_link_set), 1))) weight = matrix(zeros((len(consistent_link_set), 1))) # Get the travel times for this datetime curs = db_travel_times.get_travel_times_cursor_new(date) # Assign travel times into the vector, if this link is in the consistant link set for (link_id, date_time, paces, num_cars) in curs: # print (link_id, date_time, paces, num_cars) # print "link_id",link_id i = link_id_map[link_id] # i will be -1 if the link is not in the consistant link set if i >= 0: vect[i] = paces weight[i] = num_cars vects.append(vect) weights.append(weight) db_main.close() return vects, weights
def extract_trips(): print("Loading map") road_map = Map("nyc_map4/nodes.csv", "nyc_map4/links.csv", limit_bbox=Map.reasonable_nyc_bbox) db_main.connect("db_functions/database.conf") dt1 = datetime(2012,6,1,12) dt2 = datetime(2012,6,1,12,30) print("Loading trips") trips = db_trip.find_pickup_dt(dt1, dt2) print("Matching trips") samp = trips[1:100] new_samp = road_map.match_trips_to_nodes(samp) road_map.routeTrips(new_samp) with open('trip_links.csv', 'w') as f: w = csv.writer(f) w.writerow(['trip_id', 'from_lat','from_lon','to_lat','to_lon']) for i in range(len(new_samp)): print i if(samp[i].path_links !=None): for link in samp[i].path_links: w.writerow([i, link.origin_node.lat, link.origin_node.long, link.connecting_node.lat, link.connecting_node.long])
def analyse_trip_times(): db_main.connect('db_functions/database.conf') datelist = [datetime(year=2012, month=7, day=8, hour=0) + timedelta(hours=1)*x for x in range(168*3)] for date in datelist: trips = db_trip.find_pickup_dt(date, date+timedelta(hours=1)) print("%s : %d" % (date, len(trips)))
def test(): program_start = datetime.now() print "test started at: %s" %program_start.strftime("%Y-%m-%d %H:%M:%S") # Connect to the database db_main.connect("db_functions/database.conf") trips = db_trip.find_pickup_dt('2010-01-01 00:34:00', '2010-01-01 12:34:00') run_time = datetime.now() - program_start print run_time print len(trips)
def createMap(region_size): db_main.connect("db_functions/database.conf") d = date(2011, 3, 2) t = time(19, 40) t1 = time(20, 00) dt1 = datetime.combine(d, t) dt2 = datetime.combine(d, t1) trips_arc = db_trip.find_pickup_dt(dt1, dt2) region_size = region_size/4 approxSize = len(trips_arc)/region_size arc_flags_map = Map.Map("nyc_map4/nodes.csv", "nyc_map4/links.csv", lookup_kd_size=1, region_kd_size=approxSize, limit_bbox=Map.Map.reasonable_nyc_bbox) arc_flags_map.assign_node_regions() trips_arc = arc_flags_map.match_trips_to_nodes(trips_arc) for trip in trips_arc: trip.origin_node.trip_weight += 1 trip.dest_node.trip_weight += 1 arc_flags_map.build_kd_trees(split_weights=True) arc_flags_map.assign_node_regions() # same_region = 0 # for trip in trips_arc: # if trip.origin_node.region_id == trip.dest_node.region_id: # same_region+=1 # print "number of trips in same region are: %d out of %d \n" % (same_region, len(trips_arc)) region_graph_generator(arc_flags_map) db_main.close() # region_to_trips = {} # for node in arc_flags_map.nodes: # if node.region_id in region_to_trips: # region_to_trips[node.region_id] += node.trip_weight # else: # region_to_trips[node.region_id] = node.trip_weight # for i in region_to_trips: # print "Region %d: %d trips" % (i, region_to_trips[i]) return arc_flags_map
def run(region_size = 250): # nyc_map = Map("nyc_map4/nodes.csv", "nyc_map4/links.csv", # lookup_kd_size=1, region_kd_size=region_size, # limit_bbox=Map.reasonable_nyc_bbox) # nyc_map.assign_node_regions() nyc_map = cluster_kd.createMap(region_size) nyc_map.assign_link_arc_flags() #nyc_map.save_region("../nyc_map4/region.csv") #get_correct_nodes(nyc_map, "../speeds_per_hour/" + map_file, None) i = 0 print nyc_map.total_region_count for region_id in range(nyc_map.total_region_count): # print "Next Region!" boundary_nodes = nyc_map.get_region_boundary_nodes(region_id) # Does a multi-origin bidirectional dijkstra search to get an # arcflag tree warmstart = True use_domination_value = False DijkstrasAlgorithm.bidirectional_dijkstra(boundary_nodes, nyc_map, warmstart, use_domination_value) ##################################################################### # DRAW ARC_FLAGS USING THIS # pace_dict = {} # for link in nyc_map.links: # if link.backward_arc_flags_vector[i] == True: # pace_dict[(link.origin_node_id, link.connecting_node_id)] = 5 # else: # pace_dict[(link.origin_node_id, link.connecting_node_id)] = -5 # plot_estimates.plot_speed(nyc_map, "Backward Arc Flags Region: " + str(i), "Backward"+str(i), pace_dict) # pace_dict = {} # for link in nyc_map.links: # if link.forward_arc_flags_vector[i] == True: # pace_dict[(link.origin_node_id, link.connecting_node_id)] = 5 # else: # pace_dict[(link.origin_node_id, link.connecting_node_id)] = -5 # plot_estimates.plot_speed(nyc_map, "Forward Arc Flags Region: " + str(i), "Forward"+str(i), pace_dict) ##################################################################### i += 1 d = datetime(2012,3,5,2) db_main.connect("db_functions/database.conf") db_arc_flags.create_arc_flag_table() db_arc_flags.save_arc_flags(nyc_map, d) db_main.close()
def test(): pool = Pool(4) print("Connecting") db_main.connect('db_functions/database.conf') print("Loading Pace Data") data = load_pace_data(perc_data_threshold=.6, pool=pool) with open('tmp_vectors_chi_1_group.pickle', 'w') as f: pickle.dump(data, f)
def test(): pool = Pool(4) print ("Connecting") db_main.connect("db_functions/database.conf") print ("Loading Pace Data") data = load_pace_data(perc_data_threshold=0.6, pool=pool) with open("tmp_vectors_chi_1_group.pickle", "w") as f: pickle.dump(data, f)
def compute_link_counts(dates): num_appearances = defaultdict(float) db_main.connect('db_functions/database.conf') for date in dates: curs = db_travel_times.get_travel_times_cursor_new(date) for [link_id, date_time, paces, num_cars] in curs: num_appearances[link_id] += 1 db_main.close() return num_appearances
def compute_link_counts(dates): num_appearances = defaultdict(float) db_main.connect("db_functions/database.conf") for date in dates: curs = db_travel_times.get_travel_times_cursor_new(date) for [link_id, date_time, paces, num_cars] in curs: num_appearances[link_id] += 1 db_main.close() return num_appearances
def approximate_job_sizes(): global approx_job_size print("Approximating job sizes.") db_main.connect("db_functions/database.conf", retry_interval=10) d1 = datetime(2012,6,2) d2 = datetime(2012,6,9) for d in dateRange(d1, d2, timedelta(hours=1)): sql = "SELECT count(*) FROM trip WHERE pickup_datetime >= '%s' AND pickup_datetime < '%s'" % ( d, d+timedelta(hours=1)) (jsize,) = db_main.execute(sql).next() approx_job_size[d.weekday(), d.hour] = jsize
def approximate_job_sizes(): global approx_job_size print("Approximating job sizes.") db_main.connect("db_functions/database.conf", retry_interval=10) d1 = datetime(2012, 6, 2) d2 = datetime(2012, 6, 9) for d in dateRange(d1, d2, timedelta(hours=1)): sql = "SELECT count(*) FROM trip WHERE pickup_datetime >= '%s' AND pickup_datetime < '%s'" % ( d, d + timedelta(hours=1)) (jsize, ) = db_main.execute(sql).next() approx_job_size[d.weekday(), d.hour] = jsize
def compute_link_counts(dates): num_obs = defaultdict(float) num_appearances = defaultdict(float) db_main.connect('db_functions/database.conf') for date in dates: curs = db_travel_times.get_travel_times_cursor(date) for [begin_node_id, end_node_id, date_time, travel_time, num_trips] in curs: num_obs[begin_node_id, end_node_id] += num_trips num_appearances[begin_node_id, end_node_id] += 1 db_main.close() return num_obs, num_appearances
def run_full_day(): pool = Pool(8) dates = [datetime(2012,4,15,h) for h in xrange(24)] for start_date in dates: end_date = start_date + timedelta(hours=1) db_main.connect('db_functions/database.conf') trips = db_trip.find_pickup_dt(start_date, end_date) db_main.close() fn_prefix = "2012_4_15_%d" % start_date.hour perform_cv(trips, 'nyc_map4/nodes.csv', 'nyc_map4/links.csv', 8, pool, fn_prefix=fn_prefix)
def test_memory_usage(): from db_functions import db_main, db_trip from datetime import datetime print("Before: %f" % getmem()) nyc_map = Map("nyc_map4/nodes.csv", "nyc_map4/links.csv", limit_bbox=Map.reasonable_nyc_bbox) print [nyc_map.min_lat, nyc_map.max_lat, nyc_map.min_lon, nyc_map.max_lon] db_main.connect('db_functions/database.conf') d1 = datetime(2012,1,10,9) d2 = datetime(2012,1,10,10) trips = db_trip.find_pickup_dt(d1, d2) print("Matching...") nyc_map.match_trips_to_nodes(trips) print("After : %f" % getmem()) del(nyc_map)
def test_memory_usage(): from db_functions import db_main, db_trip from datetime import datetime print("Before: %f" % getmem()) nyc_map = Map("nyc_map4/nodes.csv", "nyc_map4/links.csv", limit_bbox=Map.reasonable_nyc_bbox) print[nyc_map.min_lat, nyc_map.max_lat, nyc_map.min_lon, nyc_map.max_lon] db_main.connect('db_functions/database.conf') d1 = datetime(2012, 1, 10, 9) d2 = datetime(2012, 1, 10, 10) trips = db_trip.find_pickup_dt(d1, d2) print("Matching...") nyc_map.match_trips_to_nodes(trips) print("After : %f" % getmem()) del (nyc_map)
def analyse_trip_locations(): db_main.connect('db_functions/database.conf') datelist = [datetime(year=2012, month=7, day=8, hour=0) + timedelta(hours=1)*x for x in range(168*3)] #nyc_map = Map('nyc_map4/nodes.csv', 'nyc_map4/links.csv', limit_bbox=Map.reasonable_nyc_bbox) nyc_map = Map('nyc_map4/nodes.csv', 'nyc_map4/links.csv') print [nyc_map.min_lat, nyc_map.max_lat, nyc_map.min_lon, nyc_map.max_lon] valid_trips = 0 bad_region_trips = 0 jfk_trips = 0 for date in datelist: trips = db_trip.find_pickup_dt(date, date+timedelta(hours=1)) print("%s : %d" % (date, len(trips))) for trip in trips: if(trip.isValid()==Trip.VALID): valid_trips += 1 if(nyc_map.get_nearest_node(trip.fromLat, trip.fromLon)==None or nyc_map.get_nearest_node(trip.toLat, trip.toLon)==None): bad_region_trips += 1 if(jfk(trip.fromLat, trip.fromLon) or jfk(trip.toLat, trip.toLon)): jfk_trips += 1 print ("Bad trips : %d / %d = %f" % (bad_region_trips, valid_trips, float(bad_region_trips)/valid_trips)) perc = 0.0 if(bad_region_trips>0): perc = float(jfk_trips)/bad_region_trips print ("JFK trips : %d / %d = %f" % (jfk_trips, bad_region_trips, perc))
def plot_many_speeds(): print("Getting dates") db_main.connect("db_functions/database.conf") #curs = db_main.execute("select distinct datetime from travel_times where datetime>= '2012-03-04' and datetime < '2012-03-11';") #curs = db_main.execute("select distinct datetime from travel_times where datetime>= '2012-06-17' and datetime < '2012-06-24';") #dates = [date for (date,) in curs] #dates = [datetime(2010,6,1,12) + timedelta(days=7)*x for x in range(208)] dates = [datetime(2010,1,6,10) + timedelta(days=7)*x for x in range(208)] dates.sort() print ("There are %d dates" % len(dates)) print ("Loading map.") road_map = Map("nyc_map4/nodes.csv", "nyc_map4/links.csv") for date in dates: print("running %s" % str(date)) plot_speed(road_map, date, "analysis/wednesdays/" + str(date) + ".png")
def run_independent(failed_trip, i): arc_flags_map = Map.Map("nyc_map4/nodes.csv", "nyc_map4/links.csv", lookup_kd_size=1, region_kd_size=1000, limit_bbox=Map.Map.reasonable_nyc_bbox) arc_flags_map.assign_node_regions() arc_flags_map.assign_link_arc_flags() # region_graph_generator(arc_flags_map) db_main.connect("db_functions/database.conf") failed_trip.path_links = [] trips = [failed_trip] trips_arc = arc_flags_map.match_trips_to_nodes(trips) # originNode = arc_flags_map.nodes_by_id[trips_arc[0].origin_node_id] # destNode = arc_flags_map.nodes_by_id[trips_arc[0].dest_node_id] originNode = trips_arc[0].origin_node destNode = trips_arc[0].dest_node boundary_nodes = arc_flags_map.get_region_boundary_nodes(originNode.region_id) DijkstrasAlgorithm.DijkstrasAlgorithm.independent_dijkstra(boundary_nodes, arc_flags_map) DijkstrasAlgorithm.DijkstrasAlgorithm.set_arc_flags(arc_flags_map, boundary_nodes[0].region_id) boundary_nodes = arc_flags_map.get_region_boundary_nodes(destNode.region_id) DijkstrasAlgorithm.DijkstrasAlgorithm.independent_dijkstra(boundary_nodes, arc_flags_map) DijkstrasAlgorithm.DijkstrasAlgorithm.set_arc_flags(arc_flags_map, boundary_nodes[0].region_id) draw_arc_flags(arc_flags_map, destNode.region_id, True) draw_arc_flags(arc_flags_map, originNode.region_id, False) pace_dict = {} arc_flags_map.routeTrips(trips_arc, arcflags_used=True) for link in arc_flags_map.links: pace_dict[(link.origin_node_id, link.connecting_node_id)] = -5 for link in trips_arc[0].path_links: pace_dict[(link.origin_node_id, link.connecting_node_id)] = 5 plot_estimates.plot_speed(arc_flags_map, "Independent" + str(i), "Independent" + str(i), pace_dict)
def compute_all_link_counts(dates, pool=DefaultPool()): # Split the list and compute the link counts of all slices in parallel it = splitList(dates, pool._processes) num_obs_list = pool.map(compute_link_counts, it) # print "1", num_obs_list[0] # Merge the outputs by summing each link count merged_count_obs = defaultdict(float) for num_appearances in num_obs_list: for key in num_appearances: merged_count_obs[key] += num_appearances[key] # Divide the sums by the total number of dates, in order to get the average for key in merged_count_obs: merged_count_obs[key] /= len(dates) print "keys", len(merged_count_obs.keys()) db_main.connect('db_functions/database.conf') logMsg("Creating") db_travel_times.create_link_counts_table_new() logMsg("Saving") # Issue of num of arguments db_travel_times.save_link_counts_new(merged_count_obs)
def compute_all_link_counts(dates, pool=DefaultPool()): # Split the list and compute the link counts of all slices in parallel it = splitList(dates, pool._processes) num_obs_list = pool.map(compute_link_counts, it) # print "1", num_obs_list[0] # Merge the outputs by summing each link count merged_count_obs = defaultdict(float) for num_appearances in num_obs_list: for key in num_appearances: merged_count_obs[key] += num_appearances[key] # Divide the sums by the total number of dates, in order to get the average for key in merged_count_obs: merged_count_obs[key] /= len(dates) print "keys", len(merged_count_obs.keys()) db_main.connect("db_functions/database.conf") logMsg("Creating") db_travel_times.create_link_counts_table_new() logMsg("Saving") # Issue of num of arguments db_travel_times.save_link_counts_new(merged_count_obs)
def test(): pool = Pool(8) print("Connecting") db_main.connect('db_functions/database.conf') print("Getting dates") curs = db_main.execute("select distinct datetime from travel_times where datetime>= '2012-06-17' and datetime < '2012-06-24' order by datetime;") #curs = db_main.execute("select distinct datetime from travel_times where datetime>= '2013-01-01' and datetime < '2013-01-02' order by datetime;") #curs = db_main.execute("select distinct datetime from travel_times;") dates = [date for (date,) in curs] print ("Found %d dates" % len(dates)) compute_all_link_counts(dates, pool=pool) print("Loading Pace Data") data = load_pace_data(perc_data_threshold=.95, pool=pool) with open('tmp_vectors.pickle', 'w') as f: pickle.dump(data, f)
from datetime import datetime, timedelta from db_functions import db_main, db_trip db_main.connect('db_functions/database.conf') dates = [datetime(2012, 4, 18, h) for h in range(24)] s = "" for d in dates: end = d + timedelta(hours=1) trips = db_trip.find_pickup_dt(d, end) total_dur = sum([trip.time for trip in trips]) avg_dur = float(total_dur) / len(trips) print(avg_dur) s = s + "%f," % avg_dur print(s)
from datetime import datetime, timedelta from db_functions import db_main, db_trip db_main.connect('db_functions/database.conf') dates = [datetime(2012,4,18,h) for h in range(24)] s = "" for d in dates: end = d + timedelta(hours=1) trips = db_trip.find_pickup_dt(d,end) total_dur = sum([trip.time for trip in trips]) avg_dur = float(total_dur) / len(trips) print(avg_dur) s = s + "%f,"%avg_dur print(s)
""" Created on Sat Jan 10 21:12:41 2015 @author: brian """ from traffic_estimation.Trip import Trip from db_functions import db_main, db_travel_times, db_trip from routing.Map import Map from datetime import datetime from traffic_estimation.TrafficEstimation import estimate_travel_times, load_trips # Connect to the database db_main.connect("db_functions/database.conf") db_travel_times.drop_travel_time_table() db_travel_times.create_travel_time_table() # Load map print("Loading map") road_map = Map("nyc_map4/nodes.csv", "nyc_map4/links.csv") print("Loading trips") trips = db_trip.find_pickup_dt('2010-01-02 00:00:00', '2010-01-02 01:00:00') print (len(trips))
def load_pace_data(perc_data_threshold, pool=DefaultPool()): weekday_names = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'] # Connect to the database adn get hte available dates logMsg ("Getting relevant dates.") db_main.connect('db_functions/database.conf') # dates = db_travel_times.get_available_dates() dates = list(dateRange(datetime(2014,06,01), datetime(2014,07,01))) ''' Only Do Once for the whole dataset and store in link_counts_chicago table''' #logMsg ("Computing consistent link set") #compute_all_link_counts(dates, pool=pool) logMsg("Loading consistent link set") consistent_link_set = load_consistent_link_set(dates, perc_data_threshold) if len(consistent_link_set) == 0: logMsg("Find 0 consistent_links. Return.") return else: print("len of consistent_link_set", len(consistent_link_set)) db_main.close() logMsg("Generating vectors") #Initialize dictionaries pace_timeseries = {} pace_grouped = defaultdict(list) dates_grouped = defaultdict(list) weights_grouped = defaultdict(list) # Split the dates into several pieces and use parallel processing to load the # vectors for each of these dates. We will use a partial function to hold the # consistent_link_set constant across all dates it = splitList(dates, pool._processes) load_pace_vectors_consistent = partial(load_pace_vectors, consistent_link_set=consistent_link_set) list_of_lists = pool.map(load_pace_vectors_consistent, it) logMsg("Merging outputs.") # Flatten the vectors into one big list vects = [vect for vect_lst, weight_lst in list_of_lists for vect in vect_lst] weights = [weight for vect_lst, weight_lst in list_of_lists for weight in weight_lst] # Loop through all dates - one vector will be created for each one for i in xrange(len(dates)): date = dates[i] vect = vects[i] weight = weights[i] # Extract the date, hour of day, and day of week just_date = str(date.date()) hour = date.hour weekday = weekday_names[date.weekday()] #Save vector in the timeseries #save the vector into the group # pace_grouped[(weekday, hour)].append(vect) # weights_grouped[(weekday, hour)].append(weight) # dates_grouped[(weekday, hour)].append(just_date) # use constant as key for this moment # weekday = 0 # hour = 0 # print just_date pace_timeseries[(just_date, hour, weekday)] = vect # print "vect here =========", vect pace_grouped[(weekday, hour)].append(vect) weights_grouped[(weekday, hour)].append(weight) dates_grouped[(weekday, hour)].append(just_date) # print pace_timeseries.keys() print len(pace_grouped[(0,0)]), len(pace_grouped[(0,0)][0]) # Assign trip names based on node ids trip_names = ["%d" % link_id for link_id in consistent_link_set] # print " len", len(pace_grouped.values()) return (pace_timeseries, pace_grouped, weights_grouped, dates_grouped, trip_names, consistent_link_set)
def load_pace_data(perc_data_threshold=.95, pool=DefaultPool()): weekday_names = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'] # Connect to the database adn get hte available dates logMsg ("Getting relevant dates.") db_main.connect('db_functions/database.conf') dates = db_travel_times.get_available_dates() #dates = list(dateRange(datetime(2012,10,21), datetime(2012,11,11))) #logMsg ("Computing consistent link set") #compute_all_link_counts(dates, pool=pool) logMsg("Loading consistent link set") consistent_link_set = load_consistent_link_set(dates, perc_data_threshold) db_main.close() logMsg("Generating vectors") #Initialize dictionaries pace_timeseries = {} pace_grouped = defaultdict(list) dates_grouped = defaultdict(list) weights_grouped = defaultdict(list) # Split the dates into several pieces and use parallel processing to load the # vectors for each of these dates. We will use a partial function to hold the # consistent_link_set constant across all dates it = splitList(dates, pool._processes) load_pace_vectors_consistent = partial(load_pace_vectors, consistent_link_set=consistent_link_set) list_of_lists = pool.map(load_pace_vectors_consistent, it) logMsg("Merging outputs.") # Flatten the vectors into one big list vects = [vect for vect_lst, weight_lst in list_of_lists for vect in vect_lst] weights = [weight for vect_lst, weight_lst in list_of_lists for weight in weight_lst] # Loop through all dates - one vector will be created for each one for i in xrange(len(dates)): date = dates[i] vect = vects[i] weight = weights[i] # Extract the date, hour of day, and day of week just_date = str(date.date()) hour = date.hour weekday = weekday_names[date.weekday()] #Save vector in the timeseries pace_timeseries[(just_date, hour, weekday)] = vect #save the vector into the group pace_grouped[(weekday, hour)].append(vect) weights_grouped[(weekday, hour)].append(weight) dates_grouped[(weekday, hour)].append(just_date) # Assign trip names based on node ids trip_names = ["%d-->%d"%(start, end) for (start, end) in consistent_link_set] return (pace_timeseries, pace_grouped, weights_grouped, dates_grouped, trip_names, consistent_link_set)
data = "\n".join(csv_lines) #print "\n".join(csv_lines[:1000]) p1 = Popen(['Rscript', 'traffic_estimation/plot_speeds_piped.R', filename, title, plot_type], stdout=PIPE, stdin=PIPE) _ = p1.communicate(data) # R output is discarded #print(_) del(_) #remove(filename + ".csv") def plot_group_of_speeds((dts, pace_dicts), road_map, tmp_dir): road_map.unflatten() db_main.connect("db_functions/database.conf") for i in range(len(dts)): dt = dts[i] if(pace_dicts==None): pace_dict = None else: pace_dict = pace_dicts[i] out_file = path.join(tmp_dir, str(dt) + ".png") plot_speed(road_map, dt, out_file, pace_dict=pace_dict) db_main.close() def plot_speeds_in_parallel(road_map, dts, speed_dicts=None, tmp_dir="analysis/tmp", pool=DefaultPool()): road_map.flatten() plt_speeds_fun = partial(plot_group_of_speeds, road_map=road_map, tmp_dir = tmp_dir)
def run_test(region_size, preprocess=False): if preprocess: start = timeit.default_timer() ArcFlagsPreProcess.run(region_size) stop = timeit.default_timer() print "The time for preprocessing was " + str(stop-start) # arc_flags_map = Map.Map("nyc_map4/nodes.csv", "nyc_map4/links.csv", # lookup_kd_size=1, region_kd_size=region_size, # limit_bbox=Map.Map.reasonable_nyc_bbox) # arc_flags_map.assign_node_regions() arc_flags_map = cluster_kd.createMap(region_size) arc_flags_map.assign_link_arc_flags() ########################### # arc_flags_map.save_as_csv("nodeRegions.csv", "linkRegions.csv") ########################### print "loaded map" db_main.connect("db_functions/database.conf") d = datetime(2012,3,5,2) db_arc_flags.load_arc_flags(arc_flags_map, d) print "loaded arcflags" d = date(2011, 3, 2) t = time(19, 40) t1 = time(20, 00) dt1 = datetime.combine(d, t) dt2 = datetime.combine(d, t1) trips_arc = db_trip.find_pickup_dt(dt1, dt2) trips_arc = arc_flags_map.match_trips_to_nodes(trips_arc) trips_star = db_trip.find_pickup_dt(dt1, dt2) trips_star = arc_flags_map.match_trips_to_nodes(trips_star) trips_none = db_trip.find_pickup_dt(dt1, dt2) trips_none = arc_flags_map.match_trips_to_nodes(trips_none) trips_both = db_trip.find_pickup_dt(dt1, dt2) trips_both = arc_flags_map.match_trips_to_nodes(trips_both) db_main.close() same = True # for i in range(len(trips_star)): # if trips_star[i].fromLon != trips_arc[i].fromLon: # same = False # if trips_star[i].toLon != trips_arc[i].toLon: # same = False # print "the two trips are the same: " + str(same) print "got " + str(len(trips_arc)) + " trips" # start = timeit.default_timer() # arc_flags_map.routeTrips(trips_none) # stop = timeit.default_timer() # print "Computed trips using normal dijkstras in " + str(stop-start) start = timeit.default_timer() arc_flags_map.routeTrips(trips_star, astar_used=True) stop = timeit.default_timer() print "Computed trips using Astar in " + str(stop-start) start = timeit.default_timer() arc_flags_map.routeTrips(trips_arc, arcflags_used=True) stop = timeit.default_timer() print "Computed trips using arc_flags in " + str(stop-start) start = timeit.default_timer() arc_flags_map.routeTrips(trips_both, arcflags_used=True, astar_used=True) stop = timeit.default_timer() print "Computed trips using arc_flags and a_star in " + str(stop-start) failed_trips = [] same = True for i in range(len(trips_arc)): if trips_none[i].path_links != trips_star[i].path_links: same = False if trips_none[i].path_links != trips_both[i].path_links: same = False if trips_none[i].path_links != trips_arc[i].path_links: time1 = 0 time2 = 0 draw_graphs(trips_none, trips_arc, arc_flags_map, i) for link in trips_none[i].path_links: time1 += link.time for link in trips_arc[i].path_links: time2 += link.time print "The time for none is: " + str(time1) + " The time for the arcs_flags is: " + str(time2) failed_trips.append(trips_none[i]) same = False # print "The four trips are the same: " + str(same) print "\n\n\n\n" return failed_trips, arc_flags_map
def load_pace_data(perc_data_threshold, pool=DefaultPool()): weekday_names = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"] # Connect to the database adn get hte available dates logMsg("Getting relevant dates.") db_main.connect("db_functions/database.conf") # dates = db_travel_times.get_available_dates() dates = list(dateRange(datetime(2014, 06, 01), datetime(2014, 07, 01))) """ Only Do Once for the whole dataset and store in link_counts_chicago table""" # logMsg ("Computing consistent link set") # compute_all_link_counts(dates, pool=pool) logMsg("Loading consistent link set") consistent_link_set = load_consistent_link_set(dates, perc_data_threshold) if len(consistent_link_set) == 0: logMsg("Find 0 consistent_links. Return.") return else: print ("len of consistent_link_set", len(consistent_link_set)) db_main.close() logMsg("Generating vectors") # Initialize dictionaries pace_timeseries = {} pace_grouped = defaultdict(list) dates_grouped = defaultdict(list) weights_grouped = defaultdict(list) # Split the dates into several pieces and use parallel processing to load the # vectors for each of these dates. We will use a partial function to hold the # consistent_link_set constant across all dates it = splitList(dates, pool._processes) load_pace_vectors_consistent = partial(load_pace_vectors, consistent_link_set=consistent_link_set) list_of_lists = pool.map(load_pace_vectors_consistent, it) logMsg("Merging outputs.") # Flatten the vectors into one big list vects = [vect for vect_lst, weight_lst in list_of_lists for vect in vect_lst] weights = [weight for vect_lst, weight_lst in list_of_lists for weight in weight_lst] # Loop through all dates - one vector will be created for each one for i in xrange(len(dates)): date = dates[i] vect = vects[i] weight = weights[i] # Extract the date, hour of day, and day of week just_date = str(date.date()) hour = date.hour weekday = weekday_names[date.weekday()] # Save vector in the timeseries # save the vector into the group # pace_grouped[(weekday, hour)].append(vect) # weights_grouped[(weekday, hour)].append(weight) # dates_grouped[(weekday, hour)].append(just_date) # use constant as key for this moment # weekday = 0 # hour = 0 # print just_date pace_timeseries[(just_date, hour, weekday)] = vect # print "vect here =========", vect pace_grouped[(weekday, hour)].append(vect) weights_grouped[(weekday, hour)].append(weight) dates_grouped[(weekday, hour)].append(just_date) # print pace_timeseries.keys() print len(pace_grouped[(0, 0)]), len(pace_grouped[(0, 0)][0]) # Assign trip names based on node ids trip_names = ["%d" % link_id for link_id in consistent_link_set] # print " len", len(pace_grouped.values()) return (pace_timeseries, pace_grouped, weights_grouped, dates_grouped, trip_names, consistent_link_set)