Exemple #1
0
def load_pace_vectors(dates, consistent_link_set):
    # Map (begin_node,connecting_node) --> ID in the pace vector
    link_id_map = defaultdict(lambda : -1) # -1 indicates an invalid ID number    
    for i in xrange(len(consistent_link_set)):
        # print i
        key = consistent_link_set[i]
        # print long(key)
        key = long(key)
        link_id_map[key] = i
        
    db_main.connect('db_functions/database.conf')
    vects = []
    weights = []
    for date in dates:
        # Initialize to zero
        # print date
        vect = matrix(zeros((len(consistent_link_set), 1)))
        weight = matrix(zeros((len(consistent_link_set), 1)))
        
        
        # Get the travel times for this datetime
        curs = db_travel_times.get_travel_times_cursor_new(date)
        # Assign travel times into the vector, if this link is in the consistant link set
        for (link_id, date_time, paces, num_cars) in curs:
            # print (link_id, date_time, paces, num_cars)
            # print "link_id",link_id
            i = link_id_map[link_id] # i will be -1 if the link is not in the consistant link set
            if(i>=0):
                vect[i] = paces
                weight[i] = num_cars
        vects.append(vect)
        weights.append(weight)
    db_main.close()
    return vects, weights
Exemple #2
0
def run_chunk(road_map, time):
    try:
        print("Connecting to db")
        db_main.connect("db_functions/database.conf", retry_interval=10)
        
        print (str(datetime.now()) + " : Analysing " + str(time))
        road_map.unflatten()
    
        t1 = datetime.now()    
        trips = db_trip.find_pickup_dt(time, time + timedelta(hours=1))
        t2 = datetime.now()
        db_main.close()
        print ("Loaded " + str(len(trips)) + " trips after " + str(t2 - t1))
        
    
    
        estimate_travel_times(road_map, trips, max_iter=2, test_set=None, distance_weighting=None, model_idle_time=False, initial_idle_time=0)
        t3 = datetime.now()    
        print (str(t3) + " : Finished estimating traffic for " + str(time) + " after " + str(t3-t2))
        
        
        road_map.save_speeds('tmp_speeds.csv')
    

        #db_main.close()
    except Exception as e:
        print("Failed to estimate traffic for %s : %s" % (str(time), e.message))
def load_pace_vectors(dates, consistent_link_set):
    # Map (begin_node,connecting_node) --> ID in the pace vector
    link_id_map = defaultdict(lambda : -1) # -1 indicates an invalid ID number    
    for i in xrange(len(consistent_link_set)):
        key = consistent_link_set[i]
        link_id_map[key] = i
        
    db_main.connect('db_functions/database.conf')
    vects = []
    weights = []
    for date in dates:
        # Initialize to zero
        vect = matrix(zeros((len(consistent_link_set), 1)))
        weight = matrix(zeros((len(consistent_link_set), 1)))
        
        
        # Get the travel times for this datetime
        curs = db_travel_times.get_travel_times_cursor(date)
        
        # Assign travel times into the vector, if this link is in the consistant link set
        for (begin_node_id, end_node_id, date_time, travel_time, num_trips) in curs:
            i = link_id_map[begin_node_id, end_node_id] # i will be -1 if the link is not in the consistant link set
            if(i>=0):
                vect[i] = travel_time
                weight[i] = num_trips
        vects.append(vect)
        weights.append(weight)
    
    db_main.close()
    return vects, weights
def run_chunk(road_map, time):
    try:
        print("Connecting to db")
        db_main.connect("db_functions/database.conf", retry_interval=10)
        
        print (str(datetime.now()) + " : Analysing " + str(time))
        road_map.unflatten()
    
        t1 = datetime.now()    
        trips = db_trip.find_pickup_dt(time, time + timedelta(hours=1))
        t2 = datetime.now()
        db_main.close()
        print ("Loaded " + str(len(trips)) + " trips after " + str(t2 - t1))
        
    
    
        estimate_travel_times(road_map, trips, max_iter=20, test_set=None,
                              distance_weighting=None, model_idle_time=False, initial_idle_time=0)
        t3 = datetime.now()    
        print (str(t3) + " : Finished estimating traffic for " + str(time) + " after " + str(t3-t2))
    
        db_main.connect("db_functions/database.conf", retry_interval=10)
        t1 = datetime.now()
        db_travel_times.save_travel_times(road_map, time)
        t2 = datetime.now()
        print("Saved travel times after " + str(t2 - t1))
        db_main.close()
    except Exception as e:
        print("Failed to estimate traffic for %s : %s" % (str(time), e.message))
def load_pace_vectors(dates, consistent_link_set):
    # Map (begin_node,connecting_node) --> ID in the pace vector
    link_id_map = defaultdict(lambda: -1)  # -1 indicates an invalid ID number
    for i in xrange(len(consistent_link_set)):
        # print i
        key = consistent_link_set[i]
        # print long(key)
        key = long(key)
        link_id_map[key] = i

    db_main.connect("db_functions/database.conf")
    vects = []
    weights = []
    for date in dates:
        # Initialize to zero
        # print date
        vect = matrix(zeros((len(consistent_link_set), 1)))
        weight = matrix(zeros((len(consistent_link_set), 1)))

        # Get the travel times for this datetime
        curs = db_travel_times.get_travel_times_cursor_new(date)
        # Assign travel times into the vector, if this link is in the consistant link set
        for (link_id, date_time, paces, num_cars) in curs:
            # print (link_id, date_time, paces, num_cars)
            # print "link_id",link_id
            i = link_id_map[link_id]  # i will be -1 if the link is not in the consistant link set
            if i >= 0:
                vect[i] = paces
                weight[i] = num_cars
        vects.append(vect)
        weights.append(weight)
    db_main.close()
    return vects, weights
Exemple #6
0
def extract_trips():
    print("Loading map")
    road_map = Map("nyc_map4/nodes.csv", "nyc_map4/links.csv", limit_bbox=Map.reasonable_nyc_bbox)
    db_main.connect("db_functions/database.conf")
    
    
    
    dt1 = datetime(2012,6,1,12)
    dt2 = datetime(2012,6,1,12,30)
    print("Loading trips")
    trips = db_trip.find_pickup_dt(dt1, dt2)
    
    
    print("Matching trips")
    samp = trips[1:100]
    new_samp = road_map.match_trips_to_nodes(samp)
    road_map.routeTrips(new_samp)
    
    with open('trip_links.csv', 'w') as f:
        w = csv.writer(f)
        w.writerow(['trip_id', 'from_lat','from_lon','to_lat','to_lon'])
        for i in range(len(new_samp)):
            print i
            if(samp[i].path_links !=None):
                for link in samp[i].path_links:
                    w.writerow([i, link.origin_node.lat, link.origin_node.long, link.connecting_node.lat, link.connecting_node.long])
Exemple #7
0
def analyse_trip_times():
    db_main.connect('db_functions/database.conf')
    
    
    datelist = [datetime(year=2012, month=7, day=8, hour=0) + timedelta(hours=1)*x for x in range(168*3)]
    
    for date in datelist:
        trips = db_trip.find_pickup_dt(date, date+timedelta(hours=1))
        print("%s  :  %d" % (date, len(trips)))
Exemple #8
0
def test():
	program_start = datetime.now()
	print "test started at: %s" %program_start.strftime("%Y-%m-%d %H:%M:%S")
	# Connect to the database
	db_main.connect("db_functions/database.conf")
	trips = db_trip.find_pickup_dt('2010-01-01 00:34:00', '2010-01-01 12:34:00')
	run_time = datetime.now() - program_start
	print run_time
	print len(trips)
Exemple #9
0
def createMap(region_size):

	db_main.connect("db_functions/database.conf")


	d = date(2011, 3, 2)
	t = time(19, 40)
	t1 = time(20, 00)
	dt1 = datetime.combine(d, t)
	dt2 = datetime.combine(d, t1)

	trips_arc = db_trip.find_pickup_dt(dt1, dt2)
	
	region_size = region_size/4
	approxSize = len(trips_arc)/region_size

	arc_flags_map = Map.Map("nyc_map4/nodes.csv", "nyc_map4/links.csv",
		                      lookup_kd_size=1, region_kd_size=approxSize,
		                      limit_bbox=Map.Map.reasonable_nyc_bbox)
	arc_flags_map.assign_node_regions()

	
	trips_arc = arc_flags_map.match_trips_to_nodes(trips_arc)


	for trip in trips_arc:
		trip.origin_node.trip_weight += 1
		trip.dest_node.trip_weight += 1

		

	arc_flags_map.build_kd_trees(split_weights=True)
	arc_flags_map.assign_node_regions()


	# same_region = 0
	# for trip in trips_arc:
	# 	if trip.origin_node.region_id == trip.dest_node.region_id:
	# 		same_region+=1

	# print "number of trips in same region are: %d out of %d \n" % (same_region, len(trips_arc))

	region_graph_generator(arc_flags_map)

	db_main.close()

	# region_to_trips = {}
	# for node in arc_flags_map.nodes:
	# 	if node.region_id in region_to_trips:
	# 		region_to_trips[node.region_id] += node.trip_weight
	# 	else:
	# 		region_to_trips[node.region_id] = node.trip_weight
	# for i in region_to_trips:
	# 	print "Region %d: %d trips" % (i, region_to_trips[i])

	return arc_flags_map
    def run(region_size = 250):
        # nyc_map = Map("nyc_map4/nodes.csv", "nyc_map4/links.csv",
        #               lookup_kd_size=1, region_kd_size=region_size,
        #               limit_bbox=Map.reasonable_nyc_bbox)
        # nyc_map.assign_node_regions()
        nyc_map = cluster_kd.createMap(region_size)
        nyc_map.assign_link_arc_flags()

        #nyc_map.save_region("../nyc_map4/region.csv")
        #get_correct_nodes(nyc_map, "../speeds_per_hour/" + map_file, None)

        i = 0
        print nyc_map.total_region_count
        for region_id in range(nyc_map.total_region_count):
            # print "Next Region!"

            boundary_nodes = nyc_map.get_region_boundary_nodes(region_id)

            # Does a multi-origin bidirectional dijkstra search to get an
            # arcflag tree
            warmstart = True
            use_domination_value = False
            DijkstrasAlgorithm.bidirectional_dijkstra(boundary_nodes,
                                                      nyc_map,
                                                      warmstart,
                                                      use_domination_value)
            #####################################################################
            # DRAW ARC_FLAGS USING THIS
            # pace_dict = {}
            # for link in nyc_map.links:
            #     if link.backward_arc_flags_vector[i] == True:
            #         pace_dict[(link.origin_node_id, link.connecting_node_id)] = 5
            #     else:
            #         pace_dict[(link.origin_node_id, link.connecting_node_id)] = -5
            # plot_estimates.plot_speed(nyc_map, "Backward Arc Flags Region: " + str(i), "Backward"+str(i), pace_dict)

            # pace_dict = {}
            # for link in nyc_map.links:
            #     if link.forward_arc_flags_vector[i] == True:
            #         pace_dict[(link.origin_node_id, link.connecting_node_id)] = 5
            #     else:
            #         pace_dict[(link.origin_node_id, link.connecting_node_id)] = -5

            # plot_estimates.plot_speed(nyc_map, "Forward Arc Flags Region: " + str(i), "Forward"+str(i), pace_dict)

            #####################################################################
            i += 1


        d = datetime(2012,3,5,2)
        db_main.connect("db_functions/database.conf")
        db_arc_flags.create_arc_flag_table()
        db_arc_flags.save_arc_flags(nyc_map, d)


        db_main.close()
Exemple #11
0
def test():
    pool = Pool(4)

    print("Connecting")
    db_main.connect('db_functions/database.conf')
    
    print("Loading Pace Data")
    data = load_pace_data(perc_data_threshold=.6, pool=pool)
    with open('tmp_vectors_chi_1_group.pickle', 'w') as f:
        pickle.dump(data, f)
def test():
    pool = Pool(4)

    print ("Connecting")
    db_main.connect("db_functions/database.conf")

    print ("Loading Pace Data")
    data = load_pace_data(perc_data_threshold=0.6, pool=pool)
    with open("tmp_vectors_chi_1_group.pickle", "w") as f:
        pickle.dump(data, f)
Exemple #13
0
def compute_link_counts(dates):
    num_appearances = defaultdict(float)
    db_main.connect('db_functions/database.conf')
    for date in dates:
        curs = db_travel_times.get_travel_times_cursor_new(date)
        for [link_id, date_time, paces, num_cars] in curs:
            num_appearances[link_id] += 1
    
    db_main.close()
    
    return num_appearances
def compute_link_counts(dates):
    num_appearances = defaultdict(float)
    db_main.connect("db_functions/database.conf")
    for date in dates:
        curs = db_travel_times.get_travel_times_cursor_new(date)
        for [link_id, date_time, paces, num_cars] in curs:
            num_appearances[link_id] += 1

    db_main.close()

    return num_appearances
def approximate_job_sizes():
    global approx_job_size
    
    print("Approximating job sizes.")
    db_main.connect("db_functions/database.conf", retry_interval=10)
    
    d1 = datetime(2012,6,2)
    d2 = datetime(2012,6,9)
    for d in dateRange(d1, d2, timedelta(hours=1)):
        sql = "SELECT count(*) FROM trip WHERE pickup_datetime >= '%s' AND pickup_datetime < '%s'" % (
                d, d+timedelta(hours=1))
        (jsize,) = db_main.execute(sql).next()
        approx_job_size[d.weekday(), d.hour] = jsize
def approximate_job_sizes():
    global approx_job_size

    print("Approximating job sizes.")
    db_main.connect("db_functions/database.conf", retry_interval=10)

    d1 = datetime(2012, 6, 2)
    d2 = datetime(2012, 6, 9)
    for d in dateRange(d1, d2, timedelta(hours=1)):
        sql = "SELECT count(*) FROM trip WHERE pickup_datetime >= '%s' AND pickup_datetime < '%s'" % (
            d, d + timedelta(hours=1))
        (jsize, ) = db_main.execute(sql).next()
        approx_job_size[d.weekday(), d.hour] = jsize
def compute_link_counts(dates):
    num_obs = defaultdict(float)
    num_appearances = defaultdict(float)
    db_main.connect('db_functions/database.conf')
    for date in dates:
        curs = db_travel_times.get_travel_times_cursor(date)
        for [begin_node_id, end_node_id, date_time, travel_time, num_trips] in curs:
            num_obs[begin_node_id, end_node_id] += num_trips
            num_appearances[begin_node_id, end_node_id] += 1
    
    db_main.close()
    
    return num_obs, num_appearances
def run_full_day():
    pool = Pool(8)
    
    dates = [datetime(2012,4,15,h) for h in xrange(24)]
    
    for start_date in dates:
        end_date = start_date + timedelta(hours=1)
        db_main.connect('db_functions/database.conf')
        trips = db_trip.find_pickup_dt(start_date, end_date)
        db_main.close()
        
        
        fn_prefix = "2012_4_15_%d" % start_date.hour
        perform_cv(trips, 'nyc_map4/nodes.csv', 'nyc_map4/links.csv', 8, pool, fn_prefix=fn_prefix)
Exemple #19
0
def test_memory_usage():
    from db_functions import db_main, db_trip
    from datetime import datetime
    print("Before: %f" % getmem())
    nyc_map = Map("nyc_map4/nodes.csv", "nyc_map4/links.csv", limit_bbox=Map.reasonable_nyc_bbox)
    print [nyc_map.min_lat, nyc_map.max_lat, nyc_map.min_lon, nyc_map.max_lon]
    
    db_main.connect('db_functions/database.conf')
    d1 = datetime(2012,1,10,9)
    d2 = datetime(2012,1,10,10)
    trips = db_trip.find_pickup_dt(d1, d2)
    print("Matching...")
    nyc_map.match_trips_to_nodes(trips)
    

    print("After : %f" % getmem())
    del(nyc_map)
Exemple #20
0
def test_memory_usage():
    from db_functions import db_main, db_trip
    from datetime import datetime
    print("Before: %f" % getmem())
    nyc_map = Map("nyc_map4/nodes.csv",
                  "nyc_map4/links.csv",
                  limit_bbox=Map.reasonable_nyc_bbox)
    print[nyc_map.min_lat, nyc_map.max_lat, nyc_map.min_lon, nyc_map.max_lon]

    db_main.connect('db_functions/database.conf')
    d1 = datetime(2012, 1, 10, 9)
    d2 = datetime(2012, 1, 10, 10)
    trips = db_trip.find_pickup_dt(d1, d2)
    print("Matching...")
    nyc_map.match_trips_to_nodes(trips)

    print("After : %f" % getmem())
    del (nyc_map)
Exemple #21
0
def analyse_trip_locations():
    db_main.connect('db_functions/database.conf')
    
    
    datelist = [datetime(year=2012, month=7, day=8, hour=0) + timedelta(hours=1)*x for x in range(168*3)]
    
    #nyc_map = Map('nyc_map4/nodes.csv', 'nyc_map4/links.csv', limit_bbox=Map.reasonable_nyc_bbox)
    nyc_map = Map('nyc_map4/nodes.csv', 'nyc_map4/links.csv')

    
    print [nyc_map.min_lat, nyc_map.max_lat, nyc_map.min_lon, nyc_map.max_lon]

    valid_trips = 0
    bad_region_trips = 0
    jfk_trips = 0
    
    for date in datelist:
        trips = db_trip.find_pickup_dt(date, date+timedelta(hours=1))
        print("%s  :  %d" % (date, len(trips)))
        
        for trip in trips:
            if(trip.isValid()==Trip.VALID):
                valid_trips += 1
                if(nyc_map.get_nearest_node(trip.fromLat, trip.fromLon)==None or
                    nyc_map.get_nearest_node(trip.toLat, trip.toLon)==None):
                    
                    bad_region_trips += 1
                    
                    if(jfk(trip.fromLat, trip.fromLon) or jfk(trip.toLat, trip.toLon)):
                        jfk_trips += 1
                    
                    
                        
        
        print ("Bad trips : %d / %d = %f" % (bad_region_trips, valid_trips, float(bad_region_trips)/valid_trips))
        perc = 0.0
        if(bad_region_trips>0):
            perc = float(jfk_trips)/bad_region_trips
        print ("JFK trips : %d / %d = %f" % (jfk_trips, bad_region_trips, perc))

        
        
            
Exemple #22
0
def plot_many_speeds():
    print("Getting dates")
    db_main.connect("db_functions/database.conf")
    #curs = db_main.execute("select distinct datetime from travel_times where datetime>= '2012-03-04' and datetime < '2012-03-11';")
    #curs = db_main.execute("select distinct datetime from travel_times where datetime>= '2012-06-17' and datetime < '2012-06-24';")

    #dates = [date for (date,) in curs]
    
    #dates = [datetime(2010,6,1,12) + timedelta(days=7)*x for x in range(208)]
    dates = [datetime(2010,1,6,10) + timedelta(days=7)*x for x in range(208)]
    
    dates.sort()    
    print ("There are %d dates" % len(dates))
    
    print ("Loading map.")
    road_map = Map("nyc_map4/nodes.csv", "nyc_map4/links.csv")
    for date in dates:
        print("running %s" % str(date))
        plot_speed(road_map, date, "analysis/wednesdays/" + str(date) + ".png")
Exemple #23
0
def plot_many_speeds():
    print("Getting dates")
    db_main.connect("db_functions/database.conf")
    #curs = db_main.execute("select distinct datetime from travel_times where datetime>= '2012-03-04' and datetime < '2012-03-11';")
    #curs = db_main.execute("select distinct datetime from travel_times where datetime>= '2012-06-17' and datetime < '2012-06-24';")

    #dates = [date for (date,) in curs]
    
    #dates = [datetime(2010,6,1,12) + timedelta(days=7)*x for x in range(208)]
    dates = [datetime(2010,1,6,10) + timedelta(days=7)*x for x in range(208)]
    
    dates.sort()    
    print ("There are %d dates" % len(dates))
    
    print ("Loading map.")
    road_map = Map("nyc_map4/nodes.csv", "nyc_map4/links.csv")
    for date in dates:
        print("running %s" % str(date))
        plot_speed(road_map, date, "analysis/wednesdays/" + str(date) + ".png")
Exemple #24
0
def run_independent(failed_trip, i):
	arc_flags_map = Map.Map("nyc_map4/nodes.csv", "nyc_map4/links.csv",
		                      lookup_kd_size=1, region_kd_size=1000,
		                      limit_bbox=Map.Map.reasonable_nyc_bbox)
	arc_flags_map.assign_node_regions()
	arc_flags_map.assign_link_arc_flags()
	# region_graph_generator(arc_flags_map)

	db_main.connect("db_functions/database.conf")

	failed_trip.path_links = []
	trips = [failed_trip]
	
	trips_arc = arc_flags_map.match_trips_to_nodes(trips)

	# originNode = arc_flags_map.nodes_by_id[trips_arc[0].origin_node_id]
	# destNode = arc_flags_map.nodes_by_id[trips_arc[0].dest_node_id]

	originNode = trips_arc[0].origin_node
	destNode = trips_arc[0].dest_node
	

	boundary_nodes = arc_flags_map.get_region_boundary_nodes(originNode.region_id)
	DijkstrasAlgorithm.DijkstrasAlgorithm.independent_dijkstra(boundary_nodes, arc_flags_map)
	DijkstrasAlgorithm.DijkstrasAlgorithm.set_arc_flags(arc_flags_map, boundary_nodes[0].region_id)

	boundary_nodes = arc_flags_map.get_region_boundary_nodes(destNode.region_id)
	DijkstrasAlgorithm.DijkstrasAlgorithm.independent_dijkstra(boundary_nodes, arc_flags_map)
	DijkstrasAlgorithm.DijkstrasAlgorithm.set_arc_flags(arc_flags_map, boundary_nodes[0].region_id)

	draw_arc_flags(arc_flags_map, destNode.region_id, True)
	draw_arc_flags(arc_flags_map, originNode.region_id, False)

	pace_dict = {}
	arc_flags_map.routeTrips(trips_arc, arcflags_used=True)
	for link in arc_flags_map.links:
		pace_dict[(link.origin_node_id, link.connecting_node_id)] = -5
	for link in trips_arc[0].path_links:
		pace_dict[(link.origin_node_id, link.connecting_node_id)] = 5

	plot_estimates.plot_speed(arc_flags_map, "Independent" + str(i), "Independent" + str(i), pace_dict)
Exemple #25
0
def compute_all_link_counts(dates, pool=DefaultPool()):
    # Split the list and compute the link counts of all slices in parallel
    it = splitList(dates, pool._processes)
    num_obs_list = pool.map(compute_link_counts, it)
    # print "1", num_obs_list[0]
    # Merge the outputs by summing each link count
    merged_count_obs = defaultdict(float)
    for num_appearances in num_obs_list:
        for key in num_appearances:
            merged_count_obs[key] += num_appearances[key]

    # Divide the sums by the total number of dates, in order to get the average
    for key in merged_count_obs:
        merged_count_obs[key] /= len(dates)
    
    print "keys", len(merged_count_obs.keys())
    db_main.connect('db_functions/database.conf')
    logMsg("Creating")
    db_travel_times.create_link_counts_table_new()
    logMsg("Saving")
    # Issue of num of arguments
    db_travel_times.save_link_counts_new(merged_count_obs)
def compute_all_link_counts(dates, pool=DefaultPool()):
    # Split the list and compute the link counts of all slices in parallel
    it = splitList(dates, pool._processes)
    num_obs_list = pool.map(compute_link_counts, it)
    # print "1", num_obs_list[0]
    # Merge the outputs by summing each link count
    merged_count_obs = defaultdict(float)
    for num_appearances in num_obs_list:
        for key in num_appearances:
            merged_count_obs[key] += num_appearances[key]

    # Divide the sums by the total number of dates, in order to get the average
    for key in merged_count_obs:
        merged_count_obs[key] /= len(dates)

    print "keys", len(merged_count_obs.keys())
    db_main.connect("db_functions/database.conf")
    logMsg("Creating")
    db_travel_times.create_link_counts_table_new()
    logMsg("Saving")
    # Issue of num of arguments
    db_travel_times.save_link_counts_new(merged_count_obs)
def test():
    pool = Pool(8)

    print("Connecting")
    db_main.connect('db_functions/database.conf')
    
    print("Getting dates")
    curs = db_main.execute("select distinct datetime from travel_times where datetime>= '2012-06-17' and datetime < '2012-06-24' order by datetime;")
    #curs = db_main.execute("select distinct datetime from travel_times where datetime>= '2013-01-01' and datetime < '2013-01-02' order by datetime;")
    #curs = db_main.execute("select distinct datetime from travel_times;")

    
    
    dates = [date for (date,) in curs]
    print ("Found %d dates" % len(dates))
    
    compute_all_link_counts(dates, pool=pool)


    print("Loading Pace Data")
    data = load_pace_data(perc_data_threshold=.95, pool=pool)
    with open('tmp_vectors.pickle', 'w') as f:
        pickle.dump(data, f)
Exemple #28
0
from datetime import datetime, timedelta
from db_functions import db_main, db_trip

db_main.connect('db_functions/database.conf')

dates = [datetime(2012, 4, 18, h) for h in range(24)]
s = ""
for d in dates:
    end = d + timedelta(hours=1)
    trips = db_trip.find_pickup_dt(d, end)
    total_dur = sum([trip.time for trip in trips])
    avg_dur = float(total_dur) / len(trips)
    print(avg_dur)
    s = s + "%f," % avg_dur

print(s)
Exemple #29
0
from datetime import datetime, timedelta
from db_functions import db_main, db_trip

db_main.connect('db_functions/database.conf')


dates = [datetime(2012,4,18,h) for h in range(24)]
s = ""
for d in dates:
    end = d + timedelta(hours=1)
    trips = db_trip.find_pickup_dt(d,end)
    total_dur = sum([trip.time for trip in trips])
    avg_dur = float(total_dur) / len(trips)
    print(avg_dur)
    s = s + "%f,"%avg_dur


print(s)
Exemple #30
0
"""
Created on Sat Jan 10 21:12:41 2015

@author: brian
"""

from traffic_estimation.Trip import Trip
from db_functions import db_main, db_travel_times, db_trip
from routing.Map import Map
from datetime import datetime

from traffic_estimation.TrafficEstimation import estimate_travel_times, load_trips


# Connect to the database
db_main.connect("db_functions/database.conf")



db_travel_times.drop_travel_time_table()
db_travel_times.create_travel_time_table()


# Load map
print("Loading map")
road_map = Map("nyc_map4/nodes.csv", "nyc_map4/links.csv")

print("Loading trips")
trips = db_trip.find_pickup_dt('2010-01-02 00:00:00', '2010-01-02 01:00:00')
print (len(trips))
Exemple #31
0
def load_pace_data(perc_data_threshold, pool=DefaultPool()):
    weekday_names = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
    
    # Connect to the database adn get hte available dates
    logMsg ("Getting relevant dates.")
    db_main.connect('db_functions/database.conf')
    # dates = db_travel_times.get_available_dates()
    dates = list(dateRange(datetime(2014,06,01), datetime(2014,07,01)))
    
    ''' Only Do Once for the whole dataset and store in link_counts_chicago table'''
    #logMsg ("Computing consistent link set")
    #compute_all_link_counts(dates, pool=pool)
    
    logMsg("Loading consistent link set")
    consistent_link_set = load_consistent_link_set(dates, perc_data_threshold)
    if len(consistent_link_set) == 0:
        logMsg("Find 0 consistent_links. Return.")
        return
    else:
        print("len of consistent_link_set", len(consistent_link_set))
    db_main.close()
    
    logMsg("Generating vectors")

    #Initialize dictionaries    
    pace_timeseries = {}
    pace_grouped = defaultdict(list)
    dates_grouped = defaultdict(list)
    weights_grouped = defaultdict(list)


    # Split the dates into several pieces and use parallel processing to load the
    # vectors for each of these dates.  We will use a partial function to hold the
    # consistent_link_set constant across all dates
    it = splitList(dates, pool._processes)
    load_pace_vectors_consistent = partial(load_pace_vectors, consistent_link_set=consistent_link_set)
    list_of_lists = pool.map(load_pace_vectors_consistent, it)
    
    logMsg("Merging outputs.")
    # Flatten the vectors into one big list
    vects = [vect for vect_lst, weight_lst in list_of_lists for vect in vect_lst]
    weights = [weight for vect_lst, weight_lst in list_of_lists for weight in weight_lst]
    
    # Loop through all dates - one vector will be created for each one
    for i in xrange(len(dates)):
        date = dates[i]
        vect = vects[i]
        weight = weights[i]
      
        
        # Extract the date, hour of day, and day of week
        just_date = str(date.date())
        hour = date.hour
        weekday = weekday_names[date.weekday()]
        
        #Save vector in the timeseries
        
        
        #save the vector into the group
        # pace_grouped[(weekday, hour)].append(vect)
        # weights_grouped[(weekday, hour)].append(weight)
        # dates_grouped[(weekday, hour)].append(just_date)

        # use constant as key for this moment
        # weekday = 0
        # hour = 0
        # print just_date
        pace_timeseries[(just_date, hour, weekday)] = vect
        # print "vect here =========", vect
        pace_grouped[(weekday, hour)].append(vect)
        weights_grouped[(weekday, hour)].append(weight)
        dates_grouped[(weekday, hour)].append(just_date)


    
    # print pace_timeseries.keys()
    print len(pace_grouped[(0,0)]), len(pace_grouped[(0,0)][0])
    # Assign trip names based on node ids
    trip_names = ["%d" % link_id for link_id in consistent_link_set]
    
    # print "    len", len(pace_grouped.values())
    return (pace_timeseries, pace_grouped, weights_grouped, dates_grouped,
                trip_names, consistent_link_set)
def load_pace_data(perc_data_threshold=.95, pool=DefaultPool()):
    weekday_names = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
    
    # Connect to the database adn get hte available dates
    logMsg ("Getting relevant dates.")
    db_main.connect('db_functions/database.conf')
    dates = db_travel_times.get_available_dates()
    #dates = list(dateRange(datetime(2012,10,21), datetime(2012,11,11))) 
    
    
    #logMsg ("Computing consistent link set")
    #compute_all_link_counts(dates, pool=pool)
    
    logMsg("Loading consistent link set")
    consistent_link_set = load_consistent_link_set(dates, perc_data_threshold)
    
    db_main.close()
    
    
    
    logMsg("Generating vectors")

    #Initialize dictionaries    
    pace_timeseries = {}
    pace_grouped = defaultdict(list)
    dates_grouped = defaultdict(list)
    weights_grouped = defaultdict(list)


    # Split the dates into several pieces and use parallel processing to load the
    # vectors for each of these dates.  We will use a partial function to hold the
    # consistent_link_set constant across all dates
    it = splitList(dates, pool._processes)
    load_pace_vectors_consistent = partial(load_pace_vectors, consistent_link_set=consistent_link_set)
    list_of_lists = pool.map(load_pace_vectors_consistent, it)
    
    logMsg("Merging outputs.")
    # Flatten the vectors into one big list
    vects = [vect for vect_lst, weight_lst in list_of_lists for vect in vect_lst]
    weights = [weight for vect_lst, weight_lst in list_of_lists for weight in weight_lst]
    
    # Loop through all dates - one vector will be created for each one
    for i in xrange(len(dates)):
        date = dates[i]
        vect = vects[i]
        weight = weights[i]
      
        
        # Extract the date, hour of day, and day of week
        just_date = str(date.date())
        hour = date.hour
        weekday = weekday_names[date.weekday()]
        
        #Save vector in the timeseries
        pace_timeseries[(just_date, hour, weekday)] = vect
        
        #save the vector into the group
        pace_grouped[(weekday, hour)].append(vect)
        weights_grouped[(weekday, hour)].append(weight)
        dates_grouped[(weekday, hour)].append(just_date)
        
    
    
    # Assign trip names based on node ids
    trip_names = ["%d-->%d"%(start, end) for (start, end) in consistent_link_set]
            
    return (pace_timeseries, pace_grouped, weights_grouped, dates_grouped,
                trip_names, consistent_link_set)
Exemple #33
0
    data = "\n".join(csv_lines)
    
    
    #print "\n".join(csv_lines[:1000])

    p1 = Popen(['Rscript', 'traffic_estimation/plot_speeds_piped.R', filename, title, plot_type], stdout=PIPE, stdin=PIPE)
    _ = p1.communicate(data) # R output is discarded
    #print(_)
    del(_)
    

    #remove(filename + ".csv")

def plot_group_of_speeds((dts, pace_dicts), road_map, tmp_dir):
    road_map.unflatten()
    db_main.connect("db_functions/database.conf")
    for i in range(len(dts)):
        dt = dts[i]
        if(pace_dicts==None):
            pace_dict = None
        else:
            pace_dict = pace_dicts[i]
        
        out_file = path.join(tmp_dir, str(dt) + ".png")
        plot_speed(road_map, dt, out_file, pace_dict=pace_dict)
    db_main.close()


def plot_speeds_in_parallel(road_map, dts, speed_dicts=None, tmp_dir="analysis/tmp", pool=DefaultPool()):
    road_map.flatten()
    plt_speeds_fun = partial(plot_group_of_speeds, road_map=road_map, tmp_dir = tmp_dir)
Exemple #34
0
def run_test(region_size, preprocess=False):
	if preprocess:
		start = timeit.default_timer()
		ArcFlagsPreProcess.run(region_size)
		stop = timeit.default_timer()
		print "The time for preprocessing was " + str(stop-start)

	# arc_flags_map = Map.Map("nyc_map4/nodes.csv", "nyc_map4/links.csv",
	#                       lookup_kd_size=1, region_kd_size=region_size,
	#                       limit_bbox=Map.Map.reasonable_nyc_bbox)
	# arc_flags_map.assign_node_regions()

	arc_flags_map = cluster_kd.createMap(region_size)
	arc_flags_map.assign_link_arc_flags()

	###########################
	# arc_flags_map.save_as_csv("nodeRegions.csv", "linkRegions.csv")
	###########################

	print "loaded map"
	db_main.connect("db_functions/database.conf")
	d = datetime(2012,3,5,2)
	db_arc_flags.load_arc_flags(arc_flags_map, d)
	print "loaded arcflags"


	d = date(2011, 3, 2)
	t = time(19, 40)
	t1 = time(20, 00)
	dt1 = datetime.combine(d, t)
	dt2 = datetime.combine(d, t1)

	trips_arc = db_trip.find_pickup_dt(dt1, dt2)
	trips_arc = arc_flags_map.match_trips_to_nodes(trips_arc)

		
	trips_star = db_trip.find_pickup_dt(dt1, dt2)
	trips_star = arc_flags_map.match_trips_to_nodes(trips_star)

	trips_none = db_trip.find_pickup_dt(dt1, dt2)
	trips_none = arc_flags_map.match_trips_to_nodes(trips_none)

	trips_both = db_trip.find_pickup_dt(dt1, dt2)
	trips_both = arc_flags_map.match_trips_to_nodes(trips_both)

	db_main.close()

	same = True
	# for i in range(len(trips_star)):
	# 	if trips_star[i].fromLon != trips_arc[i].fromLon:
	# 		same = False
	# 	if trips_star[i].toLon != trips_arc[i].toLon:
	# 		same = False
	# print "the two trips are the same: " + str(same)

	print "got " + str(len(trips_arc)) + " trips"

	# start = timeit.default_timer()
	# arc_flags_map.routeTrips(trips_none)
	# stop = timeit.default_timer()
	# print "Computed trips using normal dijkstras in " + str(stop-start)

	start = timeit.default_timer()
	arc_flags_map.routeTrips(trips_star, astar_used=True)
	stop = timeit.default_timer()
	print "Computed trips using Astar in " + str(stop-start)

	start = timeit.default_timer()
	arc_flags_map.routeTrips(trips_arc, arcflags_used=True)
	stop = timeit.default_timer()
	print "Computed trips using arc_flags in " + str(stop-start)

	start = timeit.default_timer()
	arc_flags_map.routeTrips(trips_both, arcflags_used=True, astar_used=True)
	stop = timeit.default_timer()
	print "Computed trips using arc_flags and a_star in " + str(stop-start)

	failed_trips = []
	same = True
	for i in range(len(trips_arc)):
		if trips_none[i].path_links != trips_star[i].path_links:
			same = False
		if trips_none[i].path_links != trips_both[i].path_links:
			same = False
		if trips_none[i].path_links != trips_arc[i].path_links:
			time1 = 0
			time2 = 0
			draw_graphs(trips_none, trips_arc, arc_flags_map, i)
			for link in trips_none[i].path_links:
				time1 += link.time
			for link in trips_arc[i].path_links:
				time2 += link.time
			print "The time for none is: " + str(time1) + " The time for the arcs_flags is: " + str(time2)
			failed_trips.append(trips_none[i])
			same = False
	# print "The four trips are the same: " + str(same)
	print "\n\n\n\n"
	return failed_trips, arc_flags_map
def load_pace_data(perc_data_threshold, pool=DefaultPool()):
    weekday_names = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]

    # Connect to the database adn get hte available dates
    logMsg("Getting relevant dates.")
    db_main.connect("db_functions/database.conf")
    # dates = db_travel_times.get_available_dates()
    dates = list(dateRange(datetime(2014, 06, 01), datetime(2014, 07, 01)))

    """ Only Do Once for the whole dataset and store in link_counts_chicago table"""
    # logMsg ("Computing consistent link set")
    # compute_all_link_counts(dates, pool=pool)

    logMsg("Loading consistent link set")
    consistent_link_set = load_consistent_link_set(dates, perc_data_threshold)
    if len(consistent_link_set) == 0:
        logMsg("Find 0 consistent_links. Return.")
        return
    else:
        print ("len of consistent_link_set", len(consistent_link_set))
    db_main.close()

    logMsg("Generating vectors")

    # Initialize dictionaries
    pace_timeseries = {}
    pace_grouped = defaultdict(list)
    dates_grouped = defaultdict(list)
    weights_grouped = defaultdict(list)

    # Split the dates into several pieces and use parallel processing to load the
    # vectors for each of these dates.  We will use a partial function to hold the
    # consistent_link_set constant across all dates
    it = splitList(dates, pool._processes)
    load_pace_vectors_consistent = partial(load_pace_vectors, consistent_link_set=consistent_link_set)
    list_of_lists = pool.map(load_pace_vectors_consistent, it)

    logMsg("Merging outputs.")
    # Flatten the vectors into one big list
    vects = [vect for vect_lst, weight_lst in list_of_lists for vect in vect_lst]
    weights = [weight for vect_lst, weight_lst in list_of_lists for weight in weight_lst]

    # Loop through all dates - one vector will be created for each one
    for i in xrange(len(dates)):
        date = dates[i]
        vect = vects[i]
        weight = weights[i]

        # Extract the date, hour of day, and day of week
        just_date = str(date.date())
        hour = date.hour
        weekday = weekday_names[date.weekday()]

        # Save vector in the timeseries

        # save the vector into the group
        # pace_grouped[(weekday, hour)].append(vect)
        # weights_grouped[(weekday, hour)].append(weight)
        # dates_grouped[(weekday, hour)].append(just_date)

        # use constant as key for this moment
        # weekday = 0
        # hour = 0
        # print just_date
        pace_timeseries[(just_date, hour, weekday)] = vect
        # print "vect here =========", vect
        pace_grouped[(weekday, hour)].append(vect)
        weights_grouped[(weekday, hour)].append(weight)
        dates_grouped[(weekday, hour)].append(just_date)

    # print pace_timeseries.keys()
    print len(pace_grouped[(0, 0)]), len(pace_grouped[(0, 0)][0])
    # Assign trip names based on node ids
    trip_names = ["%d" % link_id for link_id in consistent_link_set]

    # print "    len", len(pace_grouped.values())
    return (pace_timeseries, pace_grouped, weights_grouped, dates_grouped, trip_names, consistent_link_set)