def compute_all_link_counts(dates, pool=DefaultPool()): # Split the list and compute the link counts of all slices in parallel it = splitList(dates, pool._processes) num_obs_list = pool.map(compute_link_counts, it) # print "1", num_obs_list[0] # Merge the outputs by summing each link count merged_count_obs = defaultdict(float) for num_appearances in num_obs_list: for key in num_appearances: merged_count_obs[key] += num_appearances[key] # Divide the sums by the total number of dates, in order to get the average for key in merged_count_obs: merged_count_obs[key] /= len(dates) print "keys", len(merged_count_obs.keys()) db_main.connect("db_functions/database.conf") logMsg("Creating") db_travel_times.create_link_counts_table_new() logMsg("Saving") # Issue of num of arguments db_travel_times.save_link_counts_new(merged_count_obs)
def compute_all_link_counts(dates, pool=DefaultPool()): # Split the list and compute the link counts of all slices in parallel it = splitList(dates, pool._processes) num_obs_list = pool.map(compute_link_counts, it) # print "1", num_obs_list[0] # Merge the outputs by summing each link count merged_count_obs = defaultdict(float) for num_appearances in num_obs_list: for key in num_appearances: merged_count_obs[key] += num_appearances[key] # Divide the sums by the total number of dates, in order to get the average for key in merged_count_obs: merged_count_obs[key] /= len(dates) print "keys", len(merged_count_obs.keys()) db_main.connect('db_functions/database.conf') logMsg("Creating") db_travel_times.create_link_counts_table_new() logMsg("Saving") # Issue of num of arguments db_travel_times.save_link_counts_new(merged_count_obs)
def load_pace_data(perc_data_threshold, pool=DefaultPool()): weekday_names = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"] # Connect to the database adn get hte available dates logMsg("Getting relevant dates.") db_main.connect("db_functions/database.conf") # dates = db_travel_times.get_available_dates() dates = list(dateRange(datetime(2014, 06, 01), datetime(2014, 07, 01))) """ Only Do Once for the whole dataset and store in link_counts_chicago table""" # logMsg ("Computing consistent link set") # compute_all_link_counts(dates, pool=pool) logMsg("Loading consistent link set") consistent_link_set = load_consistent_link_set(dates, perc_data_threshold) if len(consistent_link_set) == 0: logMsg("Find 0 consistent_links. Return.") return else: print ("len of consistent_link_set", len(consistent_link_set)) db_main.close() logMsg("Generating vectors") # Initialize dictionaries pace_timeseries = {} pace_grouped = defaultdict(list) dates_grouped = defaultdict(list) weights_grouped = defaultdict(list) # Split the dates into several pieces and use parallel processing to load the # vectors for each of these dates. We will use a partial function to hold the # consistent_link_set constant across all dates it = splitList(dates, pool._processes) load_pace_vectors_consistent = partial(load_pace_vectors, consistent_link_set=consistent_link_set) list_of_lists = pool.map(load_pace_vectors_consistent, it) logMsg("Merging outputs.") # Flatten the vectors into one big list vects = [vect for vect_lst, weight_lst in list_of_lists for vect in vect_lst] weights = [weight for vect_lst, weight_lst in list_of_lists for weight in weight_lst] # Loop through all dates - one vector will be created for each one for i in xrange(len(dates)): date = dates[i] vect = vects[i] weight = weights[i] # Extract the date, hour of day, and day of week just_date = str(date.date()) hour = date.hour weekday = weekday_names[date.weekday()] # Save vector in the timeseries # save the vector into the group # pace_grouped[(weekday, hour)].append(vect) # weights_grouped[(weekday, hour)].append(weight) # dates_grouped[(weekday, hour)].append(just_date) # use constant as key for this moment # weekday = 0 # hour = 0 # print just_date pace_timeseries[(just_date, hour, weekday)] = vect # print "vect here =========", vect pace_grouped[(weekday, hour)].append(vect) weights_grouped[(weekday, hour)].append(weight) dates_grouped[(weekday, hour)].append(just_date) # print pace_timeseries.keys() print len(pace_grouped[(0, 0)]), len(pace_grouped[(0, 0)][0]) # Assign trip names based on node ids trip_names = ["%d" % link_id for link_id in consistent_link_set] # print " len", len(pace_grouped.values()) return (pace_timeseries, pace_grouped, weights_grouped, dates_grouped, trip_names, consistent_link_set)
def load_pace_data(perc_data_threshold=.95, pool=DefaultPool()): weekday_names = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'] # Connect to the database adn get hte available dates logMsg ("Getting relevant dates.") db_main.connect('db_functions/database.conf') dates = db_travel_times.get_available_dates() #dates = list(dateRange(datetime(2012,10,21), datetime(2012,11,11))) #logMsg ("Computing consistent link set") #compute_all_link_counts(dates, pool=pool) logMsg("Loading consistent link set") consistent_link_set = load_consistent_link_set(dates, perc_data_threshold) db_main.close() logMsg("Generating vectors") #Initialize dictionaries pace_timeseries = {} pace_grouped = defaultdict(list) dates_grouped = defaultdict(list) weights_grouped = defaultdict(list) # Split the dates into several pieces and use parallel processing to load the # vectors for each of these dates. We will use a partial function to hold the # consistent_link_set constant across all dates it = splitList(dates, pool._processes) load_pace_vectors_consistent = partial(load_pace_vectors, consistent_link_set=consistent_link_set) list_of_lists = pool.map(load_pace_vectors_consistent, it) logMsg("Merging outputs.") # Flatten the vectors into one big list vects = [vect for vect_lst, weight_lst in list_of_lists for vect in vect_lst] weights = [weight for vect_lst, weight_lst in list_of_lists for weight in weight_lst] # Loop through all dates - one vector will be created for each one for i in xrange(len(dates)): date = dates[i] vect = vects[i] weight = weights[i] # Extract the date, hour of day, and day of week just_date = str(date.date()) hour = date.hour weekday = weekday_names[date.weekday()] #Save vector in the timeseries pace_timeseries[(just_date, hour, weekday)] = vect #save the vector into the group pace_grouped[(weekday, hour)].append(vect) weights_grouped[(weekday, hour)].append(weight) dates_grouped[(weekday, hour)].append(just_date) # Assign trip names based on node ids trip_names = ["%d-->%d"%(start, end) for (start, end) in consistent_link_set] return (pace_timeseries, pace_grouped, weights_grouped, dates_grouped, trip_names, consistent_link_set)
def load_pace_data(perc_data_threshold, pool=DefaultPool()): weekday_names = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'] # Connect to the database adn get hte available dates logMsg ("Getting relevant dates.") db_main.connect('db_functions/database.conf') # dates = db_travel_times.get_available_dates() dates = list(dateRange(datetime(2014,06,01), datetime(2014,07,01))) ''' Only Do Once for the whole dataset and store in link_counts_chicago table''' #logMsg ("Computing consistent link set") #compute_all_link_counts(dates, pool=pool) logMsg("Loading consistent link set") consistent_link_set = load_consistent_link_set(dates, perc_data_threshold) if len(consistent_link_set) == 0: logMsg("Find 0 consistent_links. Return.") return else: print("len of consistent_link_set", len(consistent_link_set)) db_main.close() logMsg("Generating vectors") #Initialize dictionaries pace_timeseries = {} pace_grouped = defaultdict(list) dates_grouped = defaultdict(list) weights_grouped = defaultdict(list) # Split the dates into several pieces and use parallel processing to load the # vectors for each of these dates. We will use a partial function to hold the # consistent_link_set constant across all dates it = splitList(dates, pool._processes) load_pace_vectors_consistent = partial(load_pace_vectors, consistent_link_set=consistent_link_set) list_of_lists = pool.map(load_pace_vectors_consistent, it) logMsg("Merging outputs.") # Flatten the vectors into one big list vects = [vect for vect_lst, weight_lst in list_of_lists for vect in vect_lst] weights = [weight for vect_lst, weight_lst in list_of_lists for weight in weight_lst] # Loop through all dates - one vector will be created for each one for i in xrange(len(dates)): date = dates[i] vect = vects[i] weight = weights[i] # Extract the date, hour of day, and day of week just_date = str(date.date()) hour = date.hour weekday = weekday_names[date.weekday()] #Save vector in the timeseries #save the vector into the group # pace_grouped[(weekday, hour)].append(vect) # weights_grouped[(weekday, hour)].append(weight) # dates_grouped[(weekday, hour)].append(just_date) # use constant as key for this moment # weekday = 0 # hour = 0 # print just_date pace_timeseries[(just_date, hour, weekday)] = vect # print "vect here =========", vect pace_grouped[(weekday, hour)].append(vect) weights_grouped[(weekday, hour)].append(weight) dates_grouped[(weekday, hour)].append(just_date) # print pace_timeseries.keys() print len(pace_grouped[(0,0)]), len(pace_grouped[(0,0)][0]) # Assign trip names based on node ids trip_names = ["%d" % link_id for link_id in consistent_link_set] # print " len", len(pace_grouped.values()) return (pace_timeseries, pace_grouped, weights_grouped, dates_grouped, trip_names, consistent_link_set)