def compute_all_link_counts(dates, pool=DefaultPool()):
    # Split the list and compute the link counts of all slices in parallel
    it = splitList(dates, pool._processes)
    num_obs_list = pool.map(compute_link_counts, it)
    # print "1", num_obs_list[0]
    # Merge the outputs by summing each link count
    merged_count_obs = defaultdict(float)
    for num_appearances in num_obs_list:
        for key in num_appearances:
            merged_count_obs[key] += num_appearances[key]

    # Divide the sums by the total number of dates, in order to get the average
    for key in merged_count_obs:
        merged_count_obs[key] /= len(dates)

    print "keys", len(merged_count_obs.keys())
    db_main.connect("db_functions/database.conf")
    logMsg("Creating")
    db_travel_times.create_link_counts_table_new()
    logMsg("Saving")
    # Issue of num of arguments
    db_travel_times.save_link_counts_new(merged_count_obs)
Esempio n. 2
0
def compute_all_link_counts(dates, pool=DefaultPool()):
    # Split the list and compute the link counts of all slices in parallel
    it = splitList(dates, pool._processes)
    num_obs_list = pool.map(compute_link_counts, it)
    # print "1", num_obs_list[0]
    # Merge the outputs by summing each link count
    merged_count_obs = defaultdict(float)
    for num_appearances in num_obs_list:
        for key in num_appearances:
            merged_count_obs[key] += num_appearances[key]

    # Divide the sums by the total number of dates, in order to get the average
    for key in merged_count_obs:
        merged_count_obs[key] /= len(dates)
    
    print "keys", len(merged_count_obs.keys())
    db_main.connect('db_functions/database.conf')
    logMsg("Creating")
    db_travel_times.create_link_counts_table_new()
    logMsg("Saving")
    # Issue of num of arguments
    db_travel_times.save_link_counts_new(merged_count_obs)
def load_pace_data(perc_data_threshold, pool=DefaultPool()):
    weekday_names = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]

    # Connect to the database adn get hte available dates
    logMsg("Getting relevant dates.")
    db_main.connect("db_functions/database.conf")
    # dates = db_travel_times.get_available_dates()
    dates = list(dateRange(datetime(2014, 06, 01), datetime(2014, 07, 01)))

    """ Only Do Once for the whole dataset and store in link_counts_chicago table"""
    # logMsg ("Computing consistent link set")
    # compute_all_link_counts(dates, pool=pool)

    logMsg("Loading consistent link set")
    consistent_link_set = load_consistent_link_set(dates, perc_data_threshold)
    if len(consistent_link_set) == 0:
        logMsg("Find 0 consistent_links. Return.")
        return
    else:
        print ("len of consistent_link_set", len(consistent_link_set))
    db_main.close()

    logMsg("Generating vectors")

    # Initialize dictionaries
    pace_timeseries = {}
    pace_grouped = defaultdict(list)
    dates_grouped = defaultdict(list)
    weights_grouped = defaultdict(list)

    # Split the dates into several pieces and use parallel processing to load the
    # vectors for each of these dates.  We will use a partial function to hold the
    # consistent_link_set constant across all dates
    it = splitList(dates, pool._processes)
    load_pace_vectors_consistent = partial(load_pace_vectors, consistent_link_set=consistent_link_set)
    list_of_lists = pool.map(load_pace_vectors_consistent, it)

    logMsg("Merging outputs.")
    # Flatten the vectors into one big list
    vects = [vect for vect_lst, weight_lst in list_of_lists for vect in vect_lst]
    weights = [weight for vect_lst, weight_lst in list_of_lists for weight in weight_lst]

    # Loop through all dates - one vector will be created for each one
    for i in xrange(len(dates)):
        date = dates[i]
        vect = vects[i]
        weight = weights[i]

        # Extract the date, hour of day, and day of week
        just_date = str(date.date())
        hour = date.hour
        weekday = weekday_names[date.weekday()]

        # Save vector in the timeseries

        # save the vector into the group
        # pace_grouped[(weekday, hour)].append(vect)
        # weights_grouped[(weekday, hour)].append(weight)
        # dates_grouped[(weekday, hour)].append(just_date)

        # use constant as key for this moment
        # weekday = 0
        # hour = 0
        # print just_date
        pace_timeseries[(just_date, hour, weekday)] = vect
        # print "vect here =========", vect
        pace_grouped[(weekday, hour)].append(vect)
        weights_grouped[(weekday, hour)].append(weight)
        dates_grouped[(weekday, hour)].append(just_date)

    # print pace_timeseries.keys()
    print len(pace_grouped[(0, 0)]), len(pace_grouped[(0, 0)][0])
    # Assign trip names based on node ids
    trip_names = ["%d" % link_id for link_id in consistent_link_set]

    # print "    len", len(pace_grouped.values())
    return (pace_timeseries, pace_grouped, weights_grouped, dates_grouped, trip_names, consistent_link_set)
def load_pace_data(perc_data_threshold=.95, pool=DefaultPool()):
    weekday_names = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
    
    # Connect to the database adn get hte available dates
    logMsg ("Getting relevant dates.")
    db_main.connect('db_functions/database.conf')
    dates = db_travel_times.get_available_dates()
    #dates = list(dateRange(datetime(2012,10,21), datetime(2012,11,11))) 
    
    
    #logMsg ("Computing consistent link set")
    #compute_all_link_counts(dates, pool=pool)
    
    logMsg("Loading consistent link set")
    consistent_link_set = load_consistent_link_set(dates, perc_data_threshold)
    
    db_main.close()
    
    
    
    logMsg("Generating vectors")

    #Initialize dictionaries    
    pace_timeseries = {}
    pace_grouped = defaultdict(list)
    dates_grouped = defaultdict(list)
    weights_grouped = defaultdict(list)


    # Split the dates into several pieces and use parallel processing to load the
    # vectors for each of these dates.  We will use a partial function to hold the
    # consistent_link_set constant across all dates
    it = splitList(dates, pool._processes)
    load_pace_vectors_consistent = partial(load_pace_vectors, consistent_link_set=consistent_link_set)
    list_of_lists = pool.map(load_pace_vectors_consistent, it)
    
    logMsg("Merging outputs.")
    # Flatten the vectors into one big list
    vects = [vect for vect_lst, weight_lst in list_of_lists for vect in vect_lst]
    weights = [weight for vect_lst, weight_lst in list_of_lists for weight in weight_lst]
    
    # Loop through all dates - one vector will be created for each one
    for i in xrange(len(dates)):
        date = dates[i]
        vect = vects[i]
        weight = weights[i]
      
        
        # Extract the date, hour of day, and day of week
        just_date = str(date.date())
        hour = date.hour
        weekday = weekday_names[date.weekday()]
        
        #Save vector in the timeseries
        pace_timeseries[(just_date, hour, weekday)] = vect
        
        #save the vector into the group
        pace_grouped[(weekday, hour)].append(vect)
        weights_grouped[(weekday, hour)].append(weight)
        dates_grouped[(weekday, hour)].append(just_date)
        
    
    
    # Assign trip names based on node ids
    trip_names = ["%d-->%d"%(start, end) for (start, end) in consistent_link_set]
            
    return (pace_timeseries, pace_grouped, weights_grouped, dates_grouped,
                trip_names, consistent_link_set)
Esempio n. 5
0
def load_pace_data(perc_data_threshold, pool=DefaultPool()):
    weekday_names = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
    
    # Connect to the database adn get hte available dates
    logMsg ("Getting relevant dates.")
    db_main.connect('db_functions/database.conf')
    # dates = db_travel_times.get_available_dates()
    dates = list(dateRange(datetime(2014,06,01), datetime(2014,07,01)))
    
    ''' Only Do Once for the whole dataset and store in link_counts_chicago table'''
    #logMsg ("Computing consistent link set")
    #compute_all_link_counts(dates, pool=pool)
    
    logMsg("Loading consistent link set")
    consistent_link_set = load_consistent_link_set(dates, perc_data_threshold)
    if len(consistent_link_set) == 0:
        logMsg("Find 0 consistent_links. Return.")
        return
    else:
        print("len of consistent_link_set", len(consistent_link_set))
    db_main.close()
    
    logMsg("Generating vectors")

    #Initialize dictionaries    
    pace_timeseries = {}
    pace_grouped = defaultdict(list)
    dates_grouped = defaultdict(list)
    weights_grouped = defaultdict(list)


    # Split the dates into several pieces and use parallel processing to load the
    # vectors for each of these dates.  We will use a partial function to hold the
    # consistent_link_set constant across all dates
    it = splitList(dates, pool._processes)
    load_pace_vectors_consistent = partial(load_pace_vectors, consistent_link_set=consistent_link_set)
    list_of_lists = pool.map(load_pace_vectors_consistent, it)
    
    logMsg("Merging outputs.")
    # Flatten the vectors into one big list
    vects = [vect for vect_lst, weight_lst in list_of_lists for vect in vect_lst]
    weights = [weight for vect_lst, weight_lst in list_of_lists for weight in weight_lst]
    
    # Loop through all dates - one vector will be created for each one
    for i in xrange(len(dates)):
        date = dates[i]
        vect = vects[i]
        weight = weights[i]
      
        
        # Extract the date, hour of day, and day of week
        just_date = str(date.date())
        hour = date.hour
        weekday = weekday_names[date.weekday()]
        
        #Save vector in the timeseries
        
        
        #save the vector into the group
        # pace_grouped[(weekday, hour)].append(vect)
        # weights_grouped[(weekday, hour)].append(weight)
        # dates_grouped[(weekday, hour)].append(just_date)

        # use constant as key for this moment
        # weekday = 0
        # hour = 0
        # print just_date
        pace_timeseries[(just_date, hour, weekday)] = vect
        # print "vect here =========", vect
        pace_grouped[(weekday, hour)].append(vect)
        weights_grouped[(weekday, hour)].append(weight)
        dates_grouped[(weekday, hour)].append(just_date)


    
    # print pace_timeseries.keys()
    print len(pace_grouped[(0,0)]), len(pace_grouped[(0,0)][0])
    # Assign trip names based on node ids
    trip_names = ["%d" % link_id for link_id in consistent_link_set]
    
    # print "    len", len(pace_grouped.values())
    return (pace_timeseries, pace_grouped, weights_grouped, dates_grouped,
                trip_names, consistent_link_set)