def load_speeds_for_year(self, year, directory, recalc_field_lengths=False): if self._verbose: print "Loading speeds for station ", self.id current_day = date(year, 1, 1) last_day = date(year, 12, 31) one_day = timedelta(days=1) n_days = (last_day - current_day).days # initialize empty 2D array to hold speeds # dimensions: date (n_days), timeslot (288 5-min slots) self.speeds = empty((n_days, 288), dtype=object) for day in range(n_days): if self._verbose: print " Loading speeds for ", current_day # If there are no detectors, there are no valid speeds if self.detector_list == []: day_speeds = [None] * 288 # Otherwise, load speeds from each detector else: try: traffic_file = self.traffic_filename_from_date(current_day) tr = TrafficReader(path.join(directory, traffic_file)) # average 1min speeds across detectors day_speeds = impute.average_multilist( [detector.load_speeds(tr, recalc_field_lengths) for detector in self.detector_list]) # "short duration temporal linear regression" = impute gaps # up to 3 slots long use adjacent values day_speeds = impute.impute_range(day_speeds, impute_length=3, input_length=3) # average 1min speeds to 5min speeds day_speeds = impute.average_list(day_speeds, 5) # "short duration temporal linear regression" again day_speeds = impute.impute_range(day_speeds, impute_length=3, input_length=3) # remove any single missing values by averaging adjacent # values day_speeds = impute.impute1(day_speeds) except IOError: # If there is no file for the given day, add a list of # invalid speeds day_speeds = [None] * 288 self.speeds[day,:] = day_speeds current_day = current_day + one_day return self.speeds
def long_temporal_impute(self): # if there are no staions in this corridor don't do anything if len(self.station_list) == 0: return # speed array dimensions: station, day, time speeds = self.speeds for station in range(speeds.shape[0]): for day in range(speeds.shape[1]): speeds[station, day, :] = impute.impute_range(list(speeds[station, day, :]), impute_length=6, input_length=6)
def load_speeds(self, traffic_reader, recalc_field_lengths=False): # if there are no detectors for this station, give it a speed list of all invalid speeds if self.detector_list == []: self.speeds = [None] * 288 # otherwise, load the speeds from the detectors else: for detector in self.detectors(): detector.load_speeds(traffic_reader, recalc_field_lengths) speeds = impute.average_multilist([detector.load_speeds(traffic_reader) for detector in self.detector_list]) speeds = impute.impute_range(speeds, impute_length=3, input_length=3) speeds = impute.average_list(speeds, 5) self.speed_list = impute.impute1(speeds)
def spatial_impute(self): # if there are no stations in this corridor, don't do anything if len(self.station_list) == 0: return # dimension 1 of speeds array is day speeds = self.speeds for day in range(speeds.shape[1]): # dimension 2 of speeds array is timeslot for timeslot in range(speeds.shape[2]): # impute values along the spatial axis (dimension 0) speeds[:, day, timeslot] = impute.impute_range(list(speeds[:, day, timeslot]), impute_length=4, input_length=1)
def weekly_impute(self): # if there are no station in this corridor, don't do anytihng if len(self.station_list) == 0: return # dimension 1 is day # using the first 7 days as starting points, impute over station and time slot for every seventh day speeds = self.speeds for start_day in range(7): # dimension 0 is station for station in range(speeds.shape[0]): # dimension 2 is timeslot for timeslot in range(speeds.shape[2]): speeds[station, start_day::7, timeslot] = impute.impute_range(list(speeds[station, start_day::7, timeslot]), impute_length=3, input_length=2)