def compute_filtered_data(self, dump_config, dump_structure): all_trips = self.get_all_trips(dump_structure) moving_dump_file_name = dump_directory + dump_config.__key__( ) + "_move_trips.dat" try: mv_n_mv_temp_store = custom_pickle.load_obj(moving_dump_file_name) except FileNotFoundError: move_trips_pairs = [] non_move_trips_pairs = [] all_trips = sorted(all_trips, key=lambda _trip: _trip.start_s()) for trip_1 in all_trips: all_trips = sorted(all_trips, key=lambda _trip: _trip.start_s()) for trip_2 in all_trips: i = all_trips.index(trip_1) j = all_trips.index(trip_2) if j > i: if movable(trip_1, trip_2): move_trips_pairs.append((trip_1, trip_2)) else: non_move_trips_pairs.append((trip_1, trip_2)) mv_n_mv_temp_store = MoveNonMoveTempStore(move_trips_pairs, non_move_trips_pairs) custom_pickle.dump_obj(mv_n_mv_temp_store, moving_dump_file_name) return mv_n_mv_temp_store
def extract_trips(): """ Returns: returns list of operating/service trips from GTFS dataset """ try: operating_trips = custom_pickle.load_obj(gtfs_dump_file) except FileNotFoundError: shapes = fetch_shapes() trips = fetch_trips() stops = fetch_stops() trip_stop_times = fetch_stop_times() operating_trips = [] for trip in trips: if trip.service_id == day_code.value: trip.set_shapes(shapes[trip.shape_id]) stop_times = trip_stop_times[trip.trip_id] real_stops = [] for stop_time in stop_times: stop_id = stop_time.stop_id stop = stops[stop_id] real_stops.append(StopTimeEx(stop, stop_time)) trip.set_start_and_end(real_stops[0], real_stops[-1]) operating_trip = OperatingTrip(trip) if operating_trip.duration.time_in_seconds >= 180: operating_trip.add_locations(trip.get_lat_lon()) operating_trip.add_stops(real_stops) operating_trips.append(operating_trip) custom_pickle.dump_obj(operating_trips, gtfs_dump_file) return operating_trips
def _load_filtered_data(self, dump_config): file_path = self.dump_directory + dump_config.__key__() + self.get_suffix() + self.dump_prefix + ".dat" self._make_dump_directory() if not os.path.exists(file_path): self.dump_data(dump_config, extract_trips_minimal()) _filtered_trips = custom_pickle.load_obj(file_path) return _filtered_trips
def extract_moving_trips(): """ Returns: returns dictionary basic moving trip details """ import itertools start_end_points, routes = start_end_dump_generation() merge_json_dump() json_responses = json_response_dump_generation() __mov_trips = {} try: __mov_trips = custom_pickle.load_obj(move_trips_dump_file) except FileNotFoundError: time_stamp_day_code = get_time_stamp(day_code) start, end = get_range_values(agency_mode, day_code) time_stamp_diff = 3600 time_stamps = [ time_stamp_day_code + time_stamp_diff * i for i in range(start, end) ] trip_id = 0 missing_trip_energies = open( data_week_directory + "mov_trips_json.csv.missing", "w+") missing_write_once = False for i, time_stamp in enumerate(time_stamps): for (lat_lon_i, lat_lon_j) in itertools.product(start_end_points.keys(), start_end_points.keys()): if lat_lon_i != lat_lon_j: key = (lat_lon_i, lat_lon_j, time_stamp) if key in json_responses.keys(): parsed_json = json_responses[key] gtfs_routes = parsed_json["routes"] if len(gtfs_routes) > 0: trip_id += 1 route = gtfs_routes[0] legs = route["legs"] if len(legs) > 1: s_print_warn("Number of legs : {}".format( str(len(legs)))) leg = legs[0] _distance = leg["distance"]["value"] _duration = leg["duration"]["value"] __mov_trips[(lat_lon_i, lat_lon_j, time_stamp - time_stamp_day_code)] = \ (_duration, _distance, trip_id) else: missing_write_once = True missing_trip_energies.write( str((lat_lon_i, lat_lon_j, time_stamp)) + "\n") else: trip_id += 1 __mov_trips[(lat_lon_i, lat_lon_j, time_stamp - time_stamp_day_code)] = \ (0, 0, trip_id) custom_pickle.dump_obj(__mov_trips, move_trips_dump_file) missing_trip_energies.close() if not missing_write_once: delete_file(data_week_directory + "mov_trips_json.csv.missing") return __mov_trips
def load_moving_trips(): """ Returns: dictionary of moving trips details """ if not os.path.exists(move_trips_mini_dump_file): clean_dumps() create_dumps() _mov_trips = custom_pickle.load_obj(move_trips_mini_dump_file) return _mov_trips
def fetch_routes(): import pandas as pd routes_dump_file = input_dump_directory + "routes.dat" routes = [] try: routes = custom_pickle.load_obj(routes_dump_file) except FileNotFoundError: for route_data in pd.read_csv(input_data_directory + "routes.txt", chunksize=1): route = BusRoute(route_data) routes.append(route) custom_pickle.dump_obj(routes, routes_dump_file) return routes
def fetch_trips(): import pandas as pd trips_dump_file = input_dump_directory + "trips.dat" trips = [] try: trips = custom_pickle.load_obj(trips_dump_file) except FileNotFoundError: for trip_data in pd.read_csv(input_data_directory + "trips.txt", chunksize=1): trip = BusTrip(trip_data) trips.append(trip) custom_pickle.dump_obj(trips, trips_dump_file) return trips
def fetch_stops(): import pandas as pd stops_dump_file = input_dump_directory + "stops.dat" stops = {} try: stops = custom_pickle.load_obj(stops_dump_file) except FileNotFoundError: for stop_data in pd.read_csv(input_data_directory + "stops.txt", chunksize=1): stop = Stop(stop_data) key = stop.__dict__["stop_id"] stops[key] = stop custom_pickle.dump_obj(stops, stops_dump_file) return stops
def merge_json_dump(): """ This function merge all the JSON responses collected from Direction API into a single file. """ full_json_responses = {} _files = extract(f"{data_week_directory}/json_dumps/", ".dat") for _file in _files: try: json_responses = custom_pickle.load_obj(_file) full_json_responses.update(json_responses.copy()) except FileNotFoundError: s_print_err(_file + " is missing !!!") _json_dump_file = f"{data_week_directory}/json_dump.dat" custom_pickle.dump_obj(full_json_responses, _json_dump_file)
def extract_moving_trip_minimal(): """ Returns: returns dictionary basic moving trip details """ __moving_trips = {} try: __moving_trips = custom_pickle.load_obj(move_trips_mini_dump_file) except FileNotFoundError: electric_energy_consumptions = get_energy_values( "MTrips_electric_predict.csv") gasoline_energy_consumptions = get_energy_values( "MTrips_gas_predict.csv") missing_trip_energies = open( data_week_directory + "mov_trips.csv.missing", "w+") missing_write_once = False mov_trips = extract_moving_trips() for mov_trip_key in mov_trips.keys(): lat_lon_i, lat_lon_j, time_in_sec = mov_trip_key (_duration, _distance, _trip_id) = mov_trips[mov_trip_key] lat_i, lon_i = lat_lon_i.split(",") lat_j, lon_j = lat_lon_j.split(",") energy_key = round(float(lat_i), 5), round(float(lon_i), 5), \ round(float(lat_j), 5), round(float(lon_j), 5), time_in_sec electric = 0 missing = False if lat_lon_i != lat_lon_j: if energy_key in electric_energy_consumptions.keys(): electric = electric_energy_consumptions[energy_key] else: missing = True gasoline = 0 if lat_lon_i != lat_lon_j: if energy_key in gasoline_energy_consumptions.keys(): gasoline = gasoline_energy_consumptions[energy_key] else: missing = True if missing: missing_trip_energies.write(str(energy_key) + "\n") missing_write_once = True __moving_trips[mov_trip_key] = (_duration, _distance, electric, gasoline, _trip_id) custom_pickle.dump_obj(__moving_trips, move_trips_mini_dump_file) missing_trip_energies.close() if not missing_write_once: delete_file(data_week_directory + "mov_trips.csv.missing") return __moving_trips
def json_response_dump_generation(): """ Returns: this will returns the dictionary of json dump. this function was kept as in same flow. but no longer used. """ json_responses = {} try: json_responses = custom_pickle.load_obj(json_dump_file) except FileNotFoundError: ''' Functionality to handle this exception can be found in the ```fetch``` directory ''' s_print_err(json_dump_file + " file is missing !!!") return json_responses
def fetch_stop_times(): import pandas as pd stop_times_dump_file = input_dump_directory + "stop_times.dat" trip_stop_times_dict = {} try: trip_stop_times_dict = custom_pickle.load_obj(stop_times_dump_file) except FileNotFoundError: for stop_time_data in pd.read_csv(input_data_directory + "stop_times.txt", chunksize=1): stop_time = StopTime(stop_time_data) key = stop_time.__dict__["trip_id"] if key in trip_stop_times_dict.keys(): trip_stop_times_dict[key].append(stop_time) else: trip_stop_times_dict[key] = [stop_time] custom_pickle.dump_obj(trip_stop_times_dict, stop_times_dump_file) return trip_stop_times_dict
def fetch_shapes(): import pandas as pd shapes_dump_file = input_dump_directory + "shapes.dat" shape_dict = {} try: shape_dict = custom_pickle.load_obj(shapes_dump_file) except FileNotFoundError: for shape_data in pd.read_csv(input_data_directory + "shapes.txt", chunksize=1): shape = Shape(shape_data) key_sh = shape.__dict__["shape_id"] if key_sh in shape_dict.keys(): shape_dict[key_sh].append(shape) else: shape_dict[key_sh] = [shape] custom_pickle.dump_obj(shape_dict, shapes_dump_file) return shape_dict
def start_end_dump_generation(): """ Returns: returns list of start and end points for route which can be used to compute the moving trips """ start_end_points = {} routes = {} try: start_end_points, routes = custom_pickle.load_obj(se_dump_file) except FileNotFoundError: trips = extract_trips_minimal() for trip in trips: start_lat_lon = trip.route.start_pos.lat_lon() end_lat_lon = trip.route.end_pos.lat_lon() if (start_lat_lon, end_lat_lon) not in routes.keys(): routes[(start_lat_lon, end_lat_lon)] = trip if start_lat_lon not in start_end_points.keys(): start_end_points[start_lat_lon] = trip.route.start_pos if end_lat_lon not in start_end_points.keys(): start_end_points[end_lat_lon] = trip.route.end_pos custom_pickle.dump_obj((start_end_points, routes), se_dump_file) return start_end_points, routes
def extract_trips_minimal(): """ Returns: returns minimal version of operating trips, this will reduce the memory usage in dump generation especially in IP """ try: operating_trips = custom_pickle.load_obj(gtfs_mini_dump_file) except FileNotFoundError: shapes = fetch_shapes() trips = fetch_trips() stops = fetch_stops() trip_stop_times = fetch_stop_times() electric_energy_consumptions = get_energy_values( "Trips_electric_predict.csv") gasoline_energy_consumptions = get_energy_values( "Trips_gas_predict.csv") missing_trip_energies = open(data_week_directory + "trips.csv.missing", "w+") operating_trips = [] bus_line_trip_counts = {} bus_line_min_trip_time = {} missing_write_once = False for trip in trips: if trip.service_id == day_code.value: if trip.route_id in bus_line_trip_counts.keys(): bus_line_trip_counts[trip.route_id] += 1 else: bus_line_trip_counts[trip.route_id] = 1 trip.set_shapes(shapes[trip.shape_id]) stop_times = trip_stop_times[trip.trip_id] real_stops = [] for stop_time in stop_times: stop_id = stop_time.stop_id stop = stops[stop_id] real_stops.append(StopTimeEx(stop, stop_time)) trip.set_start_and_end(real_stops[0], real_stops[-1]) energy_key = trip.trip_key() operating_trip = OperatingTrip(trip) if operating_trip.duration.time_in_seconds > 200: if trip.route_id in bus_line_min_trip_time.keys(): min_duration = bus_line_min_trip_time[trip.route_id] if operating_trip.duration.time_in_seconds < min_duration: bus_line_min_trip_time[ trip. route_id] = operating_trip.duration.time_in_seconds else: bus_line_min_trip_time[ trip. route_id] = operating_trip.duration.time_in_seconds missing = False if energy_key in electric_energy_consumptions.keys(): electric_value = electric_energy_consumptions[ energy_key] operating_trip.add_soc(electric_value) else: missing = True if energy_key in gasoline_energy_consumptions.keys(): gasoline_value = gasoline_energy_consumptions[ energy_key] operating_trip.add_gasoline_energy(gasoline_value) else: missing = True if missing: missing_trip_energies.write(str(energy_key) + "\n") missing_write_once = True operating_trip.set_distance( get_euclidean_distance(trip.get_lat_lon())) operating_trips.append(operating_trip) custom_pickle.dump_obj(operating_trips, gtfs_mini_dump_file) trip_count_file = open(trip_count_csv_file, "w+") trip_count_file.write("route_id,number_of_trips,min_duration\n") for route_id in bus_line_trip_counts.keys(): min_trip_time = 0 if route_id in bus_line_min_trip_time: min_trip_time = bus_line_min_trip_time[route_id] write_line = "{},{},{}\n".format(route_id, bus_line_trip_counts[route_id], str(min_trip_time)) trip_count_file.write(write_line) trip_count_file.close() missing_trip_energies.close() if not missing_write_once: delete_file(data_week_directory + "trips.csv.missing") return operating_trips
def load(self, filename): obj = custom_pickle.load_obj(filename) self.__dict__.update(obj.__dict__.copy())