def compute_filtered_data(self, dump_config, dump_structure):
     all_trips = self.get_all_trips(dump_structure)
     moving_dump_file_name = dump_directory + dump_config.__key__(
     ) + "_move_trips.dat"
     try:
         mv_n_mv_temp_store = custom_pickle.load_obj(moving_dump_file_name)
     except FileNotFoundError:
         move_trips_pairs = []
         non_move_trips_pairs = []
         all_trips = sorted(all_trips, key=lambda _trip: _trip.start_s())
         for trip_1 in all_trips:
             all_trips = sorted(all_trips,
                                key=lambda _trip: _trip.start_s())
             for trip_2 in all_trips:
                 i = all_trips.index(trip_1)
                 j = all_trips.index(trip_2)
                 if j > i:
                     if movable(trip_1, trip_2):
                         move_trips_pairs.append((trip_1, trip_2))
                     else:
                         non_move_trips_pairs.append((trip_1, trip_2))
         mv_n_mv_temp_store = MoveNonMoveTempStore(move_trips_pairs,
                                                   non_move_trips_pairs)
         custom_pickle.dump_obj(mv_n_mv_temp_store, moving_dump_file_name)
     return mv_n_mv_temp_store
def extract_trips():
    """
    Returns:
        returns list of operating/service trips from GTFS dataset
    """
    try:
        operating_trips = custom_pickle.load_obj(gtfs_dump_file)
    except FileNotFoundError:
        shapes = fetch_shapes()
        trips = fetch_trips()
        stops = fetch_stops()
        trip_stop_times = fetch_stop_times()
        operating_trips = []
        for trip in trips:
            if trip.service_id == day_code.value:
                trip.set_shapes(shapes[trip.shape_id])
                stop_times = trip_stop_times[trip.trip_id]
                real_stops = []
                for stop_time in stop_times:
                    stop_id = stop_time.stop_id
                    stop = stops[stop_id]
                    real_stops.append(StopTimeEx(stop, stop_time))
                trip.set_start_and_end(real_stops[0], real_stops[-1])
                operating_trip = OperatingTrip(trip)
                if operating_trip.duration.time_in_seconds >= 180:
                    operating_trip.add_locations(trip.get_lat_lon())
                    operating_trip.add_stops(real_stops)
                    operating_trips.append(operating_trip)
        custom_pickle.dump_obj(operating_trips, gtfs_dump_file)
    return operating_trips
Exemple #3
0
 def _load_filtered_data(self, dump_config):
     file_path = self.dump_directory + dump_config.__key__() + self.get_suffix() + self.dump_prefix + ".dat"
     self._make_dump_directory()
     if not os.path.exists(file_path):
         self.dump_data(dump_config, extract_trips_minimal())
     _filtered_trips = custom_pickle.load_obj(file_path)
     return _filtered_trips
def extract_moving_trips():
    """
    Returns:
        returns dictionary basic moving trip details
    """
    import itertools
    start_end_points, routes = start_end_dump_generation()
    merge_json_dump()
    json_responses = json_response_dump_generation()
    __mov_trips = {}
    try:
        __mov_trips = custom_pickle.load_obj(move_trips_dump_file)
    except FileNotFoundError:
        time_stamp_day_code = get_time_stamp(day_code)
        start, end = get_range_values(agency_mode, day_code)
        time_stamp_diff = 3600
        time_stamps = [
            time_stamp_day_code + time_stamp_diff * i
            for i in range(start, end)
        ]
        trip_id = 0
        missing_trip_energies = open(
            data_week_directory + "mov_trips_json.csv.missing", "w+")
        missing_write_once = False
        for i, time_stamp in enumerate(time_stamps):
            for (lat_lon_i,
                 lat_lon_j) in itertools.product(start_end_points.keys(),
                                                 start_end_points.keys()):
                if lat_lon_i != lat_lon_j:
                    key = (lat_lon_i, lat_lon_j, time_stamp)
                    if key in json_responses.keys():
                        parsed_json = json_responses[key]
                        gtfs_routes = parsed_json["routes"]
                        if len(gtfs_routes) > 0:
                            trip_id += 1
                            route = gtfs_routes[0]
                            legs = route["legs"]
                            if len(legs) > 1:
                                s_print_warn("Number of legs : {}".format(
                                    str(len(legs))))
                            leg = legs[0]
                            _distance = leg["distance"]["value"]
                            _duration = leg["duration"]["value"]
                            __mov_trips[(lat_lon_i, lat_lon_j, time_stamp - time_stamp_day_code)] = \
                                (_duration, _distance, trip_id)
                    else:
                        missing_write_once = True
                        missing_trip_energies.write(
                            str((lat_lon_i, lat_lon_j, time_stamp)) + "\n")
                else:
                    trip_id += 1
                    __mov_trips[(lat_lon_i, lat_lon_j, time_stamp - time_stamp_day_code)] = \
                        (0, 0, trip_id)
        custom_pickle.dump_obj(__mov_trips, move_trips_dump_file)
        missing_trip_energies.close()
        if not missing_write_once:
            delete_file(data_week_directory + "mov_trips_json.csv.missing")
    return __mov_trips
def load_moving_trips():
    """
    Returns:
        dictionary of moving trips details
    """
    if not os.path.exists(move_trips_mini_dump_file):
        clean_dumps()
        create_dumps()

    _mov_trips = custom_pickle.load_obj(move_trips_mini_dump_file)

    return _mov_trips
Exemple #6
0
def fetch_routes():
    import pandas as pd
    routes_dump_file = input_dump_directory + "routes.dat"
    routes = []
    try:
        routes = custom_pickle.load_obj(routes_dump_file)
    except FileNotFoundError:
        for route_data in pd.read_csv(input_data_directory + "routes.txt", chunksize=1):
            route = BusRoute(route_data)
            routes.append(route)
        custom_pickle.dump_obj(routes, routes_dump_file)
    return routes
Exemple #7
0
def fetch_trips():
    import pandas as pd
    trips_dump_file = input_dump_directory + "trips.dat"
    trips = []
    try:
        trips = custom_pickle.load_obj(trips_dump_file)
    except FileNotFoundError:
        for trip_data in pd.read_csv(input_data_directory + "trips.txt", chunksize=1):
            trip = BusTrip(trip_data)
            trips.append(trip)
        custom_pickle.dump_obj(trips, trips_dump_file)
    return trips
Exemple #8
0
def fetch_stops():
    import pandas as pd
    stops_dump_file = input_dump_directory + "stops.dat"
    stops = {}
    try:
        stops = custom_pickle.load_obj(stops_dump_file)
    except FileNotFoundError:
        for stop_data in pd.read_csv(input_data_directory + "stops.txt", chunksize=1):
            stop = Stop(stop_data)
            key = stop.__dict__["stop_id"]
            stops[key] = stop
        custom_pickle.dump_obj(stops, stops_dump_file)
    return stops
def merge_json_dump():
    """
        This function merge all the JSON responses collected from
        Direction API into a single file.
    """
    full_json_responses = {}
    _files = extract(f"{data_week_directory}/json_dumps/", ".dat")
    for _file in _files:
        try:
            json_responses = custom_pickle.load_obj(_file)
            full_json_responses.update(json_responses.copy())
        except FileNotFoundError:
            s_print_err(_file + " is missing !!!")
    _json_dump_file = f"{data_week_directory}/json_dump.dat"
    custom_pickle.dump_obj(full_json_responses, _json_dump_file)
def extract_moving_trip_minimal():
    """
    Returns:
        returns dictionary basic moving trip details
    """
    __moving_trips = {}
    try:
        __moving_trips = custom_pickle.load_obj(move_trips_mini_dump_file)
    except FileNotFoundError:
        electric_energy_consumptions = get_energy_values(
            "MTrips_electric_predict.csv")
        gasoline_energy_consumptions = get_energy_values(
            "MTrips_gas_predict.csv")

        missing_trip_energies = open(
            data_week_directory + "mov_trips.csv.missing", "w+")
        missing_write_once = False
        mov_trips = extract_moving_trips()
        for mov_trip_key in mov_trips.keys():
            lat_lon_i, lat_lon_j, time_in_sec = mov_trip_key
            (_duration, _distance, _trip_id) = mov_trips[mov_trip_key]
            lat_i, lon_i = lat_lon_i.split(",")
            lat_j, lon_j = lat_lon_j.split(",")
            energy_key = round(float(lat_i), 5), round(float(lon_i), 5), \
                         round(float(lat_j), 5), round(float(lon_j), 5), time_in_sec
            electric = 0
            missing = False
            if lat_lon_i != lat_lon_j:
                if energy_key in electric_energy_consumptions.keys():
                    electric = electric_energy_consumptions[energy_key]
                else:
                    missing = True
            gasoline = 0
            if lat_lon_i != lat_lon_j:
                if energy_key in gasoline_energy_consumptions.keys():
                    gasoline = gasoline_energy_consumptions[energy_key]
                else:
                    missing = True
            if missing:
                missing_trip_energies.write(str(energy_key) + "\n")
                missing_write_once = True
            __moving_trips[mov_trip_key] = (_duration, _distance, electric,
                                            gasoline, _trip_id)
        custom_pickle.dump_obj(__moving_trips, move_trips_mini_dump_file)
        missing_trip_energies.close()
        if not missing_write_once:
            delete_file(data_week_directory + "mov_trips.csv.missing")
    return __moving_trips
def json_response_dump_generation():
    """
    Returns:
        this will returns the dictionary of json dump.
        this function was kept as in same flow. but no longer used.
    """
    json_responses = {}
    try:
        json_responses = custom_pickle.load_obj(json_dump_file)
    except FileNotFoundError:
        '''
            Functionality to handle this exception can be found in the 
            ```fetch``` directory
        '''
        s_print_err(json_dump_file + " file is missing !!!")
    return json_responses
Exemple #12
0
def fetch_stop_times():
    import pandas as pd
    stop_times_dump_file = input_dump_directory + "stop_times.dat"
    trip_stop_times_dict = {}
    try:
        trip_stop_times_dict = custom_pickle.load_obj(stop_times_dump_file)
    except FileNotFoundError:
        for stop_time_data in pd.read_csv(input_data_directory + "stop_times.txt", chunksize=1):
            stop_time = StopTime(stop_time_data)
            key = stop_time.__dict__["trip_id"]
            if key in trip_stop_times_dict.keys():
                trip_stop_times_dict[key].append(stop_time)
            else:
                trip_stop_times_dict[key] = [stop_time]
        custom_pickle.dump_obj(trip_stop_times_dict, stop_times_dump_file)
    return trip_stop_times_dict
Exemple #13
0
def fetch_shapes():
    import pandas as pd
    shapes_dump_file = input_dump_directory + "shapes.dat"
    shape_dict = {}
    try:
        shape_dict = custom_pickle.load_obj(shapes_dump_file)
    except FileNotFoundError:
        for shape_data in pd.read_csv(input_data_directory + "shapes.txt", chunksize=1):
            shape = Shape(shape_data)
            key_sh = shape.__dict__["shape_id"]
            if key_sh in shape_dict.keys():
                shape_dict[key_sh].append(shape)
            else:
                shape_dict[key_sh] = [shape]
        custom_pickle.dump_obj(shape_dict, shapes_dump_file)
    return shape_dict
def start_end_dump_generation():
    """
    Returns:
        returns list of start and end points for route which can be used
        to compute the moving trips
    """
    start_end_points = {}
    routes = {}
    try:
        start_end_points, routes = custom_pickle.load_obj(se_dump_file)
    except FileNotFoundError:
        trips = extract_trips_minimal()
        for trip in trips:
            start_lat_lon = trip.route.start_pos.lat_lon()
            end_lat_lon = trip.route.end_pos.lat_lon()
            if (start_lat_lon, end_lat_lon) not in routes.keys():
                routes[(start_lat_lon, end_lat_lon)] = trip
                if start_lat_lon not in start_end_points.keys():
                    start_end_points[start_lat_lon] = trip.route.start_pos
                if end_lat_lon not in start_end_points.keys():
                    start_end_points[end_lat_lon] = trip.route.end_pos
        custom_pickle.dump_obj((start_end_points, routes), se_dump_file)
    return start_end_points, routes
def extract_trips_minimal():
    """
    Returns:
        returns minimal version of operating trips,
        this will reduce the memory usage in dump generation especially in IP
    """
    try:
        operating_trips = custom_pickle.load_obj(gtfs_mini_dump_file)
    except FileNotFoundError:
        shapes = fetch_shapes()
        trips = fetch_trips()
        stops = fetch_stops()
        trip_stop_times = fetch_stop_times()
        electric_energy_consumptions = get_energy_values(
            "Trips_electric_predict.csv")
        gasoline_energy_consumptions = get_energy_values(
            "Trips_gas_predict.csv")
        missing_trip_energies = open(data_week_directory + "trips.csv.missing",
                                     "w+")
        operating_trips = []
        bus_line_trip_counts = {}
        bus_line_min_trip_time = {}
        missing_write_once = False
        for trip in trips:
            if trip.service_id == day_code.value:
                if trip.route_id in bus_line_trip_counts.keys():
                    bus_line_trip_counts[trip.route_id] += 1
                else:
                    bus_line_trip_counts[trip.route_id] = 1
                trip.set_shapes(shapes[trip.shape_id])
                stop_times = trip_stop_times[trip.trip_id]
                real_stops = []
                for stop_time in stop_times:
                    stop_id = stop_time.stop_id
                    stop = stops[stop_id]
                    real_stops.append(StopTimeEx(stop, stop_time))
                trip.set_start_and_end(real_stops[0], real_stops[-1])
                energy_key = trip.trip_key()
                operating_trip = OperatingTrip(trip)
                if operating_trip.duration.time_in_seconds > 200:
                    if trip.route_id in bus_line_min_trip_time.keys():
                        min_duration = bus_line_min_trip_time[trip.route_id]
                        if operating_trip.duration.time_in_seconds < min_duration:
                            bus_line_min_trip_time[
                                trip.
                                route_id] = operating_trip.duration.time_in_seconds
                    else:
                        bus_line_min_trip_time[
                            trip.
                            route_id] = operating_trip.duration.time_in_seconds
                    missing = False
                    if energy_key in electric_energy_consumptions.keys():
                        electric_value = electric_energy_consumptions[
                            energy_key]
                        operating_trip.add_soc(electric_value)
                    else:
                        missing = True
                    if energy_key in gasoline_energy_consumptions.keys():
                        gasoline_value = gasoline_energy_consumptions[
                            energy_key]
                        operating_trip.add_gasoline_energy(gasoline_value)
                    else:
                        missing = True
                    if missing:
                        missing_trip_energies.write(str(energy_key) + "\n")
                        missing_write_once = True
                    operating_trip.set_distance(
                        get_euclidean_distance(trip.get_lat_lon()))
                    operating_trips.append(operating_trip)
        custom_pickle.dump_obj(operating_trips, gtfs_mini_dump_file)
        trip_count_file = open(trip_count_csv_file, "w+")
        trip_count_file.write("route_id,number_of_trips,min_duration\n")
        for route_id in bus_line_trip_counts.keys():
            min_trip_time = 0
            if route_id in bus_line_min_trip_time:
                min_trip_time = bus_line_min_trip_time[route_id]
            write_line = "{},{},{}\n".format(route_id,
                                             bus_line_trip_counts[route_id],
                                             str(min_trip_time))
            trip_count_file.write(write_line)
        trip_count_file.close()
        missing_trip_energies.close()
        if not missing_write_once:
            delete_file(data_week_directory + "trips.csv.missing")
    return operating_trips
Exemple #16
0
 def load(self, filename):
     obj = custom_pickle.load_obj(filename)
     self.__dict__.update(obj.__dict__.copy())