def __init__(self):
     self.G = GTFS(GTFS_DATA_BASEDIR)
     self.day_start_ut = self.G.get_suitable_date_for_daily_extract(ut=True) + 3600
     self.start_time = self.day_start_ut + 8 * 3600
     self.end_time = self.day_start_ut + 11 * 3600
     self.profiles = {}
     self.journey_analyzer = None
    def _write_stats(self):
        G = GTFS(self.day_db_path)
        net = combined_stop_to_stop_transit_network(G)
        sections = net.edges(data=True)
        n_links = len(sections)
        section_lengths = []
        vehicle_kilometers_per_section = []
        for from_I, to_I, data in sections:
            section_lengths.append(data['d'])
            vehicle_kilometers_per_section.append(data['n_vehicles'] * data['d'] / 1000.)

        stats = {"n_stops": len(G.stops(require_reference_in_stop_times=True)),
                 "n_connections": len(G.get_transit_events()),
                 "n_links": n_links,
                 "network_length_m": sum(section_lengths),
                 "link_distance_avg_m": int(sum(section_lengths) / len(section_lengths)),
                 "vehicle_kilometers": sum(vehicle_kilometers_per_section),
                 "buffer_center_lat": self.lat,
                 "buffer_center_lon": self.lon,
                 "buffer_radius_km": self.buffer_distance,
                 "extract_start_date": self.get_weekly_extract_start_date().strftime("%Y-%m-%d")
                 }
        self.__verify_stats(stats)
        df = pandas.DataFrame.from_dict({key:[value] for key, value in stats.items()})
        df.to_csv(self.stats_fname, sep=";", columns=list(sorted(stats.keys())), index=False)
Exemple #3
0
    def setUp(self):
        self.gtfs_source_dir = os.path.join(os.path.dirname(__file__),
                                            "test_data")
        self.gtfs_source_dir_filter_test = os.path.join(
            self.gtfs_source_dir, "filter_test_feed/")

        # self.G = GTFS.from_directory_as_inmemory_db(self.gtfs_source_dir)

        # some preparations:
        self.fname = self.gtfs_source_dir + "/test_gtfs.sqlite"
        self.fname_copy = self.gtfs_source_dir + "/test_gtfs_copy.sqlite"
        self.fname_filter = self.gtfs_source_dir + "/test_gtfs_filter_test.sqlite"

        self._remove_temporary_files()
        self.assertFalse(os.path.exists(self.fname_copy))

        conn = sqlite3.connect(self.fname)
        import_gtfs(self.gtfs_source_dir,
                    conn,
                    preserve_connection=True,
                    print_progress=False)
        conn_filter = sqlite3.connect(self.fname_filter)
        import_gtfs(self.gtfs_source_dir_filter_test,
                    conn_filter,
                    preserve_connection=True,
                    print_progress=False)

        self.G = GTFS(conn)
        self.G_filter_test = GTFS(conn_filter)

        self.hash_orig = hashlib.md5(open(self.fname, 'rb').read()).hexdigest()
Exemple #4
0
    def test_shape_break_order_1(self, trip_I=73775):
        """This is to a bug related to shape alignment."""
        conn = GTFS('../scratch/db/hsl-2015-07-12.sqlite').conn
        cur = conn.cursor()

        cur.execute(
            '''SELECT seq, lat, lon
                       FROM stop_times LEFT JOIN stops USING (stop_I)
                       WHERE trip_I=?
                       ORDER BY seq''', (trip_I, ))
        #print '%20s, %s'%(run_code, datetime.fromtimestamp(run_sch_starttime))
        stop_points = [dict(seq=row[0], lat=row[1], lon=row[2]) for row in cur]

        # Get the shape points
        shape_id = cur.execute(
            '''SELECT shape_id
                                  FROM trips WHERE trip_I=?''',
            (trip_I, )).fetchone()[0]
        shape_points = shapes.get_shape_points(cur, shape_id)
        breakpoints, badness \
              = shapes.find_segments(stop_points, shape_points)
        print(badness)
        if badness > 30:
            print("bad shape fit: %s (%s, %s)" % (badness, trip_I, shape_id))

        for b1, b2 in zip(breakpoints, sorted(breakpoints)):
            self.assertEqual(b1, b2)
Exemple #5
0
    def test_frequencyLoader(self):
        import_gtfs(self.fdict, self.conn, preserve_connection=True)
        # "\nfrequency_route, freq_service, freq_trip, going north, freq_name, shape_es1" \
        keys = ["trip_I", "start_time", "end_time", "headway_secs", "exact_times", "start_time_ds", "end_time_ds"]
        self.setDictConn()
        rows = self.conn.execute("SELECT * FROM frequencies").fetchall()
        for key in keys:
            row = rows[0]
            assert key in row
        for row in rows:
            if row["start_time_ds"] == 14 * 3600:
                self.assertEqual(row["exact_times"], 1)
        # there should be twelve trips with service_I freq
        count = self.conn.execute("SELECT count(*) AS count FROM trips JOIN calendar "
                                  "USING(service_I) WHERE service_id='freq_service'").fetchone()['count']

        assert count == 12, count
        rows = self.conn.execute("SELECT trip_I FROM trips JOIN calendar "
                                 "USING(service_I) WHERE service_id='freq_service'").fetchall()
        for row in rows:
            trip_I = row['trip_I']
            res = self.conn.execute("SELECT * FROM stop_times WHERE trip_I={trip_I}".format(trip_I=trip_I)).fetchall()
            assert len(res) > 1, res
        self.setRowConn()
        g = GTFS(self.conn)
        print("Stop times: \n\n ", g.get_table("stop_times"))
        print("Frequencies: \n\n ", g.get_table("frequencies"))
 def _correct_coordinates_for_raw_db(self):
     g = GTFS(self.raw_db_path)
     df = self.coordinate_corrections
     for feed in self.feeds:
         feed_df = df.loc[df["feed"] == feed]
         print("Updating", len(feed_df.index), "known stop coordinate errors")
         g.update_stop_coordinates(feed_df)
Exemple #7
0
    def __init__(self, gtfssource, gtfs, verbose=True):
        """
        Parameters
        ----------
        gtfs_sources: list, string, dict
            list of paths to the strings, or a dictionary directly containing the gtfs data directly
        gtfs: gtfspy.gtfs.GTFS, or path to a relevant .sqlite GTFS database
        verbose: bool
            Whether or not to print warnings on-the-fly.
        """
        if isinstance(gtfssource, string_types + (dict, )):
            self.gtfs_sources = [gtfssource]
        else:
            assert isinstance(gtfssource, list)
            self.gtfs_sources = gtfssource
        assert len(
            self.gtfs_sources
        ) > 0, "There needs to be some source files for validating an import"

        if not isinstance(gtfs, GTFS):
            self.gtfs = GTFS(gtfs)
        else:
            self.gtfs = gtfs

        self.location = self.gtfs.get_location_name()
        self.warnings_container = WarningsContainer()
        self.verbose = verbose
Exemple #8
0
def validate_day_start_ut(conn):
    """This validates the day_start_ut of the days table."""
    G = GTFS(conn)
    cur = conn.execute('SELECT date, day_start_ut FROM days')
    for date, day_start_ut in cur:
        #print date, day_start_ut
        assert day_start_ut == G.get_day_start_ut(date)
Exemple #9
0
def get_stop_I_by_stop_id(stop_id):
    from gtfspy.gtfs import GTFS
    g = GTFS(IMPORTED_DATABASE_PATH)
    query = "SELECT stop_I FROM stops WHERE stop_id='" + str(stop_id) + "';"
    print(stop_id)
    stop_I = g.execute_custom_query(query).fetchone()[0]
    return stop_I
Exemple #10
0
    def test_filter_spatially(self):
        # test that the db is split by a given spatial boundary
        FilterExtract(self.G,
                      self.fname_copy,
                      buffer_lat=36.914893,
                      buffer_lon=-116.76821,
                      buffer_distance_km=50).create_filtered_copy()
        G_copy = GTFS(self.fname_copy)

        stops_table = G_copy.get_table("stops")
        self.assertNotIn("FUR_CREEK_RES", stops_table['stop_id'].values)
        self.assertIn("AMV", stops_table['stop_id'].values)
        self.assertEqual(len(stops_table['stop_id'].values), 8)

        conn_copy = sqlite3.connect(self.fname_copy)
        stop_ids_df = pandas.read_sql(
            'SELECT stop_id from stop_times '
            'left join stops '
            'on stops.stop_I = stop_times.stop_I', conn_copy)
        stop_ids = stop_ids_df["stop_id"].values

        self.assertNotIn("FUR_CREEK_RES", stop_ids)
        self.assertIn("AMV", stop_ids)

        trips_table = G_copy.get_table("trips")
        self.assertNotIn("BFC1", trips_table['trip_id'].values)

        routes_table = G_copy.get_table("routes")
        self.assertNotIn("BFC", routes_table['route_id'].values)
Exemple #11
0
 def __init__(self, journey_db_path, gtfs_path):
     assert os.path.isfile(journey_db_path)
     assert os.path.isfile(gtfs_path)
     self.conn = sqlite3.connect(journey_db_path)
     self.g = GTFS(gtfs_path)
     self.gtfs_path = gtfs_path
     self.conn = attach_database(self.conn, self.gtfs_path)
Exemple #12
0
    def test_filter_end_date_not_included(self):
        # the end date should not be included:
        FilterExtract(self.G,
                      self.fname_copy,
                      start_date="2007-01-02",
                      end_date="2010-12-31").create_filtered_copy()

        hash_copy = hashlib.md5(open(self.fname_copy, 'rb').read()).hexdigest()
        self.assertNotEqual(self.hash_orig, hash_copy)
        G_copy = GTFS(self.fname_copy)
        dsut_end = G_copy.get_day_start_ut("2010-12-31")
        dsut_to_trip_I = G_copy.get_tripIs_within_range_by_dsut(
            dsut_end, dsut_end + 24 * 3600)
        self.assertEqual(len(dsut_to_trip_I), 0)

        calendar_copy = G_copy.get_table("calendar")
        max_date_calendar = max([
            datetime.datetime.strptime(el, "%Y-%m-%d")
            for el in calendar_copy["end_date"].values
        ])
        min_date_calendar = max([
            datetime.datetime.strptime(el, "%Y-%m-%d")
            for el in calendar_copy["start_date"].values
        ])
        end_date_not_included = datetime.datetime.strptime(
            "2010-12-31", "%Y-%m-%d")
        start_date_not_included = datetime.datetime.strptime(
            "2007-01-01", "%Y-%m-%d")
        self.assertLess(max_date_calendar,
                        end_date_not_included,
                        msg="the last date should not be included in calendar")
        self.assertLess(start_date_not_included, min_date_calendar)
        os.remove(self.fname_copy)
Exemple #13
0
    def __init__(self, gtfssource, gtfs):
        """
        Parameters
        ----------
        gtfs_sources: list of strings
        gtfs: GTFS, or path to a GTFS object
            A GTFS object
        """
        self.df_freq_dict = {}
        if isinstance(gtfssource, string_types + (dict, )):
            self.gtfs_sources = [gtfssource]
        else:
            assert isinstance(gtfssource, list)
            self.gtfs_sources = gtfssource
        assert len(
            self.gtfs_sources
        ) > 0, "There needs to be some source files for validating an import"

        if not isinstance(gtfs, GTFS):
            self.gtfs = GTFS(gtfs)
        else:
            self.gtfs = gtfs

        self.location = self.gtfs.get_location_name()
        self.warnings_container = WarningsContainer()
def add_extra_locations_to_stops_table():
    g = GTFS(IMPORTED_DATABASE_PATH)
    for location in EXTRA_LOCATIONS:

        id = location['id']
        lat = location['lat']
        lon = location['lon']
        g.add_stop(id, "", id.replace("ADDED_", ""), "", lat, lon)
Exemple #15
0
def main_make_views(gtfs_fname):
    """Re-create all views.
    """
    print("creating views")
    conn = GTFS(fname_or_conn=gtfs_fname).conn
    for L in Loaders:
        L(None).make_views(conn)
    conn.commit()
Exemple #16
0
 def __init__(self, gtfs_name):
     if isinstance(gtfs_name, str):
         self.gtfs = GTFS(FEED_DICT[gtfs_name]["gtfs_dir"])
     else:
         self.gtfs = gtfs_name
     self.bunching_value = 99
     self.line_spacing = 0.0001
     self.shapes = False
     self.crs_wgs = {'init': 'epsg:4326'}
Exemple #17
0
    def __init__(self,
                 gtfs_path,
                 journey_db_path,
                 routing_params=None,
                 multitarget_routing=False,
                 track_vehicle_legs=True,
                 track_route=False):
        """
        :param gtfs: GTFS object
        :param list_of_stop_profiles: dict of NodeProfileMultiObjective
        :param multitarget_routing: bool
        """
        self.multitarget_routing = multitarget_routing
        self.track_route = track_route
        self.track_vehicle_legs = track_vehicle_legs
        self.gtfs_path = gtfs_path
        self.gtfs = GTFS(self.gtfs_path)
        self.gtfs_meta = self.gtfs.meta
        self.gtfs._dont_close = True
        self.od_pairs = None
        self._targets = None
        self._origins = None
        self.diff_conn = None

        if not routing_params:
            routing_params = dict()
        self.routing_params_input = routing_params

        assert os.path.exists(journey_db_path) or routing_params is not None
        journey_db_pre_exists = os.path.isfile(journey_db_path)

        # insert a pretty robust timeout:
        timeout = 100
        self.conn = sqlite3.connect(journey_db_path, timeout)
        if not journey_db_pre_exists:
            self.initialize_database()

        self.routing_parameters = Parameters(self.conn)
        self._assert_journey_computation_paramaters_match()

        self.journey_properties = {
            "journey_duration": (_T_WALK_STR, _T_WALK_STR)
        }
        if routing_params.get('track_vehicle_legs', False) or \
                self.routing_parameters.get('track_vehicle_legs', False):
            self.journey_properties["n_boardings"] = (float("inf"), 0)
        if self.track_route:
            additional_journey_parameters = {
                "in_vehicle_duration": (float('inf'), 0),
                "transfer_wait_duration": (float('inf'), 0),
                "walking_duration": (_T_WALK_STR, _T_WALK_STR),
                "pre_journey_wait_fp": (float('inf'), 0)
            }
            self.journey_properties.update(additional_journey_parameters)
        self.travel_impedance_measure_names = list(
            self.journey_properties.keys())
        self.travel_impedance_measure_names += ["temporal_distance"]
def add_swimming_halls_to_stops_table():
    g = GTFS(IMPORTED_DATABASE_PATH)
    halls = get_swimming_hall_data()
    for hall in halls:
        lat = hall['latitude']
        lon = hall['longitude']
        name = hall['name_en'].replace(" ", "_")
        id = SWIMMING_HALL_ID_PREFIX + name + "_" + str(hall['id'])
        g.add_stop(id, "NULL", name, "NULL", lat, lon)
Exemple #19
0
    def __init__(self, gtfs_path, before_db_path, after_db_path, output_db):
        self.gtfs = GTFS(gtfs_path)
        print(output_db)
        self._create_indecies(before_db_path)
        self._create_indecies(after_db_path)

        self.conn = sqlite3.connect(output_db)
        self.conn = attach_database(self.conn, before_db_path, name="before")
        self.conn = attach_database(self.conn, after_db_path, name="after")
Exemple #20
0
def stops_to_exclude(return_sqlite_list=False):
    gtfs_lm = GTFS(LM_DICT["gtfs_dir"])
    areas_to_remove = gtfs_lm.execute_custom_query_pandas(
        "SELECT *  FROM stops  WHERE  CASE WHEN substr(stop_id,1, 5) = '__b__' THEN CAST(substr(stop_id,6, 1) AS integer) ELSE CAST(substr(stop_id,1, 1) AS integer) END >4"
    )
    if return_sqlite_list:
        return "(" + ",".join(
            [str(x) for x in areas_to_remove["stop_I"].tolist()]) + ")"
    return areas_to_remove
Exemple #21
0
def _export_transfers(conn, fname):
    conn = GTFS(conn).conn
    cur = conn.cursor()
    cur.execute('SELECT S1.lat, S1.lon, S2.lat, S2.lon, SD.d '
                'FROM stop_distances SD '
                '  LEFT JOIN stops S1 ON (SD.from_stop_I=S1.stop_I) '
                '  LEFT JOIN stops S2 ON (SD.to_stop_I  =S2.stop_I)')
    f = open(fname, 'w')
    for row in cur:
        print(' '.join(str(x) for x in row), file=f)
Exemple #22
0
 def post_import2(self, conn):
     # TODO! Something whould be done with this! Multiple feeds are possible, currently only selects one row for all feeds
     G = GTFS(conn)
     for name in [
             'feed_publisher_name', 'feed_publisher_url', 'feed_lang',
             'feed_start_date', 'feed_end_date', 'feed_version'
     ]:
         value = conn.execute('SELECT %s FROM feed_info' %
                              name).fetchone()[0]
         if value:
             G.meta['feed_info_' + name] = value
Exemple #23
0
 def __init__(self, gtfs, buffer_params=None):
     """
     Parameters
     ----------
     gtfs: GTFS, or path to a GTFS object
         A GTFS object
     """
     if not isinstance(gtfs, GTFS):
         self.gtfs = GTFS(gtfs)
     else:
         self.gtfs = gtfs
     self.buffer_params = buffer_params
     self.warnings_container = WarningsContainer()
 def __create_temporal_extract_from_main_db(self, days, output_db_path):
     if os.path.isfile(output_db_path):
         os.remove(output_db_path)
     main_G = GTFS(self.main_db_path)
     assert isinstance(main_G, GTFS)
     day_extract_date_start = self.get_weekly_extract_start_date()
     start_date_ut = main_G.get_day_start_ut(day_extract_date_start)
     three_am_seconds = 3 * 3600
     fe = filter.FilterExtract(main_G,
                               output_db_path,
                               update_metadata=True,
                               trip_earliest_start_time_ut=start_date_ut + three_am_seconds,  # inclusive
                               trip_latest_start_time_ut=start_date_ut + three_am_seconds + days * 24 * 3600)  # exclusive
     fe.create_filtered_copy()
Exemple #25
0
 def test_filter_by_start_and_end_full_range(self):
     # untested tables with filtering: stops, shapes
     # test filtering by start and end time, copy full range
     FilterExtract(self.G,
                   self.fname_copy,
                   start_date=u"2007-01-01",
                   end_date=u"2011-01-01",
                   update_metadata=False).create_filtered_copy()
     G_copy = GTFS(self.fname_copy)
     dsut_end = G_copy.get_day_start_ut("2010-12-31")
     dsut_to_trip_I = G_copy.get_tripIs_within_range_by_dsut(
         dsut_end, dsut_end + 24 * 3600)
     self.assertGreater(len(dsut_to_trip_I), 0)
     os.remove(self.fname_copy)
Exemple #26
0
    def test_get_main_database_path(self):
        self.assertEqual(self.gtfs.get_main_database_path(),  "", "path of an in-memory database should equal ''")

        from gtfspy.import_gtfs import import_gtfs
        try:
            fname = self.gtfs_source_dir + "/test_gtfs.sqlite"
            if os.path.exists(fname) and os.path.isfile(fname):
                os.remove(fname)
            conn = sqlite3.connect(fname)
            import_gtfs(self.gtfs_source_dir, conn, preserve_connection=True, print_progress=False)
            G = GTFS(conn)
            self.assertTrue(os.path.exists(G.get_main_database_path()))
            self.assertIn(u"/test_gtfs.sqlite", G.get_main_database_path(), "path should be correct")
        finally:
            if os.path.exists(fname) and os.path.isfile(fname):
                os.remove(fname)
Exemple #27
0
def plot_city_figs(cities=None, axes=None, save_figure=True):
    if cities is None:
        cities = sorted(ALL_CITIES)
    for i, city in enumerate(cities):
        print("Plotting " + city)
        if axes is not None:
            ax = axes[i]
        else:
            fig = plt.figure(figsize=(6., 4.))
            ax = fig.add_subplot(111)
            fig.subplots_adjust(left=0.0, right=1.0, top=1.0, bottom=0.0)
        to_publish_csv = get_to_publish_csv()
        city_data = to_publish_csv[to_publish_csv["id"] == city].iloc[0]
        feeds = get_feeds_from_to_publish_tuple(city_data)
        pipeline = ExtractPipeline(city_data, feeds)
        try:
            day_G = GTFS(pipeline.day_db_path)
            ax = plot_route_network_from_gtfs(day_G,
                                              map_style="dark_all",
                                              ax=ax)
        except FileNotFoundError as e:
            print("File " + pipeline.day_db_path + " was not found")
        if save_figure:
            fig_path = os.path.join(FIG_PATH_DIR, city + ".pdf")
            ax.figure.savefig(fig_path)
            print("Figure saved to: \n" + fig_path)
class GenericJourneyDataAnalysisPipeline:
    def __init__(self):
        self.G = GTFS(GTFS_DATA_BASEDIR)
        self.day_start_ut = self.G.get_suitable_date_for_daily_extract(ut=True) + 3600
        self.start_time = self.day_start_ut + 8 * 3600
        self.end_time = self.day_start_ut + 11 * 3600
        self.profiles = {}
        self.journey_analyzer = None
        # self.analysis_start_time
        # self.analysis_end_time


    def script(self):

        journey_analyzer = JourneyDataAnalyzer(JOURNEY_DATA_DIR, GTFS_DATA_BASEDIR)
        if False:
            gdf = journey_analyzer.get_transfer_stops()
            gdf.to_file(shapefile_dir('transfer_stops'), driver='ESRI Shapefile')
            gdf = journey_analyzer.get_transfer_walks()
            gdf.to_file(shapefile_dir('transfer_walks'), driver='ESRI Shapefile')
            gdf = journey_analyzer.journeys_per_section()
            gdf.to_file(shapefile_dir('journeys_per_section'), driver='ESRI Shapefile')
            gdf = journey_analyzer.journey_alternatives_per_stop()
            gdf.to_file(shapefile_dir('journeys_per_stop'), driver='ESRI Shapefile')
        journey_analyzer.n_route_alternatives()
 def get_weekly_extract_start_date(self):
     """
     Returns
     -------
     datetime.datetime
     """
     print("Weekly extract start date")
     if isinstance(self.extract_start_date, str):
         assert(len(self.extract_start_date) == 10)
         print("Obtained from to_publish.csv")
         return datetime.datetime.strptime(self.extract_start_date, "%Y-%m-%d")
     else:
         main_G = GTFS(self.main_db_path)
         print("Automatically computed based on database")
         assert isinstance(main_G, GTFS)
         day_extract_date_start = main_G.get_weekly_extract_start_date()
         return day_extract_date_start
Exemple #30
0
 def test_filter_by_agency(self):
     FilterExtract(self.G, self.fname_copy,
                   agency_ids_to_preserve=['DTA']).create_filtered_copy()
     hash_copy = hashlib.md5(open(self.fname_copy, 'rb').read()).hexdigest()
     self.assertNotEqual(self.hash_orig, hash_copy)
     G_copy = GTFS(self.fname_copy)
     agency_table = G_copy.get_table("agencies")
     assert "EXA" not in agency_table[
         'agency_id'].values, "EXA agency should not be preserved"
     assert "DTA" in agency_table[
         'agency_id'].values, "DTA agency should be preserved"
     routes_table = G_copy.get_table("routes")
     assert "EXR1" not in routes_table[
         'route_id'].values, "EXR1 route_id should not be preserved"
     assert "AB" in routes_table[
         'route_id'].values, "AB route_id should be preserved"
     trips_table = G_copy.get_table("trips")
     assert "EXT1" not in trips_table[
         'trip_id'].values, "EXR1 route_id should not be preserved"
     assert "AB1" in trips_table[
         'trip_id'].values, "AB1 route_id should be preserved"
     calendar_table = G_copy.get_table("calendar")
     assert "FULLW" in calendar_table[
         'service_id'].values, "FULLW service_id should be preserved"
     # stop_times
     stop_times_table = G_copy.get_table("stop_times")
     # 01:23:45 corresponds to 3600 + (32 * 60) + 45 [in day seconds]
     assert 3600 + (32 * 60) + 45 not in stop_times_table['arr_time']
     os.remove(self.fname_copy)