def test_agencyLoader(self): import_gtfs(self.fdict, self.conn, preserve_connection=True) self.conn.commit() cursor = self.conn.cursor() rows = cursor.execute("SELECT agency_id FROM agencies").fetchall() assert len(rows) == 1 assert rows[0][0] == u'ag1', rows[0][0]
def test_metaDataLoader(self): import_gtfs(self.fdict, self.conn, preserve_connection=True) try: query = "SELECT * FROM metadata" self.conn.execute(query) except AssertionError: assert False, "The database should have a table named metadata"
def setUp(self): self.gtfs_source_dir = os.path.join(os.path.dirname(__file__), "test_data") self.gtfs_source_dir_filter_test = os.path.join( self.gtfs_source_dir, "filter_test_feed/") # self.G = GTFS.from_directory_as_inmemory_db(self.gtfs_source_dir) # some preparations: self.fname = self.gtfs_source_dir + "/test_gtfs.sqlite" self.fname_copy = self.gtfs_source_dir + "/test_gtfs_copy.sqlite" self.fname_filter = self.gtfs_source_dir + "/test_gtfs_filter_test.sqlite" self._remove_temporary_files() self.assertFalse(os.path.exists(self.fname_copy)) conn = sqlite3.connect(self.fname) import_gtfs(self.gtfs_source_dir, conn, preserve_connection=True, print_progress=False) conn_filter = sqlite3.connect(self.fname_filter) import_gtfs(self.gtfs_source_dir_filter_test, conn_filter, preserve_connection=True, print_progress=False) self.G = GTFS(conn) self.G_filter_test = GTFS(conn_filter) self.hash_orig = hashlib.md5(open(self.fname, 'rb').read()).hexdigest()
def test_frequencyLoader(self): import_gtfs(self.fdict, self.conn, preserve_connection=True) # "\nfrequency_route, freq_service, freq_trip, going north, freq_name, shape_es1" \ keys = ["trip_I", "start_time", "end_time", "headway_secs", "exact_times", "start_time_ds", "end_time_ds"] self.setDictConn() rows = self.conn.execute("SELECT * FROM frequencies").fetchall() for key in keys: row = rows[0] assert key in row for row in rows: if row["start_time_ds"] == 14 * 3600: self.assertEqual(row["exact_times"], 1) # there should be twelve trips with service_I freq count = self.conn.execute("SELECT count(*) AS count FROM trips JOIN calendar " "USING(service_I) WHERE service_id='freq_service'").fetchone()['count'] assert count == 12, count rows = self.conn.execute("SELECT trip_I FROM trips JOIN calendar " "USING(service_I) WHERE service_id='freq_service'").fetchall() for row in rows: trip_I = row['trip_I'] res = self.conn.execute("SELECT * FROM stop_times WHERE trip_I={trip_I}".format(trip_I=trip_I)).fetchall() assert len(res) > 1, res self.setRowConn() g = GTFS(self.conn) print("Stop times: \n\n ", g.get_table("stop_times")) print("Frequencies: \n\n ", g.get_table("frequencies"))
def test_calendarLoader(self): import_gtfs(self.fdict, self.conn, preserve_connection=True) self.setDictConn() rows = self.conn.execute("SELECT * FROM calendar").fetchall() assert len(rows[0]) == 11 for key in 'm t w th f s su start_date end_date service_id service_I'.split(): assert key in rows[0], 'no key ' + key
def test_dayLoader(self): import_gtfs(self.fdict, self.conn, preserve_connection=True) # Now, there should be # a regular trip according to calendar dates without any exceptions: self.setDictConn() query1 = "SELECT trip_I " \ "FROM days " \ "JOIN trips " \ "USING(trip_I) " \ "JOIN calendar " \ "USING(service_I) " \ "WHERE date='2016-03-21'" \ "AND service_id='service1'" res = self.conn.execute(query1).fetchall() assert len(res) == 1 trip_I_service_1 = res[0]['trip_I'] print(trip_I_service_1) query2 = "SELECT * FROM days WHERE trip_I=%s" % trip_I_service_1 self.assertEqual( len(self.conn.execute(query2).fetchall()), 6, "There should be 6 days with the trip_I " "corresponding to service_id service1") query3 = "SELECT * " \ "FROM days " \ "JOIN trips " \ "USING(trip_I) " \ "JOIN calendar " \ "USING(service_I) " \ "WHERE date='2016-03-22'" \ "AND service_id='service1'" self.assertEqual( len(self.conn.execute(query3).fetchall()), 0, "There should be no trip on date 2016-03-22" "for service1 due to calendar_dates") query4 = "SELECT date " \ "FROM days " \ "JOIN trips " \ "USING(trip_I) " \ "JOIN calendar " \ "USING(service_I) " \ "WHERE service_id='service2'" self.assertEqual(len(self.conn.execute(query4).fetchall()), 1, "There should be only one trip for service 2") self.assertEqual( self.conn.execute(query4).fetchone()['date'], "2016-03-22", "and the date should be 2016-03-22") query6 = "SELECT * " \ "FROM days " \ "JOIN trips " \ "USING(trip_I) " \ "JOIN calendar " \ "USING(service_I) " \ "WHERE service_id='phantom_service'" res = self.conn.execute(query6).fetchall() self.assertEqual( len(res), 0, "there should be no phantom trips due to phantom service" "even though phantom service is in calendar")
def test_transfersLoader(self): """ First tests that the basic import to the transfers table is correct, and then checks that the information from transfers.txt is also flows to the stop_distances table. """ import_gtfs(self.fdict, self.conn, preserve_connection=True) keys = [ "from_stop_I", "to_stop_I", "transfer_type", "min_transfer_time" ] self.setDictConn() transfers = self.conn.execute("SELECT * FROM transfers").fetchall() for key in keys: transfer = transfers[0] assert key in transfer from_stop_I_no_transfer = None to_stop_I_no_transfer = None from_stop_I_timed_transfer = None to_stop_I_timed_transfer = None from_stop_I_min_transfer = None to_stop_I_min_transfer = None min_transfer_time_min_transfer = None for transfer in transfers: transfer_type = transfer["transfer_type"] from_stop_I = transfer['from_stop_I'] to_stop_I = transfer['to_stop_I'] min_transfer_time = transfer["min_transfer_time"] assert isinstance(from_stop_I, int) assert isinstance(to_stop_I, int) assert isinstance(transfer_type, int) assert isinstance(min_transfer_time, int) or (min_transfer_time is None) if transfer["transfer_type"] == 3: # no transfer from_stop_I_no_transfer = from_stop_I to_stop_I_no_transfer = to_stop_I elif transfer["transfer_type"] == 2: from_stop_I_min_transfer = from_stop_I to_stop_I_min_transfer = to_stop_I min_transfer_time_min_transfer = min_transfer_time elif transfer["transfer_type"] == 1: from_stop_I_timed_transfer = from_stop_I to_stop_I_timed_transfer = to_stop_I base_query = "SELECT * FROM stop_distances WHERE from_stop_I=? and to_stop_I=?" # no_transfer no_transfer_rows = self.conn.execute( base_query, (from_stop_I_no_transfer, to_stop_I_no_transfer)).fetchall() assert len(no_transfer_rows) == 0 timed_transfer_rows = \ self.conn.execute(base_query, (from_stop_I_timed_transfer, to_stop_I_timed_transfer)).fetchall() assert len(timed_transfer_rows) == 1 assert timed_transfer_rows[0]['min_transfer_time'] == 0 min_transfer_rows = \ self.conn.execute(base_query, (from_stop_I_min_transfer, to_stop_I_min_transfer)).fetchall() assert len(min_transfer_rows) == 1 assert min_transfer_rows[0][ 'min_transfer_time'] == min_transfer_time_min_transfer
def import_database(force=False): if force or not os.path.exists(IMPORTED_DATABASE_PATH): import_gtfs.import_gtfs( [RAW_GTFS_ZIP_PATH ], # input: list of GTFS zip files (or directories) IMPORTED_DATABASE_PATH, # output: where to create the new sqlite3 database print_progress=True, # whether to print progress when importing data location_name="Helsinki")
def test_feedInfoLoader(self): import_gtfs(self.fdict, self.conn, preserve_connection=True) keys = ["feed_publisher_name", "feed_publisher_url", "feed_lang", "feed_start_date", "feed_end_date"] self.setDictConn() rows = self.conn.execute("SELECT * FROM feed_info").fetchall() for key in keys: row = rows[0] assert key in row
def test_shapeLoader(self): import_gtfs(self.fdict, self.conn, preserve_connection=True) self.setDictConn() keys = ['shape_id', 'lat', 'lon', 'seq', 'd'] table = self.conn.execute("SELECT * FROM shapes").fetchall() assert table[1]['d'] > 0, "distance traveled should be > 0" for key in keys: assert key in table[0], "key " + key + " not in shapes table"
def setUp(self): self.gtfs_source_dir = os.path.join(os.path.dirname(__file__), "test_data") self.fname = self.gtfs_source_dir + "/test_gtfs.sqlite" conn = sqlite3.connect(self.fname) import_gtfs(self.gtfs_source_dir, conn, preserve_connection=True, print_progress=False) self.G = GTFS(conn)
def test_stopDistancesLoader(self): import_gtfs(self.fdict, self.conn, preserve_connection=True) query = "SELECT * FROM stop_distances" # set dictionary like row connection: self.setDictConn() rows = self.conn.execute(query).fetchall() assert len(rows) > 0 for row in rows: print(row) assert row['d'] >= 0, "distance should be defined for all pairs in the stop_distances table"
def test_stopTimesLoader(self): import_gtfs(self.fdict, self.conn, preserve_connection=True) self.setDictConn() stoptimes = self.conn.execute("SELECT * FROM stop_times").fetchall() keys = ['stop_I', 'shape_break', 'trip_I', 'arr_time', 'dep_time', 'seq', 'arr_time_ds', 'dep_time_ds'] for key in keys: assert key in stoptimes[0] assert stoptimes[0]['dep_time_ds'] == 370 assert stoptimes[0]['shape_break'] == 0 assert stoptimes[1]['shape_break'] == 3
def test_calendarDatesLoader(self): import_gtfs(self.fdict, self.conn, preserve_connection=True) rows = self.conn.execute("SELECT * FROM calendar").fetchall() self.setDictConn() rows = self.conn.execute("SELECT * FROM calendar_dates").fetchall() for row in rows: assert isinstance(row['service_I'], int) # calendar table should be increased by two dummy row rows = self.conn.execute("SELECT * " "FROM calendar " "WHERE service_id='phantom_service'").fetchall() # Whether this should be the case is negotiable, though self.assertEqual(len(rows), 1, "phantom service should be present in the calendar")
def test_write_gtfs(self): # A simple import-output-import test" for ending in ["", ".zip"]: from gtfspy.import_gtfs import import_gtfs UUID = "36167f3012fe11e793ae92361f002671" sqlite_fname = "test_" + UUID + ".sqlite" test_output_dir = "./test_output_dir_" + UUID try: shutil.rmtree(test_output_dir) except FileNotFoundError: pass try: exports.write_gtfs(self.gtfs, test_output_dir + ending) self.assertTrue(os.path.exists(test_output_dir + ending)) try: G = import_gtfs(test_output_dir + ending, os.path.join(sqlite_fname)) self.assertTrue(os.path.exists(sqlite_fname)) finally: os.remove(sqlite_fname) finally: if ending == "": shutil.rmtree(test_output_dir + ending) else: os.remove(test_output_dir + ending)
def test_get_main_database_path(self): self.assertEqual(self.gtfs.get_main_database_path(), "", "path of an in-memory database should equal ''") from gtfspy.import_gtfs import import_gtfs try: fname = self.gtfs_source_dir + "/test_gtfs.sqlite" if os.path.exists(fname) and os.path.isfile(fname): os.remove(fname) conn = sqlite3.connect(fname) import_gtfs(self.gtfs_source_dir, conn, preserve_connection=True, print_progress=False) G = GTFS(conn) self.assertTrue(os.path.exists(G.get_main_database_path())) self.assertIn(u"/test_gtfs.sqlite", G.get_main_database_path(), "path should be correct") finally: if os.path.exists(fname) and os.path.isfile(fname): os.remove(fname)
def import_from_zips(): for name, zip_path, date in zip(names, [gtfs_a17_zip, gtfs_lm_zip], dates): if name == 'old': continue import_gtfs(os.path.join(GTFS_DB_WORK_DIR, zip_path), os.path.join(GTFS_DB_WORK_DIR, name + '_all.sqlite'), location_name='helsinki') gtfs = GTFS(os.path.join(GTFS_DB_WORK_DIR, name + '_all.sqlite')) f = FilterExtract(gtfs, os.path.join(GTFS_DB_WORK_DIR, name + '_daily.sqlite'), date=date) f.create_filtered_copy() gtfs = GTFS(os.path.join(GTFS_DB_WORK_DIR, name + '_all.sqlite')) iv = ImportValidator(os.path.join(GTFS_DB_WORK_DIR, zip_path), gtfs) warnings = iv.validate_and_get_warnings() warnings.write_summary()
def load_or_import_example_gtfs(verbose=False): imported_database_path = "test_db_kuopio.sqlite" if not os.path.exists( imported_database_path ): # reimport only if the imported database does not already exist print("Importing gtfs zip file") import_gtfs.import_gtfs( ["data/gtfs_kuopio_finland.zip" ], # input: list of GTFS zip files (or directories) imported_database_path, # output: where to create the new sqlite3 database print_progress= verbose, # whether to print progress when importing data location_name="Kuopio") # Not this is an optional step, which is not necessary for many things. print("Computing walking paths using OSM") G = gtfs.GTFS(imported_database_path) G.meta['download_date'] = "2017-03-15" osm_path = "data/kuopio_extract_mapzen_2017_03_15.osm.pbf" # when using with the Kuopio test data set, # this should raise a warning due to no nearby OSM nodes for one of the stops. osm_transfers.add_walk_distances_to_db_python(imported_database_path, osm_path) print( "Note: for large cities we have also a faster option for computing footpaths that uses Java.)" ) dir_path = os.path.dirname(os.path.realpath(__file__)) java_path = os.path.join(dir_path, "../java_routing/") print("Please see the contents of " + java_path + " for more details.") # Now you can access the imported database using a GTFS-object as an interface: G = gtfs.GTFS(imported_database_path) if verbose: print("Location name:" + G.get_location_name()) # should print Kuopio print("Time span of the data in unixtime: " + str(G.get_approximate_schedule_time_span_in_ut())) # prints the time span in unix time return G
def test_resequencing_stop_times(self): gtfs_source = self.fdict.copy() gtfs_source.pop('stop_times.txt') gtfs_source['stop_times.txt'] = \ self.stopTimesText = \ "trip_id, arrival_time, departure_time, stop_sequence, stop_id" \ "\nservice1_trip1,0:06:10,0:06:10,0,SID1" \ "\nservice1_trip1,0:06:15,0:06:16,10,SID2" \ "\nfreq_trip_scheduled,0:00:00,0:00:00,1,SID1" \ "\nfreq_trip_scheduled,0:02:00,0:02:00,123,SID2" import_gtfs(gtfs_source, self.conn, preserve_connection=True) rows = self.conn.execute("SELECT seq FROM stop_times ORDER BY trip_I, seq").fetchall() for row in rows: print(row) self.assertEqual(rows[0][0], 1) self.assertEqual(rows[1][0], 2) self.assertEqual(rows[2][0], 1) self.assertEqual(rows[3][0], 2)
def test_stopLoader(self): import_gtfs(self.fdict, self.conn, preserve_connection=True) self.setDictConn() # sqlite returns now list of dicts rows = self.conn.execute("SELECT * FROM stops").fetchall() assert len(rows) > 4 # some data should be imported assert rows[0]['stop_I'] == 1 # Store quotes in names: parent_index = None for i, row in enumerate(rows): if row['name'] == '"Parent-Stop-Name"': parent_index = i break assert parent_index is not None parent_stop_I = rows[parent_index]['stop_I'] boring_index = None for i, row in enumerate(rows): if row['name'] == "Boring Stop Name": boring_index = i break assert boring_index is not None assert rows[boring_index]['parent_I'] == parent_stop_I assert rows[boring_index]['self_or_parent_I'] == parent_stop_I assert rows[3]['self_or_parent_I'] == 3
def test_sources_required_multiple(self): fdict_copy = dict(self.fdict) fdict_copy.pop("stops.txt") with self.assertRaises(AssertionError): import_gtfs([self.fdict, fdict_copy], self.conn)
def test_importMultiple(self): gtfs_source_dir = os.path.join(os.path.dirname(__file__), "test_data") gtfs_sources = [gtfs_source_dir, self.fdict] error_raised = False try: import_gtfs(gtfs_sources, self.conn, preserve_connection=True) except ValueError: error_raised = True assert error_raised, "different timezones in multiple feeds should raise an error" #mod_agencyText = \ # 'agency_id, agency_name, agency_timezone, agency_url' \ # '\nag1, CompNet, America/Los_Angeles, www.example.com' #self.fdict['agency.txt'] = mod_agencyText # test that if trip_id:s (or stop_id:s etc. ) are the same in two feeds, # they get different trip_Is in the database created self.tearDown() self.setUp() # assert if importing two of the same feed will create the double number of trips gtfs_source = [self.fdict] import_gtfs(gtfs_source, self.conn, preserve_connection=True) n_rows_ref = self.conn.execute("SELECT count(*) FROM trips").fetchone()[0] self.tearDown() self.setUp() gtfs_sources = [self.fdict, self.fdict] import_gtfs(gtfs_sources, self.conn, preserve_connection=True) n_rows_double = self.conn.execute("SELECT count(*) FROM trips").fetchone()[0] self.assertEqual(n_rows_double, 2*n_rows_ref) # check for duplicate trip_I's rows = self.conn.execute("SELECT count(*) FROM trips GROUP BY trip_I").fetchall() for row in rows: self.assertIs(row[0],1) # check for duplicate service_I's in calendar rows = self.conn.execute("SELECT count(*) FROM calendar GROUP BY service_I").fetchall() for row in rows: self.assertIs(row[0], 1) # check for duplicate service_I's in calendar_dates rows = self.conn.execute("SELECT count(*) FROM calendar_dates GROUP BY service_I").fetchall() for row in rows: self.assertIs(row[0], 1) # check for duplicate route_I's rows = self.conn.execute("SELECT count(*) FROM routes GROUP BY route_I").fetchall() for row in rows: self.assertIs(row[0], 1) # check for duplicate agency_I's rows = self.conn.execute("SELECT count(*) FROM agencies GROUP BY agency_I").fetchall() for row in rows: self.assertIs(row[0], 1) # check for duplicate stop_I's rows = self.conn.execute("SELECT count(*) FROM stops GROUP BY stop_I").fetchall() for row in rows: self.assertIs(row[0], 1)
def test_sources_required(self): self.fdict.pop("stops.txt") with self.assertRaises(AssertionError): import_gtfs(self.fdict, self.conn)
def test_testDataImport(self): gtfs_source_dir = os.path.join(os.path.dirname(__file__), "test_data") import_gtfs(gtfs_source_dir, self.conn, preserve_connection=True)
def test_zipImport(self): gtfs_source_zip = os.path.join(os.path.dirname(__file__), "test_data/test_gtfs.zip") import_gtfs(gtfs_source_zip, self.conn, preserve_connection=True)
def test_agencyLoaderTwoTimeZonesFail(self): newagencytext = \ self.agencyText + "\n123, AgencyFromDifferentTZ, Europe/Helsinki, www.buahaha.com" self.fdict['agency.txt'] = newagencytext with self.assertRaises(ValueError): import_gtfs(self.fdict, self.conn, preserve_connection=True)
def test_routeLoader(self): import_gtfs(self.fdict, self.conn, preserve_connection=True) assert len(self.conn.execute("SELECT * FROM routes").fetchall()) > 0
def _import_sample_gtfs_db(self): import_gtfs([ os.path.join(os.path.dirname(__file__), "../../test/test_data/test_gtfs.zip") ], self.gtfs_path)
def import_process(self): output_filename = self.dlg.lineEdit_2.text() import_filenames = self.dlg.lineEdit.text().split("; ") import_gtfs(import_filenames, output_filename)
def test_tripLoader(self): import_gtfs(self.fdict, self.conn, preserve_connection=True) rows = self.conn.execute("SELECT * FROM trips").fetchall() self.assertGreaterEqual(len(rows), 1)