def test_wrong_folder_name(self): """Check if invalid folder names raise an exception.""" geolife_path = os.path.join("tests", "data", "geolife") temp_dir = os.path.join(geolife_path, "123 - invalid folder ()%") os.mkdir(temp_dir) try: with pytest.raises(ValueError): read_geolife(geolife_path) finally: os.rmdir(temp_dir)
def test_print_progress_flag(self, capsys): """Test if the print_progress bar controls the printing behavior.""" g_path = os.path.join("tests", "data", "geolife") read_geolife(g_path, print_progress=True) captured_print = capsys.readouterr() assert captured_print.err != "" read_geolife(g_path, print_progress=False) captured_noprint = capsys.readouterr() assert captured_noprint.err == "" assert True
def test_check_overlap(self): """ Triplegs and staypoints should not overlap when generated using the default extract triplegs method. This test extracts triplegs and staypoints from positionfixes and stores them in a single dataframe. The dataframe is sorted by date, then we check if the staypoint/tripleg from the row before was finished when the next one started. """ pfs = read_geolife(os.path.join('tests', 'data', 'geolife_long')) spts = pfs.as_positionfixes.extract_staypoints(method='sliding', dist_threshold=25, time_threshold=5 * 60) tpls = pfs.as_positionfixes.extract_triplegs(spts) spts_tpls = spts[['started_at', 'finished_at', 'user_id']].append( tpls[['started_at', 'finished_at', 'user_id']]) spts_tpls.sort_values(by=['user_id', 'started_at'], inplace=True) for user_id_this in spts['user_id'].unique(): spts_tpls_this = spts_tpls[spts_tpls['user_id'] == user_id_this] diff = spts_tpls_this['started_at'] - spts_tpls_this[ 'finished_at'].shift(1) # transform to numpy array and drop first values (always nan due to shift operation) diff = diff.values[1:] # all values have to greater or equal to zero. Otherwise there is an overlap assert all(diff >= np.timedelta64(datetime.timedelta()))
def test_loop_read(self): pfs = read_geolife(os.path.join('tests', 'data', 'geolife')) tmp_file = os.path.join('tests', 'data', 'positionfixes_test.csv') pfs.as_positionfixes.to_csv(tmp_file) pfs2 = ti.read_positionfixes_csv(tmp_file, index_col='id')[pfs.columns] os.remove(tmp_file) assert np.isclose(0, (pfs.lat - pfs2.lat).abs().sum())
def test_general_generation(self): """ Test if we can generate the example trips based on example data """ # load pregenerated trips trips_loaded = pd.read_csv( os.path.join('tests', 'data', 'geolife_long', 'trips.csv')) trips_loaded['started_at'] = pd.to_datetime(trips_loaded['started_at']) trips_loaded['finished_at'] = pd.to_datetime( trips_loaded['finished_at']) trips_loaded.rename(columns={ 'origin': 'origin_staypoint_id', 'destination': 'destination_staypoint_id' }, inplace=True) # create trips from geolife (based on positionfixes) pfs = read_geolife(os.path.join('tests', 'data', 'geolife_long')) spts = pfs.as_positionfixes.extract_staypoints(method='sliding', dist_threshold=25, time_threshold=5 * 60) spts = spts.as_staypoints.create_activity_flag() tpls = pfs.as_positionfixes.extract_triplegs(spts) spts, tpls, trips = generate_trips(spts, tpls, gap_threshold=15, id_offset=0) pd.testing.assert_frame_equal(trips_loaded, trips)
def read_geolife_with_modes(): pfs, labels = read_geolife(os.path.join("tests", "data", "geolife_modes")) pfs, sp = pfs.as_positionfixes.generate_staypoints(method="sliding", dist_threshold=25, time_threshold=5) _, tpls = pfs.as_positionfixes.generate_triplegs(sp, method="between_staypoints") tpls_with_modes = geolife_add_modes_to_triplegs(tpls, labels) return tpls_with_modes
def test_loop_read(self): """Use read_geolife reader, store posfix as .csv, load them again.""" pfs, _ = read_geolife(os.path.join("tests", "data", "geolife")) tmp_file = os.path.join("tests", "data", "positionfixes_test.csv") pfs.as_positionfixes.to_csv(tmp_file) pfs2 = ti.read_positionfixes_csv(tmp_file, index_col="id")[pfs.columns] os.remove(tmp_file) assert np.isclose(0, (pfs.lat - pfs2.lat).abs().sum())
def read_geolife_with_modes(): pfs, labels = read_geolife(os.path.join('tests', 'data', 'geolife_modes')) pfs, spts = pfs.as_positionfixes.generate_staypoints(method='sliding', dist_threshold=25, time_threshold=5 * 60) _, tpls = pfs.as_positionfixes.generate_triplegs(spts, method='between_staypoints') tpls_with_modes = geolife_add_modes_to_triplegs(tpls, labels) return tpls_with_modes
def test_loop_read(self): """Use read_geolife reader, store posfix as .csv, load them again.""" pfs, _ = read_geolife(os.path.join("tests", "data", "geolife"), print_progress=True) saved_file = os.path.join("tests", "data", "positionfixes_test.csv") pfs.as_positionfixes.to_csv(saved_file) pfs_reRead = ti.read_positionfixes_csv(saved_file, index_col="id", crs="epsg:4326") os.remove(saved_file) assert_geoseries_equal(pfs.geometry, pfs_reRead.geometry)
def test_unavailable_label_reading(self): """Test data types of the labels returned by read_geolife from a dictionary without label files.""" _, labels = read_geolife(os.path.join("tests", "data", "geolife_long")) # the output is a dictionary assert isinstance(labels, dict) # the values are pandas dataframes for key, value in labels.items(): assert isinstance(value, pd.DataFrame)
def test_label_reading(self): """Test data types of the labels returned by read_geolife.""" _, labels = read_geolife(os.path.join("tests", "data", "geolife_modes")) # the output is a dictionary assert isinstance(labels, dict) # it has the keys of the users 10 and 20, the values are pandas dataframes for key, value in labels.items(): assert key in [10, 20, 178] assert isinstance(value, pd.DataFrame)
def get_geolife_triplegs_with_modes(): """Get modal split for a small part of the geolife dataset""" pfs, labels = read_geolife(os.path.join('tests', 'data', 'geolife_modes')) pfs, spts = pfs.as_positionfixes.generate_staypoints(method='sliding', dist_threshold=25, time_threshold=5 * 60) _, tpls = pfs.as_positionfixes.generate_triplegs( spts, method='between_staypoints') tpls_with_modes = geolife_add_modes_to_triplegs(tpls, labels) return tpls_with_modes
def get_geolife_triplegs_with_modes(): """Get modal split for a small part of the geolife dataset.""" pfs, labels = read_geolife(os.path.join("tests", "data", "geolife_modes")) pfs, sp = pfs.as_positionfixes.generate_staypoints(method="sliding", dist_threshold=25, time_threshold=5) _, tpls = pfs.as_positionfixes.generate_triplegs( sp, method="between_staypoints") tpls_with_modes = geolife_add_modes_to_triplegs(tpls, labels) return tpls_with_modes
def test_wrong_folder_name(self): """Check if invalid folder names raise an exception""" geolife_path = os.path.join('tests', 'data', 'geolife') temp_dir = os.path.join(geolife_path, '123 - invalid folder ()%') os.mkdir(temp_dir) try: with pytest.raises(ValueError): _, _ = read_geolife(geolife_path) finally: os.rmdir(temp_dir)
def test_general_trip_generation(self): """ Test if we can generate the example trips based on example data """ gap_threshold = 15 # load pregenerated trips trips_loaded = pd.read_csv(os.path.join('tests', 'data', 'geolife_long', 'trips.csv'), index_col='id') trips_loaded['started_at'] = pd.to_datetime(trips_loaded['started_at']) trips_loaded['finished_at'] = pd.to_datetime( trips_loaded['finished_at']) spts_tpls_loaded = pd.read_csv(os.path.join('tests', 'data', 'geolife_long', 'tpls_spts.csv'), index_col='id') spts_tpls_loaded['started_at'] = pd.to_datetime( spts_tpls_loaded['started_at']) spts_tpls_loaded['started_at_next'] = pd.to_datetime( spts_tpls_loaded['started_at_next']) spts_tpls_loaded['finished_at'] = pd.to_datetime( spts_tpls_loaded['finished_at']) # create trips from geolife (based on positionfixes) pfs = read_geolife(os.path.join('tests', 'data', 'geolife_long')) spts = pfs.as_positionfixes.extract_staypoints(method='sliding', dist_threshold=25, time_threshold=5 * 60) spts = spts.as_staypoints.create_activity_flag() tpls = pfs.as_positionfixes.extract_triplegs(spts) # temporary fix ID bug (issue #56) so that we work with valid staypoint/tripleg files spts = spts.set_index('id') tpls = tpls.set_index('id') # generate trips and a joint staypoint/triplegs dataframe spts, tpls, trips = generate_trips(spts, tpls, gap_threshold=gap_threshold, id_offset=0) spts_tpls = create_debug_spts_tpls_data(spts, tpls, gap_threshold=gap_threshold) # test if generated trips are equal pd.testing.assert_frame_equal(trips_loaded, trips) # test if generated staypoints/triplegs are equal (especially important for trip ids) pd.testing.assert_frame_equal(spts_tpls_loaded, spts_tpls, check_dtype=False)
def test_extract_triplegs_global(self): # generate triplegs from raw-data pfs = read_geolife(os.path.join('tests', 'data', 'geolife')) spts = pfs.as_positionfixes.extract_staypoints(method='sliding', dist_threshold=25, time_threshold=5 * 60) tpls = pfs.as_positionfixes.extract_triplegs(spts) # load pregenerated test-triplegs tpls_test = ti.read_triplegs_csv(os.path.join('tests', 'data', 'geolife', 'geolife_triplegs_short.csv')) assert len(tpls) > 0 assert len(tpls) == len(tpls) distance_sum = 0 for i in range(len(tpls)): distance = tpls.geom.iloc[i].distance(tpls_test.geom.iloc[i]) distance_sum = distance_sum + distance assert_almost_equal(distance_sum, 0.0)
def read_geolife_modes(): return read_geolife(os.path.join('tests', 'data', 'geolife_modes'))
def test_no_user_folders(self): """Check if no user folders raise an exception.""" geolife_path = os.path.join("tests", "data", "geolife", "000", "Trajectory") with pytest.raises(FileNotFoundError): read_geolife(geolife_path)
def read_geolife_modes(): return read_geolife(os.path.join("tests", "data", "geolife_modes"))