def test_duplicate_columns(self, example_triplegs): """Test if running the function twice, the generated column does not yield exception in join statement""" sp, tpls = example_triplegs # generate trips and a joint staypoint/triplegs dataframe sp_run_1, tpls_run_1, _ = generate_trips(sp, tpls, gap_threshold=15) with pytest.warns(UserWarning): sp_run_2, tpls_run_2, _ = generate_trips(sp_run_1, tpls_run_1, gap_threshold=15) assert set(tpls_run_1.columns) == set(tpls_run_2.columns) assert set(sp_run_1.columns) == set(sp_run_2.columns)
def test_trip_wo_geom(self, example_triplegs_higher_gap_threshold): """Test if the add_geometry parameter shows correct behavior""" sp, tpls = example_triplegs_higher_gap_threshold # generate trips dataframe with geometry _, _, trips = generate_trips(sp, tpls, gap_threshold=15) trips = pd.DataFrame(trips.drop(["geom"], axis=1)) # generate trips without geometry _, _, trips_wo_geom = generate_trips(sp, tpls, gap_threshold=15, add_geometry=False) # test if generated trips are equal assert_frame_equal(trips_wo_geom, trips)
def test_general_generation(self): """ Test if we can generate the example trips based on example data """ # load pregenerated trips trips_loaded = pd.read_csv( os.path.join('tests', 'data', 'geolife_long', 'trips.csv')) trips_loaded['started_at'] = pd.to_datetime(trips_loaded['started_at']) trips_loaded['finished_at'] = pd.to_datetime( trips_loaded['finished_at']) trips_loaded.rename(columns={ 'origin': 'origin_staypoint_id', 'destination': 'destination_staypoint_id' }, inplace=True) # create trips from geolife (based on positionfixes) pfs = read_geolife(os.path.join('tests', 'data', 'geolife_long')) spts = pfs.as_positionfixes.extract_staypoints(method='sliding', dist_threshold=25, time_threshold=5 * 60) spts = spts.as_staypoints.create_activity_flag() tpls = pfs.as_positionfixes.extract_triplegs(spts) spts, tpls, trips = generate_trips(spts, tpls, gap_threshold=15, id_offset=0) pd.testing.assert_frame_equal(trips_loaded, trips)
def test_only_staypoints_in_trip(self): """Test that trips with only staypoints (non-activities) are deleted.""" start = pd.Timestamp("2021-07-11 8:00:00") h = pd.to_timedelta("1h") sp_tpls = [ {"activity": True, "type": "staypoint"}, {"activity": False, "type": "staypoint"}, {"activity": True, "type": "staypoint"}, {"activity": False, "type": "tripleg"}, {"activity": False, "type": "staypoint"}, {"activity": True, "type": "staypoint"}, ] for n, d in enumerate(sp_tpls): d["user_id"] = 0 d["started_at"] = start + n * h d["finished_at"] = d["started_at"] + h sp_tpls = pd.DataFrame(sp_tpls) sp = sp_tpls[sp_tpls["type"] == "staypoint"] tpls = sp_tpls[sp_tpls["type"] == "tripleg"] sp_, tpls_, trips = generate_trips(sp, tpls, add_geometry=False) trip_id_truth = pd.Series([None, None, None, 0, None], dtype="Int64") trip_id_truth.index = sp_.index # don't check index assert_series_equal(sp_["trip_id"], trip_id_truth, check_names=False) assert (tpls_["trip_id"] == 0).all() assert len(trips) == 1
def test_generate_trips_gap_detection(self): """ Test different gap cases: - activity - tripleg - activity [gap] activity - tripleg - activity - activity - tripleg - [gap] - tripleg - activity - activity - tripleg - [gap] activity - tripleg - activity - activity - tripleg - [gap] activity - tripleg - activity - activity - tripleg - activity [gap] - tripleg - tripleg - tripleg - activity - tripleg - [gap] - tripleg - tripleg - [gap] - tripleg Returns ------- """ gap_threshold = 15 # load data and add dummy geometry sp_in = pd.read_csv( os.path.join("tests", "data", "trips", "staypoints_gaps.csv"), sep=";", index_col="id", parse_dates=[0, 1], infer_datetime_format=True, dayfirst=True, ) sp_in["geom"] = Point(1, 1) sp_in = gpd.GeoDataFrame(sp_in, geometry="geom") sp_in = ti.io.read_staypoints_gpd(sp_in, tz="utc") tpls_in = pd.read_csv( os.path.join("tests", "data", "trips", "triplegs_gaps.csv"), sep=";", index_col="id", parse_dates=[0, 1], infer_datetime_format=True, dayfirst=True, ) tpls_in["geom"] = LineString([[1, 1], [2, 2]]) tpls_in = gpd.GeoDataFrame(tpls_in, geometry="geom") tpls_in = ti.io.read_triplegs_gpd(tpls_in, tz="utc") # load ground truth data trips_loaded = ti.read_trips_csv( os.path.join("tests", "data", "trips", "trips_gaps.csv"), index_col="id", tz="utc" ) sp_tpls_loaded = pd.read_csv(os.path.join("tests", "data", "trips", "sp_tpls_gaps.csv"), index_col="id") sp_tpls_loaded["started_at"] = pd.to_datetime(sp_tpls_loaded["started_at"], utc=True) sp_tpls_loaded["started_at_next"] = pd.to_datetime(sp_tpls_loaded["started_at_next"], utc=True) sp_tpls_loaded["finished_at"] = pd.to_datetime(sp_tpls_loaded["finished_at"], utc=True) # generate trips and a joint staypoint/triplegs dataframe sp_proc, tpls_proc, trips = generate_trips(sp_in, tpls_in, gap_threshold=gap_threshold, add_geometry=False) sp_tpls = _create_debug_sp_tpls_data(sp_proc, tpls_proc, gap_threshold=gap_threshold) # test if generated trips are equal pd.testing.assert_frame_equal(trips_loaded, trips) # test if generated staypoints/triplegs are equal (especially important for trip ids) assert_frame_equal(sp_tpls_loaded, sp_tpls, check_dtype=False)
def test_generate_trips_index_start(self, example_triplegs): """Test the generated index start from 0 for different methods.""" sp, tpls = example_triplegs # generate trips and a joint staypoint/triplegs dataframe _, _, trips = generate_trips(sp, tpls, gap_threshold=15) assert (trips.index == np.arange(len(trips))).any()
def test_generate_trips_missing_link(self, example_triplegs): """Test nan is assigned for missing link between sp and trips, and tpls and trips.""" sp, tpls = example_triplegs # generate trips and a joint staypoint/triplegs dataframe sp, tpls, _ = generate_trips(sp, tpls, gap_threshold=15) assert pd.isna(sp["trip_id"]).any() assert pd.isna(sp["prev_trip_id"]).any() assert pd.isna(sp["next_trip_id"]).any()
def test_general_trip_generation(self): """ Test if we can generate the example trips based on example data """ gap_threshold = 15 # load pregenerated trips trips_loaded = pd.read_csv(os.path.join('tests', 'data', 'geolife_long', 'trips.csv'), index_col='id') trips_loaded['started_at'] = pd.to_datetime(trips_loaded['started_at']) trips_loaded['finished_at'] = pd.to_datetime( trips_loaded['finished_at']) spts_tpls_loaded = pd.read_csv(os.path.join('tests', 'data', 'geolife_long', 'tpls_spts.csv'), index_col='id') spts_tpls_loaded['started_at'] = pd.to_datetime( spts_tpls_loaded['started_at']) spts_tpls_loaded['started_at_next'] = pd.to_datetime( spts_tpls_loaded['started_at_next']) spts_tpls_loaded['finished_at'] = pd.to_datetime( spts_tpls_loaded['finished_at']) # create trips from geolife (based on positionfixes) pfs = read_geolife(os.path.join('tests', 'data', 'geolife_long')) spts = pfs.as_positionfixes.extract_staypoints(method='sliding', dist_threshold=25, time_threshold=5 * 60) spts = spts.as_staypoints.create_activity_flag() tpls = pfs.as_positionfixes.extract_triplegs(spts) # temporary fix ID bug (issue #56) so that we work with valid staypoint/tripleg files spts = spts.set_index('id') tpls = tpls.set_index('id') # generate trips and a joint staypoint/triplegs dataframe spts, tpls, trips = generate_trips(spts, tpls, gap_threshold=gap_threshold, id_offset=0) spts_tpls = create_debug_spts_tpls_data(spts, tpls, gap_threshold=gap_threshold) # test if generated trips are equal pd.testing.assert_frame_equal(trips_loaded, trips) # test if generated staypoints/triplegs are equal (especially important for trip ids) pd.testing.assert_frame_equal(spts_tpls_loaded, spts_tpls, check_dtype=False)
def test_compare_to_old_trip_function(self, example_triplegs): """Test if we can generate the example trips based on example data.""" sp, tpls = example_triplegs # generate trips and a joint staypoint/triplegs dataframe sp, tpls, trips = generate_trips(sp, tpls, gap_threshold=15) sp_, tpls_, trips_ = _generate_trips_old(sp, tpls, gap_threshold=15) trips.drop(columns=["geom"], inplace=True) # test if generated trips are equal # ignore column order and index dtype assert_frame_equal(trips, trips_, check_like=True, check_index_type=False) assert_frame_equal(sp, sp_, check_like=True, check_index_type=False) assert_frame_equal(tpls, tpls_, check_like=True, check_index_type=False)
def test_generate_trips_dtype_consistent(self, example_triplegs): """Test the dtypes for the generated columns.""" sp, tpls = example_triplegs # generate trips and a joint staypoint/triplegs dataframe sp, tpls, trips = generate_trips(sp, tpls, gap_threshold=15) assert sp["user_id"].dtype == trips["user_id"].dtype assert trips.index.dtype == "int64" assert sp["trip_id"].dtype == "Int64" assert sp["prev_trip_id"].dtype == "Int64" assert sp["next_trip_id"].dtype == "Int64" assert tpls["trip_id"].dtype == "Int64"
def test_generate_trips(self, example_triplegs_higher_gap_threshold): """Test if we can generate the example trips based on example data.""" # load pregenerated trips path = os.path.join("tests", "data", "geolife_long", "trips.csv") trips_loaded = ti.read_trips_csv(path, index_col="id", geom_col="geom", crs=None) # create trips from geolife (based on positionfixes) - with gap_threshold 1e6 sp, tpls = example_triplegs_higher_gap_threshold # generate trips and a joint staypoint/triplegs dataframe sp, tpls, trips = generate_trips(sp, tpls, gap_threshold=15) trips = trips[ ["user_id", "started_at", "finished_at", "origin_staypoint_id", "destination_staypoint_id", "geom"] ] # test if generated trips are equal assert_geodataframe_equal(trips_loaded, trips)
def test_generate_trips_id_management(self, example_triplegs_higher_gap_threshold): """Test if we can generate the example trips based on example data.""" sp_tpls_loaded = pd.read_csv(os.path.join("tests", "data", "geolife_long", "sp_tpls.csv"), index_col="id") sp_tpls_loaded["started_at"] = pd.to_datetime(sp_tpls_loaded["started_at"]) sp_tpls_loaded["started_at_next"] = pd.to_datetime(sp_tpls_loaded["started_at_next"]) sp_tpls_loaded["finished_at"] = pd.to_datetime(sp_tpls_loaded["finished_at"]) sp, tpls = example_triplegs_higher_gap_threshold # generate trips and a joint staypoint/triplegs dataframe gap_threshold = 15 sp, tpls, _ = generate_trips(sp, tpls, gap_threshold=gap_threshold) sp_tpls = _create_debug_sp_tpls_data(sp, tpls, gap_threshold=gap_threshold) # test if generated staypoints/triplegs are equal (especially important for trip ids) assert_frame_equal(sp_tpls_loaded, sp_tpls, check_dtype=False)
def test_sp_tpls_index(self): """Test if staypoint and tripleg index are identical before and after generating trips.""" start = pd.Timestamp("2021-07-11 8:00:00") h = pd.to_timedelta("1h") sp_tpls = [ {"activity": True, "type": "staypoint"}, {"activity": False, "type": "tripleg"}, {"activity": False, "type": "staypoint"}, {"activity": False, "type": "tripleg"}, {"activity": True, "type": "staypoint"}, ] for n, d in enumerate(sp_tpls): d["user_id"] = 0 d["started_at"] = start + n * h d["finished_at"] = d["started_at"] + h sp_tpls = pd.DataFrame(sp_tpls) sp = sp_tpls[sp_tpls["type"] == "staypoint"] tpls = sp_tpls[sp_tpls["type"] != "staypoint"] tpls.index.name = "something_long_and_obscure" sp.index.name = "even_obscurer" sp_, tpls_, _ = generate_trips(sp, tpls, add_geometry=False) assert_index_equal(tpls.index, tpls_.index) assert_index_equal(sp.index, sp_.index)
def test_gap_detection(self): """ Test different gap cases: - activity - tripleg - activity [gap] activity - tripleg - activity - activity - tripleg - [gap] - tripleg - activity - activity - tripleg - [gap] activity - tripleg - activity - activity - tripleg - [gap] activity - tripleg - activity - activity - tripleg - activity [gap] - tripleg - tripleg - tripleg - activity - tripleg - [gap] - tripleg - tripleg - [gap] - tripleg Returns ------- """ gap_threshold = 15 # load data and add dummy geometry spts_in = pd.read_csv(os.path.join('.', 'tests', 'data', 'trips', 'staypoints_gaps.csv'), sep=';', index_col='id', parse_dates=[0, 1], infer_datetime_format=True, dayfirst=True) spts_in['geom'] = Point(1, 1) spts_in = gpd.GeoDataFrame(spts_in, geometry='geom') assert spts_in.as_staypoints tpls_in = pd.read_csv(os.path.join('.', 'tests', 'data', 'trips', 'triplegs_gaps.csv'), sep=';', index_col='id', parse_dates=[0, 1], infer_datetime_format=True, dayfirst=True) tpls_in['geom'] = LineString([[1, 1], [2, 2]]) tpls_in = gpd.GeoDataFrame(tpls_in, geometry='geom') assert tpls_in.as_triplegs # load ground truth data trips_loaded = pd.read_csv(os.path.join('.', 'tests', 'data', 'trips', 'trips_gaps.csv'), index_col='id') trips_loaded['started_at'] = pd.to_datetime(trips_loaded['started_at']) trips_loaded['finished_at'] = pd.to_datetime( trips_loaded['finished_at']) spts_tpls_loaded = pd.read_csv(os.path.join('.', 'tests', 'data', 'trips', 'stps_tpls_gaps.csv'), index_col='id') spts_tpls_loaded['started_at'] = pd.to_datetime( spts_tpls_loaded['started_at']) spts_tpls_loaded['started_at_next'] = pd.to_datetime( spts_tpls_loaded['started_at_next']) spts_tpls_loaded['finished_at'] = pd.to_datetime( spts_tpls_loaded['finished_at']) # generate trips and a joint staypoint/triplegs dataframe spts_proc, tpls_proc, trips = generate_trips(spts_in, tpls_in, gap_threshold=15, id_offset=0) spts_tpls = create_debug_spts_tpls_data(spts_proc, tpls_proc, gap_threshold=gap_threshold) # test if generated trips are equal pd.testing.assert_frame_equal(trips_loaded, trips) # test if generated staypoints/triplegs are equal (especially important for trip ids) pd.testing.assert_frame_equal(spts_tpls_loaded, spts_tpls, check_dtype=False)