def test_from_to_csv(self): """Test basic reading and writing functions.""" orig_file = os.path.join("tests", "data", "trips.csv") mod_file = os.path.join("tests", "data", "trips_mod_columns.csv") tmp_file = os.path.join("tests", "data", "trips_test_1.csv") trips = ti.read_trips_csv(orig_file, sep=";", index_col="id") column_mapping = { "orig_stp": "origin_staypoint_id", "dest_stp": "destination_staypoint_id" } mod_trips = ti.read_trips_csv(mod_file, columns=column_mapping, sep=";", index_col="id") mod_trips_wo_geom = pd.DataFrame(mod_trips.drop(columns=["geom"])) assert mod_trips_wo_geom.equals(trips) trips["started_at"] = trips["started_at"].apply( lambda d: d.isoformat().replace("+00:00", "Z")) trips["finished_at"] = trips["finished_at"].apply( lambda d: d.isoformat().replace("+00:00", "Z")) columns = [ "user_id", "started_at", "finished_at", "origin_staypoint_id", "destination_staypoint_id" ] trips.as_trips.to_csv(tmp_file, sep=";", columns=columns) assert filecmp.cmp(orig_file, tmp_file, shallow=False) os.remove(tmp_file)
def test_trips_csv_index_col(self): """Test if `index_col` can be set.""" file = os.path.join('tests', 'data', 'trips.csv') ind_name = 'id' gdf = ti.read_trips_csv(file, sep=";", index_col=ind_name) assert gdf.index.name == ind_name gdf = ti.read_trips_csv(file, sep=";", index_col=None) assert gdf.index.name is None
def test_set_index(self): """Test if `index_col` can be set.""" file = os.path.join("tests", "data", "trips.csv") ind_name = "id" gdf = ti.read_trips_csv(file, sep=";", index_col=ind_name) assert gdf.index.name == ind_name gdf = ti.read_trips_csv(file, sep=";", index_col=None) assert gdf.index.name is None
def test_trips_from_to_csv(self): orig_file = os.path.join('tests', 'data', 'trips.csv') mod_file = os.path.join('tests', 'data', 'trips_mod_columns.csv') tmp_file = os.path.join('tests', 'data', 'trips_test.csv') trips = ti.read_trips_csv(orig_file, sep=';', index_col="id") column_mapping = {'orig_stp': 'origin_staypoint_id', 'dest_stp': 'destination_staypoint_id'} mod_trips = ti.read_trips_csv(mod_file, columns=column_mapping, sep=';', index_col="id") assert mod_trips.equals(trips) trips['started_at'] = trips['started_at'].apply(lambda d: d.isoformat().replace('+00:00', 'Z')) trips['finished_at'] = trips['finished_at'].apply(lambda d: d.isoformat().replace('+00:00', 'Z')) columns = ['user_id', 'started_at', 'finished_at', 'origin_staypoint_id', 'destination_staypoint_id'] trips.as_trips.to_csv(tmp_file, sep=';', columns=columns) assert filecmp.cmp(orig_file, tmp_file, shallow=False) os.remove(tmp_file)
def test_accessor(self): """Test if the accessor leads to the same results as the explicit function.""" # load pregenerated trips trips_loaded = ti.read_trips_csv(os.path.join("tests", "data", "geolife_long", "trips.csv"), index_col="id") # prepare data pfs, _ = ti.io.dataset_reader.read_geolife( os.path.join("tests", "data", "geolife_long")) pfs, stps = pfs.as_positionfixes.generate_staypoints(method="sliding", dist_threshold=25, time_threshold=5) stps = stps.as_staypoints.create_activity_flag(time_threshold=15) pfs, tpls = pfs.as_positionfixes.generate_triplegs(stps) # generate trips using the explicit function import stps_expl, tpls_expl, trips_expl = ti.preprocessing.triplegs.generate_trips( stps, tpls, gap_threshold=15) # generate trips using the accessor stps_acc, tpls_acc, trips_acc = tpls.as_triplegs.generate_trips( stps, gap_threshold=15) # test if generated trips are equal pd.testing.assert_frame_equal(trips_expl, trips_acc) assert_geodataframe_equal(stps_expl, stps_acc) assert_geodataframe_equal(tpls_expl, tpls_acc)
def test_duplicate_columns(self): """Test if running the function twice, the generated column does not yield exception in join statement""" # load pregenerated trips trips_loaded = ti.read_trips_csv(os.path.join("tests", "data", "geolife_long", "trips.csv"), index_col="id") # create trips from geolife (based on positionfixes) pfs, _ = ti.io.dataset_reader.read_geolife( os.path.join("tests", "data", "geolife_long")) pfs, stps = pfs.as_positionfixes.generate_staypoints(method="sliding", dist_threshold=25, time_threshold=5) stps = stps.as_staypoints.create_activity_flag(time_threshold=15) pfs, tpls = pfs.as_positionfixes.generate_triplegs(stps) # generate trips and a joint staypoint/triplegs dataframe stps_run_1, tpls_run_1, _ = ti.preprocessing.triplegs.generate_trips( stps, tpls, gap_threshold=15) stps_run_2, tpls_run_2, _ = ti.preprocessing.triplegs.generate_trips( stps_run_1, tpls_run_1, gap_threshold=15) assert set(tpls_run_1.columns) == set(tpls_run_2.columns) assert set(stps_run_1.columns) == set(stps_run_2.columns)
def test_accessor_arguments(self): """Test if the accessor is robust to different ways to receive arguments""" # load pregenerated trips trips_loaded = ti.read_trips_csv(os.path.join("tests", "data", "geolife_long", "trips.csv"), index_col="id") # prepare data pfs, _ = ti.io.dataset_reader.read_geolife( os.path.join("tests", "data", "geolife_long")) pfs, stps = pfs.as_positionfixes.generate_staypoints(method="sliding", dist_threshold=25, time_threshold=5) stps = stps.as_staypoints.create_activity_flag(time_threshold=15) pfs, tpls = pfs.as_positionfixes.generate_triplegs(stps) # accessor with only arguments (not allowed) with pytest.raises(AssertionError): _, _, _ = tpls.as_triplegs.generate_trips(stps, 15) # accessor with only keywords stps_1, tpls_1, trips_1 = tpls.as_triplegs.generate_trips( stps_input=stps, gap_threshold=15) # accessor with mixed arguments/keywords stps_2, tpls_2, trips_2 = tpls.as_triplegs.generate_trips( stps, gap_threshold=15) # test if generated trips are equal (1,2) assert_geodataframe_equal(stps_1, stps_2) assert_geodataframe_equal(tpls_1, tpls_2) pd.testing.assert_frame_equal(trips_1, trips_2)
def test_generate_trips_gap_detection(self): """ Test different gap cases: - activity - tripleg - activity [gap] activity - tripleg - activity - activity - tripleg - [gap] - tripleg - activity - activity - tripleg - [gap] activity - tripleg - activity - activity - tripleg - [gap] activity - tripleg - activity - activity - tripleg - activity [gap] - tripleg - tripleg - tripleg - activity - tripleg - [gap] - tripleg - tripleg - [gap] - tripleg Returns ------- """ gap_threshold = 15 # load data and add dummy geometry sp_in = pd.read_csv( os.path.join("tests", "data", "trips", "staypoints_gaps.csv"), sep=";", index_col="id", parse_dates=[0, 1], infer_datetime_format=True, dayfirst=True, ) sp_in["geom"] = Point(1, 1) sp_in = gpd.GeoDataFrame(sp_in, geometry="geom") sp_in = ti.io.read_staypoints_gpd(sp_in, tz="utc") tpls_in = pd.read_csv( os.path.join("tests", "data", "trips", "triplegs_gaps.csv"), sep=";", index_col="id", parse_dates=[0, 1], infer_datetime_format=True, dayfirst=True, ) tpls_in["geom"] = LineString([[1, 1], [2, 2]]) tpls_in = gpd.GeoDataFrame(tpls_in, geometry="geom") tpls_in = ti.io.read_triplegs_gpd(tpls_in, tz="utc") # load ground truth data trips_loaded = ti.read_trips_csv( os.path.join("tests", "data", "trips", "trips_gaps.csv"), index_col="id", tz="utc" ) sp_tpls_loaded = pd.read_csv(os.path.join("tests", "data", "trips", "sp_tpls_gaps.csv"), index_col="id") sp_tpls_loaded["started_at"] = pd.to_datetime(sp_tpls_loaded["started_at"], utc=True) sp_tpls_loaded["started_at_next"] = pd.to_datetime(sp_tpls_loaded["started_at_next"], utc=True) sp_tpls_loaded["finished_at"] = pd.to_datetime(sp_tpls_loaded["finished_at"], utc=True) # generate trips and a joint staypoint/triplegs dataframe sp_proc, tpls_proc, trips = generate_trips(sp_in, tpls_in, gap_threshold=gap_threshold, add_geometry=False) sp_tpls = _create_debug_sp_tpls_data(sp_proc, tpls_proc, gap_threshold=gap_threshold) # test if generated trips are equal pd.testing.assert_frame_equal(trips_loaded, trips) # test if generated staypoints/triplegs are equal (especially important for trip ids) assert_frame_equal(sp_tpls_loaded, sp_tpls, check_dtype=False)
def test_trips_from_gpd(self): df = pd.read_csv(os.path.join('tests', 'data', 'trips.csv'), sep=';') df.set_index('id', inplace=True) trips_from_gpd = ti.io.from_geopandas.trips_from_gpd(df, tz='utc') trips_file = os.path.join('tests', 'data', 'trips.csv') trips_from_csv = ti.read_trips_csv(trips_file, sep=';', tz='utc', index_col='id') pd.testing.assert_frame_equal(trips_from_gpd, trips_from_csv, check_exact=False)
def test_accessor(self): """Test if the as_trips accessor checks the required column for trips.""" trips = ti.read_trips_csv(os.path.join("tests", "data", "geolife_long", "trips.csv"), index_col="id") assert trips.as_trips # user_id with pytest.raises(AttributeError): trips.drop(["user_id"], axis=1).as_trips
def test_set_datatime_tz(self): """Test setting the timezone infomation when reading.""" # check if tz is added to the datatime column file = os.path.join("tests", "data", "trips.csv") trips = ti.read_trips_csv(file, sep=";", index_col="id") assert pd.api.types.is_datetime64tz_dtype(trips["started_at"]) # check if a timezone will be set after manually deleting the timezone trips["started_at"] = trips["started_at"].dt.tz_localize(None) assert not pd.api.types.is_datetime64tz_dtype(trips["started_at"]) tmp_file = os.path.join("tests", "data", "trips_test_2.csv") trips.as_trips.to_csv(tmp_file, sep=";") trips = ti.read_trips_csv(tmp_file, sep=";", index_col="id", tz="utc") assert pd.api.types.is_datetime64tz_dtype(trips["started_at"]) # check if a warning is raised if 'tz' is not provided with pytest.warns(UserWarning): ti.read_trips_csv(tmp_file, sep=";", index_col="id") os.remove(tmp_file)
def test_csv(self): """Test if the results of reading from gpd and csv agrees.""" df = pd.read_csv(os.path.join("tests", "data", "trips.csv"), sep=";") df.set_index("id", inplace=True) trips_from_gpd = read_trips_gpd(df, tz="utc") trips_file = os.path.join("tests", "data", "trips.csv") trips_from_csv = ti.read_trips_csv(trips_file, sep=";", tz="utc", index_col="id") assert_frame_equal(trips_from_gpd, trips_from_csv, check_exact=False)
def test_generate_trips(self, example_triplegs_higher_gap_threshold): """Test if we can generate the example trips based on example data.""" # load pregenerated trips path = os.path.join("tests", "data", "geolife_long", "trips.csv") trips_loaded = ti.read_trips_csv(path, index_col="id", geom_col="geom", crs=None) # create trips from geolife (based on positionfixes) - with gap_threshold 1e6 sp, tpls = example_triplegs_higher_gap_threshold # generate trips and a joint staypoint/triplegs dataframe sp, tpls, trips = generate_trips(sp, tpls, gap_threshold=15) trips = trips[ ["user_id", "started_at", "finished_at", "origin_staypoint_id", "destination_staypoint_id", "geom"] ] # test if generated trips are equal assert_geodataframe_equal(trips_loaded, trips)
def test_trips_from_to_csv(self): orig_file = 'tests/data/trips.csv' tmp_file = 'tests/data/trips_test.csv' tpls = ti.read_trips_csv(orig_file, sep=';') tpls['started_at'] = tpls['started_at'].apply( lambda d: d.isoformat().replace('+00:00', 'Z')) tpls['finished_at'] = tpls['finished_at'].apply( lambda d: d.isoformat().replace('+00:00', 'Z')) tpls.as_trips.to_csv(tmp_file, sep=';', columns=[ 'user_id', 'started_at', 'finished_at', 'origin_staypoint_id', 'destination_staypoint_id' ]) assert filecmp.cmp(orig_file, tmp_file, shallow=False) os.remove(tmp_file)
def test_generate_trips(self): """Test if we can generate the example trips based on example data.""" # load pregenerated trips trips_loaded = ti.read_trips_csv(os.path.join("tests", "data", "geolife_long", "trips.csv"), index_col="id") # create trips from geolife (based on positionfixes) pfs, _ = ti.io.dataset_reader.read_geolife( os.path.join("tests", "data", "geolife_long")) pfs, stps = pfs.as_positionfixes.generate_staypoints(method="sliding", dist_threshold=25, time_threshold=5) stps = stps.as_staypoints.create_activity_flag(time_threshold=15) pfs, tpls = pfs.as_positionfixes.generate_triplegs(stps) # generate trips and a joint staypoint/triplegs dataframe stps, tpls, trips = ti.preprocessing.triplegs.generate_trips( stps, tpls, gap_threshold=15) # test if generated trips are equal pd.testing.assert_frame_equal(trips_loaded, trips)
def test_generate_trips(self): """Test if we can generate the example trips based on example data.""" gap_threshold = 15 # load pregenerated trips trips_loaded = ti.read_trips_csv(os.path.join('tests', 'data', 'geolife_long', 'trips.csv'), index_col='id') # create trips from geolife (based on positionfixes) pfs, _ = ti.io.dataset_reader.read_geolife( os.path.join('tests', 'data', 'geolife_long')) pfs, stps = pfs.as_positionfixes.generate_staypoints(method='sliding', dist_threshold=25, time_threshold=5 * 60) stps = stps.as_staypoints.create_activity_flag() pfs, tpls = pfs.as_positionfixes.generate_triplegs(stps) # generate trips and a joint staypoint/triplegs dataframe stps, tpls, trips = ti.preprocessing.triplegs.generate_trips( stps, tpls, gap_threshold=gap_threshold, id_offset=0) # test if generated trips are equal pd.testing.assert_frame_equal(trips_loaded, trips)
def testdata_trips(): """Read trips test data from file.""" path = os.path.join("tests", "data", "geolife_long", "trips.csv") test_trips = ti.read_trips_csv(path, index_col="id", geom_col="geom") return test_trips
def test_trips_csv_index_warning(self): """Test if a warning is raised when not parsing the index_col argument.""" file = os.path.join('tests', 'data', 'trips.csv') with pytest.warns(UserWarning): ti.read_trips_csv(file, sep=';')
def test_set_index_warning(self): """Test if a warning is raised when not parsing the index_col argument.""" file = os.path.join("tests", "data", "trips.csv") with pytest.warns(UserWarning): ti.read_trips_csv(file, sep=";")