def test_triplegs_read_csv_crs_parameter(self): file = os.path.join('tests', 'data', 'triplegs.csv') crs = "EPSG:2056" tpls = ti.read_triplegs_csv(file, sep=';', tz='utc', index_col="id") assert tpls.crs is None tpls = ti.read_triplegs_csv(file, sep=';', tz='utc', index_col="id", crs=crs) assert tpls.crs == crs
def test_from_to_csv(self): """Test basic reading and writing functions.""" orig_file = os.path.join("tests", "data", "triplegs.csv") mod_file = os.path.join("tests", "data", "triplegs_mod_columns.csv") tmp_file = os.path.join("tests", "data", "triplegs_test_1.csv") tpls = ti.read_triplegs_csv(orig_file, sep=";", tz="utc", index_col="id") column_mapping = { "start_time": "started_at", "end_time": "finished_at", "tripleg": "geom" } mod_tpls = ti.read_triplegs_csv(mod_file, sep=";", columns=column_mapping, index_col="id") assert mod_tpls.equals(tpls) tpls["started_at"] = tpls["started_at"].apply( lambda d: d.isoformat().replace("+00:00", "Z")) tpls["finished_at"] = tpls["finished_at"].apply( lambda d: d.isoformat().replace("+00:00", "Z")) columns = ["user_id", "started_at", "finished_at", "geom"] tpls.as_triplegs.to_csv(tmp_file, sep=";", columns=columns) assert filecmp.cmp(orig_file, tmp_file, shallow=False) os.remove(tmp_file)
def test_triplegs_csv_index_col(self): """Test if `index_col` can be set.""" file = os.path.join('tests', 'data', 'triplegs.csv') ind_name = 'id' pfs = ti.read_triplegs_csv(file, sep=";", index_col=ind_name) assert pfs.index.name == ind_name pfs = ti.read_triplegs_csv(file, sep=";", index_col=None) assert pfs.index.name is None
def test_set_index(self): """Test if `index_col` can be set.""" file = os.path.join("tests", "data", "triplegs.csv") ind_name = "id" pfs = ti.read_triplegs_csv(file, sep=";", index_col=ind_name) assert pfs.index.name == ind_name pfs = ti.read_triplegs_csv(file, sep=";", index_col=None) assert pfs.index.name is None
def test_set_crs(self): """Test setting the crs when reading.""" file = os.path.join("tests", "data", "triplegs.csv") crs = "EPSG:2056" tpls = ti.read_triplegs_csv(file, sep=";", tz="utc", index_col="id") assert tpls.crs is None tpls = ti.read_triplegs_csv(file, sep=";", tz="utc", index_col="id", crs=crs) assert tpls.crs == crs
def test_test_Douglas_Peucker_Algorithm_has_no_side_effects(self): orig_file = 'tests/data/triplegs_with_too_many_points_test.csv' tpls = ti.read_triplegs_csv(orig_file, sep=';') tpls_copy = copy.deepcopy(tpls) tpls_smoothed = smoothen_triplegs(tpls, epsilon=0.0001) assert np.all(tpls == tpls_copy)
def test_filter_triplegs(self): # read triplegs and area file tpls_file = os.path.join('tests', 'data', 'geolife', 'geolife_triplegs.csv') tpls = ti.read_triplegs_csv(tpls_file, tz='utc', index_col='id') extent = gpd.read_file( os.path.join('tests', 'data', 'area', 'tsinghua.geojson')) # the projection needs to be defined: WGS84 tpls.crs = 'epsg:4326' within_tl = tpls.as_triplegs.spatial_filter(areas=extent, method="within", re_project=True) intersects_tl = tpls.as_triplegs.spatial_filter(areas=extent, method="intersects", re_project=True) crosses_tl = tpls.as_triplegs.spatial_filter(areas=extent, method="crosses", re_project=True) # the result obtained from ArcGIS gis_within_num = 9 gis_intersects_num = 20 assert len(within_tl) == gis_within_num, "The within tripleg number should be the same as" + \ "the one from the result with ArcGIS" assert len(intersects_tl) == gis_intersects_num, "The intersects tripleg number should be " + \ "the same as the one from the result with ArcGIS" assert len(crosses_tl) == len(intersects_tl) - len(within_tl), "The crosses tripleg number" + \ "should equal the number of intersect triplegs minus the number of within triplegs"
def test_filter_triplegs(self): """Test if spatial_filter works for triplegs.""" # read triplegs and area file tpls_file = os.path.join("tests", "data", "geolife", "geolife_triplegs.csv") tpls = ti.read_triplegs_csv(tpls_file, tz="utc", index_col="id") extent = gpd.read_file( os.path.join("tests", "data", "area", "tsinghua.geojson")) # the projection needs to be defined: WGS84 tpls.crs = "epsg:4326" within_tl = tpls.as_triplegs.spatial_filter(areas=extent, method="within", re_project=True) intersects_tl = tpls.as_triplegs.spatial_filter(areas=extent, method="intersects", re_project=True) crosses_tl = tpls.as_triplegs.spatial_filter(areas=extent, method="crosses", re_project=True) # the result obtained from ArcGIS gis_within_num = 9 gis_intersects_num = 20 assert len(within_tl) == gis_within_num, ( "The within tripleg number should be the same as" + "the one from the result with ArcGIS") assert len(intersects_tl) == gis_intersects_num, ( "The intersects tripleg number should be " + "the same as the one from the result with ArcGIS") assert len(crosses_tl) == len(intersects_tl) - len(within_tl), ( "The crosses tripleg number" + "should equal the number of intersect triplegs minus the number of within triplegs" )
def test_Douglas_Peucker_Algorithm_reduces_triplet_length(self): def plot_line(ax, ob): x, y = ob.xy ax.plot(x, y, alpha=0.7, linewidth=3, solid_capstyle='round', zorder=2) pd.set_option('display.max_columns', 10) pd.set_option('display.max_rows', 10) orig_file = 'tests/data/triplegs_with_too_many_points_test.csv' tpls = ti.read_triplegs_csv(orig_file, sep=';') tpls_smoothed = smoothen_triplegs(tpls, epsilon=0.0001) line1 = tpls.iloc[0].geom line1_smoothed = tpls_smoothed.iloc[0].geom line2 = tpls.iloc[1].geom line2_smoothed = tpls_smoothed.iloc[1].geom print(line1) print(line1_smoothed) assert line1.length == line1_smoothed.length assert line2.length == line2_smoothed.length assert len(line1.coords) == 10 assert len(line2.coords) == 7 assert len(line1_smoothed.coords) == 4 assert len(line2_smoothed.coords) == 3
def test_triplegs_from_to_csv(self): orig_file = os.path.join('tests', 'data', 'triplegs.csv') mod_file = os.path.join('tests', 'data', 'triplegs_mod_columns.csv') tmp_file = os.path.join('tests', 'data', 'triplegs_test.csv') tpls = ti.read_triplegs_csv(orig_file, sep=';', tz='utc', index_col="id") column_mapping = {'start_time': 'started_at', 'end_time': 'finished_at', 'tripleg': 'geom'} mod_tpls = ti.read_triplegs_csv(mod_file, sep=';', columns=column_mapping, index_col="id") assert mod_tpls.equals(tpls) tpls['started_at'] = tpls['started_at'].apply(lambda d: d.isoformat().replace('+00:00', 'Z')) tpls['finished_at'] = tpls['finished_at'].apply(lambda d: d.isoformat().replace('+00:00', 'Z')) columns = ['user_id', 'started_at', 'finished_at', 'geom'] tpls.as_triplegs.to_csv(tmp_file, sep=';', columns=columns) assert filecmp.cmp(orig_file, tmp_file, shallow=False) os.remove(tmp_file)
def test_simple_coarse_identification_geographic(self): """Asserts that a warning is thrown if data in non-WGS geographic coordinate systems.""" tpls_file = os.path.join('tests', 'data', 'triplegs_transport_mode_identification.csv') tpls = ti.read_triplegs_csv(tpls_file, sep=';', index_col="id") tpls_2 = tpls.set_crs(epsg=4326) tpls_4 = tpls_2.to_crs(epsg=4269) with pytest.warns(UserWarning): tpls_4.as_triplegs.predict_transport_mode(method='simple-coarse')
def test_check_empty_dataframe(self): """Assert that the method does not work for empty DataFrames (but that the rest works fine, e.g., method signature). """ tpls_file = os.path.join('tests', 'data', 'triplegs_transport_mode_identification.csv') tpls = ti.read_triplegs_csv(tpls_file, sep=';', index_col="id") empty_frame = tpls[0:0] with pytest.raises(AssertionError): empty_frame.as_triplegs.predict_transport_mode(method='simple-coarse')
def test_check_empty_dataframe(self): """Assert that the method does not work for empty DataFrames.""" tpls_file = os.path.join("tests", "data", "triplegs_transport_mode_identification.csv") tpls = ti.read_triplegs_csv(tpls_file, sep=";", index_col="id") empty_frame = tpls[0:0] with pytest.raises(AssertionError): empty_frame.as_triplegs.predict_transport_mode( method="simple-coarse")
def test_simple_coarse_identification_geographic(self): """Asserts the correct behaviour with data in geographic coordinate systems.""" tpls_file = os.path.join('tests', 'data', 'triplegs_transport_mode_identification.csv') tpls = ti.read_triplegs_csv(tpls_file, sep=';', index_col="id") tpls_2 = tpls.set_crs(epsg=4326) tpls_4 = tpls_2.to_crs(epsg=4269) with pytest.raises(UserWarning, match='Your data is in a geographic coordinate system, length calculation fails'): tpls_4.as_triplegs.predict_transport_mode(method='simple-coarse')
def test_triplegs_from_gpd(self): gdf = gpd.read_file(os.path.join('tests', 'data', 'triplegs.geojson')) gdf.set_index('id', inplace=True) tpls_from_gpd = ti.io.from_geopandas.triplegs_from_gpd(gdf, user_id='User', geom='geometry', tz='utc') tpls_file = os.path.join('tests', 'data', 'triplegs.csv') tpls_from_csv = ti.read_triplegs_csv(tpls_file, sep=';', tz='utc', index_col='id') pd.testing.assert_frame_equal(tpls_from_gpd, tpls_from_csv, check_exact=False)
def test_triplegs_plot(self): tmp_file = 'tests/data/triplegs_plot.png' pfs = ti.read_positionfixes_csv('tests/data/positionfixes.csv', sep=';') tpls = ti.read_triplegs_csv('tests/data/triplegs.csv', sep=';') tpls.as_triplegs.plot(out_filename=tmp_file, positionfixes=pfs, plot_osm=False) assert os.path.exists(tmp_file) os.remove(tmp_file)
def test_trajectory_distance_via_accessor_x(self): tpls = ti.read_triplegs_csv( os.path.join('tests', 'data', 'geolife', 'geolife_triplegs.csv')) D_single = tpls.iloc[0:4].as_triplegs.similarity(dist_metric='dtw', n_jobs=1) D_multi = tpls.iloc[0:4].as_triplegs.similarity(dist_metric='dtw', n_jobs=4) assert np.isclose(np.sum(np.abs(D_single - D_multi)), 0)
def test_simple_coarse_identification_projected(self): """Asserts the correct behaviour with data in projected coordinate systems.""" tpls_file = os.path.join('tests', 'data', 'triplegs_transport_mode_identification.csv') tpls = ti.read_triplegs_csv(tpls_file, sep=';', index_col="id") tpls_2 = tpls.set_crs(epsg=4326) tpls_3 = tpls_2.to_crs(epsg=2056) tpls_transport_mode_3 = tpls_3.as_triplegs.predict_transport_mode(method='simple-coarse') assert tpls_transport_mode_3.iloc[0]['mode'] == 'slow_mobility' assert tpls_transport_mode_3.iloc[1]['mode'] == 'motorized_mobility' assert tpls_transport_mode_3.iloc[2]['mode'] == 'fast_mobility'
def test_check_categories(self): """Asserts the correct identification of valid category dictionaries.""" tpls_file = os.path.join('tests', 'data', 'triplegs_transport_mode_identification.csv') tpls = ti.read_triplegs_csv(tpls_file, sep=';', index_col="id") correct_dict = {2: 'cat1', 7: 'cat2', np.inf: 'cat3'} assert ti.analysis.transport_mode_identification.check_categories(correct_dict) with pytest.raises(ValueError): incorrect_dict = {10: 'cat1', 5: 'cat2', np.inf: 'cat3'} tpls.as_triplegs.predict_transport_mode(method='simple-coarse', categories=incorrect_dict)
def test_trajectory_distance(self): tpls = ti.read_triplegs_csv( os.path.join('tests', 'data', 'geolife', 'geolife_triplegs.csv')) D_single = calculate_distance_matrix(X=tpls.iloc[0:4], dist_metric='dtw', n_jobs=1) D_multi = calculate_distance_matrix(X=tpls.iloc[0:4], dist_metric='dtw', n_jobs=4) assert np.isclose(np.sum(np.abs(D_single - D_multi)), 0)
def test_check_categories(self): """Asserts the correct identification of valid category dictionaries.""" tpls_file = os.path.join("tests", "data", "triplegs_transport_mode_identification.csv") tpls = ti.read_triplegs_csv(tpls_file, sep=";", index_col="id") correct_dict = {2: "cat1", 7: "cat2", np.inf: "cat3"} assert _check_categories(correct_dict) with pytest.raises(ValueError): incorrect_dict = {10: "cat1", 5: "cat2", np.inf: "cat3"} tpls.as_triplegs.predict_transport_mode(method="simple-coarse", categories=incorrect_dict)
def test_trajectory_distance_via_accessor_xy(self): tpls_file = os.path.join('tests', 'data', 'geolife', 'geolife_triplegs.csv') tpls = ti.read_triplegs_csv(tpls_file, tz='utc', index_col='id') x = tpls.iloc[0:2] y = tpls.iloc[4:8] D_single = x.as_triplegs.similarity(Y=y, dist_metric='dtw', n_jobs=1) D_multi = x.as_triplegs.similarity(Y=y, dist_metric='dtw', n_jobs=4) assert np.isclose(np.sum(np.abs(D_single - D_multi)), 0)
def test_simple_coarse_identification_wgs_84(self): """Asserts the correct behaviour with data in wgs84.""" tpls_file = os.path.join("tests", "data", "triplegs_transport_mode_identification.csv") tpls = ti.read_triplegs_csv(tpls_file, sep=";", index_col="id") tpls_2 = tpls.set_crs(epsg=4326) tpls_transport_mode_2 = tpls_2.as_triplegs.predict_transport_mode( method="simple-coarse") assert tpls_transport_mode_2.iloc[0]["mode"] == "slow_mobility" assert tpls_transport_mode_2.iloc[1]["mode"] == "motorized_mobility" assert tpls_transport_mode_2.iloc[2]["mode"] == "fast_mobility"
def test_triplegs_plot(self): """Use trackintel visualization function to plot triplegs and check if file exists.""" tmp_file = os.path.join('tests', 'data', 'triplegs_plot.png') pfs_file = os.path.join('tests', 'data', 'positionfixes.csv') pfs = ti.read_positionfixes_csv(pfs_file, sep=';', index_col='id', crs='EPSG:4326') tpls_file = os.path.join('tests', 'data', 'triplegs.csv') tpls = ti.read_triplegs_csv(tpls_file, sep=';', index_col='id', crs='EPSG:4326') tpls.as_triplegs.plot(out_filename=tmp_file, positionfixes=pfs, plot_osm=False) assert os.path.exists(tmp_file) os.remove(tmp_file)
def test_triplegs_from_to_csv(self): orig_file = 'tests/data/triplegs.csv' tmp_file = 'tests/data/triplegs_test.csv' tpls = ti.read_triplegs_csv(orig_file, sep=';') tpls['started_at'] = tpls['started_at'].apply( lambda d: d.isoformat().replace('+00:00', 'Z')) tpls['finished_at'] = tpls['finished_at'].apply( lambda d: d.isoformat().replace('+00:00', 'Z')) tpls.as_triplegs.to_csv( tmp_file, sep=';', columns=['user_id', 'started_at', 'finished_at', 'geom']) assert filecmp.cmp(orig_file, tmp_file, shallow=False) os.remove(tmp_file)
def test_smoothen_triplegs(self): tpls_file = os.path.join("tests", "data", "triplegs_with_too_many_points_test.csv") tpls = ti.read_triplegs_csv(tpls_file, sep=";", index_col=None) tpls_smoothed = ti.preprocessing.triplegs.smoothen_triplegs(tpls, tolerance=0.0001) line1 = tpls.iloc[0].geom line1_smoothed = tpls_smoothed.iloc[0].geom line2 = tpls.iloc[1].geom line2_smoothed = tpls_smoothed.iloc[1].geom assert line1.length == line1_smoothed.length assert line2.length == line2_smoothed.length assert len(line1.coords) == 10 assert len(line2.coords) == 7 assert len(line1_smoothed.coords) == 4 assert len(line2_smoothed.coords) == 3
def test_set_datatime_tz(self): """Test setting the timezone infomation when reading.""" # check if tz is added to the datatime column file = os.path.join("tests", "data", "triplegs.csv") tpls = ti.read_triplegs_csv(file, sep=";", index_col="id") assert pd.api.types.is_datetime64tz_dtype(tpls["started_at"]) # check if a timezone will be set after manually deleting the timezone tpls["started_at"] = tpls["started_at"].dt.tz_localize(None) assert not pd.api.types.is_datetime64tz_dtype(tpls["started_at"]) tmp_file = os.path.join("tests", "data", "triplegs_test_2.csv") tpls.as_triplegs.to_csv(tmp_file, sep=";") tpls = ti.read_triplegs_csv(tmp_file, sep=";", index_col="id", tz="utc") assert pd.api.types.is_datetime64tz_dtype(tpls["started_at"]) # check if a warning is raised if 'tz' is not provided with pytest.warns(UserWarning): ti.read_triplegs_csv(tmp_file, sep=";", index_col="id") os.remove(tmp_file)
def test_simple_coarse_identification_no_crs(self): """Assert that the simple-coarse transport mode identification throws the correct warning and and yields the correct results for WGS84. """ tpls_file = os.path.join('tests', 'data', 'triplegs_transport_mode_identification.csv') tpls = ti.read_triplegs_csv(tpls_file, sep=';', index_col="id") with pytest.warns(UserWarning, match='Your data is not projected. WGS84 is assumed and for length ' + 'calculation the haversine distance is used'): tpls_transport_mode = tpls.as_triplegs.predict_transport_mode(method='simple-coarse') assert tpls_transport_mode.iloc[0]['mode'] == 'slow_mobility' assert tpls_transport_mode.iloc[1]['mode'] == 'motorized_mobility' assert tpls_transport_mode.iloc[2]['mode'] == 'fast_mobility'
def test_read_triplegs_gpd(self): """Test if the results of reading from gpd and csv agrees.""" gdf = gpd.read_file(os.path.join("tests", "data", "triplegs.geojson")) gdf.set_index("id", inplace=True) tpls_from_gpd = ti.io.from_geopandas.read_triplegs_gpd(gdf, user_id="User", geom="geometry", tz="utc") tpls_file = os.path.join("tests", "data", "triplegs.csv") tpls_from_csv = ti.read_triplegs_csv(tpls_file, sep=";", tz="utc", index_col="id") pd.testing.assert_frame_equal(tpls_from_gpd, tpls_from_csv, check_exact=False)
def test_simple_coarse_identification_no_crs(self): """ Assert that the simple-coarse transport mode identification throws the correct warning and and yields the correct results for WGS84. """ tpls_file = os.path.join("tests", "data", "triplegs_transport_mode_identification.csv") tpls = ti.read_triplegs_csv(tpls_file, sep=";", index_col="id") with pytest.warns( UserWarning, match="Your data is not projected.", ): tpls_transport_mode = tpls.as_triplegs.predict_transport_mode( method="simple-coarse") assert tpls_transport_mode.iloc[0]["mode"] == "slow_mobility" assert tpls_transport_mode.iloc[1]["mode"] == "motorized_mobility" assert tpls_transport_mode.iloc[2]["mode"] == "fast_mobility"