Ejemplo n.º 1
0
    def test_duplicate_columns(self, example_triplegs):
        """Test if running the function twice, the generated column does not yield exception in join statement"""
        sp, tpls = example_triplegs

        # generate trips and a joint staypoint/triplegs dataframe
        sp_run_1, tpls_run_1, _ = generate_trips(sp, tpls, gap_threshold=15)
        with pytest.warns(UserWarning):
            sp_run_2, tpls_run_2, _ = generate_trips(sp_run_1, tpls_run_1, gap_threshold=15)

        assert set(tpls_run_1.columns) == set(tpls_run_2.columns)
        assert set(sp_run_1.columns) == set(sp_run_2.columns)
Ejemplo n.º 2
0
    def test_trip_wo_geom(self, example_triplegs_higher_gap_threshold):
        """Test if the add_geometry parameter shows correct behavior"""
        sp, tpls = example_triplegs_higher_gap_threshold

        # generate trips dataframe with geometry
        _, _, trips = generate_trips(sp, tpls, gap_threshold=15)
        trips = pd.DataFrame(trips.drop(["geom"], axis=1))

        # generate trips without geometry
        _, _, trips_wo_geom = generate_trips(sp, tpls, gap_threshold=15, add_geometry=False)

        # test if generated trips are equal
        assert_frame_equal(trips_wo_geom, trips)
    def test_general_generation(self):
        """
        Test if we can generate the example trips based on example data
        """
        # load pregenerated trips
        trips_loaded = pd.read_csv(
            os.path.join('tests', 'data', 'geolife_long', 'trips.csv'))
        trips_loaded['started_at'] = pd.to_datetime(trips_loaded['started_at'])
        trips_loaded['finished_at'] = pd.to_datetime(
            trips_loaded['finished_at'])
        trips_loaded.rename(columns={
            'origin': 'origin_staypoint_id',
            'destination': 'destination_staypoint_id'
        },
                            inplace=True)

        # create trips from geolife (based on positionfixes)
        pfs = read_geolife(os.path.join('tests', 'data', 'geolife_long'))
        spts = pfs.as_positionfixes.extract_staypoints(method='sliding',
                                                       dist_threshold=25,
                                                       time_threshold=5 * 60)
        spts = spts.as_staypoints.create_activity_flag()
        tpls = pfs.as_positionfixes.extract_triplegs(spts)

        spts, tpls, trips = generate_trips(spts,
                                           tpls,
                                           gap_threshold=15,
                                           id_offset=0)
        pd.testing.assert_frame_equal(trips_loaded, trips)
Ejemplo n.º 4
0
 def test_only_staypoints_in_trip(self):
     """Test that trips with only staypoints (non-activities) are deleted."""
     start = pd.Timestamp("2021-07-11 8:00:00")
     h = pd.to_timedelta("1h")
     sp_tpls = [
         {"activity": True, "type": "staypoint"},
         {"activity": False, "type": "staypoint"},
         {"activity": True, "type": "staypoint"},
         {"activity": False, "type": "tripleg"},
         {"activity": False, "type": "staypoint"},
         {"activity": True, "type": "staypoint"},
     ]
     for n, d in enumerate(sp_tpls):
         d["user_id"] = 0
         d["started_at"] = start + n * h
         d["finished_at"] = d["started_at"] + h
     sp_tpls = pd.DataFrame(sp_tpls)
     sp = sp_tpls[sp_tpls["type"] == "staypoint"]
     tpls = sp_tpls[sp_tpls["type"] == "tripleg"]
     sp_, tpls_, trips = generate_trips(sp, tpls, add_geometry=False)
     trip_id_truth = pd.Series([None, None, None, 0, None], dtype="Int64")
     trip_id_truth.index = sp_.index  # don't check index
     assert_series_equal(sp_["trip_id"], trip_id_truth, check_names=False)
     assert (tpls_["trip_id"] == 0).all()
     assert len(trips) == 1
Ejemplo n.º 5
0
    def test_generate_trips_gap_detection(self):
        """
        Test different gap cases:
        - activity - tripleg - activity [gap] activity - tripleg - activity
        - activity - tripleg -  [gap]  - tripleg - activity
        - activity - tripleg -  [gap]  activity - tripleg - activity
        - activity - tripleg -  [gap]  activity - tripleg - activity
        - activity - tripleg - activity [gap] - tripleg - tripleg - tripleg - activity
        - tripleg - [gap] - tripleg - tripleg - [gap] - tripleg
        Returns
        -------

        """
        gap_threshold = 15

        # load data and add dummy geometry
        sp_in = pd.read_csv(
            os.path.join("tests", "data", "trips", "staypoints_gaps.csv"),
            sep=";",
            index_col="id",
            parse_dates=[0, 1],
            infer_datetime_format=True,
            dayfirst=True,
        )
        sp_in["geom"] = Point(1, 1)
        sp_in = gpd.GeoDataFrame(sp_in, geometry="geom")
        sp_in = ti.io.read_staypoints_gpd(sp_in, tz="utc")

        tpls_in = pd.read_csv(
            os.path.join("tests", "data", "trips", "triplegs_gaps.csv"),
            sep=";",
            index_col="id",
            parse_dates=[0, 1],
            infer_datetime_format=True,
            dayfirst=True,
        )
        tpls_in["geom"] = LineString([[1, 1], [2, 2]])
        tpls_in = gpd.GeoDataFrame(tpls_in, geometry="geom")
        tpls_in = ti.io.read_triplegs_gpd(tpls_in, tz="utc")

        # load ground truth data
        trips_loaded = ti.read_trips_csv(
            os.path.join("tests", "data", "trips", "trips_gaps.csv"), index_col="id", tz="utc"
        )

        sp_tpls_loaded = pd.read_csv(os.path.join("tests", "data", "trips", "sp_tpls_gaps.csv"), index_col="id")
        sp_tpls_loaded["started_at"] = pd.to_datetime(sp_tpls_loaded["started_at"], utc=True)
        sp_tpls_loaded["started_at_next"] = pd.to_datetime(sp_tpls_loaded["started_at_next"], utc=True)
        sp_tpls_loaded["finished_at"] = pd.to_datetime(sp_tpls_loaded["finished_at"], utc=True)

        # generate trips and a joint staypoint/triplegs dataframe
        sp_proc, tpls_proc, trips = generate_trips(sp_in, tpls_in, gap_threshold=gap_threshold, add_geometry=False)
        sp_tpls = _create_debug_sp_tpls_data(sp_proc, tpls_proc, gap_threshold=gap_threshold)

        # test if generated trips are equal
        pd.testing.assert_frame_equal(trips_loaded, trips)

        # test if generated staypoints/triplegs are equal (especially important for trip ids)
        assert_frame_equal(sp_tpls_loaded, sp_tpls, check_dtype=False)
Ejemplo n.º 6
0
    def test_generate_trips_index_start(self, example_triplegs):
        """Test the generated index start from 0 for different methods."""
        sp, tpls = example_triplegs

        # generate trips and a joint staypoint/triplegs dataframe
        _, _, trips = generate_trips(sp, tpls, gap_threshold=15)

        assert (trips.index == np.arange(len(trips))).any()
Ejemplo n.º 7
0
    def test_generate_trips_missing_link(self, example_triplegs):
        """Test nan is assigned for missing link between sp and trips, and tpls and trips."""
        sp, tpls = example_triplegs

        # generate trips and a joint staypoint/triplegs dataframe
        sp, tpls, _ = generate_trips(sp, tpls, gap_threshold=15)
        assert pd.isna(sp["trip_id"]).any()
        assert pd.isna(sp["prev_trip_id"]).any()
        assert pd.isna(sp["next_trip_id"]).any()
Ejemplo n.º 8
0
    def test_general_trip_generation(self):
        """
        Test if we can generate the example trips based on example data
        """
        gap_threshold = 15
        # load pregenerated trips
        trips_loaded = pd.read_csv(os.path.join('tests', 'data',
                                                'geolife_long', 'trips.csv'),
                                   index_col='id')
        trips_loaded['started_at'] = pd.to_datetime(trips_loaded['started_at'])
        trips_loaded['finished_at'] = pd.to_datetime(
            trips_loaded['finished_at'])

        spts_tpls_loaded = pd.read_csv(os.path.join('tests', 'data',
                                                    'geolife_long',
                                                    'tpls_spts.csv'),
                                       index_col='id')
        spts_tpls_loaded['started_at'] = pd.to_datetime(
            spts_tpls_loaded['started_at'])
        spts_tpls_loaded['started_at_next'] = pd.to_datetime(
            spts_tpls_loaded['started_at_next'])
        spts_tpls_loaded['finished_at'] = pd.to_datetime(
            spts_tpls_loaded['finished_at'])

        # create trips from geolife (based on positionfixes)
        pfs = read_geolife(os.path.join('tests', 'data', 'geolife_long'))
        spts = pfs.as_positionfixes.extract_staypoints(method='sliding',
                                                       dist_threshold=25,
                                                       time_threshold=5 * 60)
        spts = spts.as_staypoints.create_activity_flag()
        tpls = pfs.as_positionfixes.extract_triplegs(spts)

        # temporary fix ID bug (issue  #56) so that we work with valid staypoint/tripleg files
        spts = spts.set_index('id')
        tpls = tpls.set_index('id')

        # generate trips and a joint staypoint/triplegs dataframe
        spts, tpls, trips = generate_trips(spts,
                                           tpls,
                                           gap_threshold=gap_threshold,
                                           id_offset=0)
        spts_tpls = create_debug_spts_tpls_data(spts,
                                                tpls,
                                                gap_threshold=gap_threshold)

        # test if generated trips are equal
        pd.testing.assert_frame_equal(trips_loaded, trips)
        # test if generated staypoints/triplegs are equal (especially important for trip ids)
        pd.testing.assert_frame_equal(spts_tpls_loaded,
                                      spts_tpls,
                                      check_dtype=False)
Ejemplo n.º 9
0
    def test_compare_to_old_trip_function(self, example_triplegs):
        """Test if we can generate the example trips based on example data."""
        sp, tpls = example_triplegs

        # generate trips and a joint staypoint/triplegs dataframe
        sp, tpls, trips = generate_trips(sp, tpls, gap_threshold=15)
        sp_, tpls_, trips_ = _generate_trips_old(sp, tpls, gap_threshold=15)
        trips.drop(columns=["geom"], inplace=True)

        # test if generated trips are equal
        # ignore column order and index dtype
        assert_frame_equal(trips, trips_, check_like=True, check_index_type=False)
        assert_frame_equal(sp, sp_, check_like=True, check_index_type=False)
        assert_frame_equal(tpls, tpls_, check_like=True, check_index_type=False)
Ejemplo n.º 10
0
    def test_generate_trips_dtype_consistent(self, example_triplegs):
        """Test the dtypes for the generated columns."""
        sp, tpls = example_triplegs

        # generate trips and a joint staypoint/triplegs dataframe
        sp, tpls, trips = generate_trips(sp, tpls, gap_threshold=15)

        assert sp["user_id"].dtype == trips["user_id"].dtype
        assert trips.index.dtype == "int64"

        assert sp["trip_id"].dtype == "Int64"
        assert sp["prev_trip_id"].dtype == "Int64"
        assert sp["next_trip_id"].dtype == "Int64"
        assert tpls["trip_id"].dtype == "Int64"
Ejemplo n.º 11
0
    def test_generate_trips(self, example_triplegs_higher_gap_threshold):
        """Test if we can generate the example trips based on example data."""
        # load pregenerated trips
        path = os.path.join("tests", "data", "geolife_long", "trips.csv")
        trips_loaded = ti.read_trips_csv(path, index_col="id", geom_col="geom", crs=None)

        # create trips from geolife (based on positionfixes) - with gap_threshold 1e6
        sp, tpls = example_triplegs_higher_gap_threshold

        # generate trips and a joint staypoint/triplegs dataframe
        sp, tpls, trips = generate_trips(sp, tpls, gap_threshold=15)
        trips = trips[
            ["user_id", "started_at", "finished_at", "origin_staypoint_id", "destination_staypoint_id", "geom"]
        ]
        # test if generated trips are equal
        assert_geodataframe_equal(trips_loaded, trips)
Ejemplo n.º 12
0
    def test_generate_trips_id_management(self, example_triplegs_higher_gap_threshold):
        """Test if we can generate the example trips based on example data."""
        sp_tpls_loaded = pd.read_csv(os.path.join("tests", "data", "geolife_long", "sp_tpls.csv"), index_col="id")
        sp_tpls_loaded["started_at"] = pd.to_datetime(sp_tpls_loaded["started_at"])
        sp_tpls_loaded["started_at_next"] = pd.to_datetime(sp_tpls_loaded["started_at_next"])
        sp_tpls_loaded["finished_at"] = pd.to_datetime(sp_tpls_loaded["finished_at"])

        sp, tpls = example_triplegs_higher_gap_threshold

        # generate trips and a joint staypoint/triplegs dataframe
        gap_threshold = 15
        sp, tpls, _ = generate_trips(sp, tpls, gap_threshold=gap_threshold)
        sp_tpls = _create_debug_sp_tpls_data(sp, tpls, gap_threshold=gap_threshold)

        # test if generated staypoints/triplegs are equal (especially important for trip ids)
        assert_frame_equal(sp_tpls_loaded, sp_tpls, check_dtype=False)
Ejemplo n.º 13
0
    def test_sp_tpls_index(self):
        """Test if staypoint and tripleg index are identical before and after generating trips."""
        start = pd.Timestamp("2021-07-11 8:00:00")
        h = pd.to_timedelta("1h")
        sp_tpls = [
            {"activity": True, "type": "staypoint"},
            {"activity": False, "type": "tripleg"},
            {"activity": False, "type": "staypoint"},
            {"activity": False, "type": "tripleg"},
            {"activity": True, "type": "staypoint"},
        ]
        for n, d in enumerate(sp_tpls):
            d["user_id"] = 0
            d["started_at"] = start + n * h
            d["finished_at"] = d["started_at"] + h

        sp_tpls = pd.DataFrame(sp_tpls)
        sp = sp_tpls[sp_tpls["type"] == "staypoint"]
        tpls = sp_tpls[sp_tpls["type"] != "staypoint"]
        tpls.index.name = "something_long_and_obscure"
        sp.index.name = "even_obscurer"
        sp_, tpls_, _ = generate_trips(sp, tpls, add_geometry=False)
        assert_index_equal(tpls.index, tpls_.index)
        assert_index_equal(sp.index, sp_.index)
Ejemplo n.º 14
0
    def test_gap_detection(self):
        """
        Test different gap cases:
        - activity - tripleg - activity [gap] activity - tripleg - activity
        - activity - tripleg -  [gap]  - tripleg - activity
        - activity - tripleg -  [gap]  activity - tripleg - activity
        - activity - tripleg -  [gap]  activity - tripleg - activity
        - activity - tripleg - activity [gap] - tripleg - tripleg - tripleg - activity
        - tripleg - [gap] - tripleg - tripleg - [gap] - tripleg
        Returns
        -------

        """
        gap_threshold = 15

        # load data and add dummy geometry
        spts_in = pd.read_csv(os.path.join('.', 'tests', 'data', 'trips',
                                           'staypoints_gaps.csv'),
                              sep=';',
                              index_col='id',
                              parse_dates=[0, 1],
                              infer_datetime_format=True,
                              dayfirst=True)
        spts_in['geom'] = Point(1, 1)
        spts_in = gpd.GeoDataFrame(spts_in, geometry='geom')
        assert spts_in.as_staypoints

        tpls_in = pd.read_csv(os.path.join('.', 'tests', 'data', 'trips',
                                           'triplegs_gaps.csv'),
                              sep=';',
                              index_col='id',
                              parse_dates=[0, 1],
                              infer_datetime_format=True,
                              dayfirst=True)
        tpls_in['geom'] = LineString([[1, 1], [2, 2]])
        tpls_in = gpd.GeoDataFrame(tpls_in, geometry='geom')
        assert tpls_in.as_triplegs

        # load ground truth data
        trips_loaded = pd.read_csv(os.path.join('.', 'tests', 'data', 'trips',
                                                'trips_gaps.csv'),
                                   index_col='id')
        trips_loaded['started_at'] = pd.to_datetime(trips_loaded['started_at'])
        trips_loaded['finished_at'] = pd.to_datetime(
            trips_loaded['finished_at'])

        spts_tpls_loaded = pd.read_csv(os.path.join('.', 'tests', 'data',
                                                    'trips',
                                                    'stps_tpls_gaps.csv'),
                                       index_col='id')
        spts_tpls_loaded['started_at'] = pd.to_datetime(
            spts_tpls_loaded['started_at'])
        spts_tpls_loaded['started_at_next'] = pd.to_datetime(
            spts_tpls_loaded['started_at_next'])
        spts_tpls_loaded['finished_at'] = pd.to_datetime(
            spts_tpls_loaded['finished_at'])

        # generate trips and a joint staypoint/triplegs dataframe
        spts_proc, tpls_proc, trips = generate_trips(spts_in,
                                                     tpls_in,
                                                     gap_threshold=15,
                                                     id_offset=0)
        spts_tpls = create_debug_spts_tpls_data(spts_proc,
                                                tpls_proc,
                                                gap_threshold=gap_threshold)

        # test if generated trips are equal
        pd.testing.assert_frame_equal(trips_loaded, trips)

        # test if generated staypoints/triplegs are equal (especially important for trip ids)
        pd.testing.assert_frame_equal(spts_tpls_loaded,
                                      spts_tpls,
                                      check_dtype=False)