def test_unit_minutes(images): result = remove_duplicates(images, interval=30, unit="minutes", species_col="scientific_name") expected = pd.DataFrame({ "deployment_id": ["001", "001", "001", "001", "001", "002", "002"], "scientific_name": [ "Myrmecophaga tridactyla", "Myrmecophaga tridactyla", "Myrmecophaga tridactyla", "Myrmecophaga tridactyla", "Leptotila verreauxi", "Myrmecophaga tridactyla", "Myrmecophaga tridactyla", ], "timestamp": [ "2020-12-01 10:13:13", "2020-12-01 14:22:38", "2020-12-03 08:15:57", "2020-12-15 06:12:32", "2020-12-01 10:14:04", "2020-12-01 14:08:21", "2020-12-01 19:21:34", ], }) pd.testing.assert_frame_equal(result, expected)
def test_intact_input(images): images_original = images.copy() remove_duplicates(images, species_col="scientific_name") pd.testing.assert_frame_equal(images_original, images)
def test_keep_index(images): result = remove_duplicates(images, reset_index=False, species_col="scientific_name") expected_index = pd.Index([0, 3, 4, 5, 6, 7, 9], dtype="int64") pd.testing.assert_index_equal(result.index, expected_index)