def test_get_metadata_for_raw_files_discovered_after_datetime(self): with freeze_time('2015-01-02T03:05:05'): raw_unprocessed_path_1 = self._make_unprocessed_path( 'bucket/file_tag.csv', GcsfsDirectIngestFileType.RAW_DATA, dt=datetime.datetime.utcnow()) self.metadata_manager.register_new_file(raw_unprocessed_path_1) self.metadata_manager_other_region.register_new_file( raw_unprocessed_path_1) with freeze_time('2015-01-02T03:06:06'): raw_unprocessed_path_2 = self._make_unprocessed_path( 'bucket/other_tag.csv', GcsfsDirectIngestFileType.RAW_DATA, dt=datetime.datetime.utcnow()) self.metadata_manager.register_new_file(raw_unprocessed_path_2) with freeze_time('2015-01-02T03:07:07'): raw_unprocessed_path_3 = self._make_unprocessed_path( 'bucket/file_tag.csv', GcsfsDirectIngestFileType.RAW_DATA, dt=datetime.datetime.utcnow()) self.metadata_manager.register_new_file(raw_unprocessed_path_3) expected_list = [ DirectIngestRawFileMetadata.new_with_defaults( region_code=self.metadata_manager.region_code, file_tag='file_tag', discovery_time=datetime.datetime(2015, 1, 2, 3, 5, 5), normalized_file_name= 'unprocessed_2015-01-02T03:05:05:000000_raw_file_tag.csv', datetimes_contained_upper_bound_inclusive=datetime.datetime( 2015, 1, 2, 3, 5, 5)), DirectIngestRawFileMetadata.new_with_defaults( region_code=self.metadata_manager.region_code, file_tag='file_tag', discovery_time=datetime.datetime(2015, 1, 2, 3, 7, 7), normalized_file_name= 'unprocessed_2015-01-02T03:07:07:000000_raw_file_tag.csv', datetimes_contained_upper_bound_inclusive=datetime.datetime( 2015, 1, 2, 3, 7, 7)) ] self.assertEqual( expected_list, self.metadata_manager. get_metadata_for_raw_files_discovered_after_datetime( 'file_tag', discovery_time_lower_bound_exclusive=None)) expected_list = expected_list[-1:] self.assertEqual( expected_list, self.metadata_manager. get_metadata_for_raw_files_discovered_after_datetime( 'file_tag', discovery_time_lower_bound_exclusive=datetime.datetime( 2015, 1, 2, 3, 7, 0)))
def test_get_raw_file_metadata_unique_to_state(self): # Arrange raw_unprocessed_path = self._make_unprocessed_path( 'bucket/file_tag.csv', GcsfsDirectIngestFileType.RAW_DATA) self.metadata_manager_other_region.mark_file_as_discovered( raw_unprocessed_path) # Act self.metadata_manager.mark_file_as_discovered(raw_unprocessed_path) metadata = self.metadata_manager.get_file_metadata( raw_unprocessed_path) # Assert expected_metadata = DirectIngestRawFileMetadata.new_with_defaults( region_code=self.metadata_manager.region_code, file_tag='file_tag', discovery_time=datetime.datetime(2015, 1, 2, 3, 4, 6), normalized_file_name= 'unprocessed_2015-01-02T03:03:03:000003_raw_file_tag.csv', processed_time=None, datetimes_contained_upper_bound_inclusive=datetime.datetime( 2015, 1, 2, 3, 3, 3, 3)) self.assertIsInstance(metadata, DirectIngestRawFileMetadata) self.assertIsNotNone(metadata.file_id) self.assertEqual(expected_metadata, metadata)