Example #1
0
    def test_get_metadata_for_raw_files_discovered_after_datetime(self):
        with freeze_time('2015-01-02T03:05:05'):
            raw_unprocessed_path_1 = self._make_unprocessed_path(
                'bucket/file_tag.csv',
                GcsfsDirectIngestFileType.RAW_DATA,
                dt=datetime.datetime.utcnow())
            self.metadata_manager.register_new_file(raw_unprocessed_path_1)
            self.metadata_manager_other_region.register_new_file(
                raw_unprocessed_path_1)

        with freeze_time('2015-01-02T03:06:06'):
            raw_unprocessed_path_2 = self._make_unprocessed_path(
                'bucket/other_tag.csv',
                GcsfsDirectIngestFileType.RAW_DATA,
                dt=datetime.datetime.utcnow())
            self.metadata_manager.register_new_file(raw_unprocessed_path_2)

        with freeze_time('2015-01-02T03:07:07'):
            raw_unprocessed_path_3 = self._make_unprocessed_path(
                'bucket/file_tag.csv',
                GcsfsDirectIngestFileType.RAW_DATA,
                dt=datetime.datetime.utcnow())
            self.metadata_manager.register_new_file(raw_unprocessed_path_3)

        expected_list = [
            DirectIngestRawFileMetadata.new_with_defaults(
                region_code=self.metadata_manager.region_code,
                file_tag='file_tag',
                discovery_time=datetime.datetime(2015, 1, 2, 3, 5, 5),
                normalized_file_name=
                'unprocessed_2015-01-02T03:05:05:000000_raw_file_tag.csv',
                datetimes_contained_upper_bound_inclusive=datetime.datetime(
                    2015, 1, 2, 3, 5, 5)),
            DirectIngestRawFileMetadata.new_with_defaults(
                region_code=self.metadata_manager.region_code,
                file_tag='file_tag',
                discovery_time=datetime.datetime(2015, 1, 2, 3, 7, 7),
                normalized_file_name=
                'unprocessed_2015-01-02T03:07:07:000000_raw_file_tag.csv',
                datetimes_contained_upper_bound_inclusive=datetime.datetime(
                    2015, 1, 2, 3, 7, 7))
        ]

        self.assertEqual(
            expected_list,
            self.metadata_manager.
            get_metadata_for_raw_files_discovered_after_datetime(
                'file_tag', discovery_time_lower_bound_exclusive=None))

        expected_list = expected_list[-1:]

        self.assertEqual(
            expected_list,
            self.metadata_manager.
            get_metadata_for_raw_files_discovered_after_datetime(
                'file_tag',
                discovery_time_lower_bound_exclusive=datetime.datetime(
                    2015, 1, 2, 3, 7, 0)))
    def test_get_raw_file_metadata_unique_to_state(self):
        # Arrange
        raw_unprocessed_path = self._make_unprocessed_path(
            'bucket/file_tag.csv', GcsfsDirectIngestFileType.RAW_DATA)

        self.metadata_manager_other_region.mark_file_as_discovered(
            raw_unprocessed_path)

        # Act
        self.metadata_manager.mark_file_as_discovered(raw_unprocessed_path)
        metadata = self.metadata_manager.get_file_metadata(
            raw_unprocessed_path)

        # Assert
        expected_metadata = DirectIngestRawFileMetadata.new_with_defaults(
            region_code=self.metadata_manager.region_code,
            file_tag='file_tag',
            discovery_time=datetime.datetime(2015, 1, 2, 3, 4, 6),
            normalized_file_name=
            'unprocessed_2015-01-02T03:03:03:000003_raw_file_tag.csv',
            processed_time=None,
            datetimes_contained_upper_bound_inclusive=datetime.datetime(
                2015, 1, 2, 3, 3, 3, 3))

        self.assertIsInstance(metadata, DirectIngestRawFileMetadata)
        self.assertIsNotNone(metadata.file_id)

        self.assertEqual(expected_metadata, metadata)