Exemple #1
0
    def test_process_file_that_needs_splitting(self):
        controller = build_gcsfs_controller_for_tests(
            StateTestGcsfsDirectIngestController,
            self.FIXTURE_PATH_PREFIX,
            run_async=True)

        # Set line limit to 1
        controller.file_split_line_limit = 1

        # pylint:disable=protected-access
        file_tags = list(sorted(controller._get_file_tag_rank_list()))

        add_paths_with_tags_and_process(self,
                                        controller,
                                        file_tags,
                                        pre_normalize_filename=True)

        processed_split_file_paths = defaultdict(list)
        for path in controller.fs.all_paths:
            if self._path_in_split_file_storage_subdir(path, controller):
                file_tag = filename_parts_from_path(path).file_tag
                processed_split_file_paths[file_tag].append(path)

        self.assertEqual(1, len(processed_split_file_paths.keys()))
        self.assertEqual(2, len(processed_split_file_paths['tagC']))

        found_suffixes = {
            filename_parts_from_path(p).filename_suffix
            for p in processed_split_file_paths['tagC']
        }
        self.assertEqual(found_suffixes,
                         {'00001_file_split_size1', '00002_file_split_size1'})
Exemple #2
0
    def test_state_unexpected_tag(self):
        controller = build_controller_for_tests(
            StateTestGcsfsDirectIngestController,
            self.FIXTURE_PATH_PREFIX,
            run_async=True)

        file_tags = ['tagA', 'Unexpected_Tag', 'tagB', 'tagC']
        unexpected_tags = ['Unexpected_Tag']

        add_paths_with_tags_and_process(self, controller, file_tags,
                                        unexpected_tags)
Exemple #3
0
    def run_async_file_order_test_for_controller_cls(self, controller_cls):
        """Writes all expected files to the mock fs, then kicks the controller
        and ensures that all jobs are run to completion in the proper order."""

        controller = build_controller_for_tests(controller_cls,
                                                self.FIXTURE_PATH_PREFIX,
                                                run_async=True)

        # pylint:disable=protected-access
        file_tags = list(reversed(sorted(
            controller._get_file_tag_rank_list())))

        add_paths_with_tags_and_process(self, controller, file_tags)
Exemple #4
0
    def test_process_already_normalized_paths(self):
        controller = build_gcsfs_controller_for_tests(
            StateTestGcsfsDirectIngestController,
            self.FIXTURE_PATH_PREFIX,
            run_async=True)

        # pylint:disable=protected-access
        file_tags = list(sorted(controller._get_file_tag_rank_list()))

        add_paths_with_tags_and_process(self,
                                        controller,
                                        file_tags,
                                        pre_normalize_filename=True)
Exemple #5
0
    def run_async_file_order_test_for_controller_cls(self, controller_cls):
        """Writes all expected files to the mock fs, then kicks the controller
        and ensures that all jobs are run to completion in the proper order."""

        controller = build_gcsfs_controller_for_tests(controller_cls,
                                                      self.FIXTURE_PATH_PREFIX,
                                                      run_async=True)

        # pylint:disable=protected-access
        file_tags = list(reversed(sorted(
            controller._get_file_tag_rank_list())))

        add_paths_with_tags_and_process(self, controller, file_tags)

        self.assertIsInstance(controller,
                              BaseTestCsvGcsfsDirectIngestController)
        self.assertFalse(controller.has_temp_paths_in_disk())
Exemple #6
0
    def test_move_files_from_previous_days_to_storage(self):
        controller = build_gcsfs_controller_for_tests(
            StateTestGcsfsDirectIngestController,
            self.FIXTURE_PATH_PREFIX,
            run_async=False)

        previous_date = '2019-09-15'
        file_path_from_prev_day = path_for_fixture_file(
            controller,
            f'tagB.csv',
            should_normalize=True,
            dt=datetime.datetime.fromisoformat(previous_date))

        # pylint:disable=protected-access
        processed_file_from_prev_day = \
            controller.fs._to_processed_file_path(file_path_from_prev_day)

        unexpected_file_path_from_prev_day = path_for_fixture_file(
            controller,
            f'Unexpected_Tag.csv',
            should_normalize=True,
            dt=datetime.datetime.fromisoformat(previous_date))

        controller.fs.test_add_path(processed_file_from_prev_day)
        controller.fs.test_add_path(unexpected_file_path_from_prev_day)

        # pylint:disable=protected-access
        file_tags = list(sorted(controller._get_file_tag_rank_list()))

        # This will test that all paths get moved to storage,
        # except the unexpected tag.
        add_paths_with_tags_and_process(self,
                                        controller,
                                        file_tags,
                                        unexpected_tags=['Unexpected_Tag'])

        paths_from_prev_date = []
        for path in controller.fs.all_paths:
            expected_storage_dir_str = os.path.join(
                controller.storage_directory_path.abs_path(), previous_date)
            if path.abs_path().startswith(expected_storage_dir_str):
                paths_from_prev_date.append(path)

        self.assertTrue(len(paths_from_prev_date), 1)
        self.assertTrue('tagB' in paths_from_prev_date[0].abs_path())
Exemple #7
0
    def test_state_unexpected_tag(self):
        controller = build_gcsfs_controller_for_tests(
            StateTestGcsfsDirectIngestController,
            self.FIXTURE_PATH_PREFIX,
            run_async=True)

        file_tags = ['tagA', 'Unexpected_Tag', 'tagB', 'tagC']
        unexpected_tags = ['Unexpected_Tag']

        add_paths_with_tags_and_process(self, controller, file_tags,
                                        unexpected_tags)

        split_paths = {
            path
            for path in controller.fs.all_paths
            if controller.fs.is_split_file(path)
        }
        self.assertFalse(split_paths)