def test_register_file_count(self):
        """
        Testing that if a file count is set, it will be returned correctly.
        :return:
        """

        test_path = "/tmp/testing/test_dir"
        expected_result = 123

        location = LocationTracker(location_path=test_path, data_store=self.data_store)
        location.register_file_count(file_count=123)

        self.assertEqual(expected_result, location.location.location_file_count)
    def test_location_tracker_no_data_store(self):
        """
        Testing that if location tracker is used but data store is not provided, throw error.
        :return:
        """

        with self.assertRaises(Exception) as context:
            LocationTracker(location_name="testing", location_path="fake path")

        self.assertTrue("Data store is not set." in str(context.exception))
    def test_determine_location_name_duplicate_name_local(self):
        """
        Testing that if two different s3 locations produce the same location name
        that the second location will append a number to ensure uniqueness.
        :return:
        """
        expected_result = "local - test_dir - 1"

        location = LocationTracker(
            location_path="/tmp/duplicate_testing/test_dir/file.txt",
            data_store=self.data_store,
        )

        dupe_location = LocationTracker(
            location_path="/tmp/duplicate_testing_another/test_dir/file.txt",
            data_store=self.data_store,
        )

        given_result = dupe_location.location.location_name

        self.assertEqual(expected_result, given_result)
    def test_determine_location_name_duplicate_name_s3(self):
        """
        Testing that if two different s3 locations produce the same location name
        that the second location will append a number to ensure uniqueness.
        :return:
        """
        expected_result = "s3 duplicate-test - dir - 1"

        location = LocationTracker(
            location_path="https://duplicate-test.s3.amazonaws.com/this/is/a/test/dir/file.txt",
            data_store=self.data_store,
        )

        dupe_location = LocationTracker(
            location_path="https://duplicate-test.s3.amazonaws.com/this/is/another/test/dir/file.txt",
            data_store=self.data_store,
        )

        given_result = dupe_location.location.location_name

        self.assertEqual(expected_result, given_result)
    def test_derive_location_type_s3(self):
        """
        Testing that if s3 is part of the location path, the type is set to s3.
        :return:
        """
        test_path = "s3://tmp/testing/test_dir"

        expected_result = "s3"
        given_result = LocationTracker(
            location_path=test_path, data_store=self.data_store
        ).location_type.location_type_name

        self.assertEqual(expected_result, given_result)
    def test_derive_location_type_local(self):
        """
        Testing that if no other location type is found, set as local filesystem.
        :return:
        """
        test_path = "/tmp/testing/test_dir"

        expected_result = "local filesystem"
        given_result = LocationTracker(
            location_path=test_path, data_store=self.data_store
        ).location_type.location_type_name

        self.assertEqual(expected_result, given_result)
    def test_derive_location_name_s3(self):
        """
        Testing that if no location name is provided, and it's an s3 location path, the s3 prefix is added.
        :return:
        """
        test_path = "s3://tmp/testing/test_dir/"

        expected_result = "s3 tmp - test_dir"
        given_result = LocationTracker(
            location_path=test_path, data_store=self.data_store
        ).location_name

        self.assertEqual(expected_result, given_result)
    def test_derive_location_name_none(self):
        """
        Testing that if no location name is provided, and it's not a location path, the last directory is set as the
        location name.
        :return:
        """
        test_path = "/tmp/testing/test_dir/"

        expected_result = "local - test_dir"
        given_result = LocationTracker(
            location_path=test_path, data_store=self.data_store
        ).location_name

        self.assertEqual(expected_result, given_result)
    def test_derive_location_name_no_trailing_slash_s3(self):
        """
        Testing that if no location name is provided, and it's not a location already, the last directory is set as the
        location name even if a trailing slash is not provided.
        :return:
        """
        test_path = "s3://tmp/testing/test_dir"

        expected_result = "s3 tmp - test_dir"
        given_result = LocationTracker(
            location_path=test_path, data_store=self.data_store
        ).location_name

        self.assertEqual(expected_result, given_result)
    def test_determine_location_name_file_not_part_local(self):
        """
        Testing that when a local path is provided with a filename at the end, the file is ignored.
        :return:
        """
        expected_result = "local - path"

        location = LocationTracker(
            location_path="/local/dir/path/text.txt", data_store=self.data_store
        )

        given_result = location.location.location_name

        self.assertEqual(expected_result, given_result)
    def test_determine_location_bucket_name_local(self):
        """
        Testing that if the location is not related to s3, a bucket name will not be set.
        :return:
        """
        expected_result = None

        location = LocationTracker(
            location_path="/local/dir/path/text.txt", data_store=self.data_store
        )

        given_result = location.location.location_bucket_name

        self.assertEqual(expected_result, given_result)
    def test_determine_location_name_file_not_part_s3(self):
        """
        Testing that when a s3 path is provided with a filename at the end, the file is ignored.
        :return:
        """
        expected_result = "s3 test-bucket - dir"

        location = LocationTracker(
            location_path="https://test-bucket.s3.amazonaws.com/this/is/a/test/dir/file.txt",
            data_store=self.data_store,
        )

        given_result = location.location.location_name

        self.assertEqual(expected_result, given_result)
    def test_determine_location_bucket_name_s3(self):
        """
        Testing that if the location is related to s3, a bucket name will be set.
        :return:
        """
        expected_result = "test-bucket"

        location = LocationTracker(
            location_path="https://test-bucket.s3.amazonaws.com/this/is/a/test/dir/file.txt",
            data_store=self.data_store,
        )

        given_result = location.location.location_bucket_name

        self.assertEqual(expected_result, given_result)
    def initialize_extract_tracker(self):
        """
        Initialize extract_tracker object based on values passed on call.
        :return:
        """

        if self.extract_id is not None:
            self.logger.info(
                "Extract id provided.  Attempting to reconstruct.")

            self.extract = self.data_store.get_or_create_item(
                model=Extract, extract_id=self.extract_id, create=False)
            self.filename = self.extract.extract_filename
            self.location = self.extract.locations
            self.compression_type = self.extract.compression_type
            if self.compression_type is None:
                self.compression_type_id = None
            else:
                self.compression_type_id = self.compression_type.compression_type_id
            self.filetype = self.extract.extract_filetype
            self.full_filename = self.get_full_filename()
            self.dataset_types = self.get_dataset_types()
            self.extract_process = self.retrieve_extract_process()
            self.sources = self.extract.extract_sources

            if (self.extract.extract_filesize is not None
                    and self.extract.extract_filesize_type is not None):

                self.file_size = "%s %s" % (
                    self.extract.extract_filesize,
                    self.extract.extract_filesize_type,
                )
            else:
                self.file_size = None

        else:
            if self.filename is None:
                error_msg = "Filename must be provided."
                self.logger.error(error_msg)
                raise Exception(error_msg)

            if self.location is not None:
                self.logger.info("Location object provided.")
                self.location = self.location
            elif self.location_path is not None:
                self.logger.info(
                    "Location path provided.  Creating Location object.")
                self.location = LocationTracker(
                    location_name=self.location_name,
                    location_path=self.location_path,
                    data_store=self.data_store,
                )
            else:
                raise Exception(
                    "A location object or location_path must be provided.")

            if self.compression_type is not None:
                self.logger.info("Finding compression type.")
                try:
                    self.compression_type = self.data_store.get_or_create_item(
                        model=ExtractCompressionType,
                        create=False,
                        extract_compression_type=self.compression_type,
                    )
                except Exception:
                    error_msg = ("%s is not a valid compression type." %
                                 self.compression_type)
                    self.logger.error(error_msg)
                    raise Exception(error_msg)

                self.compression_type_id = (
                    self.compression_type.extract_compression_type_id)

            if self.filetype is not None:
                self.logger.info(
                    "File type provided.  Verifying it is a valid filetype.")
                try:
                    self.filetype = self.data_store.get_or_create_item(
                        model=ExtractFileType,
                        create=False,
                        extract_filetype=self.filetype,
                    )
                except Exception:
                    error_msg = "%s is not a valid file type." % self.filetype
                    self.logger.error(error_msg)
                    raise Exception(error_msg)
            else:
                # Need to try to determine the filetype based on the extension of the filename.
                file_extension = os.path.splitext(self.filename)[1]
                file_extension = file_extension.replace(".", "")
                self.logger.info(
                    "Trying to find record for file extension: %s" %
                    file_extension)
                self.filetype = self.data_store.get_or_create_item(
                    model=ExtractFileType,
                    create=False,
                    extract_filetype_code=file_extension,
                )

            self.logger.info("Registering extract.")

            self.extract = self.data_store.get_or_create_item(
                model=Extract,
                extract_filename=self.filename,
                extract_location_id=self.location.location.location_id,
                extract_compression_type_id=self.compression_type_id,
                extract_filetype_id=self.filetype.extract_filetype_id,
            )

            self.full_filename = self.get_full_filename(
                location_path=self.location_path)

            self.extract_process = self.retrieve_extract_process()

            if self.process_run.dataset_types is not None:
                self.logger.info("Associating dataset type(s) with extract.")
                self.dataset_types = self.register_extract_dataset_types(
                    dataset_types=self.process_run.dataset_types)

            if self.process_run.source_objects is not None:
                self.logger.info(
                    "Associating source system(s) object(s) with extract and location."
                )
                self.source_objects = self.register_extract_sources(
                    source_objects=self.process_run.source_objects)
                self.sources = self.source_objects

            elif self.process_run.process.sources is not None:
                self.logger.info(
                    "Associating source system(s) with extract and location.")

                self.sources = self.register_extract_sources(
                    sources=self.process_run.sources)

            else:
                self.logger.info("No source system(s) to associate to.")

            if self.status is not None:
                self.logger.info("Status was provided by user.")
                self.change_extract_status(new_status=self.status)
            else:
                self.logger.info("Status was not provided.  Initializing.")
                self.extract.extract_status_id = self.extract_status_initializing

            if self.file_size is not None:
                split_filesize = self.file_size_splitter(
                    file_size=self.file_size)

                self.extract.extract_filesize = split_filesize[0]
                self.extract.extract_filesize_type = split_filesize[1]

            self.session.commit()