コード例 #1
0
class S3IntegrationTest(BaseIntegrationTest.BaseTestCases):
    """
    Integration tests for S3. Note that for now, while upload/download/remove
    methods are not implemented, tests will use the existing stor-test-bucket
    bucket on S3.

    In order to run the tests, you must have valid AWS S3 credentials set in the
    following environment variables: AWS_TEST_ACCESS_KEY_ID,
    AWS_TEST_SECRET_ACCESS_KEY (and optionally AWS_DEFAULT_REGION).
    """
    def setUp(self):
        super(S3IntegrationTest, self).setUp()

        if not (os.environ.get('AWS_TEST_ACCESS_KEY_ID')
                and os.environ.get('AWS_TEST_SECRET_ACCESS_KEY')
                and os.environ.get('S3_TEST_BUCKET')):
            raise unittest.SkipTest(
                'AWS_TEST_ACCESS_KEY_ID / AWS_TEST_SECRET_ACCESS_KEY / S3_TEST_BUCKET '
                ' env vars not set. Skipping integration test')

        # Disable loggers so nose output is clean
        logging.getLogger('botocore').setLevel(logging.CRITICAL)
        test_bucket = os.environ['S3_TEST_BUCKET']
        self.test_bucket = Path('s3://{test_bucket}/{uuid}'.format(
            test_bucket=test_bucket, uuid=uuid.uuid4()))
        self.test_dir = self.test_bucket / 'test'
        stor.settings.update({
            's3': {
                'aws_access_key_id': os.environ['AWS_TEST_ACCESS_KEY_ID'],
                'aws_secret_access_key':
                os.environ['AWS_TEST_SECRET_ACCESS_KEY']
            }
        })

    def tearDown(self):
        super(S3IntegrationTest, self).tearDown()
        self.test_dir.rmtree()

    def test_over_1000_files(self):
        num_test_objs = 1234
        min_obj_size = 0

        with NamedTemporaryDirectory(change_dir=True) as tmp_d:
            self.create_dataset(tmp_d, num_test_objs, min_obj_size)
            self.test_dir.upload(['.'])

        self.assertEquals(1234, len(self.test_dir.list()))
        self.assertEquals(1200, len(self.test_dir.list(limit=1200)))
        self.assertTrue(self.test_dir.isdir())

        with NamedTemporaryDirectory(change_dir=True) as tmp_d:
            self.test_dir.download('./')
            self.assertEquals(1234, len(os.listdir(tmp_d)))

    def test_list_methods(self):
        fake_bucket = Path('s3://stor-test-bucket2')
        with self.assertRaises(exceptions.NotFoundError):
            fake_bucket.list()
        fake_folder = self.test_bucket / 'not_a_dir'
        self.assertEquals([], fake_folder.list())

        with NamedTemporaryDirectory(change_dir=True):
            open('file1.txt', 'w').close()
            open('file2.txt', 'w').close()
            os.mkdir('nested_dir')
            os.mkdir('nested_dir/dir')
            open('nested_dir/dir/file3.txt', 'w').close()
            self.test_dir.upload(['.'])

        file_list = self.test_dir.list()
        starts_with_list = self.test_bucket.list(starts_with='test')
        self.assertEquals(set(file_list), set(starts_with_list))
        self.assertEquals(
            set(file_list),
            set([
                self.test_dir / 'file1.txt', self.test_dir / 'file2.txt',
                self.test_dir / 'nested_dir/dir/file3.txt'
            ]))

        dir_list = self.test_dir.listdir()
        self.assertEquals(
            set(dir_list),
            set([
                self.test_dir / 'file1.txt', self.test_dir / 'file2.txt',
                self.test_dir / 'nested_dir/'
            ]))

        self.assertTrue(self.test_dir.listdir() == (self.test_dir +
                                                    '/').listdir())

    def test_is_methods(self):
        """
        Tests is methods, exists(), and getsize().
        getsize() integration test may be moved to a different test
        depending on whether other metadata methods (such as stat())
        are implemented.
        """
        self.assertTrue(self.test_bucket.exists())
        self.assertTrue(self.test_bucket.isdir())
        self.assertFalse(self.test_bucket.isfile())
        self.assertEquals(self.test_bucket.getsize(), 0)

        with NamedTemporaryDirectory(change_dir=True) as tmp_d:
            self.create_dataset(tmp_d, 1, 10)
            self.test_dir.upload(['.'])
            correct_size = os.path.getsize('0')

        self.assertTrue(self.test_dir.exists())
        self.assertTrue(self.test_dir.isdir())
        self.assertFalse(self.test_dir.isfile())
        self.assertEquals(self.test_dir.getsize(), 0)

        test_file = self.test_dir / '0'
        self.assertTrue(test_file.exists())
        self.assertFalse(test_file.isdir())
        self.assertTrue(test_file.isfile())
        self.assertEquals(test_file.getsize(), correct_size)

        test_file.remove()
        self.assertFalse(test_file.exists())
        self.assertFalse(test_file.isdir())
        self.assertFalse(test_file.isfile())
        with self.assertRaises(exceptions.NotFoundError):
            test_file.getsize()

        fake_bucket = self.test_bucket + '2'
        self.assertFalse(fake_bucket.exists())
        self.assertFalse(fake_bucket.isdir())
        self.assertFalse(fake_bucket.isfile())
        with self.assertRaises(exceptions.NotFoundError):
            fake_bucket.getsize()

    def test_upload_download_remove(self):
        num_test_objs = 10
        min_obj_size = 50
        with NamedTemporaryDirectory(change_dir=True) as tmp_d:
            self.create_dataset(tmp_d, num_test_objs, min_obj_size)
            self.test_dir.upload(['.'])

        for which_obj in self.get_dataset_obj_names(num_test_objs):
            self.assertTrue((self.test_dir / which_obj).exists())

        with NamedTemporaryDirectory(change_dir=True) as tmp_d:
            self.test_dir.download(tmp_d)
            for which_obj in self.get_dataset_obj_names(num_test_objs):
                self.assertCorrectObjectContents(which_obj, which_obj,
                                                 min_obj_size)
                (self.test_dir / which_obj).remove()

                # consistency check
                while (self.test_dir / which_obj).exists():
                    time.sleep(.5)
                self.assertFalse((self.test_dir / which_obj).exists())

    def test_upload_w_headers(self):
        test_file = self.test_dir / 'a.txt'
        with NamedTemporaryDirectory(change_dir=True):
            open('a.txt', 'w').close()
            self.test_dir.upload(['.'], headers={'ContentLanguage': 'en'})

        self.assertTrue(test_file.exists())
        self.assertEquals(test_file.stat()['ContentLanguage'], 'en')

    def test_download(self):
        with NamedTemporaryDirectory(change_dir=True):
            os.mkdir('dir')
            os.mkdir('dir/a')
            open('dir/a/a.txt', 'w').close()
            self.test_dir.upload(['.'])

        with NamedTemporaryDirectory(change_dir=True):
            open('dir', 'w').close()
            open('a', 'w').close()
            with self.assertRaises(OSError):
                self.test_dir.download('.')
            with self.assertRaises(OSError):
                (self.test_dir / 'dir').download('.')

    def test_condition(self):
        num_test_objs = 20
        test_obj_size = 100
        with NamedTemporaryDirectory(change_dir=True) as tmp_d:
            self.create_dataset(tmp_d, num_test_objs, test_obj_size)
            Path('.').copytree(self.test_dir)

        # Verify a ConditionNotMet exception is thrown when attempting to list
        # a file that hasn't been uploaded
        expected_objs = {
            self.test_dir / which_obj
            for which_obj in self.get_dataset_obj_names(num_test_objs + 1)
        }

        with self.assertRaises(exceptions.ConditionNotMetError):
            self.test_dir.list(
                condition=lambda results: expected_objs == set(results))

        # Verify that the condition passes when excluding the non-extant file
        correct_objs = {
            self.test_dir / which_obj
            for which_obj in self.get_dataset_obj_names(num_test_objs)
        }
        objs = self.test_dir.list(
            condition=lambda results: correct_objs == set(results))
        self.assertEquals(correct_objs, set(objs))

    def test_dir_markers(self):
        with NamedTemporaryDirectory(change_dir=True):
            os.mkdir('empty')
            os.mkdir('dir')
            open('a.txt', 'w').close()
            open('dir/b.txt', 'w').close()
            self.test_dir.upload(['.'])

        self.assertEquals(
            set(self.test_dir.list()), {
                self.test_dir / 'a.txt', self.test_dir / 'dir/b.txt',
                self.test_dir / 'empty/'
            })
        self.assertEquals(
            set(self.test_dir.list(ignore_dir_markers=True)),
            {self.test_dir / 'a.txt', self.test_dir / 'dir/b.txt'})
        self.assertTrue((self.test_dir / 'empty').isdir())

        with NamedTemporaryDirectory(change_dir=True):
            self.test_dir.download('.')
            self.assertTrue(os.path.isdir('empty'))
            self.assertTrue(os.path.exists('dir/b.txt'))
            self.assertTrue(os.path.exists('a.txt'))

    def test_copytree_to_from_dir_w_manifest(self):
        num_test_objs = 10
        test_obj_size = 100
        with NamedTemporaryDirectory(change_dir=True) as tmp_d:
            self.create_dataset(tmp_d, num_test_objs, test_obj_size)
            # Make a nested file and an empty directory for testing purposes
            tmp_d = Path(tmp_d)
            os.mkdir(tmp_d / 'my_dir')
            open(tmp_d / 'my_dir' / 'empty_file', 'w').close()
            os.mkdir(tmp_d / 'my_dir' / 'empty_dir')

            stor.copytree('.', self.test_dir, use_manifest=True)

            # Validate the contents of the manifest file
            manifest_contents = utils.get_data_manifest_contents(self.test_dir)
            expected_contents = self.get_dataset_obj_names(num_test_objs)
            expected_contents.extend(
                ['my_dir/empty_file', 'my_dir/empty_dir/'])
            expected_contents = [Path('test') / c for c in expected_contents]
            self.assertEquals(set(manifest_contents), set(expected_contents))

        with NamedTemporaryDirectory(change_dir=True) as tmp_d:
            # Download the results successfully
            Path(self.test_dir).copytree('test', use_manifest=True)

            # Now delete one of the objects from s3. A second download
            # will fail with a condition error
            Path(self.test_dir / 'my_dir' / 'empty_dir/').remove()
            with self.assertRaises(exceptions.ConditionNotMetError):
                Path(self.test_dir).copytree('test',
                                             use_manifest=True,
                                             num_retries=0)

    def test_multipart_transfer(self):
        logger = six.StringIO()
        handler = logging.StreamHandler(logger)
        logging.getLogger('botocore').setLevel(logging.DEBUG)
        logging.getLogger('botocore').addHandler(handler)
        handler.setLevel(logging.DEBUG)
        with NamedTemporaryDirectory(change_dir=True) as tmp_d:
            self.create_dataset(tmp_d, 1, 10 * 1024 * 1024)
            self.test_dir.upload(['.'])

        self.assertEquals(1, len(self.test_dir.listdir()))

        with NamedTemporaryDirectory(change_dir=True) as tmp_d:
            self.test_dir.download('.')
            self.assertEquals(1, len(Path('.').listdir()))
        self.assertIn("CompleteMultipartUploadResult", logger.getvalue())
        # Check for multipart download by checking for multiple 206 GET requests
        # to the object
        self.assertRegexpMatches(
            logger.getvalue(),
            '"GET (/stor-test-bucket)?/test/0 HTTP/1.1" 206')
コード例 #2
0
def copy(source, dest, swift_retry_options=None):
    """Copies a source file to a destination file.

    Note that this utility can be called from either OBS, posix, or
    windows paths created with ``stor.Path``.

    Args:
        source (path|str): The source directory to copy from
        dest (path|str): The destination file or directory.
        swift_retry_options (dict): Optional retry arguments to use for swift
            upload or download. View the
            `swift module-level documentation <swiftretry>` for more
            information on retry arguments

    Examples:
        Copying a swift file to a local path behaves as follows::

            >>> import stor
            >>> swift_p = 'swift://tenant/container/dir/file.txt'
            >>> # file.txt will be copied to other_dir/other_file.txt
            >>> stor.copy(swift_p, 'other_dir/other_file.txt')

        Copying from a local path to swift behaves as follows::

            >>> from stor import Path
            >>> local_p = Path('my/local/file.txt')
            >>> # File will be uploaded to swift://tenant/container/dir/my_file.txt
            >>> local_p.copy('swift://tenant/container/dir/')

        Because of the ambiguity in whether a remote target is a file or directory, copy()
        will error on ambiguous paths.

            >>> local_p.copy('swift://tenant/container/dir')
            Traceback (most recent call last):
            ...
            ValueError: OBS destination must be file with extension or directory with slash
    """
    from stor import Path
    from stor.obs import OBSUploadObject

    source = Path(source)
    dest = Path(dest)
    swift_retry_options = swift_retry_options or {}
    if is_obs_path(source) and is_obs_path(dest):
        raise ValueError('cannot copy one OBS path to another OBS path')
    if is_obs_path(dest) and dest.is_ambiguous():
        raise ValueError('OBS destination must be file with extension or directory with slash')

    if is_filesystem_path(dest):
        dest.parent.makedirs_p()
        if is_obs_path(source):
            dest_file = dest if not dest.isdir() else dest / source.name
            source.download_object(dest_file, **swift_retry_options)
        else:
            shutil.copy(source, dest)
    else:
        dest_file = dest if not dest.endswith('/') else dest / source.name
        if is_swift_path(dest) and not dest_file.parent.container:
            raise ValueError((
                'cannot copy to tenant "%s" and file '
                '"%s"' % (dest_file.parent, dest_file.name)
            ))
        dest_obj_name = Path(dest_file.parent.resource or '') / dest_file.name
        upload_obj = OBSUploadObject(source, dest_obj_name)
        dest_file.parent.upload([upload_obj],
                                **swift_retry_options)
コード例 #3
0
def copytree(source,
             dest,
             copy_cmd=None,
             use_manifest=False,
             headers=None,
             condition=None,
             **kwargs):
    """Copies a source directory to a destination directory. Assumes that
    paths are capable of being copied to/from.

    Note that this function uses shutil.copytree by default, meaning
    that a posix or windows destination must not exist beforehand.

    For example, assume the following file hierarchy::

        a/
        - b/
        - - 1.txt

    Doing a copytree from ``a`` to a new posix destination of ``c`` is
    performed with::

        Path('a').copytree('c')

    The end result for c looks like::

        c/
        - b/
        - - 1.txt

    Note that the user can override which copy command is used for posix
    copies, and it is up to them to ensure that their code abides by the
    semantics of the provided copy command. This function has been tested
    in production using the default command of ``cp -r`` and using ``mcp -r``.

    Using OBS source and destinations work in a similar manner. Assume
    the destination is a swift path and we upload the same ``a`` folder::

        Path('a').copytree('swift://tenant/container/folder')

    The end swift result will have one object::

        Path('swift://tenant/container/folder/b/1.txt')

    Similarly one can do::

        Path('swift://tenant/container/folder/').copytree('c')

    The end result for c looks the same as the above posix example::

        c/
        - b/
        - - 1.txt

    Args:
        source (path|str): The source directory to copy from
        dest (path|str): The directory to copy to. Must not exist if
            its a posix directory
        copy_cmd (str): If copying to / from posix or windows, this command is
            used instead of shutil.copytree
        use_manifest (bool, default False): See `SwiftPath.upload` and
            `SwiftPath.download`.
        condition (function(results) -> bool): See `SwiftPath.upload` and
            `SwiftPath.download`.
        headers (List[str]): See `SwiftPath.upload`.

    Raises:
        ValueError: if two OBS paths are specified
        OSError: if destination is a posix path and it already exists
    """
    from stor import Path

    source = Path(source)
    dest = Path(dest)
    if is_dx_path(source) and is_dx_path(dest):
        return source.copytree(dest, **kwargs)
    if is_obs_path(source) and is_obs_path(dest):
        raise ValueError('cannot copy one OBS path to another OBS path')
    from stor.windows import WindowsPath
    if is_obs_path(source) and isinstance(dest, WindowsPath):
        raise ValueError('OBS copytree to windows is not supported')

    if is_filesystem_path(dest):
        dest.expand().abspath().parent.makedirs_p()
        if is_obs_path(source):
            source.download(dest,
                            use_manifest=use_manifest,
                            condition=condition,
                            **kwargs)
        else:
            if copy_cmd:
                copy_cmd = shlex.split(copy_cmd)
                copy_cmd.extend([
                    str(source.abspath().expand()),
                    str(dest.abspath().expand())
                ])
                logger.info('performing copy with command - %s', copy_cmd)
                check_call(copy_cmd)
            else:
                shutil.copytree(source, dest)
    else:
        if is_dx_path(dest) and (dest.isdir() or dest.endswith('/')):
            dest = dest / remove_trailing_slash(source).name
            if dest.isdir():
                raise exceptions.TargetExistsError(
                    'Destination path ({}) already exists, will not cause '
                    'duplicate folders to exist. Remove the original first'.
                    format(dest))
        with source:
            dest.upload(['.'],
                        use_manifest=use_manifest,
                        headers=headers,
                        condition=condition,
                        **kwargs)