def test_makedirs(self):
        with NamedTemporaryDirectory() as tmpdir:
            d = Path(tmpdir)

            # Placeholder file so that when removedirs() is called,
            # it doesn't remove the temporary directory itself.
            tempf = d / 'temp.txt'
            with tempf.open('w') as fp:
                fp.write('blah')
            try:
                foo = d / 'foo'
                boz = foo / 'bar' / 'baz' / 'boz'
                boz.makedirs()
                try:
                    assert boz.isdir()
                finally:
                    foo.rmtree()
                assert not foo.exists()
                assert d.exists()

                foo.mkdir(0o750)
                boz.makedirs(0o700)
                try:
                    assert boz.isdir()
                finally:
                    foo.rmtree()
                assert not foo.exists()
                assert d.exists()
            finally:
                os.remove(tempf)
예제 #2
0
def is_writeable(path, swift_retry_options=None):
    """
    Determine whether we have permission to write to path.

    Behavior of this method is slightly different for different storage types when the
    directory doesn't exist:
    1. For local file systems, this function will return True if the target directory
       exists and a file written to it.
    2. For AWS S3, this function will return True only if the target bucket is already
       present and we have write access to the bucket.
    3. For Swift, this function will return True, only if the target tenant is already
       present and we have write access to the tenant and container. The container doesn't
       have to be present.

    This is function is useful, because `stor.stat()` will succeed if we have read-only
    permissions to `path`, but the eventual attempt to upload will fail.

    Secondly, `path` might not exist yet. If the intent of the caller is to create it, ,
    stor.stat() will fail, however the eventual upload attempt would succeed.

    Args:
        path (stor.Path|str): The path to check.
        swift_retry_options (dict): Optional retry arguments to use for swift
            upload or download. View the
            `swift module-level documentation <swiftretry>` for more
            information on retry arguments. If the goal is to not use
            exponential backoff, pass ``{'num_retries': 0}`` here.

    Returns:
        bool: Whether ``path`` is writeable or not.
    """
    from stor import basename
    from stor import join
    from stor import Path
    from stor import remove
    from stor.swift import ConflictError
    from stor.swift import SwiftPath
    from stor.swift import UnauthorizedError
    from stor.swift import UnavailableError

    path = with_trailing_slash(Path(path))

    if is_filesystem_path(path):
        return os.access(path, os.W_OK)

    container_path = None
    container_existed = None
    if is_swift_path(path):
        # We want this function to behave as a no-op with regards to the underlying
        # container structure. Therefore we need to remove any containers created by this
        # function that were not present when it was called. The `container_existed`
        # defined below will store whether the container that we're checking existed when
        # calling this function, so that we know if it should be removed at the end.
        container_path = Path('{}{}/{}/'.format(
            SwiftPath.drive,
            path.tenant,
            path.container
        ))
        container_existed = container_path.exists()

    with tempfile.NamedTemporaryFile() as tmpfile:
        try:
            # Attempt to create a file in the `path`.
            copy(tmpfile.name, path, swift_retry_options=swift_retry_options)
            # Remove the file that was created.
            remove(join(path, basename(tmpfile.name)))
            answer = True
        except (UnauthorizedError, UnavailableError, IOError, OSError, exceptions.FailedUploadError):  # nopep8
            answer = False

    # Remove the Swift container if it didn't exist when calling this function, but exists
    # now. This way this function remains a no-op with regards to container structure.
    if container_existed is False and container_path.exists():
        try:
            container_path.remove_container()
        except ConflictError:
            # Ignore if some other thread/user created the container in the meantime.
            pass

    return answer
예제 #3
0
class SwiftIntegrationTest(BaseIntegrationTest.BaseTestCases):
    def setUp(self):
        super(SwiftIntegrationTest, self).setUp()

        if not os.environ.get('SWIFT_TEST_USERNAME'):
            raise unittest.SkipTest(
                'SWIFT_TEST_USERNAME env var not set. Skipping integration test')

        # Disable loggers so nose output wont be trashed
        logging.getLogger('requests').setLevel(logging.CRITICAL)
        logging.getLogger('swiftclient').setLevel(logging.CRITICAL)
        logging.getLogger('keystoneclient').setLevel(logging.CRITICAL)

        settings.update({
            'swift': {
                'username': os.environ.get('SWIFT_TEST_USERNAME'),
                'password': os.environ.get('SWIFT_TEST_PASSWORD'),
                'num_retries': 5
            }})
        # fall back on to swiftstack auth for tenant
        tenant = os.environ.get('SWIFT_TEST_TENANT', 'AUTH_%s' % os.environ['SWIFT_TEST_USERNAME'])

        self.test_container = Path('swift://%s/%s' % (tenant, uuid.uuid4()))
        if self.test_container.exists():
            raise ValueError('test container %s already exists.' % self.test_container)

        try:
            self.test_container.post()
        except BaseException:
            self.test_container.rmtree()
            raise

        self.test_dir = self.test_container / 'test'

    def tearDown(self):
        super(SwiftIntegrationTest, self).tearDown()
        self.test_container.rmtree()

    def test_cached_auth_and_auth_invalidation(self):
        from swiftclient.client import get_auth_keystone as real_get_keystone
        swift._clear_cached_auth_credentials()
        tenant = self.test_container.tenant
        with mock.patch('swiftclient.client.get_auth_keystone', autospec=True) as mock_get_ks:
            mock_get_ks.side_effect = real_get_keystone
            s = Path(self.test_container).stat()
            self.assertEquals(s['Account'], tenant)
            self.assertEquals(len(mock_get_ks.call_args_list), 1)

            # The keystone auth should not be called on another stat
            mock_get_ks.reset_mock()
            s = Path(self.test_container).stat()
            self.assertEquals(s['Account'], tenant)
            self.assertEquals(len(mock_get_ks.call_args_list), 0)

            # Set the auth cache to something bad. The auth keystone should
            # be called twice on another stat. It's first called by the swiftclient
            # when retrying auth (with the bad token) and then called by us without
            # a token after the swiftclient raises an authorization error.
            mock_get_ks.reset_mock()
            swift._cached_auth_token_map[tenant]['creds']['os_auth_token'] = 'bad_auth'
            s = Path(self.test_container).stat()
            self.assertEquals(s['Account'], tenant)
            self.assertEquals(len(mock_get_ks.call_args_list), 2)
            # Note that the auth_token is passed into the keystone client but then popped
            # from the kwargs. Assert that an auth token is no longer part of the retry calls
            self.assertTrue('auth_token' not in mock_get_ks.call_args_list[0][0][3])
            self.assertTrue('auth_token' not in mock_get_ks.call_args_list[1][0][3])

            # Now make the auth always be invalid and verify that an auth error is thrown
            # This also tests that keystone auth errors are propagated as swift
            # AuthenticationErrors
            mock_get_ks.reset_mock()
            swift._clear_cached_auth_credentials()
            with mock.patch('keystoneclient.v2_0.client.Client') as mock_ks_client:
                from keystoneclient.exceptions import Unauthorized
                mock_ks_client.side_effect = Unauthorized
                with self.assertRaises(swift.AuthenticationError):
                    Path(self.test_container).stat()

                # Verify that getting the auth was called two more times because of retry
                # logic
                self.assertEquals(len(mock_get_ks.call_args_list), 2)

    def test_static_large_obj_copy_and_segment_container(self):
        with NamedTemporaryDirectory(change_dir=True) as tmp_d:
            segment_size = 1048576
            obj_size = segment_size * 4 + 100
            self.create_dataset(tmp_d, 1, obj_size)
            obj_path = stor.join(tmp_d,
                                 self.get_dataset_obj_names(1)[0])
            options = {'swift:upload': {'segment_size': segment_size}}
            with settings.use(options):
                obj_path.copy(self.test_container / 'large_object.txt')

            # Verify there is a segment container and that it can be ignored when listing a dir
            segment_container = Path(self.test_container.parent) / ('.segments_%s' % self.test_container.name)  # noqa
            containers = Path(self.test_container.parent).listdir(ignore_segment_containers=False)
            self.assertTrue(segment_container in containers)
            self.assertTrue(self.test_container in containers)
            containers = Path(self.test_container.parent).listdir(ignore_segment_containers=True)
            self.assertFalse(segment_container in containers)
            self.assertTrue(self.test_container in containers)

            # Verify there are five segments
            objs = set(segment_container.list(condition=lambda results: len(results) == 5))
            self.assertEquals(len(objs), 5)

            # Copy back the large object and verify its contents
            obj_path = Path(tmp_d) / 'large_object.txt'
            Path(self.test_container / 'large_object.txt').copy(obj_path)
            self.assertCorrectObjectContents(obj_path, self.get_dataset_obj_names(1)[0], obj_size)

    @unittest.skipIf(not os.environ.get('OS_TEMP_URL_KEY'), 'No temp url key set')
    def test_temp_url(self):
        basic_file = 'test.txt'
        complex_file = 'my test?file=special_chars.txt'
        with NamedTemporaryDirectory(change_dir=True) as tmp_d:
            nested_tmp_dir = stor.join(tmp_d, 'tmp')
            os.mkdir(nested_tmp_dir)
            basic_file_p = stor.join(nested_tmp_dir, basic_file)
            complex_file_p = stor.join(nested_tmp_dir, 'my test?file=special_chars.txt')

            with stor.open(basic_file_p, 'w') as f:
                f.write('basic test')
            with stor.open(complex_file_p, 'w') as f:
                f.write('complex test')

            self.test_container.upload(['.'])

        with NamedTemporaryDirectory(change_dir=True) as tmp_d:
            basic_obj = stor.Path(
                stor.join(self.test_container, 'tmp', basic_file))
            basic_temp_url = basic_obj.temp_url(inline=False, filename=basic_file)
            r = requests.get(basic_temp_url)
            self.assertEquals(r.content, 'basic test')
            self.assertEquals(r.headers['Content-Disposition'],
                              'attachment; filename="test.txt"; filename*=UTF-8\'\'test.txt')

            complex_obj = stor.Path(
                stor.join(self.test_container, 'tmp', complex_file))
            complex_temp_url = complex_obj.temp_url(inline=False, filename=complex_file)
            r = requests.get(complex_temp_url)
            self.assertEquals(r.content, 'complex test')
            self.assertEquals(r.headers['Content-Disposition'],
                              'attachment; filename="my test%3Ffile%3Dspecial_chars.txt"; filename*=UTF-8\'\'my%20test%3Ffile%3Dspecial_chars.txt')  # noqa

    def test_condition_failures(self):
        num_test_objs = 20
        test_obj_size = 100
        test_dir = self.test_container / 'test'
        with NamedTemporaryDirectory(change_dir=True) as tmp_d:
            self.create_dataset(tmp_d, num_test_objs, test_obj_size)
            Path('.').copytree(test_dir)

        # Verify a ConditionNotMet exception is thrown when attempting to list
        # a file that hasn't been uploaded
        expected_objs = {
            test_dir / which_obj
            for which_obj in self.get_dataset_obj_names(num_test_objs + 1)
        }

        num_retries = settings.get()['swift']['num_retries']
        with mock.patch('time.sleep') as mock_sleep:
            with self.assertRaises(swift.ConditionNotMetError):
                test_dir.list(condition=lambda results: expected_objs == set(results))
            self.assertTrue(num_retries > 0)
            self.assertEquals(len(mock_sleep.call_args_list), num_retries)

        # Verify that the condition passes when excluding the non-extant file
        expected_objs = {
            test_dir / which_obj
            for which_obj in self.get_dataset_obj_names(num_test_objs)
        }
        objs = test_dir.list(condition=lambda results: expected_objs == set(results))
        self.assertEquals(expected_objs, set(objs))

    def test_list_glob(self):
        num_test_objs = 20
        test_obj_size = 100
        test_dir = self.test_container / 'test'
        with NamedTemporaryDirectory(change_dir=True) as tmp_d:
            self.create_dataset(tmp_d, num_test_objs, test_obj_size)
            Path('.').copytree(test_dir)

        objs = set(test_dir.list(condition=lambda results: len(results) == num_test_objs))
        expected_objs = {
            test_dir / obj_name
            for obj_name in self.get_dataset_obj_names(num_test_objs)
        }
        self.assertEquals(len(objs), num_test_objs)
        self.assertEquals(objs, expected_objs)

        expected_glob = {
            test_dir / obj_name
            for obj_name in self.get_dataset_obj_names(num_test_objs) if obj_name.startswith('1')
        }
        self.assertTrue(len(expected_glob) > 1)
        globbed_objs = set(
            test_dir.glob('1*', condition=lambda results: len(results) == len(expected_glob)))
        self.assertEquals(globbed_objs, expected_glob)

    def test_copytree_w_headers(self):
        with NamedTemporaryDirectory(change_dir=True) as tmp_d:
            open(tmp_d / 'test_obj', 'w').close()
            stor.copytree(
                '.',
                self.test_container,
                headers=['X-Delete-After:1000'])

        obj = stor.join(self.test_container, 'test_obj')
        stat_results = obj.stat()
        self.assertTrue('x-delete-at' in stat_results['headers'])

    def test_rmtree(self):
        with NamedTemporaryDirectory(change_dir=True) as tmp_d:
            # Make a couple empty test files and nested files
            tmp_d = Path(tmp_d)
            os.mkdir(tmp_d / 'my_dir')
            open(tmp_d / 'my_dir' / 'dir_file1', 'w').close()
            open(tmp_d / 'my_dir' / 'dir_file2', 'w').close()
            open(tmp_d / 'base_file1', 'w').close()
            open(tmp_d / 'base_file2', 'w').close()

            stor.copytree(
                '.',
                self.test_container,
                use_manifest=True)

            swift_dir = self.test_container / 'my_dir'
            self.assertEquals(len(swift_dir.list()), 2)
            swift_dir.rmtree()
            self.assertEquals(len(swift_dir.list()), 0)

            base_contents = self.test_container.list()
            self.assertTrue((self.test_container / 'base_file1') in base_contents)
            self.assertTrue((self.test_container / 'base_file1') in base_contents)

            self.test_container.rmtree()

            # TODO figure out a better way to test that the container no longer exists.
            with self.assertRaises(swift.NotFoundError):
                # Replication may have not happened yet for container deletion. Keep
                # listing in intervals until a NotFoundError is thrown
                for i in (0, 1, 3):
                    time.sleep(i)
                    self.test_container.list()

    def test_is_methods(self):
        container = self.test_container
        container = self.test_container
        file_with_prefix = stor.join(container, 'analysis.txt')

        # ensure container is created but empty
        container.post()
        self.assertTrue(stor.isdir(container))
        self.assertFalse(stor.isfile(container))
        self.assertTrue(stor.exists(container))
        self.assertFalse(stor.listdir(container))

        folder = stor.join(container, 'analysis')
        subfolder = stor.join(container, 'analysis', 'alignments')
        file_in_folder = stor.join(container, 'analysis', 'alignments',
                                   'bam.bam')
        self.assertFalse(stor.exists(file_in_folder))
        self.assertFalse(stor.isdir(folder))
        self.assertFalse(stor.isdir(folder + '/'))
        with stor.open(file_with_prefix, 'w') as fp:
            fp.write('data\n')
        self.assertFalse(stor.isdir(folder))
        self.assertTrue(stor.isfile(file_with_prefix))

        with stor.open(file_in_folder, 'w') as fp:
            fp.write('blah.txt\n')

        self.assertTrue(stor.isdir(folder))
        self.assertFalse(stor.isfile(folder))
        self.assertTrue(stor.isdir(subfolder))

    def test_metadata_pulling(self):
        file_in_folder = stor.join(self.test_container,
                                   'somefile.svg')
        with stor.open(file_in_folder, 'w') as fp:
            fp.write('12345\n')

        self.assertEqual(stor.getsize(file_in_folder), 6)
        stat_data = stor.Path(file_in_folder).stat()
        self.assertIn('Content-Type', stat_data)
        self.assertEqual(stat_data['Content-Type'], 'image/svg+xml')

    def test_push_metadata(self):
        if self.test_container.tenant != 'AUTH_swft_test':
            raise unittest.SkipTest('test only works with admin rights')
        obj = self.test_container / 'object.txt'
        with obj.open('w') as fp:
            fp.write('a\n')
        obj.post({'header': ['X-Object-Meta-Custom:text']})
        stat_data = obj.stat()
        # TODO(jtratner): consider validating x-object-meta vs.
        # x-container-meta (otherwise headers won't take)
        self.assertIn('x-object-meta-custom', stat_data['headers'])
        self.assertEqual(stat_data['headers']['x-object-meta-custom'], 'text')
        self.test_container.post({'header': ['X-Container-Meta-Exciting:value'],
                                  'read_acl': '.r:*'})
        stat_data = self.test_container.stat()
        self.assertEqual(stat_data['Read-ACL'], '.r:*')
        self.assertIn('x-container-meta-exciting', stat_data['headers'])
        self.assertEqual(stat_data['headers']['x-container-meta-exciting'], 'value')
        self.test_container.post({'read_acl': '.r:example.com'})
        self.assertEqual(self.test_container.stat()['Read-ACL'],
                         '.r:example.com')

    def test_copytree_to_from_dir_w_manifest(self):
        num_test_objs = 10
        test_obj_size = 100
        with NamedTemporaryDirectory(change_dir=True) as tmp_d:
            self.create_dataset(tmp_d, num_test_objs, test_obj_size)
            # Make a nested file and an empty directory for testing purposes
            tmp_d = Path(tmp_d)
            os.mkdir(tmp_d / 'my_dir')
            open(tmp_d / 'my_dir' / 'empty_file', 'w').close()
            os.mkdir(tmp_d / 'my_dir' / 'empty_dir')

            stor.copytree(
                '.',
                self.test_dir,
                use_manifest=True)

            # Validate the contents of the manifest file
            manifest_contents = utils.get_data_manifest_contents(self.test_dir)
            expected_contents = self.get_dataset_obj_names(num_test_objs)
            expected_contents.extend(['my_dir/empty_file',
                                      'my_dir/empty_dir'])
            expected_contents = [Path('test') / c for c in expected_contents]
            self.assertEquals(set(manifest_contents), set(expected_contents))

        with NamedTemporaryDirectory(change_dir=True) as tmp_d:
            # Download the results successfully
            Path(self.test_dir).copytree(
                'test',
                use_manifest=True)

            # Now delete one of the objects from swift. A second download
            # will fail with a condition error
            Path(self.test_dir / 'my_dir' / 'empty_dir').remove()
            with self.assertRaises(exceptions.ConditionNotMetError):
                Path(self.test_dir).copytree(
                    'test',
                    use_manifest=True,
                    num_retries=0)

    def test_all_segment_container_types_are_deleted(self):
        segment_containers = [stor.join('swift://' + self.test_container.tenant,
                                        fmt % self.test_container.name)
                              for fmt in ('.segments_%s', '%s+segments', '%s_segments')]
        all_containers = segment_containers + [self.test_container]

        test_files = [stor.join(c, 'test_file_tbdeleted.txt') for c in all_containers]
        for t in test_files:
            with stor.open(t, 'w') as fp:
                fp.write('testtxt\n')
        assert all(t.exists() for t in test_files)
        stor.rmtree(self.test_container)
        for t in test_files:
            assert not t.exists(), 'Did not delete %s' % t

    def test_upload_multiple_dirs(self):
        with NamedTemporaryDirectory(change_dir=True) as tmp_d:
            num_test_objs = 10
            tmp_d = Path(tmp_d)

            # Create files filled with random data.
            path1 = tmp_d / 'dir1'
            os.mkdir(path1)
            self.create_dataset(path1, num_test_objs, 10)

            # Create empty dir and file.
            path2 = tmp_d / 'dir2'
            os.mkdir(path2)
            os.mkdir(path2 / 'my_dir')
            open(path2 / 'my_dir' / 'included_file', 'w').close()
            open(path2 / 'my_dir' / 'excluded_file', 'w').close()
            os.mkdir(path2 / 'my_dir' / 'included_dir')
            os.mkdir(path2 / 'my_dir' / 'excluded_dir')

            # Create file in the top level directory.
            open(tmp_d / 'top_level_file', 'w').close()

            to_upload = [
                'dir1',
                'dir2/my_dir/included_file',
                'dir2/my_dir/included_dir',
                'top_level_file',
            ]
            with tmp_d:
                swift_path = self.test_dir / 'subdir'
                swift_path.upload(to_upload, use_manifest=True)

            # Validate the contents of the manifest file
            manifest_contents = utils.get_data_manifest_contents(swift_path)
            expected_contents = [
                Path('dir1') / name
                for name in self.get_dataset_obj_names(num_test_objs)
            ]
            expected_contents.extend([
                'dir2/my_dir/included_file',
                'dir2/my_dir/included_dir',
                'top_level_file',
            ])

            expected_contents = [Path('test/subdir') / c for c in expected_contents]
            self.assertEquals(set(manifest_contents), set(expected_contents))
예제 #4
0
class S3IntegrationTest(BaseIntegrationTest.BaseTestCases):
    """
    Integration tests for S3. Note that for now, while upload/download/remove
    methods are not implemented, tests will use the existing stor-test-bucket
    bucket on S3.

    In order to run the tests, you must have valid AWS S3 credentials set in the
    following environment variables: AWS_TEST_ACCESS_KEY_ID,
    AWS_TEST_SECRET_ACCESS_KEY (and optionally AWS_DEFAULT_REGION).
    """
    def setUp(self):
        super(S3IntegrationTest, self).setUp()

        if not (os.environ.get('AWS_TEST_ACCESS_KEY_ID')
                and os.environ.get('AWS_TEST_SECRET_ACCESS_KEY')
                and os.environ.get('S3_TEST_BUCKET')):
            raise unittest.SkipTest(
                'AWS_TEST_ACCESS_KEY_ID / AWS_TEST_SECRET_ACCESS_KEY / S3_TEST_BUCKET '
                ' env vars not set. Skipping integration test')

        # Disable loggers so nose output is clean
        logging.getLogger('botocore').setLevel(logging.CRITICAL)
        test_bucket = os.environ['S3_TEST_BUCKET']
        self.test_bucket = Path('s3://{test_bucket}/{uuid}'.format(
            test_bucket=test_bucket, uuid=uuid.uuid4()))
        self.test_dir = self.test_bucket / 'test'
        stor.settings.update({
            's3': {
                'aws_access_key_id': os.environ['AWS_TEST_ACCESS_KEY_ID'],
                'aws_secret_access_key':
                os.environ['AWS_TEST_SECRET_ACCESS_KEY']
            }
        })

    def tearDown(self):
        super(S3IntegrationTest, self).tearDown()
        self.test_dir.rmtree()

    def test_over_1000_files(self):
        num_test_objs = 1234
        min_obj_size = 0

        with NamedTemporaryDirectory(change_dir=True) as tmp_d:
            self.create_dataset(tmp_d, num_test_objs, min_obj_size)
            self.test_dir.upload(['.'])

        self.assertEquals(1234, len(self.test_dir.list()))
        self.assertEquals(1200, len(self.test_dir.list(limit=1200)))
        self.assertTrue(self.test_dir.isdir())

        with NamedTemporaryDirectory(change_dir=True) as tmp_d:
            self.test_dir.download('./')
            self.assertEquals(1234, len(os.listdir(tmp_d)))

    def test_list_methods(self):
        fake_bucket = Path('s3://stor-test-bucket2')
        with self.assertRaises(exceptions.NotFoundError):
            fake_bucket.list()
        fake_folder = self.test_bucket / 'not_a_dir'
        self.assertEquals([], fake_folder.list())

        with NamedTemporaryDirectory(change_dir=True):
            open('file1.txt', 'w').close()
            open('file2.txt', 'w').close()
            os.mkdir('nested_dir')
            os.mkdir('nested_dir/dir')
            open('nested_dir/dir/file3.txt', 'w').close()
            self.test_dir.upload(['.'])

        file_list = self.test_dir.list()
        starts_with_list = self.test_bucket.list(starts_with='test')
        self.assertEquals(set(file_list), set(starts_with_list))
        self.assertEquals(
            set(file_list),
            set([
                self.test_dir / 'file1.txt', self.test_dir / 'file2.txt',
                self.test_dir / 'nested_dir/dir/file3.txt'
            ]))

        dir_list = self.test_dir.listdir()
        self.assertEquals(
            set(dir_list),
            set([
                self.test_dir / 'file1.txt', self.test_dir / 'file2.txt',
                self.test_dir / 'nested_dir/'
            ]))

        self.assertTrue(self.test_dir.listdir() == (self.test_dir +
                                                    '/').listdir())

    def test_is_methods(self):
        """
        Tests is methods, exists(), and getsize().
        getsize() integration test may be moved to a different test
        depending on whether other metadata methods (such as stat())
        are implemented.
        """
        self.assertTrue(self.test_bucket.exists())
        self.assertTrue(self.test_bucket.isdir())
        self.assertFalse(self.test_bucket.isfile())
        self.assertEquals(self.test_bucket.getsize(), 0)

        with NamedTemporaryDirectory(change_dir=True) as tmp_d:
            self.create_dataset(tmp_d, 1, 10)
            self.test_dir.upload(['.'])
            correct_size = os.path.getsize('0')

        self.assertTrue(self.test_dir.exists())
        self.assertTrue(self.test_dir.isdir())
        self.assertFalse(self.test_dir.isfile())
        self.assertEquals(self.test_dir.getsize(), 0)

        test_file = self.test_dir / '0'
        self.assertTrue(test_file.exists())
        self.assertFalse(test_file.isdir())
        self.assertTrue(test_file.isfile())
        self.assertEquals(test_file.getsize(), correct_size)

        test_file.remove()
        self.assertFalse(test_file.exists())
        self.assertFalse(test_file.isdir())
        self.assertFalse(test_file.isfile())
        with self.assertRaises(exceptions.NotFoundError):
            test_file.getsize()

        fake_bucket = self.test_bucket + '2'
        self.assertFalse(fake_bucket.exists())
        self.assertFalse(fake_bucket.isdir())
        self.assertFalse(fake_bucket.isfile())
        with self.assertRaises(exceptions.NotFoundError):
            fake_bucket.getsize()

    def test_upload_download_remove(self):
        num_test_objs = 10
        min_obj_size = 50
        with NamedTemporaryDirectory(change_dir=True) as tmp_d:
            self.create_dataset(tmp_d, num_test_objs, min_obj_size)
            self.test_dir.upload(['.'])

        for which_obj in self.get_dataset_obj_names(num_test_objs):
            self.assertTrue((self.test_dir / which_obj).exists())

        with NamedTemporaryDirectory(change_dir=True) as tmp_d:
            self.test_dir.download(tmp_d)
            for which_obj in self.get_dataset_obj_names(num_test_objs):
                self.assertCorrectObjectContents(which_obj, which_obj,
                                                 min_obj_size)
                (self.test_dir / which_obj).remove()

                # consistency check
                while (self.test_dir / which_obj).exists():
                    time.sleep(.5)
                self.assertFalse((self.test_dir / which_obj).exists())

    def test_upload_w_headers(self):
        test_file = self.test_dir / 'a.txt'
        with NamedTemporaryDirectory(change_dir=True):
            open('a.txt', 'w').close()
            self.test_dir.upload(['.'], headers={'ContentLanguage': 'en'})

        self.assertTrue(test_file.exists())
        self.assertEquals(test_file.stat()['ContentLanguage'], 'en')

    def test_download(self):
        with NamedTemporaryDirectory(change_dir=True):
            os.mkdir('dir')
            os.mkdir('dir/a')
            open('dir/a/a.txt', 'w').close()
            self.test_dir.upload(['.'])

        with NamedTemporaryDirectory(change_dir=True):
            open('dir', 'w').close()
            open('a', 'w').close()
            with self.assertRaises(OSError):
                self.test_dir.download('.')
            with self.assertRaises(OSError):
                (self.test_dir / 'dir').download('.')

    def test_condition(self):
        num_test_objs = 20
        test_obj_size = 100
        with NamedTemporaryDirectory(change_dir=True) as tmp_d:
            self.create_dataset(tmp_d, num_test_objs, test_obj_size)
            Path('.').copytree(self.test_dir)

        # Verify a ConditionNotMet exception is thrown when attempting to list
        # a file that hasn't been uploaded
        expected_objs = {
            self.test_dir / which_obj
            for which_obj in self.get_dataset_obj_names(num_test_objs + 1)
        }

        with self.assertRaises(exceptions.ConditionNotMetError):
            self.test_dir.list(
                condition=lambda results: expected_objs == set(results))

        # Verify that the condition passes when excluding the non-extant file
        correct_objs = {
            self.test_dir / which_obj
            for which_obj in self.get_dataset_obj_names(num_test_objs)
        }
        objs = self.test_dir.list(
            condition=lambda results: correct_objs == set(results))
        self.assertEquals(correct_objs, set(objs))

    def test_dir_markers(self):
        with NamedTemporaryDirectory(change_dir=True):
            os.mkdir('empty')
            os.mkdir('dir')
            open('a.txt', 'w').close()
            open('dir/b.txt', 'w').close()
            self.test_dir.upload(['.'])

        self.assertEquals(
            set(self.test_dir.list()), {
                self.test_dir / 'a.txt', self.test_dir / 'dir/b.txt',
                self.test_dir / 'empty/'
            })
        self.assertEquals(
            set(self.test_dir.list(ignore_dir_markers=True)),
            {self.test_dir / 'a.txt', self.test_dir / 'dir/b.txt'})
        self.assertTrue((self.test_dir / 'empty').isdir())

        with NamedTemporaryDirectory(change_dir=True):
            self.test_dir.download('.')
            self.assertTrue(os.path.isdir('empty'))
            self.assertTrue(os.path.exists('dir/b.txt'))
            self.assertTrue(os.path.exists('a.txt'))

    def test_copytree_to_from_dir_w_manifest(self):
        num_test_objs = 10
        test_obj_size = 100
        with NamedTemporaryDirectory(change_dir=True) as tmp_d:
            self.create_dataset(tmp_d, num_test_objs, test_obj_size)
            # Make a nested file and an empty directory for testing purposes
            tmp_d = Path(tmp_d)
            os.mkdir(tmp_d / 'my_dir')
            open(tmp_d / 'my_dir' / 'empty_file', 'w').close()
            os.mkdir(tmp_d / 'my_dir' / 'empty_dir')

            stor.copytree('.', self.test_dir, use_manifest=True)

            # Validate the contents of the manifest file
            manifest_contents = utils.get_data_manifest_contents(self.test_dir)
            expected_contents = self.get_dataset_obj_names(num_test_objs)
            expected_contents.extend(
                ['my_dir/empty_file', 'my_dir/empty_dir/'])
            expected_contents = [Path('test') / c for c in expected_contents]
            self.assertEquals(set(manifest_contents), set(expected_contents))

        with NamedTemporaryDirectory(change_dir=True) as tmp_d:
            # Download the results successfully
            Path(self.test_dir).copytree('test', use_manifest=True)

            # Now delete one of the objects from s3. A second download
            # will fail with a condition error
            Path(self.test_dir / 'my_dir' / 'empty_dir/').remove()
            with self.assertRaises(exceptions.ConditionNotMetError):
                Path(self.test_dir).copytree('test',
                                             use_manifest=True,
                                             num_retries=0)

    def test_multipart_transfer(self):
        logger = six.StringIO()
        handler = logging.StreamHandler(logger)
        logging.getLogger('botocore').setLevel(logging.DEBUG)
        logging.getLogger('botocore').addHandler(handler)
        handler.setLevel(logging.DEBUG)
        with NamedTemporaryDirectory(change_dir=True) as tmp_d:
            self.create_dataset(tmp_d, 1, 10 * 1024 * 1024)
            self.test_dir.upload(['.'])

        self.assertEquals(1, len(self.test_dir.listdir()))

        with NamedTemporaryDirectory(change_dir=True) as tmp_d:
            self.test_dir.download('.')
            self.assertEquals(1, len(Path('.').listdir()))
        self.assertIn("CompleteMultipartUploadResult", logger.getvalue())
        # Check for multipart download by checking for multiple 206 GET requests
        # to the object
        self.assertRegexpMatches(
            logger.getvalue(),
            '"GET (/stor-test-bucket)?/test/0 HTTP/1.1" 206')