def test_makedirs(self): with NamedTemporaryDirectory() as tmpdir: d = Path(tmpdir) # Placeholder file so that when removedirs() is called, # it doesn't remove the temporary directory itself. tempf = d / 'temp.txt' with tempf.open('w') as fp: fp.write('blah') try: foo = d / 'foo' boz = foo / 'bar' / 'baz' / 'boz' boz.makedirs() try: assert boz.isdir() finally: foo.rmtree() assert not foo.exists() assert d.exists() foo.mkdir(0o750) boz.makedirs(0o700) try: assert boz.isdir() finally: foo.rmtree() assert not foo.exists() assert d.exists() finally: os.remove(tempf)
def is_writeable(path, swift_retry_options=None): """ Determine whether we have permission to write to path. Behavior of this method is slightly different for different storage types when the directory doesn't exist: 1. For local file systems, this function will return True if the target directory exists and a file written to it. 2. For AWS S3, this function will return True only if the target bucket is already present and we have write access to the bucket. 3. For Swift, this function will return True, only if the target tenant is already present and we have write access to the tenant and container. The container doesn't have to be present. This is function is useful, because `stor.stat()` will succeed if we have read-only permissions to `path`, but the eventual attempt to upload will fail. Secondly, `path` might not exist yet. If the intent of the caller is to create it, , stor.stat() will fail, however the eventual upload attempt would succeed. Args: path (stor.Path|str): The path to check. swift_retry_options (dict): Optional retry arguments to use for swift upload or download. View the `swift module-level documentation <swiftretry>` for more information on retry arguments. If the goal is to not use exponential backoff, pass ``{'num_retries': 0}`` here. Returns: bool: Whether ``path`` is writeable or not. """ from stor import basename from stor import join from stor import Path from stor import remove from stor.swift import ConflictError from stor.swift import SwiftPath from stor.swift import UnauthorizedError from stor.swift import UnavailableError path = with_trailing_slash(Path(path)) if is_filesystem_path(path): return os.access(path, os.W_OK) container_path = None container_existed = None if is_swift_path(path): # We want this function to behave as a no-op with regards to the underlying # container structure. Therefore we need to remove any containers created by this # function that were not present when it was called. The `container_existed` # defined below will store whether the container that we're checking existed when # calling this function, so that we know if it should be removed at the end. container_path = Path('{}{}/{}/'.format( SwiftPath.drive, path.tenant, path.container )) container_existed = container_path.exists() with tempfile.NamedTemporaryFile() as tmpfile: try: # Attempt to create a file in the `path`. copy(tmpfile.name, path, swift_retry_options=swift_retry_options) # Remove the file that was created. remove(join(path, basename(tmpfile.name))) answer = True except (UnauthorizedError, UnavailableError, IOError, OSError, exceptions.FailedUploadError): # nopep8 answer = False # Remove the Swift container if it didn't exist when calling this function, but exists # now. This way this function remains a no-op with regards to container structure. if container_existed is False and container_path.exists(): try: container_path.remove_container() except ConflictError: # Ignore if some other thread/user created the container in the meantime. pass return answer
class SwiftIntegrationTest(BaseIntegrationTest.BaseTestCases): def setUp(self): super(SwiftIntegrationTest, self).setUp() if not os.environ.get('SWIFT_TEST_USERNAME'): raise unittest.SkipTest( 'SWIFT_TEST_USERNAME env var not set. Skipping integration test') # Disable loggers so nose output wont be trashed logging.getLogger('requests').setLevel(logging.CRITICAL) logging.getLogger('swiftclient').setLevel(logging.CRITICAL) logging.getLogger('keystoneclient').setLevel(logging.CRITICAL) settings.update({ 'swift': { 'username': os.environ.get('SWIFT_TEST_USERNAME'), 'password': os.environ.get('SWIFT_TEST_PASSWORD'), 'num_retries': 5 }}) # fall back on to swiftstack auth for tenant tenant = os.environ.get('SWIFT_TEST_TENANT', 'AUTH_%s' % os.environ['SWIFT_TEST_USERNAME']) self.test_container = Path('swift://%s/%s' % (tenant, uuid.uuid4())) if self.test_container.exists(): raise ValueError('test container %s already exists.' % self.test_container) try: self.test_container.post() except BaseException: self.test_container.rmtree() raise self.test_dir = self.test_container / 'test' def tearDown(self): super(SwiftIntegrationTest, self).tearDown() self.test_container.rmtree() def test_cached_auth_and_auth_invalidation(self): from swiftclient.client import get_auth_keystone as real_get_keystone swift._clear_cached_auth_credentials() tenant = self.test_container.tenant with mock.patch('swiftclient.client.get_auth_keystone', autospec=True) as mock_get_ks: mock_get_ks.side_effect = real_get_keystone s = Path(self.test_container).stat() self.assertEquals(s['Account'], tenant) self.assertEquals(len(mock_get_ks.call_args_list), 1) # The keystone auth should not be called on another stat mock_get_ks.reset_mock() s = Path(self.test_container).stat() self.assertEquals(s['Account'], tenant) self.assertEquals(len(mock_get_ks.call_args_list), 0) # Set the auth cache to something bad. The auth keystone should # be called twice on another stat. It's first called by the swiftclient # when retrying auth (with the bad token) and then called by us without # a token after the swiftclient raises an authorization error. mock_get_ks.reset_mock() swift._cached_auth_token_map[tenant]['creds']['os_auth_token'] = 'bad_auth' s = Path(self.test_container).stat() self.assertEquals(s['Account'], tenant) self.assertEquals(len(mock_get_ks.call_args_list), 2) # Note that the auth_token is passed into the keystone client but then popped # from the kwargs. Assert that an auth token is no longer part of the retry calls self.assertTrue('auth_token' not in mock_get_ks.call_args_list[0][0][3]) self.assertTrue('auth_token' not in mock_get_ks.call_args_list[1][0][3]) # Now make the auth always be invalid and verify that an auth error is thrown # This also tests that keystone auth errors are propagated as swift # AuthenticationErrors mock_get_ks.reset_mock() swift._clear_cached_auth_credentials() with mock.patch('keystoneclient.v2_0.client.Client') as mock_ks_client: from keystoneclient.exceptions import Unauthorized mock_ks_client.side_effect = Unauthorized with self.assertRaises(swift.AuthenticationError): Path(self.test_container).stat() # Verify that getting the auth was called two more times because of retry # logic self.assertEquals(len(mock_get_ks.call_args_list), 2) def test_static_large_obj_copy_and_segment_container(self): with NamedTemporaryDirectory(change_dir=True) as tmp_d: segment_size = 1048576 obj_size = segment_size * 4 + 100 self.create_dataset(tmp_d, 1, obj_size) obj_path = stor.join(tmp_d, self.get_dataset_obj_names(1)[0]) options = {'swift:upload': {'segment_size': segment_size}} with settings.use(options): obj_path.copy(self.test_container / 'large_object.txt') # Verify there is a segment container and that it can be ignored when listing a dir segment_container = Path(self.test_container.parent) / ('.segments_%s' % self.test_container.name) # noqa containers = Path(self.test_container.parent).listdir(ignore_segment_containers=False) self.assertTrue(segment_container in containers) self.assertTrue(self.test_container in containers) containers = Path(self.test_container.parent).listdir(ignore_segment_containers=True) self.assertFalse(segment_container in containers) self.assertTrue(self.test_container in containers) # Verify there are five segments objs = set(segment_container.list(condition=lambda results: len(results) == 5)) self.assertEquals(len(objs), 5) # Copy back the large object and verify its contents obj_path = Path(tmp_d) / 'large_object.txt' Path(self.test_container / 'large_object.txt').copy(obj_path) self.assertCorrectObjectContents(obj_path, self.get_dataset_obj_names(1)[0], obj_size) @unittest.skipIf(not os.environ.get('OS_TEMP_URL_KEY'), 'No temp url key set') def test_temp_url(self): basic_file = 'test.txt' complex_file = 'my test?file=special_chars.txt' with NamedTemporaryDirectory(change_dir=True) as tmp_d: nested_tmp_dir = stor.join(tmp_d, 'tmp') os.mkdir(nested_tmp_dir) basic_file_p = stor.join(nested_tmp_dir, basic_file) complex_file_p = stor.join(nested_tmp_dir, 'my test?file=special_chars.txt') with stor.open(basic_file_p, 'w') as f: f.write('basic test') with stor.open(complex_file_p, 'w') as f: f.write('complex test') self.test_container.upload(['.']) with NamedTemporaryDirectory(change_dir=True) as tmp_d: basic_obj = stor.Path( stor.join(self.test_container, 'tmp', basic_file)) basic_temp_url = basic_obj.temp_url(inline=False, filename=basic_file) r = requests.get(basic_temp_url) self.assertEquals(r.content, 'basic test') self.assertEquals(r.headers['Content-Disposition'], 'attachment; filename="test.txt"; filename*=UTF-8\'\'test.txt') complex_obj = stor.Path( stor.join(self.test_container, 'tmp', complex_file)) complex_temp_url = complex_obj.temp_url(inline=False, filename=complex_file) r = requests.get(complex_temp_url) self.assertEquals(r.content, 'complex test') self.assertEquals(r.headers['Content-Disposition'], 'attachment; filename="my test%3Ffile%3Dspecial_chars.txt"; filename*=UTF-8\'\'my%20test%3Ffile%3Dspecial_chars.txt') # noqa def test_condition_failures(self): num_test_objs = 20 test_obj_size = 100 test_dir = self.test_container / 'test' with NamedTemporaryDirectory(change_dir=True) as tmp_d: self.create_dataset(tmp_d, num_test_objs, test_obj_size) Path('.').copytree(test_dir) # Verify a ConditionNotMet exception is thrown when attempting to list # a file that hasn't been uploaded expected_objs = { test_dir / which_obj for which_obj in self.get_dataset_obj_names(num_test_objs + 1) } num_retries = settings.get()['swift']['num_retries'] with mock.patch('time.sleep') as mock_sleep: with self.assertRaises(swift.ConditionNotMetError): test_dir.list(condition=lambda results: expected_objs == set(results)) self.assertTrue(num_retries > 0) self.assertEquals(len(mock_sleep.call_args_list), num_retries) # Verify that the condition passes when excluding the non-extant file expected_objs = { test_dir / which_obj for which_obj in self.get_dataset_obj_names(num_test_objs) } objs = test_dir.list(condition=lambda results: expected_objs == set(results)) self.assertEquals(expected_objs, set(objs)) def test_list_glob(self): num_test_objs = 20 test_obj_size = 100 test_dir = self.test_container / 'test' with NamedTemporaryDirectory(change_dir=True) as tmp_d: self.create_dataset(tmp_d, num_test_objs, test_obj_size) Path('.').copytree(test_dir) objs = set(test_dir.list(condition=lambda results: len(results) == num_test_objs)) expected_objs = { test_dir / obj_name for obj_name in self.get_dataset_obj_names(num_test_objs) } self.assertEquals(len(objs), num_test_objs) self.assertEquals(objs, expected_objs) expected_glob = { test_dir / obj_name for obj_name in self.get_dataset_obj_names(num_test_objs) if obj_name.startswith('1') } self.assertTrue(len(expected_glob) > 1) globbed_objs = set( test_dir.glob('1*', condition=lambda results: len(results) == len(expected_glob))) self.assertEquals(globbed_objs, expected_glob) def test_copytree_w_headers(self): with NamedTemporaryDirectory(change_dir=True) as tmp_d: open(tmp_d / 'test_obj', 'w').close() stor.copytree( '.', self.test_container, headers=['X-Delete-After:1000']) obj = stor.join(self.test_container, 'test_obj') stat_results = obj.stat() self.assertTrue('x-delete-at' in stat_results['headers']) def test_rmtree(self): with NamedTemporaryDirectory(change_dir=True) as tmp_d: # Make a couple empty test files and nested files tmp_d = Path(tmp_d) os.mkdir(tmp_d / 'my_dir') open(tmp_d / 'my_dir' / 'dir_file1', 'w').close() open(tmp_d / 'my_dir' / 'dir_file2', 'w').close() open(tmp_d / 'base_file1', 'w').close() open(tmp_d / 'base_file2', 'w').close() stor.copytree( '.', self.test_container, use_manifest=True) swift_dir = self.test_container / 'my_dir' self.assertEquals(len(swift_dir.list()), 2) swift_dir.rmtree() self.assertEquals(len(swift_dir.list()), 0) base_contents = self.test_container.list() self.assertTrue((self.test_container / 'base_file1') in base_contents) self.assertTrue((self.test_container / 'base_file1') in base_contents) self.test_container.rmtree() # TODO figure out a better way to test that the container no longer exists. with self.assertRaises(swift.NotFoundError): # Replication may have not happened yet for container deletion. Keep # listing in intervals until a NotFoundError is thrown for i in (0, 1, 3): time.sleep(i) self.test_container.list() def test_is_methods(self): container = self.test_container container = self.test_container file_with_prefix = stor.join(container, 'analysis.txt') # ensure container is created but empty container.post() self.assertTrue(stor.isdir(container)) self.assertFalse(stor.isfile(container)) self.assertTrue(stor.exists(container)) self.assertFalse(stor.listdir(container)) folder = stor.join(container, 'analysis') subfolder = stor.join(container, 'analysis', 'alignments') file_in_folder = stor.join(container, 'analysis', 'alignments', 'bam.bam') self.assertFalse(stor.exists(file_in_folder)) self.assertFalse(stor.isdir(folder)) self.assertFalse(stor.isdir(folder + '/')) with stor.open(file_with_prefix, 'w') as fp: fp.write('data\n') self.assertFalse(stor.isdir(folder)) self.assertTrue(stor.isfile(file_with_prefix)) with stor.open(file_in_folder, 'w') as fp: fp.write('blah.txt\n') self.assertTrue(stor.isdir(folder)) self.assertFalse(stor.isfile(folder)) self.assertTrue(stor.isdir(subfolder)) def test_metadata_pulling(self): file_in_folder = stor.join(self.test_container, 'somefile.svg') with stor.open(file_in_folder, 'w') as fp: fp.write('12345\n') self.assertEqual(stor.getsize(file_in_folder), 6) stat_data = stor.Path(file_in_folder).stat() self.assertIn('Content-Type', stat_data) self.assertEqual(stat_data['Content-Type'], 'image/svg+xml') def test_push_metadata(self): if self.test_container.tenant != 'AUTH_swft_test': raise unittest.SkipTest('test only works with admin rights') obj = self.test_container / 'object.txt' with obj.open('w') as fp: fp.write('a\n') obj.post({'header': ['X-Object-Meta-Custom:text']}) stat_data = obj.stat() # TODO(jtratner): consider validating x-object-meta vs. # x-container-meta (otherwise headers won't take) self.assertIn('x-object-meta-custom', stat_data['headers']) self.assertEqual(stat_data['headers']['x-object-meta-custom'], 'text') self.test_container.post({'header': ['X-Container-Meta-Exciting:value'], 'read_acl': '.r:*'}) stat_data = self.test_container.stat() self.assertEqual(stat_data['Read-ACL'], '.r:*') self.assertIn('x-container-meta-exciting', stat_data['headers']) self.assertEqual(stat_data['headers']['x-container-meta-exciting'], 'value') self.test_container.post({'read_acl': '.r:example.com'}) self.assertEqual(self.test_container.stat()['Read-ACL'], '.r:example.com') def test_copytree_to_from_dir_w_manifest(self): num_test_objs = 10 test_obj_size = 100 with NamedTemporaryDirectory(change_dir=True) as tmp_d: self.create_dataset(tmp_d, num_test_objs, test_obj_size) # Make a nested file and an empty directory for testing purposes tmp_d = Path(tmp_d) os.mkdir(tmp_d / 'my_dir') open(tmp_d / 'my_dir' / 'empty_file', 'w').close() os.mkdir(tmp_d / 'my_dir' / 'empty_dir') stor.copytree( '.', self.test_dir, use_manifest=True) # Validate the contents of the manifest file manifest_contents = utils.get_data_manifest_contents(self.test_dir) expected_contents = self.get_dataset_obj_names(num_test_objs) expected_contents.extend(['my_dir/empty_file', 'my_dir/empty_dir']) expected_contents = [Path('test') / c for c in expected_contents] self.assertEquals(set(manifest_contents), set(expected_contents)) with NamedTemporaryDirectory(change_dir=True) as tmp_d: # Download the results successfully Path(self.test_dir).copytree( 'test', use_manifest=True) # Now delete one of the objects from swift. A second download # will fail with a condition error Path(self.test_dir / 'my_dir' / 'empty_dir').remove() with self.assertRaises(exceptions.ConditionNotMetError): Path(self.test_dir).copytree( 'test', use_manifest=True, num_retries=0) def test_all_segment_container_types_are_deleted(self): segment_containers = [stor.join('swift://' + self.test_container.tenant, fmt % self.test_container.name) for fmt in ('.segments_%s', '%s+segments', '%s_segments')] all_containers = segment_containers + [self.test_container] test_files = [stor.join(c, 'test_file_tbdeleted.txt') for c in all_containers] for t in test_files: with stor.open(t, 'w') as fp: fp.write('testtxt\n') assert all(t.exists() for t in test_files) stor.rmtree(self.test_container) for t in test_files: assert not t.exists(), 'Did not delete %s' % t def test_upload_multiple_dirs(self): with NamedTemporaryDirectory(change_dir=True) as tmp_d: num_test_objs = 10 tmp_d = Path(tmp_d) # Create files filled with random data. path1 = tmp_d / 'dir1' os.mkdir(path1) self.create_dataset(path1, num_test_objs, 10) # Create empty dir and file. path2 = tmp_d / 'dir2' os.mkdir(path2) os.mkdir(path2 / 'my_dir') open(path2 / 'my_dir' / 'included_file', 'w').close() open(path2 / 'my_dir' / 'excluded_file', 'w').close() os.mkdir(path2 / 'my_dir' / 'included_dir') os.mkdir(path2 / 'my_dir' / 'excluded_dir') # Create file in the top level directory. open(tmp_d / 'top_level_file', 'w').close() to_upload = [ 'dir1', 'dir2/my_dir/included_file', 'dir2/my_dir/included_dir', 'top_level_file', ] with tmp_d: swift_path = self.test_dir / 'subdir' swift_path.upload(to_upload, use_manifest=True) # Validate the contents of the manifest file manifest_contents = utils.get_data_manifest_contents(swift_path) expected_contents = [ Path('dir1') / name for name in self.get_dataset_obj_names(num_test_objs) ] expected_contents.extend([ 'dir2/my_dir/included_file', 'dir2/my_dir/included_dir', 'top_level_file', ]) expected_contents = [Path('test/subdir') / c for c in expected_contents] self.assertEquals(set(manifest_contents), set(expected_contents))
class S3IntegrationTest(BaseIntegrationTest.BaseTestCases): """ Integration tests for S3. Note that for now, while upload/download/remove methods are not implemented, tests will use the existing stor-test-bucket bucket on S3. In order to run the tests, you must have valid AWS S3 credentials set in the following environment variables: AWS_TEST_ACCESS_KEY_ID, AWS_TEST_SECRET_ACCESS_KEY (and optionally AWS_DEFAULT_REGION). """ def setUp(self): super(S3IntegrationTest, self).setUp() if not (os.environ.get('AWS_TEST_ACCESS_KEY_ID') and os.environ.get('AWS_TEST_SECRET_ACCESS_KEY') and os.environ.get('S3_TEST_BUCKET')): raise unittest.SkipTest( 'AWS_TEST_ACCESS_KEY_ID / AWS_TEST_SECRET_ACCESS_KEY / S3_TEST_BUCKET ' ' env vars not set. Skipping integration test') # Disable loggers so nose output is clean logging.getLogger('botocore').setLevel(logging.CRITICAL) test_bucket = os.environ['S3_TEST_BUCKET'] self.test_bucket = Path('s3://{test_bucket}/{uuid}'.format( test_bucket=test_bucket, uuid=uuid.uuid4())) self.test_dir = self.test_bucket / 'test' stor.settings.update({ 's3': { 'aws_access_key_id': os.environ['AWS_TEST_ACCESS_KEY_ID'], 'aws_secret_access_key': os.environ['AWS_TEST_SECRET_ACCESS_KEY'] } }) def tearDown(self): super(S3IntegrationTest, self).tearDown() self.test_dir.rmtree() def test_over_1000_files(self): num_test_objs = 1234 min_obj_size = 0 with NamedTemporaryDirectory(change_dir=True) as tmp_d: self.create_dataset(tmp_d, num_test_objs, min_obj_size) self.test_dir.upload(['.']) self.assertEquals(1234, len(self.test_dir.list())) self.assertEquals(1200, len(self.test_dir.list(limit=1200))) self.assertTrue(self.test_dir.isdir()) with NamedTemporaryDirectory(change_dir=True) as tmp_d: self.test_dir.download('./') self.assertEquals(1234, len(os.listdir(tmp_d))) def test_list_methods(self): fake_bucket = Path('s3://stor-test-bucket2') with self.assertRaises(exceptions.NotFoundError): fake_bucket.list() fake_folder = self.test_bucket / 'not_a_dir' self.assertEquals([], fake_folder.list()) with NamedTemporaryDirectory(change_dir=True): open('file1.txt', 'w').close() open('file2.txt', 'w').close() os.mkdir('nested_dir') os.mkdir('nested_dir/dir') open('nested_dir/dir/file3.txt', 'w').close() self.test_dir.upload(['.']) file_list = self.test_dir.list() starts_with_list = self.test_bucket.list(starts_with='test') self.assertEquals(set(file_list), set(starts_with_list)) self.assertEquals( set(file_list), set([ self.test_dir / 'file1.txt', self.test_dir / 'file2.txt', self.test_dir / 'nested_dir/dir/file3.txt' ])) dir_list = self.test_dir.listdir() self.assertEquals( set(dir_list), set([ self.test_dir / 'file1.txt', self.test_dir / 'file2.txt', self.test_dir / 'nested_dir/' ])) self.assertTrue(self.test_dir.listdir() == (self.test_dir + '/').listdir()) def test_is_methods(self): """ Tests is methods, exists(), and getsize(). getsize() integration test may be moved to a different test depending on whether other metadata methods (such as stat()) are implemented. """ self.assertTrue(self.test_bucket.exists()) self.assertTrue(self.test_bucket.isdir()) self.assertFalse(self.test_bucket.isfile()) self.assertEquals(self.test_bucket.getsize(), 0) with NamedTemporaryDirectory(change_dir=True) as tmp_d: self.create_dataset(tmp_d, 1, 10) self.test_dir.upload(['.']) correct_size = os.path.getsize('0') self.assertTrue(self.test_dir.exists()) self.assertTrue(self.test_dir.isdir()) self.assertFalse(self.test_dir.isfile()) self.assertEquals(self.test_dir.getsize(), 0) test_file = self.test_dir / '0' self.assertTrue(test_file.exists()) self.assertFalse(test_file.isdir()) self.assertTrue(test_file.isfile()) self.assertEquals(test_file.getsize(), correct_size) test_file.remove() self.assertFalse(test_file.exists()) self.assertFalse(test_file.isdir()) self.assertFalse(test_file.isfile()) with self.assertRaises(exceptions.NotFoundError): test_file.getsize() fake_bucket = self.test_bucket + '2' self.assertFalse(fake_bucket.exists()) self.assertFalse(fake_bucket.isdir()) self.assertFalse(fake_bucket.isfile()) with self.assertRaises(exceptions.NotFoundError): fake_bucket.getsize() def test_upload_download_remove(self): num_test_objs = 10 min_obj_size = 50 with NamedTemporaryDirectory(change_dir=True) as tmp_d: self.create_dataset(tmp_d, num_test_objs, min_obj_size) self.test_dir.upload(['.']) for which_obj in self.get_dataset_obj_names(num_test_objs): self.assertTrue((self.test_dir / which_obj).exists()) with NamedTemporaryDirectory(change_dir=True) as tmp_d: self.test_dir.download(tmp_d) for which_obj in self.get_dataset_obj_names(num_test_objs): self.assertCorrectObjectContents(which_obj, which_obj, min_obj_size) (self.test_dir / which_obj).remove() # consistency check while (self.test_dir / which_obj).exists(): time.sleep(.5) self.assertFalse((self.test_dir / which_obj).exists()) def test_upload_w_headers(self): test_file = self.test_dir / 'a.txt' with NamedTemporaryDirectory(change_dir=True): open('a.txt', 'w').close() self.test_dir.upload(['.'], headers={'ContentLanguage': 'en'}) self.assertTrue(test_file.exists()) self.assertEquals(test_file.stat()['ContentLanguage'], 'en') def test_download(self): with NamedTemporaryDirectory(change_dir=True): os.mkdir('dir') os.mkdir('dir/a') open('dir/a/a.txt', 'w').close() self.test_dir.upload(['.']) with NamedTemporaryDirectory(change_dir=True): open('dir', 'w').close() open('a', 'w').close() with self.assertRaises(OSError): self.test_dir.download('.') with self.assertRaises(OSError): (self.test_dir / 'dir').download('.') def test_condition(self): num_test_objs = 20 test_obj_size = 100 with NamedTemporaryDirectory(change_dir=True) as tmp_d: self.create_dataset(tmp_d, num_test_objs, test_obj_size) Path('.').copytree(self.test_dir) # Verify a ConditionNotMet exception is thrown when attempting to list # a file that hasn't been uploaded expected_objs = { self.test_dir / which_obj for which_obj in self.get_dataset_obj_names(num_test_objs + 1) } with self.assertRaises(exceptions.ConditionNotMetError): self.test_dir.list( condition=lambda results: expected_objs == set(results)) # Verify that the condition passes when excluding the non-extant file correct_objs = { self.test_dir / which_obj for which_obj in self.get_dataset_obj_names(num_test_objs) } objs = self.test_dir.list( condition=lambda results: correct_objs == set(results)) self.assertEquals(correct_objs, set(objs)) def test_dir_markers(self): with NamedTemporaryDirectory(change_dir=True): os.mkdir('empty') os.mkdir('dir') open('a.txt', 'w').close() open('dir/b.txt', 'w').close() self.test_dir.upload(['.']) self.assertEquals( set(self.test_dir.list()), { self.test_dir / 'a.txt', self.test_dir / 'dir/b.txt', self.test_dir / 'empty/' }) self.assertEquals( set(self.test_dir.list(ignore_dir_markers=True)), {self.test_dir / 'a.txt', self.test_dir / 'dir/b.txt'}) self.assertTrue((self.test_dir / 'empty').isdir()) with NamedTemporaryDirectory(change_dir=True): self.test_dir.download('.') self.assertTrue(os.path.isdir('empty')) self.assertTrue(os.path.exists('dir/b.txt')) self.assertTrue(os.path.exists('a.txt')) def test_copytree_to_from_dir_w_manifest(self): num_test_objs = 10 test_obj_size = 100 with NamedTemporaryDirectory(change_dir=True) as tmp_d: self.create_dataset(tmp_d, num_test_objs, test_obj_size) # Make a nested file and an empty directory for testing purposes tmp_d = Path(tmp_d) os.mkdir(tmp_d / 'my_dir') open(tmp_d / 'my_dir' / 'empty_file', 'w').close() os.mkdir(tmp_d / 'my_dir' / 'empty_dir') stor.copytree('.', self.test_dir, use_manifest=True) # Validate the contents of the manifest file manifest_contents = utils.get_data_manifest_contents(self.test_dir) expected_contents = self.get_dataset_obj_names(num_test_objs) expected_contents.extend( ['my_dir/empty_file', 'my_dir/empty_dir/']) expected_contents = [Path('test') / c for c in expected_contents] self.assertEquals(set(manifest_contents), set(expected_contents)) with NamedTemporaryDirectory(change_dir=True) as tmp_d: # Download the results successfully Path(self.test_dir).copytree('test', use_manifest=True) # Now delete one of the objects from s3. A second download # will fail with a condition error Path(self.test_dir / 'my_dir' / 'empty_dir/').remove() with self.assertRaises(exceptions.ConditionNotMetError): Path(self.test_dir).copytree('test', use_manifest=True, num_retries=0) def test_multipart_transfer(self): logger = six.StringIO() handler = logging.StreamHandler(logger) logging.getLogger('botocore').setLevel(logging.DEBUG) logging.getLogger('botocore').addHandler(handler) handler.setLevel(logging.DEBUG) with NamedTemporaryDirectory(change_dir=True) as tmp_d: self.create_dataset(tmp_d, 1, 10 * 1024 * 1024) self.test_dir.upload(['.']) self.assertEquals(1, len(self.test_dir.listdir())) with NamedTemporaryDirectory(change_dir=True) as tmp_d: self.test_dir.download('.') self.assertEquals(1, len(Path('.').listdir())) self.assertIn("CompleteMultipartUploadResult", logger.getvalue()) # Check for multipart download by checking for multiple 206 GET requests # to the object self.assertRegexpMatches( logger.getvalue(), '"GET (/stor-test-bucket)?/test/0 HTTP/1.1" 206')