class S3IntegrationTest(BaseIntegrationTest.BaseTestCases): """ Integration tests for S3. Note that for now, while upload/download/remove methods are not implemented, tests will use the existing stor-test-bucket bucket on S3. In order to run the tests, you must have valid AWS S3 credentials set in the following environment variables: AWS_TEST_ACCESS_KEY_ID, AWS_TEST_SECRET_ACCESS_KEY (and optionally AWS_DEFAULT_REGION). """ def setUp(self): super(S3IntegrationTest, self).setUp() if not (os.environ.get('AWS_TEST_ACCESS_KEY_ID') and os.environ.get('AWS_TEST_SECRET_ACCESS_KEY') and os.environ.get('S3_TEST_BUCKET')): raise unittest.SkipTest( 'AWS_TEST_ACCESS_KEY_ID / AWS_TEST_SECRET_ACCESS_KEY / S3_TEST_BUCKET ' ' env vars not set. Skipping integration test') # Disable loggers so nose output is clean logging.getLogger('botocore').setLevel(logging.CRITICAL) test_bucket = os.environ['S3_TEST_BUCKET'] self.test_bucket = Path('s3://{test_bucket}/{uuid}'.format( test_bucket=test_bucket, uuid=uuid.uuid4())) self.test_dir = self.test_bucket / 'test' stor.settings.update({ 's3': { 'aws_access_key_id': os.environ['AWS_TEST_ACCESS_KEY_ID'], 'aws_secret_access_key': os.environ['AWS_TEST_SECRET_ACCESS_KEY'] } }) def tearDown(self): super(S3IntegrationTest, self).tearDown() self.test_dir.rmtree() def test_over_1000_files(self): num_test_objs = 1234 min_obj_size = 0 with NamedTemporaryDirectory(change_dir=True) as tmp_d: self.create_dataset(tmp_d, num_test_objs, min_obj_size) self.test_dir.upload(['.']) self.assertEquals(1234, len(self.test_dir.list())) self.assertEquals(1200, len(self.test_dir.list(limit=1200))) self.assertTrue(self.test_dir.isdir()) with NamedTemporaryDirectory(change_dir=True) as tmp_d: self.test_dir.download('./') self.assertEquals(1234, len(os.listdir(tmp_d))) def test_list_methods(self): fake_bucket = Path('s3://stor-test-bucket2') with self.assertRaises(exceptions.NotFoundError): fake_bucket.list() fake_folder = self.test_bucket / 'not_a_dir' self.assertEquals([], fake_folder.list()) with NamedTemporaryDirectory(change_dir=True): open('file1.txt', 'w').close() open('file2.txt', 'w').close() os.mkdir('nested_dir') os.mkdir('nested_dir/dir') open('nested_dir/dir/file3.txt', 'w').close() self.test_dir.upload(['.']) file_list = self.test_dir.list() starts_with_list = self.test_bucket.list(starts_with='test') self.assertEquals(set(file_list), set(starts_with_list)) self.assertEquals( set(file_list), set([ self.test_dir / 'file1.txt', self.test_dir / 'file2.txt', self.test_dir / 'nested_dir/dir/file3.txt' ])) dir_list = self.test_dir.listdir() self.assertEquals( set(dir_list), set([ self.test_dir / 'file1.txt', self.test_dir / 'file2.txt', self.test_dir / 'nested_dir/' ])) self.assertTrue(self.test_dir.listdir() == (self.test_dir + '/').listdir()) def test_is_methods(self): """ Tests is methods, exists(), and getsize(). getsize() integration test may be moved to a different test depending on whether other metadata methods (such as stat()) are implemented. """ self.assertTrue(self.test_bucket.exists()) self.assertTrue(self.test_bucket.isdir()) self.assertFalse(self.test_bucket.isfile()) self.assertEquals(self.test_bucket.getsize(), 0) with NamedTemporaryDirectory(change_dir=True) as tmp_d: self.create_dataset(tmp_d, 1, 10) self.test_dir.upload(['.']) correct_size = os.path.getsize('0') self.assertTrue(self.test_dir.exists()) self.assertTrue(self.test_dir.isdir()) self.assertFalse(self.test_dir.isfile()) self.assertEquals(self.test_dir.getsize(), 0) test_file = self.test_dir / '0' self.assertTrue(test_file.exists()) self.assertFalse(test_file.isdir()) self.assertTrue(test_file.isfile()) self.assertEquals(test_file.getsize(), correct_size) test_file.remove() self.assertFalse(test_file.exists()) self.assertFalse(test_file.isdir()) self.assertFalse(test_file.isfile()) with self.assertRaises(exceptions.NotFoundError): test_file.getsize() fake_bucket = self.test_bucket + '2' self.assertFalse(fake_bucket.exists()) self.assertFalse(fake_bucket.isdir()) self.assertFalse(fake_bucket.isfile()) with self.assertRaises(exceptions.NotFoundError): fake_bucket.getsize() def test_upload_download_remove(self): num_test_objs = 10 min_obj_size = 50 with NamedTemporaryDirectory(change_dir=True) as tmp_d: self.create_dataset(tmp_d, num_test_objs, min_obj_size) self.test_dir.upload(['.']) for which_obj in self.get_dataset_obj_names(num_test_objs): self.assertTrue((self.test_dir / which_obj).exists()) with NamedTemporaryDirectory(change_dir=True) as tmp_d: self.test_dir.download(tmp_d) for which_obj in self.get_dataset_obj_names(num_test_objs): self.assertCorrectObjectContents(which_obj, which_obj, min_obj_size) (self.test_dir / which_obj).remove() # consistency check while (self.test_dir / which_obj).exists(): time.sleep(.5) self.assertFalse((self.test_dir / which_obj).exists()) def test_upload_w_headers(self): test_file = self.test_dir / 'a.txt' with NamedTemporaryDirectory(change_dir=True): open('a.txt', 'w').close() self.test_dir.upload(['.'], headers={'ContentLanguage': 'en'}) self.assertTrue(test_file.exists()) self.assertEquals(test_file.stat()['ContentLanguage'], 'en') def test_download(self): with NamedTemporaryDirectory(change_dir=True): os.mkdir('dir') os.mkdir('dir/a') open('dir/a/a.txt', 'w').close() self.test_dir.upload(['.']) with NamedTemporaryDirectory(change_dir=True): open('dir', 'w').close() open('a', 'w').close() with self.assertRaises(OSError): self.test_dir.download('.') with self.assertRaises(OSError): (self.test_dir / 'dir').download('.') def test_condition(self): num_test_objs = 20 test_obj_size = 100 with NamedTemporaryDirectory(change_dir=True) as tmp_d: self.create_dataset(tmp_d, num_test_objs, test_obj_size) Path('.').copytree(self.test_dir) # Verify a ConditionNotMet exception is thrown when attempting to list # a file that hasn't been uploaded expected_objs = { self.test_dir / which_obj for which_obj in self.get_dataset_obj_names(num_test_objs + 1) } with self.assertRaises(exceptions.ConditionNotMetError): self.test_dir.list( condition=lambda results: expected_objs == set(results)) # Verify that the condition passes when excluding the non-extant file correct_objs = { self.test_dir / which_obj for which_obj in self.get_dataset_obj_names(num_test_objs) } objs = self.test_dir.list( condition=lambda results: correct_objs == set(results)) self.assertEquals(correct_objs, set(objs)) def test_dir_markers(self): with NamedTemporaryDirectory(change_dir=True): os.mkdir('empty') os.mkdir('dir') open('a.txt', 'w').close() open('dir/b.txt', 'w').close() self.test_dir.upload(['.']) self.assertEquals( set(self.test_dir.list()), { self.test_dir / 'a.txt', self.test_dir / 'dir/b.txt', self.test_dir / 'empty/' }) self.assertEquals( set(self.test_dir.list(ignore_dir_markers=True)), {self.test_dir / 'a.txt', self.test_dir / 'dir/b.txt'}) self.assertTrue((self.test_dir / 'empty').isdir()) with NamedTemporaryDirectory(change_dir=True): self.test_dir.download('.') self.assertTrue(os.path.isdir('empty')) self.assertTrue(os.path.exists('dir/b.txt')) self.assertTrue(os.path.exists('a.txt')) def test_copytree_to_from_dir_w_manifest(self): num_test_objs = 10 test_obj_size = 100 with NamedTemporaryDirectory(change_dir=True) as tmp_d: self.create_dataset(tmp_d, num_test_objs, test_obj_size) # Make a nested file and an empty directory for testing purposes tmp_d = Path(tmp_d) os.mkdir(tmp_d / 'my_dir') open(tmp_d / 'my_dir' / 'empty_file', 'w').close() os.mkdir(tmp_d / 'my_dir' / 'empty_dir') stor.copytree('.', self.test_dir, use_manifest=True) # Validate the contents of the manifest file manifest_contents = utils.get_data_manifest_contents(self.test_dir) expected_contents = self.get_dataset_obj_names(num_test_objs) expected_contents.extend( ['my_dir/empty_file', 'my_dir/empty_dir/']) expected_contents = [Path('test') / c for c in expected_contents] self.assertEquals(set(manifest_contents), set(expected_contents)) with NamedTemporaryDirectory(change_dir=True) as tmp_d: # Download the results successfully Path(self.test_dir).copytree('test', use_manifest=True) # Now delete one of the objects from s3. A second download # will fail with a condition error Path(self.test_dir / 'my_dir' / 'empty_dir/').remove() with self.assertRaises(exceptions.ConditionNotMetError): Path(self.test_dir).copytree('test', use_manifest=True, num_retries=0) def test_multipart_transfer(self): logger = six.StringIO() handler = logging.StreamHandler(logger) logging.getLogger('botocore').setLevel(logging.DEBUG) logging.getLogger('botocore').addHandler(handler) handler.setLevel(logging.DEBUG) with NamedTemporaryDirectory(change_dir=True) as tmp_d: self.create_dataset(tmp_d, 1, 10 * 1024 * 1024) self.test_dir.upload(['.']) self.assertEquals(1, len(self.test_dir.listdir())) with NamedTemporaryDirectory(change_dir=True) as tmp_d: self.test_dir.download('.') self.assertEquals(1, len(Path('.').listdir())) self.assertIn("CompleteMultipartUploadResult", logger.getvalue()) # Check for multipart download by checking for multiple 206 GET requests # to the object self.assertRegexpMatches( logger.getvalue(), '"GET (/stor-test-bucket)?/test/0 HTTP/1.1" 206')
def copy(source, dest, swift_retry_options=None): """Copies a source file to a destination file. Note that this utility can be called from either OBS, posix, or windows paths created with ``stor.Path``. Args: source (path|str): The source directory to copy from dest (path|str): The destination file or directory. swift_retry_options (dict): Optional retry arguments to use for swift upload or download. View the `swift module-level documentation <swiftretry>` for more information on retry arguments Examples: Copying a swift file to a local path behaves as follows:: >>> import stor >>> swift_p = 'swift://tenant/container/dir/file.txt' >>> # file.txt will be copied to other_dir/other_file.txt >>> stor.copy(swift_p, 'other_dir/other_file.txt') Copying from a local path to swift behaves as follows:: >>> from stor import Path >>> local_p = Path('my/local/file.txt') >>> # File will be uploaded to swift://tenant/container/dir/my_file.txt >>> local_p.copy('swift://tenant/container/dir/') Because of the ambiguity in whether a remote target is a file or directory, copy() will error on ambiguous paths. >>> local_p.copy('swift://tenant/container/dir') Traceback (most recent call last): ... ValueError: OBS destination must be file with extension or directory with slash """ from stor import Path from stor.obs import OBSUploadObject source = Path(source) dest = Path(dest) swift_retry_options = swift_retry_options or {} if is_obs_path(source) and is_obs_path(dest): raise ValueError('cannot copy one OBS path to another OBS path') if is_obs_path(dest) and dest.is_ambiguous(): raise ValueError('OBS destination must be file with extension or directory with slash') if is_filesystem_path(dest): dest.parent.makedirs_p() if is_obs_path(source): dest_file = dest if not dest.isdir() else dest / source.name source.download_object(dest_file, **swift_retry_options) else: shutil.copy(source, dest) else: dest_file = dest if not dest.endswith('/') else dest / source.name if is_swift_path(dest) and not dest_file.parent.container: raise ValueError(( 'cannot copy to tenant "%s" and file ' '"%s"' % (dest_file.parent, dest_file.name) )) dest_obj_name = Path(dest_file.parent.resource or '') / dest_file.name upload_obj = OBSUploadObject(source, dest_obj_name) dest_file.parent.upload([upload_obj], **swift_retry_options)
def copytree(source, dest, copy_cmd=None, use_manifest=False, headers=None, condition=None, **kwargs): """Copies a source directory to a destination directory. Assumes that paths are capable of being copied to/from. Note that this function uses shutil.copytree by default, meaning that a posix or windows destination must not exist beforehand. For example, assume the following file hierarchy:: a/ - b/ - - 1.txt Doing a copytree from ``a`` to a new posix destination of ``c`` is performed with:: Path('a').copytree('c') The end result for c looks like:: c/ - b/ - - 1.txt Note that the user can override which copy command is used for posix copies, and it is up to them to ensure that their code abides by the semantics of the provided copy command. This function has been tested in production using the default command of ``cp -r`` and using ``mcp -r``. Using OBS source and destinations work in a similar manner. Assume the destination is a swift path and we upload the same ``a`` folder:: Path('a').copytree('swift://tenant/container/folder') The end swift result will have one object:: Path('swift://tenant/container/folder/b/1.txt') Similarly one can do:: Path('swift://tenant/container/folder/').copytree('c') The end result for c looks the same as the above posix example:: c/ - b/ - - 1.txt Args: source (path|str): The source directory to copy from dest (path|str): The directory to copy to. Must not exist if its a posix directory copy_cmd (str): If copying to / from posix or windows, this command is used instead of shutil.copytree use_manifest (bool, default False): See `SwiftPath.upload` and `SwiftPath.download`. condition (function(results) -> bool): See `SwiftPath.upload` and `SwiftPath.download`. headers (List[str]): See `SwiftPath.upload`. Raises: ValueError: if two OBS paths are specified OSError: if destination is a posix path and it already exists """ from stor import Path source = Path(source) dest = Path(dest) if is_dx_path(source) and is_dx_path(dest): return source.copytree(dest, **kwargs) if is_obs_path(source) and is_obs_path(dest): raise ValueError('cannot copy one OBS path to another OBS path') from stor.windows import WindowsPath if is_obs_path(source) and isinstance(dest, WindowsPath): raise ValueError('OBS copytree to windows is not supported') if is_filesystem_path(dest): dest.expand().abspath().parent.makedirs_p() if is_obs_path(source): source.download(dest, use_manifest=use_manifest, condition=condition, **kwargs) else: if copy_cmd: copy_cmd = shlex.split(copy_cmd) copy_cmd.extend([ str(source.abspath().expand()), str(dest.abspath().expand()) ]) logger.info('performing copy with command - %s', copy_cmd) check_call(copy_cmd) else: shutil.copytree(source, dest) else: if is_dx_path(dest) and (dest.isdir() or dest.endswith('/')): dest = dest / remove_trailing_slash(source).name if dest.isdir(): raise exceptions.TargetExistsError( 'Destination path ({}) already exists, will not cause ' 'duplicate folders to exist. Remove the original first'. format(dest)) with source: dest.upload(['.'], use_manifest=use_manifest, headers=headers, condition=condition, **kwargs)