def upload(self, to_upload, **kwargs): """Upload a list of files and directories to a directory. This is not a batch level operation. If some file errors, the files uploaded before will remain present. Args: to_upload (List[Union[str, OBSUploadObject]]): A list of posix file names, directory names, or OBSUploadObject objects to upload. Raises: ValueError: When source path is not a directory TargetExistsError: When destination directory already exists """ dx_upload_objects = [ name for name in to_upload if isinstance(name, OBSUploadObject) ] all_files_to_upload = utils.walk_files_and_dirs([ name for name in to_upload if not isinstance(name, OBSUploadObject) ]) dx_upload_objects.extend([ OBSUploadObject( f, object_name=('/' + self.resource if self.resource else Path('')) / utils.file_name_to_object_name(f)) for f in all_files_to_upload ]) for upload_obj in dx_upload_objects: upload_obj.object_name = Path(upload_obj.object_name) upload_obj.source = Path(upload_obj.source) dest_file = Path('{drive}{project}:{path}'.format( drive=self.drive, project=self.canonical_project, path=upload_obj.object_name)) if upload_obj.source.isfile(): dest_is_file = dest_file.isfile() if dest_is_file: # only occurs if upload is called directly with existing objects logger.warning( 'Destination path ({}) already exists, will not cause ' 'duplicate file objects on the platform. Skipping...'. format(dest_file)) else: with _wrap_dx_calls(): dxpy.upload_local_file( filename=upload_obj.source, project=self.canonical_project, folder='/' + (dest_file.parent.resource or ''), parents=True, name=dest_file.name) elif upload_obj.source.isdir(): dest_file.makedirs_p() else: raise stor_exceptions.NotFoundError( 'Source path ({}) does not exist. Please provide a valid source' .format(upload_obj.source))
class S3IntegrationTest(BaseIntegrationTest.BaseTestCases): """ Integration tests for S3. Note that for now, while upload/download/remove methods are not implemented, tests will use the existing stor-test-bucket bucket on S3. In order to run the tests, you must have valid AWS S3 credentials set in the following environment variables: AWS_TEST_ACCESS_KEY_ID, AWS_TEST_SECRET_ACCESS_KEY (and optionally AWS_DEFAULT_REGION). """ def setUp(self): super(S3IntegrationTest, self).setUp() if not (os.environ.get('AWS_TEST_ACCESS_KEY_ID') and os.environ.get('AWS_TEST_SECRET_ACCESS_KEY') and os.environ.get('S3_TEST_BUCKET')): raise unittest.SkipTest( 'AWS_TEST_ACCESS_KEY_ID / AWS_TEST_SECRET_ACCESS_KEY / S3_TEST_BUCKET ' ' env vars not set. Skipping integration test') # Disable loggers so nose output is clean logging.getLogger('botocore').setLevel(logging.CRITICAL) test_bucket = os.environ['S3_TEST_BUCKET'] self.test_bucket = Path('s3://{test_bucket}/{uuid}'.format( test_bucket=test_bucket, uuid=uuid.uuid4())) self.test_dir = self.test_bucket / 'test' stor.settings.update({ 's3': { 'aws_access_key_id': os.environ['AWS_TEST_ACCESS_KEY_ID'], 'aws_secret_access_key': os.environ['AWS_TEST_SECRET_ACCESS_KEY'] } }) def tearDown(self): super(S3IntegrationTest, self).tearDown() self.test_dir.rmtree() def test_over_1000_files(self): num_test_objs = 1234 min_obj_size = 0 with NamedTemporaryDirectory(change_dir=True) as tmp_d: self.create_dataset(tmp_d, num_test_objs, min_obj_size) self.test_dir.upload(['.']) self.assertEquals(1234, len(self.test_dir.list())) self.assertEquals(1200, len(self.test_dir.list(limit=1200))) self.assertTrue(self.test_dir.isdir()) with NamedTemporaryDirectory(change_dir=True) as tmp_d: self.test_dir.download('./') self.assertEquals(1234, len(os.listdir(tmp_d))) def test_list_methods(self): fake_bucket = Path('s3://stor-test-bucket2') with self.assertRaises(exceptions.NotFoundError): fake_bucket.list() fake_folder = self.test_bucket / 'not_a_dir' self.assertEquals([], fake_folder.list()) with NamedTemporaryDirectory(change_dir=True): open('file1.txt', 'w').close() open('file2.txt', 'w').close() os.mkdir('nested_dir') os.mkdir('nested_dir/dir') open('nested_dir/dir/file3.txt', 'w').close() self.test_dir.upload(['.']) file_list = self.test_dir.list() starts_with_list = self.test_bucket.list(starts_with='test') self.assertEquals(set(file_list), set(starts_with_list)) self.assertEquals( set(file_list), set([ self.test_dir / 'file1.txt', self.test_dir / 'file2.txt', self.test_dir / 'nested_dir/dir/file3.txt' ])) dir_list = self.test_dir.listdir() self.assertEquals( set(dir_list), set([ self.test_dir / 'file1.txt', self.test_dir / 'file2.txt', self.test_dir / 'nested_dir/' ])) self.assertTrue(self.test_dir.listdir() == (self.test_dir + '/').listdir()) def test_is_methods(self): """ Tests is methods, exists(), and getsize(). getsize() integration test may be moved to a different test depending on whether other metadata methods (such as stat()) are implemented. """ self.assertTrue(self.test_bucket.exists()) self.assertTrue(self.test_bucket.isdir()) self.assertFalse(self.test_bucket.isfile()) self.assertEquals(self.test_bucket.getsize(), 0) with NamedTemporaryDirectory(change_dir=True) as tmp_d: self.create_dataset(tmp_d, 1, 10) self.test_dir.upload(['.']) correct_size = os.path.getsize('0') self.assertTrue(self.test_dir.exists()) self.assertTrue(self.test_dir.isdir()) self.assertFalse(self.test_dir.isfile()) self.assertEquals(self.test_dir.getsize(), 0) test_file = self.test_dir / '0' self.assertTrue(test_file.exists()) self.assertFalse(test_file.isdir()) self.assertTrue(test_file.isfile()) self.assertEquals(test_file.getsize(), correct_size) test_file.remove() self.assertFalse(test_file.exists()) self.assertFalse(test_file.isdir()) self.assertFalse(test_file.isfile()) with self.assertRaises(exceptions.NotFoundError): test_file.getsize() fake_bucket = self.test_bucket + '2' self.assertFalse(fake_bucket.exists()) self.assertFalse(fake_bucket.isdir()) self.assertFalse(fake_bucket.isfile()) with self.assertRaises(exceptions.NotFoundError): fake_bucket.getsize() def test_upload_download_remove(self): num_test_objs = 10 min_obj_size = 50 with NamedTemporaryDirectory(change_dir=True) as tmp_d: self.create_dataset(tmp_d, num_test_objs, min_obj_size) self.test_dir.upload(['.']) for which_obj in self.get_dataset_obj_names(num_test_objs): self.assertTrue((self.test_dir / which_obj).exists()) with NamedTemporaryDirectory(change_dir=True) as tmp_d: self.test_dir.download(tmp_d) for which_obj in self.get_dataset_obj_names(num_test_objs): self.assertCorrectObjectContents(which_obj, which_obj, min_obj_size) (self.test_dir / which_obj).remove() # consistency check while (self.test_dir / which_obj).exists(): time.sleep(.5) self.assertFalse((self.test_dir / which_obj).exists()) def test_upload_w_headers(self): test_file = self.test_dir / 'a.txt' with NamedTemporaryDirectory(change_dir=True): open('a.txt', 'w').close() self.test_dir.upload(['.'], headers={'ContentLanguage': 'en'}) self.assertTrue(test_file.exists()) self.assertEquals(test_file.stat()['ContentLanguage'], 'en') def test_download(self): with NamedTemporaryDirectory(change_dir=True): os.mkdir('dir') os.mkdir('dir/a') open('dir/a/a.txt', 'w').close() self.test_dir.upload(['.']) with NamedTemporaryDirectory(change_dir=True): open('dir', 'w').close() open('a', 'w').close() with self.assertRaises(OSError): self.test_dir.download('.') with self.assertRaises(OSError): (self.test_dir / 'dir').download('.') def test_condition(self): num_test_objs = 20 test_obj_size = 100 with NamedTemporaryDirectory(change_dir=True) as tmp_d: self.create_dataset(tmp_d, num_test_objs, test_obj_size) Path('.').copytree(self.test_dir) # Verify a ConditionNotMet exception is thrown when attempting to list # a file that hasn't been uploaded expected_objs = { self.test_dir / which_obj for which_obj in self.get_dataset_obj_names(num_test_objs + 1) } with self.assertRaises(exceptions.ConditionNotMetError): self.test_dir.list( condition=lambda results: expected_objs == set(results)) # Verify that the condition passes when excluding the non-extant file correct_objs = { self.test_dir / which_obj for which_obj in self.get_dataset_obj_names(num_test_objs) } objs = self.test_dir.list( condition=lambda results: correct_objs == set(results)) self.assertEquals(correct_objs, set(objs)) def test_dir_markers(self): with NamedTemporaryDirectory(change_dir=True): os.mkdir('empty') os.mkdir('dir') open('a.txt', 'w').close() open('dir/b.txt', 'w').close() self.test_dir.upload(['.']) self.assertEquals( set(self.test_dir.list()), { self.test_dir / 'a.txt', self.test_dir / 'dir/b.txt', self.test_dir / 'empty/' }) self.assertEquals( set(self.test_dir.list(ignore_dir_markers=True)), {self.test_dir / 'a.txt', self.test_dir / 'dir/b.txt'}) self.assertTrue((self.test_dir / 'empty').isdir()) with NamedTemporaryDirectory(change_dir=True): self.test_dir.download('.') self.assertTrue(os.path.isdir('empty')) self.assertTrue(os.path.exists('dir/b.txt')) self.assertTrue(os.path.exists('a.txt')) def test_copytree_to_from_dir_w_manifest(self): num_test_objs = 10 test_obj_size = 100 with NamedTemporaryDirectory(change_dir=True) as tmp_d: self.create_dataset(tmp_d, num_test_objs, test_obj_size) # Make a nested file and an empty directory for testing purposes tmp_d = Path(tmp_d) os.mkdir(tmp_d / 'my_dir') open(tmp_d / 'my_dir' / 'empty_file', 'w').close() os.mkdir(tmp_d / 'my_dir' / 'empty_dir') stor.copytree('.', self.test_dir, use_manifest=True) # Validate the contents of the manifest file manifest_contents = utils.get_data_manifest_contents(self.test_dir) expected_contents = self.get_dataset_obj_names(num_test_objs) expected_contents.extend( ['my_dir/empty_file', 'my_dir/empty_dir/']) expected_contents = [Path('test') / c for c in expected_contents] self.assertEquals(set(manifest_contents), set(expected_contents)) with NamedTemporaryDirectory(change_dir=True) as tmp_d: # Download the results successfully Path(self.test_dir).copytree('test', use_manifest=True) # Now delete one of the objects from s3. A second download # will fail with a condition error Path(self.test_dir / 'my_dir' / 'empty_dir/').remove() with self.assertRaises(exceptions.ConditionNotMetError): Path(self.test_dir).copytree('test', use_manifest=True, num_retries=0) def test_multipart_transfer(self): logger = six.StringIO() handler = logging.StreamHandler(logger) logging.getLogger('botocore').setLevel(logging.DEBUG) logging.getLogger('botocore').addHandler(handler) handler.setLevel(logging.DEBUG) with NamedTemporaryDirectory(change_dir=True) as tmp_d: self.create_dataset(tmp_d, 1, 10 * 1024 * 1024) self.test_dir.upload(['.']) self.assertEquals(1, len(self.test_dir.listdir())) with NamedTemporaryDirectory(change_dir=True) as tmp_d: self.test_dir.download('.') self.assertEquals(1, len(Path('.').listdir())) self.assertIn("CompleteMultipartUploadResult", logger.getvalue()) # Check for multipart download by checking for multiple 206 GET requests # to the object self.assertRegexpMatches( logger.getvalue(), '"GET (/stor-test-bucket)?/test/0 HTTP/1.1" 206')