def test_temp_url(self): basic_file = 'test.txt' complex_file = 'my test?file=special_chars.txt' with NamedTemporaryDirectory(change_dir=True) as tmp_d: nested_tmp_dir = stor.join(tmp_d, 'tmp') os.mkdir(nested_tmp_dir) basic_file_p = stor.join(nested_tmp_dir, basic_file) complex_file_p = stor.join(nested_tmp_dir, 'my test?file=special_chars.txt') with stor.open(basic_file_p, 'w') as f: f.write('basic test') with stor.open(complex_file_p, 'w') as f: f.write('complex test') self.test_container.upload(['.']) with NamedTemporaryDirectory(change_dir=True) as tmp_d: basic_obj = stor.Path( stor.join(self.test_container, 'tmp', basic_file)) basic_temp_url = basic_obj.temp_url(inline=False, filename=basic_file) r = requests.get(basic_temp_url) self.assertEquals(r.content, 'basic test') self.assertEquals(r.headers['Content-Disposition'], 'attachment; filename="test.txt"; filename*=UTF-8\'\'test.txt') complex_obj = stor.Path( stor.join(self.test_container, 'tmp', complex_file)) complex_temp_url = complex_obj.temp_url(inline=False, filename=complex_file) r = requests.get(complex_temp_url) self.assertEquals(r.content, 'complex test') self.assertEquals(r.headers['Content-Disposition'], 'attachment; filename="my test%3Ffile%3Dspecial_chars.txt"; filename*=UTF-8\'\'my%20test%3Ffile%3Dspecial_chars.txt') # noqa
def test_is_methods(self): container = self.test_container container = self.test_container file_with_prefix = stor.join(container, 'analysis.txt') # ensure container is created but empty container.post() self.assertTrue(stor.isdir(container)) self.assertFalse(stor.isfile(container)) self.assertTrue(stor.exists(container)) self.assertFalse(stor.listdir(container)) folder = stor.join(container, 'analysis') subfolder = stor.join(container, 'analysis', 'alignments') file_in_folder = stor.join(container, 'analysis', 'alignments', 'bam.bam') self.assertFalse(stor.exists(file_in_folder)) self.assertFalse(stor.isdir(folder)) self.assertFalse(stor.isdir(folder + '/')) with stor.open(file_with_prefix, 'w') as fp: fp.write('data\n') self.assertFalse(stor.isdir(folder)) self.assertTrue(stor.isfile(file_with_prefix)) with stor.open(file_in_folder, 'w') as fp: fp.write('blah.txt\n') self.assertTrue(stor.isdir(folder)) self.assertFalse(stor.isfile(folder)) self.assertTrue(stor.isdir(subfolder))
def test_non_empty_directory_errors(self): example_dir = stor.join(self.test_dir, 'example') assert not example_dir.exists() other_dir = stor.join(self.test_dir, 'otherdir') self.create_dataset(other_dir, 1, 1) self.create_dataset(example_dir, 1, 1) example_dir.makedirs_p() with self.assertRaisesRegexp(FileExistsError, '.*File exists'): try: stor.copytree(other_dir, example_dir) except FileExistsError as e: assert e.errno == 17 raise
def test_on_del_no_writes(self, mock_read_object, mock_write_object): fileobj = stor.open(stor.join(self.drive, 'B/C/obj'), 'w') del fileobj gc.collect() self.assertFalse(mock_read_object.called) self.assertFalse(mock_write_object.called) fileobj = stor.open(stor.join(self.drive, 'B/C/obj'), 'r') del fileobj gc.collect() self.assertFalse(mock_read_object.called) self.assertFalse(mock_write_object.called)
def test_all_segment_container_types_are_deleted(self): segment_containers = [stor.join('swift://' + self.test_container.tenant, fmt % self.test_container.name) for fmt in ('.segments_%s', '%s+segments', '%s_segments')] all_containers = segment_containers + [self.test_container] test_files = [stor.join(c, 'test_file_tbdeleted.txt') for c in all_containers] for t in test_files: with stor.open(t, 'w') as fp: fp.write('testtxt\n') assert all(t.exists() for t in test_files) stor.rmtree(self.test_container) for t in test_files: assert not t.exists(), 'Did not delete %s' % t
def test_static_large_obj_copy_and_segment_container(self): with NamedTemporaryDirectory(change_dir=True) as tmp_d: segment_size = 1048576 obj_size = segment_size * 4 + 100 self.create_dataset(tmp_d, 1, obj_size) obj_path = stor.join(tmp_d, self.get_dataset_obj_names(1)[0]) options = {'swift:upload': {'segment_size': segment_size}} with settings.use(options): obj_path.copy(self.test_container / 'large_object.txt') # Verify there is a segment container and that it can be ignored when listing a dir segment_container = Path(self.test_container.parent) / ('.segments_%s' % self.test_container.name) # noqa containers = Path(self.test_container.parent).listdir(ignore_segment_containers=False) self.assertTrue(segment_container in containers) self.assertTrue(self.test_container in containers) containers = Path(self.test_container.parent).listdir(ignore_segment_containers=True) self.assertFalse(segment_container in containers) self.assertTrue(self.test_container in containers) # Verify there are five segments objs = set(segment_container.list(condition=lambda results: len(results) == 5)) self.assertEquals(len(objs), 5) # Copy back the large object and verify its contents obj_path = Path(tmp_d) / 'large_object.txt' Path(self.test_container / 'large_object.txt').copy(obj_path) self.assertCorrectObjectContents(obj_path, self.get_dataset_obj_names(1)[0], obj_size)
def test_w_swift_component(self): p = posix.PosixPath('my/path') / swift.SwiftPath( 'swift://t/c/name').name self.assertEquals(p, posix.PosixPath('my/path/name')) self.assertEquals( stor.join('my/path', swift.SwiftPath('swift://t/c/name').name), p)
def s3_to_swift(s3_path): """S3 Cloud-Sync style path to SwiftStack Path Args: s3_path (str|Path): path to convert Returns: SwiftPath: the converted path """ return stor.join('swift://', *stor.Path(s3_path).resource.split('/')[1:])
def test_metadata_pulling(self): file_in_folder = stor.join(self.test_container, 'somefile.svg') with stor.open(file_in_folder, 'w') as fp: fp.write('12345\n') self.assertEqual(stor.getsize(file_in_folder), 6) stat_data = stor.Path(file_in_folder).stat() self.assertIn('Content-Type', stat_data) self.assertEqual(stat_data['Content-Type'], 'image/svg+xml')
def test_gzip_on_remote(self): local_gzip = os.path.join(os.path.dirname(__file__), 'file_data/s_3_2126.bcl.gz') remote_gzip = stor.join(self.test_dir, stor.basename(local_gzip)) stor.copy(local_gzip, remote_gzip) with stor.open(remote_gzip) as fp: with gzip.GzipFile(fileobj=fp) as remote_gzip_fp: with gzip.open(local_gzip) as local_gzip_fp: assert_same_data(remote_gzip_fp, local_gzip_fp)
def test_copytree_w_headers(self): with NamedTemporaryDirectory(change_dir=True) as tmp_d: open(tmp_d / 'test_obj', 'w').close() stor.copytree('.', self.test_container, headers=['X-Delete-After:1000']) obj = stor.join(self.test_container, 'test_obj') stat_results = obj.stat() self.assertTrue('x-delete-at' in stat_results['headers'])
def test_empty_buffer_no_writes(self, mock_read_object, mock_write_object): # NOTE: this tests that our current description (only non-empty buffers are uploaded) is # enshrined. fileobj = stor.open(stor.join(self.drive, 'B/C/obj'), 'w') fileobj.flush() self.assertFalse(fileobj._buffer) fileobj.write('') fileobj.flush() fileobj.close() self.assertFalse(mock_read_object.called) self.assertFalse(mock_write_object.called)
def test_readable_writable_seekable(self): pth = self.normal_path read_obj = pth.open(mode='r') self.assertTrue(read_obj.readable()) self.assertFalse(read_obj.writable()) self.assertTrue(read_obj.seekable()) write_obj = stor.open(stor.join(self.drive, 'B/C/obj'), 'w') self.assertFalse(write_obj.readable()) self.assertTrue(write_obj.writable()) self.assertTrue(write_obj.seekable())
def test_copy_to_from_dir(self): num_test_objs = 5 min_obj_size = 100 with NamedTemporaryDirectory(change_dir=True) as tmp_d: self.create_dataset(tmp_d, num_test_objs, min_obj_size) for which_obj in self.get_dataset_obj_names(num_test_objs): obj_path = stor.join(self.test_dir, '%s.txt' % which_obj) stor.copy(which_obj, obj_path) stor.copy(obj_path, 'copied_file') self.assertCorrectObjectContents('copied_file', which_obj, min_obj_size)
def test_works_with_gzip(self, mock_read_object): gzip_path = stor.join(stor.dirname(__file__), 'file_data', 's_3_2126.bcl.gz') text = stor.open(gzip_path, 'rb').read() mock_read_object.return_value = text fileobj = stor.open(stor.join(self.drive, 'A/C/s_3_2126.bcl.gz'), 'rb') with fileobj: with gzip.GzipFile(fileobj=fileobj) as fp: with gzip.open(gzip_path) as gzip_fp: assert_same_data(fp, gzip_fp) fileobj = stor.open(stor.join(self.drive, 'A/C/s_3_2126.bcl.gz'), 'rb') with fileobj: with gzip.GzipFile(fileobj=fileobj) as fp: with gzip.open(gzip_path) as gzip_fp: # after seeking should still be same fp.seek(3) gzip_fp.seek(3) assert_same_data(fp, gzip_fp)
def swift_to_s3(swift_path, bucket): """SwiftStack Swift Path to S3 Cloud-Shunt Path Args: swift_path (str|Path): path to convert bucket (str): name of S3 bucket Returns: S3Path: the converted path See https://www.swiftstack.com/docs/admin/cluster_management/cloud_sync.html#swift-object-representation-in-s3 for details """ # noqa if not bucket: raise TypeError('bucket is required') swift_path = stor.Path(swift_path) h = hashlib.md5((u'%s/%s' % (swift_path.tenant, swift_path.container) ).encode("utf8")).hexdigest() prefix = hex(int(h, 16) % 16**6).lstrip('0x').rstrip('L') pth = stor.join('s3://%s' % bucket, prefix, swift_path.tenant, swift_path.container) if swift_path.resource: pth = stor.join(pth, swift_path.resource) return pth
def test_invalid_open(self): pth = stor.join(self.drive, 'B/C/D') with self.assertRaisesRegexp(ValueError, 'mode'): # keep reference here f = stor.open(pth, 'invalid') # noqa assert False, 'should error before this' # pragma: no cover with self.assertRaisesRegexp(ValueError, 'mode'): with stor.open(pth, 'invalid'): assert False, 'should error before this' # pragma: no cover with self.assertRaisesRegexp(ValueError, 'mode'): with stor.Path(pth).open('invalid'): assert False, 'should error before this' # pragma: no cover
def test_gzip_on_remote(self): self._skip_if_filesystem_python3(self.test_dir) local_gzip = os.path.join(os.path.dirname(__file__), 'file_data/s_3_2126.bcl.gz') remote_gzip = stor.join(self.test_dir, stor.basename(local_gzip)) stor.copy(local_gzip, remote_gzip) file_h = dxpy.DXFile(dxid=remote_gzip.canonical_resource, project=remote_gzip.canonical_project) file_h.wait_on_close(20) # wait for file to go to closed state with stor.open(remote_gzip, mode='rb') as fp: with gzip.GzipFile(fileobj=fp) as remote_gzip_fp: with gzip.open(local_gzip) as local_gzip_fp: assert_same_data(remote_gzip_fp, local_gzip_fp)
def test_copy_to_from_dir(self): num_test_objs = 5 min_obj_size = 100 with NamedTemporaryDirectory(change_dir=True) as tmp_d: self.create_dataset(tmp_d, num_test_objs, min_obj_size) for which_obj in self.get_dataset_obj_names(num_test_objs): obj_path = stor.join(self.test_dir, '%s.txt' % which_obj) stor.copy(which_obj, obj_path) file_h = dxpy.DXFile(dxid=obj_path.canonical_resource, project=obj_path.canonical_project) file_h.wait_on_close(20) # wait for file to go to closed state stor.copy(obj_path, 'copied_file') self.assertCorrectObjectContents('copied_file', which_obj, min_obj_size)
def test_walkfiles(self): with NamedTemporaryDirectory(change_dir=True): # Make a dataset with files that will match a particular pattern (*.sh) # and also empty directories that should be ignored when calling walkfiles open('aabc.sh', 'w').close() open('aabc', 'w').close() os.mkdir('b') open('b/c.sh', 'w').close() os.mkdir('empty') open('b/d', 'w').close() open('b/abbbc', 'w').close() Path('.').copytree(self.test_dir) unfiltered_files = list(self.test_dir.walkfiles()) self.assertEquals( set(unfiltered_files), set([ stor.join(self.test_dir, 'aabc.sh'), stor.join(self.test_dir, 'aabc'), stor.join(self.test_dir, 'b/c.sh'), stor.join(self.test_dir, 'b/d'), stor.join(self.test_dir, 'b/abbbc'), ])) prefix_files = list(self.test_dir.walkfiles('*.sh')) self.assertEquals( set(prefix_files), set([ stor.join(self.test_dir, 'aabc.sh'), stor.join(self.test_dir, 'b/c.sh'), ])) double_infix_files = list(self.test_dir.walkfiles('a*b*c')) self.assertEquals( set(double_infix_files), set([ stor.join(self.test_dir, 'aabc'), stor.join(self.test_dir, 'b/abbbc'), ])) suffix_files = list(self.test_dir.walkfiles('a*')) self.assertEquals( set(suffix_files), set([ stor.join(self.test_dir, 'aabc.sh'), stor.join(self.test_dir, 'aabc'), stor.join(self.test_dir, 'b/abbbc'), ])) # should still *make* an empty directory assert stor.exists(stor.join(self.test_dir, 'empty'))
def setUp(self): super(FilesystemIntegrationTest, self).setUp() ntp_obj = stor.NamedTemporaryDirectory() # ensure that it's empty and does not exist to start self.test_dir = stor.join(ntp_obj.__enter__(), 'parent') self.addCleanup(ntp_obj.__exit__, None, None, None)
def test_w_s3_component(self): p = posix.PosixPath('my/path') / s3.S3Path('s3://b/name').name self.assertEquals(p, posix.PosixPath('my/path/name')) self.assertEquals(stor.join('my/path', s3.S3Path('s3://b/name').name), p)
def test_success(self): p = posix.PosixPath('my/path') / 'other/path' self.assertEquals(p, posix.PosixPath('my/path/other/path')) self.assertEquals(p, stor.join('my/path', 'other/path'))
def is_writeable(path, swift_retry_options=None): """ Determine whether we have permission to write to path. Behavior of this method is slightly different for different storage types when the directory doesn't exist: 1. For local file systems, this function will return True if the target directory exists and a file written to it. 2. For AWS S3, this function will return True only if the target bucket is already present and we have write access to the bucket. 3. For Swift, this function will return True, only if the target tenant is already present and we have write access to the tenant and container. The container doesn't have to be present. This is function is useful, because `stor.stat()` will succeed if we have read-only permissions to `path`, but the eventual attempt to upload will fail. Secondly, `path` might not exist yet. If the intent of the caller is to create it, , stor.stat() will fail, however the eventual upload attempt would succeed. Args: path (stor.Path|str): The path to check. swift_retry_options (dict): Optional retry arguments to use for swift upload or download. View the `swift module-level documentation <swiftretry>` for more information on retry arguments. If the goal is to not use exponential backoff, pass ``{'num_retries': 0}`` here. Returns: bool: Whether ``path`` is writeable or not. """ from stor import basename from stor import join from stor import Path from stor import remove from stor.swift import ConflictError from stor.swift import SwiftPath from stor.swift import UnauthorizedError from stor.swift import UnavailableError path = with_trailing_slash(Path(path)) if is_filesystem_path(path): return os.access(path, os.W_OK) container_path = None container_existed = None if is_swift_path(path): # We want this function to behave as a no-op with regards to the underlying # container structure. Therefore we need to remove any containers created by this # function that were not present when it was called. The `container_existed` # defined below will store whether the container that we're checking existed when # calling this function, so that we know if it should be removed at the end. container_path = Path('{}{}/{}/'.format( SwiftPath.drive, path.tenant, path.container )) container_existed = container_path.exists() with tempfile.NamedTemporaryFile() as tmpfile: try: # Attempt to create a file in the `path`. copy(tmpfile.name, path, swift_retry_options=swift_retry_options) # Remove the file that was created. remove(join(path, basename(tmpfile.name))) answer = True except (UnauthorizedError, UnavailableError, IOError, OSError, exceptions.FailedUploadError): # nopep8 answer = False # Remove the Swift container if it didn't exist when calling this function, but exists # now. This way this function remains a no-op with regards to container structure. if container_existed is False and container_path.exists(): try: container_path.remove_container() except ConflictError: # Ignore if some other thread/user created the container in the meantime. pass return answer