Beispiel #1
0
    def test_temp_url(self):
        basic_file = 'test.txt'
        complex_file = 'my test?file=special_chars.txt'
        with NamedTemporaryDirectory(change_dir=True) as tmp_d:
            nested_tmp_dir = stor.join(tmp_d, 'tmp')
            os.mkdir(nested_tmp_dir)
            basic_file_p = stor.join(nested_tmp_dir, basic_file)
            complex_file_p = stor.join(nested_tmp_dir, 'my test?file=special_chars.txt')

            with stor.open(basic_file_p, 'w') as f:
                f.write('basic test')
            with stor.open(complex_file_p, 'w') as f:
                f.write('complex test')

            self.test_container.upload(['.'])

        with NamedTemporaryDirectory(change_dir=True) as tmp_d:
            basic_obj = stor.Path(
                stor.join(self.test_container, 'tmp', basic_file))
            basic_temp_url = basic_obj.temp_url(inline=False, filename=basic_file)
            r = requests.get(basic_temp_url)
            self.assertEquals(r.content, 'basic test')
            self.assertEquals(r.headers['Content-Disposition'],
                              'attachment; filename="test.txt"; filename*=UTF-8\'\'test.txt')

            complex_obj = stor.Path(
                stor.join(self.test_container, 'tmp', complex_file))
            complex_temp_url = complex_obj.temp_url(inline=False, filename=complex_file)
            r = requests.get(complex_temp_url)
            self.assertEquals(r.content, 'complex test')
            self.assertEquals(r.headers['Content-Disposition'],
                              'attachment; filename="my test%3Ffile%3Dspecial_chars.txt"; filename*=UTF-8\'\'my%20test%3Ffile%3Dspecial_chars.txt')  # noqa
Beispiel #2
0
    def test_is_methods(self):
        container = self.test_container
        container = self.test_container
        file_with_prefix = stor.join(container, 'analysis.txt')

        # ensure container is created but empty
        container.post()
        self.assertTrue(stor.isdir(container))
        self.assertFalse(stor.isfile(container))
        self.assertTrue(stor.exists(container))
        self.assertFalse(stor.listdir(container))

        folder = stor.join(container, 'analysis')
        subfolder = stor.join(container, 'analysis', 'alignments')
        file_in_folder = stor.join(container, 'analysis', 'alignments',
                                   'bam.bam')
        self.assertFalse(stor.exists(file_in_folder))
        self.assertFalse(stor.isdir(folder))
        self.assertFalse(stor.isdir(folder + '/'))
        with stor.open(file_with_prefix, 'w') as fp:
            fp.write('data\n')
        self.assertFalse(stor.isdir(folder))
        self.assertTrue(stor.isfile(file_with_prefix))

        with stor.open(file_in_folder, 'w') as fp:
            fp.write('blah.txt\n')

        self.assertTrue(stor.isdir(folder))
        self.assertFalse(stor.isfile(folder))
        self.assertTrue(stor.isdir(subfolder))
Beispiel #3
0
 def test_non_empty_directory_errors(self):
     example_dir = stor.join(self.test_dir, 'example')
     assert not example_dir.exists()
     other_dir = stor.join(self.test_dir, 'otherdir')
     self.create_dataset(other_dir, 1, 1)
     self.create_dataset(example_dir, 1, 1)
     example_dir.makedirs_p()
     with self.assertRaisesRegexp(FileExistsError, '.*File exists'):
         try:
             stor.copytree(other_dir, example_dir)
         except FileExistsError as e:
             assert e.errno == 17
             raise
Beispiel #4
0
    def test_on_del_no_writes(self, mock_read_object, mock_write_object):
        fileobj = stor.open(stor.join(self.drive, 'B/C/obj'), 'w')
        del fileobj
        gc.collect()

        self.assertFalse(mock_read_object.called)
        self.assertFalse(mock_write_object.called)

        fileobj = stor.open(stor.join(self.drive, 'B/C/obj'), 'r')
        del fileobj
        gc.collect()

        self.assertFalse(mock_read_object.called)
        self.assertFalse(mock_write_object.called)
Beispiel #5
0
    def test_all_segment_container_types_are_deleted(self):
        segment_containers = [stor.join('swift://' + self.test_container.tenant,
                                        fmt % self.test_container.name)
                              for fmt in ('.segments_%s', '%s+segments', '%s_segments')]
        all_containers = segment_containers + [self.test_container]

        test_files = [stor.join(c, 'test_file_tbdeleted.txt') for c in all_containers]
        for t in test_files:
            with stor.open(t, 'w') as fp:
                fp.write('testtxt\n')
        assert all(t.exists() for t in test_files)
        stor.rmtree(self.test_container)
        for t in test_files:
            assert not t.exists(), 'Did not delete %s' % t
Beispiel #6
0
    def test_static_large_obj_copy_and_segment_container(self):
        with NamedTemporaryDirectory(change_dir=True) as tmp_d:
            segment_size = 1048576
            obj_size = segment_size * 4 + 100
            self.create_dataset(tmp_d, 1, obj_size)
            obj_path = stor.join(tmp_d,
                                 self.get_dataset_obj_names(1)[0])
            options = {'swift:upload': {'segment_size': segment_size}}
            with settings.use(options):
                obj_path.copy(self.test_container / 'large_object.txt')

            # Verify there is a segment container and that it can be ignored when listing a dir
            segment_container = Path(self.test_container.parent) / ('.segments_%s' % self.test_container.name)  # noqa
            containers = Path(self.test_container.parent).listdir(ignore_segment_containers=False)
            self.assertTrue(segment_container in containers)
            self.assertTrue(self.test_container in containers)
            containers = Path(self.test_container.parent).listdir(ignore_segment_containers=True)
            self.assertFalse(segment_container in containers)
            self.assertTrue(self.test_container in containers)

            # Verify there are five segments
            objs = set(segment_container.list(condition=lambda results: len(results) == 5))
            self.assertEquals(len(objs), 5)

            # Copy back the large object and verify its contents
            obj_path = Path(tmp_d) / 'large_object.txt'
            Path(self.test_container / 'large_object.txt').copy(obj_path)
            self.assertCorrectObjectContents(obj_path, self.get_dataset_obj_names(1)[0], obj_size)
Beispiel #7
0
 def test_w_swift_component(self):
     p = posix.PosixPath('my/path') / swift.SwiftPath(
         'swift://t/c/name').name
     self.assertEquals(p, posix.PosixPath('my/path/name'))
     self.assertEquals(
         stor.join('my/path',
                   swift.SwiftPath('swift://t/c/name').name), p)
Beispiel #8
0
def s3_to_swift(s3_path):
    """S3 Cloud-Sync style path to SwiftStack Path

    Args:
        s3_path (str|Path): path to convert
    Returns:
        SwiftPath: the converted path
    """
    return stor.join('swift://', *stor.Path(s3_path).resource.split('/')[1:])
Beispiel #9
0
    def test_metadata_pulling(self):
        file_in_folder = stor.join(self.test_container, 'somefile.svg')
        with stor.open(file_in_folder, 'w') as fp:
            fp.write('12345\n')

        self.assertEqual(stor.getsize(file_in_folder), 6)
        stat_data = stor.Path(file_in_folder).stat()
        self.assertIn('Content-Type', stat_data)
        self.assertEqual(stat_data['Content-Type'], 'image/svg+xml')
Beispiel #10
0
 def test_gzip_on_remote(self):
     local_gzip = os.path.join(os.path.dirname(__file__),
                               'file_data/s_3_2126.bcl.gz')
     remote_gzip = stor.join(self.test_dir, stor.basename(local_gzip))
     stor.copy(local_gzip, remote_gzip)
     with stor.open(remote_gzip) as fp:
         with gzip.GzipFile(fileobj=fp) as remote_gzip_fp:
             with gzip.open(local_gzip) as local_gzip_fp:
                 assert_same_data(remote_gzip_fp, local_gzip_fp)
Beispiel #11
0
    def test_copytree_w_headers(self):
        with NamedTemporaryDirectory(change_dir=True) as tmp_d:
            open(tmp_d / 'test_obj', 'w').close()
            stor.copytree('.',
                          self.test_container,
                          headers=['X-Delete-After:1000'])

        obj = stor.join(self.test_container, 'test_obj')
        stat_results = obj.stat()
        self.assertTrue('x-delete-at' in stat_results['headers'])
Beispiel #12
0
 def test_empty_buffer_no_writes(self, mock_read_object, mock_write_object):
     # NOTE: this tests that our current description (only non-empty buffers are uploaded) is
     # enshrined.
     fileobj = stor.open(stor.join(self.drive, 'B/C/obj'), 'w')
     fileobj.flush()
     self.assertFalse(fileobj._buffer)
     fileobj.write('')
     fileobj.flush()
     fileobj.close()
     self.assertFalse(mock_read_object.called)
     self.assertFalse(mock_write_object.called)
Beispiel #13
0
    def test_readable_writable_seekable(self):
        pth = self.normal_path
        read_obj = pth.open(mode='r')
        self.assertTrue(read_obj.readable())
        self.assertFalse(read_obj.writable())
        self.assertTrue(read_obj.seekable())

        write_obj = stor.open(stor.join(self.drive, 'B/C/obj'), 'w')
        self.assertFalse(write_obj.readable())
        self.assertTrue(write_obj.writable())
        self.assertTrue(write_obj.seekable())
Beispiel #14
0
 def test_copy_to_from_dir(self):
     num_test_objs = 5
     min_obj_size = 100
     with NamedTemporaryDirectory(change_dir=True) as tmp_d:
         self.create_dataset(tmp_d, num_test_objs, min_obj_size)
         for which_obj in self.get_dataset_obj_names(num_test_objs):
             obj_path = stor.join(self.test_dir, '%s.txt' % which_obj)
             stor.copy(which_obj, obj_path)
             stor.copy(obj_path, 'copied_file')
             self.assertCorrectObjectContents('copied_file', which_obj,
                                              min_obj_size)
Beispiel #15
0
    def test_works_with_gzip(self, mock_read_object):
        gzip_path = stor.join(stor.dirname(__file__), 'file_data',
                              's_3_2126.bcl.gz')
        text = stor.open(gzip_path, 'rb').read()
        mock_read_object.return_value = text
        fileobj = stor.open(stor.join(self.drive, 'A/C/s_3_2126.bcl.gz'), 'rb')

        with fileobj:
            with gzip.GzipFile(fileobj=fileobj) as fp:
                with gzip.open(gzip_path) as gzip_fp:
                    assert_same_data(fp, gzip_fp)

        fileobj = stor.open(stor.join(self.drive, 'A/C/s_3_2126.bcl.gz'), 'rb')

        with fileobj:
            with gzip.GzipFile(fileobj=fileobj) as fp:
                with gzip.open(gzip_path) as gzip_fp:
                    # after seeking should still be same
                    fp.seek(3)
                    gzip_fp.seek(3)
                    assert_same_data(fp, gzip_fp)
Beispiel #16
0
def swift_to_s3(swift_path, bucket):
    """SwiftStack Swift Path to S3 Cloud-Shunt Path

    Args:
        swift_path (str|Path): path to convert
        bucket (str): name of S3 bucket
    Returns:
        S3Path: the converted path

    See https://www.swiftstack.com/docs/admin/cluster_management/cloud_sync.html#swift-object-representation-in-s3 for details
    """  # noqa
    if not bucket:
        raise TypeError('bucket is required')
    swift_path = stor.Path(swift_path)
    h = hashlib.md5((u'%s/%s' % (swift_path.tenant, swift_path.container)
                     ).encode("utf8")).hexdigest()
    prefix = hex(int(h, 16) % 16**6).lstrip('0x').rstrip('L')
    pth = stor.join('s3://%s' % bucket, prefix, swift_path.tenant, swift_path.container)
    if swift_path.resource:
        pth = stor.join(pth, swift_path.resource)
    return pth
Beispiel #17
0
    def test_invalid_open(self):
        pth = stor.join(self.drive, 'B/C/D')
        with self.assertRaisesRegexp(ValueError, 'mode'):
            # keep reference here
            f = stor.open(pth, 'invalid')  # noqa
            assert False, 'should error before this'  # pragma: no cover

        with self.assertRaisesRegexp(ValueError, 'mode'):
            with stor.open(pth, 'invalid'):
                assert False, 'should error before this'  # pragma: no cover

        with self.assertRaisesRegexp(ValueError, 'mode'):
            with stor.Path(pth).open('invalid'):
                assert False, 'should error before this'  # pragma: no cover
Beispiel #18
0
    def test_gzip_on_remote(self):
        self._skip_if_filesystem_python3(self.test_dir)
        local_gzip = os.path.join(os.path.dirname(__file__),
                                  'file_data/s_3_2126.bcl.gz')
        remote_gzip = stor.join(self.test_dir, stor.basename(local_gzip))
        stor.copy(local_gzip, remote_gzip)
        file_h = dxpy.DXFile(dxid=remote_gzip.canonical_resource,
                             project=remote_gzip.canonical_project)
        file_h.wait_on_close(20)  # wait for file to go to closed state

        with stor.open(remote_gzip, mode='rb') as fp:
            with gzip.GzipFile(fileobj=fp) as remote_gzip_fp:
                with gzip.open(local_gzip) as local_gzip_fp:
                    assert_same_data(remote_gzip_fp, local_gzip_fp)
Beispiel #19
0
 def test_copy_to_from_dir(self):
     num_test_objs = 5
     min_obj_size = 100
     with NamedTemporaryDirectory(change_dir=True) as tmp_d:
         self.create_dataset(tmp_d, num_test_objs, min_obj_size)
         for which_obj in self.get_dataset_obj_names(num_test_objs):
             obj_path = stor.join(self.test_dir, '%s.txt' % which_obj)
             stor.copy(which_obj, obj_path)
             file_h = dxpy.DXFile(dxid=obj_path.canonical_resource,
                                  project=obj_path.canonical_project)
             file_h.wait_on_close(20)  # wait for file to go to closed state
             stor.copy(obj_path, 'copied_file')
             self.assertCorrectObjectContents('copied_file', which_obj,
                                              min_obj_size)
Beispiel #20
0
        def test_walkfiles(self):
            with NamedTemporaryDirectory(change_dir=True):
                # Make a dataset with files that will match a particular pattern (*.sh)
                # and also empty directories that should be ignored when calling walkfiles
                open('aabc.sh', 'w').close()
                open('aabc', 'w').close()
                os.mkdir('b')
                open('b/c.sh', 'w').close()
                os.mkdir('empty')
                open('b/d', 'w').close()
                open('b/abbbc', 'w').close()
                Path('.').copytree(self.test_dir)

            unfiltered_files = list(self.test_dir.walkfiles())
            self.assertEquals(
                set(unfiltered_files),
                set([
                    stor.join(self.test_dir, 'aabc.sh'),
                    stor.join(self.test_dir, 'aabc'),
                    stor.join(self.test_dir, 'b/c.sh'),
                    stor.join(self.test_dir, 'b/d'),
                    stor.join(self.test_dir, 'b/abbbc'),
                ]))
            prefix_files = list(self.test_dir.walkfiles('*.sh'))
            self.assertEquals(
                set(prefix_files),
                set([
                    stor.join(self.test_dir, 'aabc.sh'),
                    stor.join(self.test_dir, 'b/c.sh'),
                ]))
            double_infix_files = list(self.test_dir.walkfiles('a*b*c'))
            self.assertEquals(
                set(double_infix_files),
                set([
                    stor.join(self.test_dir, 'aabc'),
                    stor.join(self.test_dir, 'b/abbbc'),
                ]))
            suffix_files = list(self.test_dir.walkfiles('a*'))
            self.assertEquals(
                set(suffix_files),
                set([
                    stor.join(self.test_dir, 'aabc.sh'),
                    stor.join(self.test_dir, 'aabc'),
                    stor.join(self.test_dir, 'b/abbbc'),
                ]))
            # should still *make* an empty directory
            assert stor.exists(stor.join(self.test_dir, 'empty'))
Beispiel #21
0
 def setUp(self):
     super(FilesystemIntegrationTest, self).setUp()
     ntp_obj = stor.NamedTemporaryDirectory()
     # ensure that it's empty and does not exist to start
     self.test_dir = stor.join(ntp_obj.__enter__(), 'parent')
     self.addCleanup(ntp_obj.__exit__, None, None, None)
Beispiel #22
0
 def test_w_s3_component(self):
     p = posix.PosixPath('my/path') / s3.S3Path('s3://b/name').name
     self.assertEquals(p, posix.PosixPath('my/path/name'))
     self.assertEquals(stor.join('my/path',
                                 s3.S3Path('s3://b/name').name), p)
Beispiel #23
0
 def test_success(self):
     p = posix.PosixPath('my/path') / 'other/path'
     self.assertEquals(p, posix.PosixPath('my/path/other/path'))
     self.assertEquals(p, stor.join('my/path', 'other/path'))
Beispiel #24
0
def is_writeable(path, swift_retry_options=None):
    """
    Determine whether we have permission to write to path.

    Behavior of this method is slightly different for different storage types when the
    directory doesn't exist:
    1. For local file systems, this function will return True if the target directory
       exists and a file written to it.
    2. For AWS S3, this function will return True only if the target bucket is already
       present and we have write access to the bucket.
    3. For Swift, this function will return True, only if the target tenant is already
       present and we have write access to the tenant and container. The container doesn't
       have to be present.

    This is function is useful, because `stor.stat()` will succeed if we have read-only
    permissions to `path`, but the eventual attempt to upload will fail.

    Secondly, `path` might not exist yet. If the intent of the caller is to create it, ,
    stor.stat() will fail, however the eventual upload attempt would succeed.

    Args:
        path (stor.Path|str): The path to check.
        swift_retry_options (dict): Optional retry arguments to use for swift
            upload or download. View the
            `swift module-level documentation <swiftretry>` for more
            information on retry arguments. If the goal is to not use
            exponential backoff, pass ``{'num_retries': 0}`` here.

    Returns:
        bool: Whether ``path`` is writeable or not.
    """
    from stor import basename
    from stor import join
    from stor import Path
    from stor import remove
    from stor.swift import ConflictError
    from stor.swift import SwiftPath
    from stor.swift import UnauthorizedError
    from stor.swift import UnavailableError

    path = with_trailing_slash(Path(path))

    if is_filesystem_path(path):
        return os.access(path, os.W_OK)

    container_path = None
    container_existed = None
    if is_swift_path(path):
        # We want this function to behave as a no-op with regards to the underlying
        # container structure. Therefore we need to remove any containers created by this
        # function that were not present when it was called. The `container_existed`
        # defined below will store whether the container that we're checking existed when
        # calling this function, so that we know if it should be removed at the end.
        container_path = Path('{}{}/{}/'.format(
            SwiftPath.drive,
            path.tenant,
            path.container
        ))
        container_existed = container_path.exists()

    with tempfile.NamedTemporaryFile() as tmpfile:
        try:
            # Attempt to create a file in the `path`.
            copy(tmpfile.name, path, swift_retry_options=swift_retry_options)
            # Remove the file that was created.
            remove(join(path, basename(tmpfile.name)))
            answer = True
        except (UnauthorizedError, UnavailableError, IOError, OSError, exceptions.FailedUploadError):  # nopep8
            answer = False

    # Remove the Swift container if it didn't exist when calling this function, but exists
    # now. This way this function remains a no-op with regards to container structure.
    if container_existed is False and container_path.exists():
        try:
            container_path.remove_container()
        except ConflictError:
            # Ignore if some other thread/user created the container in the meantime.
            pass

    return answer