Ejemplo n.º 1
0
    def test_copytree_to_from_dir_w_manifest(self):
        num_test_objs = 10
        test_obj_size = 100
        with NamedTemporaryDirectory(change_dir=True) as tmp_d:
            self.create_dataset(tmp_d, num_test_objs, test_obj_size)
            # Make a nested file and an empty directory for testing purposes
            tmp_d = Path(tmp_d)
            os.mkdir(tmp_d / 'my_dir')
            open(tmp_d / 'my_dir' / 'empty_file', 'w').close()
            os.mkdir(tmp_d / 'my_dir' / 'empty_dir')

            stor.copytree('.', self.test_dir, use_manifest=True)

            # Validate the contents of the manifest file
            manifest_contents = utils.get_data_manifest_contents(self.test_dir)
            expected_contents = self.get_dataset_obj_names(num_test_objs)
            expected_contents.extend(
                ['my_dir/empty_file', 'my_dir/empty_dir/'])
            expected_contents = [Path('test') / c for c in expected_contents]
            self.assertEquals(set(manifest_contents), set(expected_contents))

        with NamedTemporaryDirectory(change_dir=True) as tmp_d:
            # Download the results successfully
            Path(self.test_dir).copytree('test', use_manifest=True)

            # Now delete one of the objects from s3. A second download
            # will fail with a condition error
            Path(self.test_dir / 'my_dir' / 'empty_dir/').remove()
            with self.assertRaises(exceptions.ConditionNotMetError):
                Path(self.test_dir).copytree('test',
                                             use_manifest=True,
                                             num_retries=0)
Ejemplo n.º 2
0
    def test_upload_multiple_dirs(self):
        with NamedTemporaryDirectory(change_dir=True) as tmp_d:
            num_test_objs = 10
            tmp_d = Path(tmp_d)

            # Create files filled with random data.
            path1 = tmp_d / 'dir1'
            os.mkdir(path1)
            self.create_dataset(path1, num_test_objs, 10)

            # Create empty dir and file.
            path2 = tmp_d / 'dir2'
            os.mkdir(path2)
            os.mkdir(path2 / 'my_dir')
            open(path2 / 'my_dir' / 'included_file', 'w').close()
            open(path2 / 'my_dir' / 'excluded_file', 'w').close()
            os.mkdir(path2 / 'my_dir' / 'included_dir')
            os.mkdir(path2 / 'my_dir' / 'excluded_dir')

            # Create file in the top level directory.
            open(tmp_d / 'top_level_file', 'w').close()

            to_upload = [
                'dir1',
                'dir2/my_dir/included_file',
                'dir2/my_dir/included_dir',
                'top_level_file',
            ]
            with tmp_d:
                swift_path = self.test_dir / 'subdir'
                swift_path.upload(to_upload, use_manifest=True)

            # Validate the contents of the manifest file
            manifest_contents = utils.get_data_manifest_contents(swift_path)
            expected_contents = [
                Path('dir1') / name
                for name in self.get_dataset_obj_names(num_test_objs)
            ]
            expected_contents.extend([
                'dir2/my_dir/included_file',
                'dir2/my_dir/included_dir',
                'top_level_file',
            ])

            expected_contents = [
                Path('test/subdir') / c for c in expected_contents
            ]
            self.assertEquals(set(manifest_contents), set(expected_contents))
Ejemplo n.º 3
0
Archivo: s3.py Proyecto: ying-w/stor
    def download(self, dest, condition=None, use_manifest=False, **kwargs):
        """Downloads a directory from S3 to a destination directory.

        Args:
            dest (str): The destination path to download file to. If downloading to a directory,
                there must be a trailing slash. The directory will be created if it doesn't exist.
            condition (function(results) -> bool): The method will only return
                when the results of download matches the condition.

        Returns:
            List[S3Path]: A list of the downloaded objects.

        Notes:
        - The destination directory will be created automatically if it doesn't exist.
        - This method downloads to paths relative to the current directory.
        """
        utils.validate_condition(condition)

        if use_manifest:
            object_names = utils.get_data_manifest_contents(self)
            manifest_cond = partial(utils.validate_manifest_list, object_names)
            condition = (utils.join_conditions(condition, manifest_cond)
                         if condition else manifest_cond)

        source = utils.with_trailing_slash(self)
        files_to_download = [{
            'source': file,
            'dest': dest
        } for file in source.list()]

        options = settings.get()['s3:download']
        segment_size = utils.str_to_bytes(options.get('segment_size'))
        transfer_config = {
            'multipart_threshold': segment_size,
            'max_concurrency': options.get('segment_threads'),
            'multipart_chunksize': segment_size
        }
        download_w_config = partial(self._download_object_worker,
                                    config=transfer_config)

        downloaded = {'completed': [], 'failed': []}
        with S3DownloadLogger(len(files_to_download)) as dl:
            pool = ThreadPool(options['object_threads'])
            try:
                result_iter = pool.imap_unordered(download_w_config,
                                                  files_to_download)
                while True:
                    try:
                        result = result_iter.next(0xFFFF)
                        if result['success']:
                            dl.add_result(result)
                            downloaded['completed'].append(result)
                        else:
                            downloaded['failed'].append(result)
                    except StopIteration:
                        break
                pool.close()
            except BaseException:
                pool.terminate()
                raise
            finally:
                pool.join()

        if downloaded['failed']:
            raise exceptions.FailedDownloadError(
                'an error occurred while downloading', downloaded)

        utils.check_condition(condition,
                              [r['source'] for r in downloaded['completed']])
        return downloaded
Ejemplo n.º 4
0
Archivo: s3.py Proyecto: ying-w/stor
    def list(
            self,
            starts_with=None,
            limit=None,
            condition=None,
            use_manifest=False,
            # hidden args
            list_as_dir=False,
            ignore_dir_markers=False,
            **kwargs):
        """
        List contents using the resource of the path as a prefix.

        Args:
            starts_with (str): Allows for an additional search path to be
                appended to the current swift path. The current path will be
                treated as a directory.
            limit (int): Limit the amount of results returned.
            condition (function(results) -> bool): The method will only return
                when the results matches the condition.
            use_manifest (bool): Perform the list and use the data manfest file to validate
                the list.

        Returns:
            List[S3Path]: Every path in the listing

        Raises:
            RemoteError: An s3 client error occurred.
            ConditionNotMetError: Results were returned, but they did not meet the condition.
        """
        bucket = self.bucket
        prefix = self.resource
        utils.validate_condition(condition)

        if use_manifest:
            object_names = utils.get_data_manifest_contents(self)
            manifest_cond = partial(utils.validate_manifest_list, object_names)
            condition = (utils.join_conditions(condition, manifest_cond)
                         if condition else manifest_cond)

        if starts_with:
            prefix = prefix / starts_with if prefix else starts_with
        else:
            prefix = prefix or ''

        list_kwargs = {
            'Bucket': bucket,
            'Prefix': prefix,
            'PaginationConfig': {}
        }

        if limit:
            list_kwargs['PaginationConfig']['MaxItems'] = limit

        if list_as_dir:
            # Ensure the the prefix has a trailing slash if there is a prefix
            list_kwargs['Prefix'] = utils.with_trailing_slash(
                prefix) if prefix else ''
            list_kwargs['Delimiter'] = '/'

        path_prefix = S3Path('%s%s' % (self.drive, bucket))

        results = self._get_s3_iterator('list_objects_v2', **list_kwargs)
        list_results = []
        try:
            for page in results:
                if 'Contents' in page:
                    list_results.extend([
                        path_prefix / result['Key']
                        for result in page['Contents']
                        if not ignore_dir_markers or (
                            ignore_dir_markers
                            and not utils.has_trailing_slash(result['Key']))
                    ])
                if list_as_dir and 'CommonPrefixes' in page:
                    list_results.extend([
                        path_prefix / result['Prefix']
                        for result in page['CommonPrefixes']
                    ])
        except botocore_exceptions.ClientError as e:
            raise _parse_s3_error(e) from e

        utils.check_condition(condition, list_results)
        return list_results