Exemple #1
0
 def delete_objects(self, path):
     bucket, path = self.parse_path(path=path)
     client = self._session.boto3_session.client(
         service_name="s3", config=self._session.botocore_config)
     procs = []
     args = {"Bucket": bucket, "MaxKeys": 1000, "Prefix": path}
     logger.debug(f"Arguments: \n{args}")
     next_continuation_token = ""
     while next_continuation_token is not None:
         res = client.list_objects_v2(**args)
         if not res.get("Contents"):
             break
         keys = [{"Key": x.get("Key")} for x in res.get("Contents")]
         logger.debug(f"Number of listed keys: {len(keys)}")
         next_continuation_token = res.get("NextContinuationToken")
         if next_continuation_token:
             args["ContinuationToken"] = next_continuation_token
             proc = mp.Process(
                 target=self.delete_objects_batch,
                 args=(self._session.primitives, bucket, keys),
             )
             proc.daemon = False
             proc.start()
             procs.append(proc)
             if len(procs) == self._session.procs_io_bound:
                 wait_process_release(procs)
         else:
             logger.debug(f"Starting last delete call...")
             self.delete_objects_batch(self._session.primitives, bucket,
                                       keys)
     logger.debug(f"Waiting final processes...")
     for proc in procs:
         proc.join()
Exemple #2
0
    def delete_not_listed_objects(
            self,
            objects_paths: List[str],
            procs_io_bound: Optional[int] = None) -> None:
        """
        Delete all NOT listed objects.

        :param objects_paths: List of objects paths to be held.
        :param procs_io_bound: Number of processes to be used for I/O bound operations
        :return: None
        """
        procs_io_bound = procs_io_bound if procs_io_bound is not None else self._session.procs_io_bound if self._session.procs_io_bound is not None else 1
        logger.debug(f"procs_io_bound: {procs_io_bound}")

        partitions: Dict[str, List[str]] = {}
        for object_path in objects_paths:
            partition_path = f"{object_path.rsplit('/', 1)[0]}/"
            if partition_path not in partitions:
                partitions[partition_path] = []
            partitions[partition_path].append(object_path)
        procs = []
        for partition_path, batch in partitions.items():
            proc = mp.Process(
                target=self._delete_not_listed_batch,
                args=(self._session.primitives, partition_path, batch, 1),
            )
            proc.daemon = False
            proc.start()
            procs.append(proc)
            if len(procs) == self._session.procs_io_bound:
                wait_process_release(procs)
        logger.debug(f"Waiting final processes...")
        for proc in procs:
            proc.join()
Exemple #3
0
    def delete_not_listed_objects(self, objects_paths, procs_io_bound=None):
        if not procs_io_bound:
            procs_io_bound = self._session.procs_io_bound
        logger.debug(f"procs_io_bound: {procs_io_bound}")

        partitions = {}
        for object_path in objects_paths:
            partition_path = f"{object_path.rsplit('/', 1)[0]}/"
            if partition_path not in partitions:
                partitions[partition_path] = []
            partitions[partition_path].append(object_path)
        procs = []
        for partition_path, batch in partitions.items():
            proc = mp.Process(
                target=self.delete_not_listed_batch,
                args=(self._session.primitives, partition_path, batch, 1),
            )
            proc.daemon = False
            proc.start()
            procs.append(proc)
            if len(procs) == self._session.procs_io_bound:
                wait_process_release(procs)
        logger.debug(f"Waiting final processes...")
        for proc in procs:
            proc.join()
Exemple #4
0
    def delete_objects(self,
                       path: str,
                       procs_io_bound: Optional[int] = None) -> None:
        """
        Delete all objects in the received S3 path.

        :param path: S3 path (e.g. "s3://bucket/path")
        :param procs_io_bound: Number of processes to be used for I/O bound operations
        :return: None
        """
        procs_io_bound = procs_io_bound if procs_io_bound is not None else self._session.procs_io_bound if self._session.procs_io_bound is not None else 1
        bucket, path = self.parse_path(path=path)
        procs: List[mp.Process] = []
        args: Dict[str, Any] = {
            "Bucket": bucket,
            "MaxKeys": 1000,
            "Prefix": path
        }
        logger.debug(f"Arguments: \n{args}")
        next_continuation_token: Optional[str] = ""
        while next_continuation_token is not None:
            res: Dict = self._client_s3.list_objects_v2(**args)
            if res.get("Contents") is None:
                break
            keys: List[Dict[str, str]] = [{
                "Key": x.get("Key")
            } for x in res.get("Contents") if "Key" in x]  # type: ignore
            logger.debug(f"Number of listed keys: {len(keys)}")
            next_continuation_token = res.get("NextContinuationToken")
            if next_continuation_token:
                args["ContinuationToken"] = next_continuation_token
                proc: mp.Process = mp.Process(
                    target=self._delete_objects_batch,
                    args=(self._session.primitives, bucket, keys),
                )
                proc.daemon = False
                proc.start()
                procs.append(proc)
                if len(procs) == procs_io_bound:
                    wait_process_release(procs)
            else:
                logger.debug(f"Starting last delete call...")
                self._delete_objects_batch(self._session.primitives, bucket,
                                           keys)
        logger.debug(f"Waiting final processes...")
        for proc in procs:
            proc.join()