def _push_local_dir(self): remote_upload_to = self._generate_tmp_s3_path() # pack local dir _, local_path = tempfile.mkstemp() shutil.make_archive(local_path, "gztar", os.getcwd()) # local source -> s3 self._run_with_retry(lambda: self.s3_client.upload_file( Filename=local_path + ".tar.gz", Bucket=self.bucket, Key=remote_upload_to, )) # remove local archive os.unlink(local_path) bucket_address = f"s3://{self.bucket}/{remote_upload_to}" # s3 -> remote target retcode, _ = self.job_manager.run_and_wait( f"pip install -q awscli && " f"aws s3 cp {bucket_address} archive.tar.gz && " f"tar xf archive.tar.gz ", {}, ) if retcode != 0: raise FileUploadError(f"Error uploading local dir to session " f"{self.cluster_manager.cluster_name}.") try: self._run_with_retry( lambda: self.s3_client.delete_object(Bucket=self.bucket, Key=remote_upload_to), initial_retry_delay_s=2, ) except Exception as e: logger.warning(f"Could not remove temporary S3 object: {e}")
def upload(self, source: Optional[str] = None, target: Optional[str] = None): if source is None and target is None: self._push_local_dir() return assert isinstance(source, str) assert isinstance(target, str) remote_upload_to = self._generate_tmp_s3_path() # local source -> s3 self._run_with_retry( lambda: self.s3_client.upload_file( Filename=source, Bucket=self.bucket, Key=remote_upload_to, ) ) # s3 -> remote target bucket_address = f"s3://{self.bucket}/{remote_upload_to}" retcode, _ = self.job_manager.run_and_wait( "pip install -q awscli && " f"aws s3 cp {bucket_address} {target}", {}, ) if retcode != 0: raise FileUploadError(f"Error uploading file {source} to {target}") try: self._run_with_retry( lambda: self.s3_client.delete_object( Bucket=self.bucket, Key=remote_upload_to ), initial_retry_delay_s=2, ) except Exception as e: logger.warning(f"Could not remove temporary S3 object: {e}")