def exists(path: str) -> bool: if path.startswith('s3://'): return S3Config().get_s3_client().exists(path) elif path.startswith('gs://'): return GCSConfig().get_gcs_client().exists(path) else: raise
def get_object_storage_target(path: str, format: Format) -> luigi.Target: if path.startswith('s3://'): return luigi.contrib.s3.S3Target(path, client=S3Config().get_s3_client(), format=format) elif path.startswith('gs://'): return luigi.contrib.gcs.GCSTarget(path, client=GCSConfig().get_gcs_client(), format=format) else: raise
def _make_file_system_target( file_path: str, processor: Optional[FileProcessor] = None ) -> luigi.target.FileSystemTarget: processor = processor or make_file_processor(file_path) if file_path.startswith('s3://'): return luigi.contrib.s3.S3Target(file_path, client=S3Config().get_s3_client(), format=processor.format()) return luigi.LocalTarget(file_path, format=processor.format())
def get_timestamp(path: str) -> datetime: if path.startswith('s3://'): return S3Config().get_s3_client().get_key(path).last_modified elif path.startswith('gs://'): # for gcs object # should PR to luigi bucket, obj = GCSConfig().get_gcs_client()._path_to_bucket_and_key(path) result = GCSConfig().get_gcs_client().client.objects().get(bucket=bucket, object=obj).execute() return result['updated'] else: raise
def __init__(self, file_path: str, temporary_directory: str) -> None: self._file_path = file_path self._temporary_directory = temporary_directory self._client = S3Config().get_s3_client()
def test_get_same_s3_client(self): client_a = S3Config().get_s3_client() client_b = S3Config().get_s3_client() self.assertEqual(client_a, client_b)
def _get_last_modification_time(path: str) -> datetime: if path.startswith('s3://'): if S3Config().get_s3_client().exists(path): return S3Config().get_s3_client().get_key(path).last_modified raise FileNotFoundError(f'No such file or directory: {path}') return datetime.fromtimestamp(os.path.getmtime(path))