Beispiel #1
0
 def exists(path: str) -> bool:
     if path.startswith('s3://'):
         return S3Config().get_s3_client().exists(path)
     elif path.startswith('gs://'):
         return GCSConfig().get_gcs_client().exists(path)
     else:
         raise
Beispiel #2
0
 def get_object_storage_target(path: str, format: Format) -> luigi.Target:
     if path.startswith('s3://'):
         return luigi.contrib.s3.S3Target(path, client=S3Config().get_s3_client(), format=format)
     elif path.startswith('gs://'):
         return luigi.contrib.gcs.GCSTarget(path, client=GCSConfig().get_gcs_client(), format=format)
     else:
         raise
Beispiel #3
0
def _make_file_system_target(
    file_path: str,
    processor: Optional[FileProcessor] = None
) -> luigi.target.FileSystemTarget:
    processor = processor or make_file_processor(file_path)
    if file_path.startswith('s3://'):
        return luigi.contrib.s3.S3Target(file_path,
                                         client=S3Config().get_s3_client(),
                                         format=processor.format())
    return luigi.LocalTarget(file_path, format=processor.format())
Beispiel #4
0
 def get_timestamp(path: str) -> datetime:
     if path.startswith('s3://'):
         return S3Config().get_s3_client().get_key(path).last_modified
     elif path.startswith('gs://'):
         # for gcs object
         # should PR to luigi
         bucket, obj = GCSConfig().get_gcs_client()._path_to_bucket_and_key(path)
         result = GCSConfig().get_gcs_client().client.objects().get(bucket=bucket, object=obj).execute()
         return result['updated']
     else:
         raise
Beispiel #5
0
 def __init__(self, file_path: str, temporary_directory: str) -> None:
     self._file_path = file_path
     self._temporary_directory = temporary_directory
     self._client = S3Config().get_s3_client()
Beispiel #6
0
    def test_get_same_s3_client(self):
        client_a = S3Config().get_s3_client()
        client_b = S3Config().get_s3_client()

        self.assertEqual(client_a, client_b)
Beispiel #7
0
def _get_last_modification_time(path: str) -> datetime:
    if path.startswith('s3://'):
        if S3Config().get_s3_client().exists(path):
            return S3Config().get_s3_client().get_key(path).last_modified
        raise FileNotFoundError(f'No such file or directory: {path}')
    return datetime.fromtimestamp(os.path.getmtime(path))