Ejemplo n.º 1
0
    def parse_gcs_url(gcs_url):
        """
        Parses and validates a google cloud storage url.

        Returns:
            tuple(bucket_name, blob).
        """
        parsed_url = urllib.parse.urlparse(gcs_url)
        if not parsed_url.netloc:
            raise PolyaxonStoresException('Received an invalid url `{}`'.format(gcs_url))
        if parsed_url.scheme != 'gs':
            raise PolyaxonStoresException('Received an invalid url `{}`'.format(gcs_url))
        blob = parsed_url.path.lstrip('/')
        return parsed_url.netloc, blob
Ejemplo n.º 2
0
    def download_file(self,
                      blob,
                      local_path,
                      container_name=None,
                      use_basename=True):
        """
        Downloads a file from Google Cloud Storage.

        Args:
            blob: `str`. blob to download.
            local_path: `str`. the path to download to.
            container_name: `str`. the name of the container.
            use_basename: `bool`. whether or not to use the basename of the blob.
        """
        if not container_name:
            container_name, _, blob = self.parse_wasbs_url(blob)

        local_path = os.path.abspath(local_path)

        if use_basename:
            local_path = append_basename(local_path, blob)

        check_dirname_exists(local_path)

        try:
            self.connection.get_blob_to_path(container_name, blob, local_path)
        except AzureHttpError as e:
            raise PolyaxonStoresException(e)
Ejemplo n.º 3
0
    def download_file(self,
                      blob,
                      local_path,
                      bucket_name=None,
                      use_basename=True):
        """
        Downloads a file from Google Cloud Storage.

        Args:
            blob: `str`. blob to download.
            local_path: `str`. the path to download to.
            bucket_name: `str`. the name of the bucket.
            use_basename: `bool`. whether or not to use the basename of the blob.
        """
        if not bucket_name:
            bucket_name, blob = self.parse_gcs_url(blob)

        local_path = os.path.abspath(local_path)

        if use_basename:
            local_path = append_basename(local_path, blob)

        check_dirname_exists(local_path)

        try:
            blob = self.get_blob(blob=blob, bucket_name=bucket_name)
            blob.download_to_filename(local_path)
        except (NotFound, GoogleAPIError) as e:
            raise PolyaxonStoresException(e)
Ejemplo n.º 4
0
    def download_file(self,
                      key,
                      local_path,
                      bucket_name=None,
                      use_basename=True):
        """
        Download a file from S3.

        Args:
            key: `str`. S3 key that will point to the file.
            local_path: `str`. the path to download to.
            bucket_name: `str`. Name of the bucket in which to store the file.
            use_basename: `bool`. whether or not to use the basename of the key.
        """
        if not bucket_name:
            bucket_name, key = self.parse_s3_url(key)

        local_path = os.path.abspath(local_path)

        if use_basename:
            local_path = append_basename(local_path, key)

        check_dirname_exists(local_path)

        try:
            self.client.download_file(bucket_name, key, local_path)
        except ClientError as e:
            raise PolyaxonStoresException(e)
Ejemplo n.º 5
0
 def delete_file(self, key, bucket_name=None):
     if not bucket_name:
         bucket_name, key = self.parse_gcs_url(key)
     bucket = self.get_bucket(bucket_name)
     try:
         return bucket.delete_blob(key)
     except (NotFound, GoogleAPIError) as e:
         raise PolyaxonStoresException(e)
Ejemplo n.º 6
0
 def delete_file(self, key, bucket_name=None):
     if not bucket_name:
         (bucket_name, key) = self.parse_s3_url(key)
     try:
         obj = self.resource.Object(bucket_name, key)
         obj.delete()
     except ClientError as e:
         raise PolyaxonStoresException(e)
Ejemplo n.º 7
0
 def __init__(self, store=None, path=None):
     self._path = path
     if not store and path:
         store = BaseStore.get_store_for_path(path=path)
     if not store:
         store = BaseStore.get_store()
     if isinstance(store, BaseStore):
         self._store = store
     else:
         raise PolyaxonStoresException('Received an unrecognised store `{}`.'.format(store))
Ejemplo n.º 8
0
def get_gc_credentials(key_path=None, keyfile_dict=None, scopes=None):
    """
    Returns the Credentials object for Google API
    """
    key_path = key_path or get_key_path()
    keyfile_dict = keyfile_dict or get_keyfile_dict()
    scopes = scopes or get_scopes()

    if scopes is not None:
        scopes = [s.strip() for s in scopes.split(',')]
    else:
        scopes = DEFAULT_SCOPES

    if not key_path and not keyfile_dict:
        logger.info('Getting connection using `google.auth.default()` '
                    'since no key file is defined for hook.')
        credentials, _ = google.auth.default(scopes=scopes)
    elif key_path:
        # Get credentials from a JSON file.
        if key_path.endswith('.json'):
            logger.info('Getting connection using a JSON key file.')
            credentials = Credentials.from_service_account_file(
                os.path.abspath(key_path), scopes=scopes)
        else:
            raise PolyaxonStoresException(
                'Unrecognised extension for key file.')
    else:
        # Get credentials from JSON data.
        try:
            if not isinstance(keyfile_dict, Mapping):
                keyfile_dict = json.loads(keyfile_dict)

            # Convert escaped newlines to actual newlines if any.
            keyfile_dict['private_key'] = keyfile_dict['private_key'].replace(
                '\\n', '\n')

            credentials = Credentials.from_service_account_info(keyfile_dict,
                                                                scopes=scopes)
        except ValueError:  # json.decoder.JSONDecodeError does not exist on py2
            raise PolyaxonStoresException('Invalid key JSON.')

    return credentials
Ejemplo n.º 9
0
    def parse_wasbs_url(wasbs_url):
        """
        Parses and validates a wasbs url.

        Returns:
            tuple(container, storage_account, path).
        """
        parsed_url = urllib.parse.urlparse(wasbs_url)
        if parsed_url.scheme != "wasbs":
            raise PolyaxonStoresException('Received an invalid url `{}`'.format(wasbs_url))
        match = re.match("([^@]+)@([^.]+)\\.blob\\.core\\.windows\\.net", parsed_url.netloc)
        if match is None:
            raise PolyaxonStoresException(
                'wasbs_url must be of the form <container>@<account>.blob.core.windows.net')
        container = match.group(1)
        storage_account = match.group(2)
        path = parsed_url.path
        if path.startswith('/'):
            path = path[1:]
        return container, storage_account, path
Ejemplo n.º 10
0
    def get_store_for_path(cls, path):
        store_access = settings.RUN_STORES_ACCESS_KEYS.get(path)
        if not store_access:
            return cls.get_store()

        if 'store' not in store_access or 'secret_key' not in store_access:
            raise PolyaxonStoresException(
                'Received an invalid store access definition.')

        store_type = store_access['store']
        try:
            store_access = settings.config.get_dict(store_access['secret_key'])
        except RheaError:
            raise PolyaxonStoresException(
                'Could not create store for path `{}`,'
                'received a store type `{}` without valid access key.'.format(
                    path, store_type))

        return cls.get_store_for_type(store_type=store_type,
                                      store_access=store_access)
Ejemplo n.º 11
0
    def parse_wasbs_url(wasbs_url):
        """
        Parses and validates a wasbs url.

        Returns:
            tuple(container, storage_account, path).
        """
        try:
            spec = rhea_parser.parse_wasbs_path(wasbs_url)
            return spec.container, spec.storage_account, spec.path
        except RheaError as e:
            raise PolyaxonStoresException(e)
Ejemplo n.º 12
0
    def parse_gcs_url(gcs_url):
        """
        Parses and validates a google cloud storage url.

        Returns:
            tuple(bucket_name, blob).
        """
        try:
            spec = rhea_parser.parse_gcs_path(gcs_url)
            return spec.bucket, spec.blob
        except RheaError as e:
            raise PolyaxonStoresException(e)
Ejemplo n.º 13
0
    def parse_s3_url(s3_url):
        """
        Parses and validates an S3 url.

        Returns:
             tuple(bucket_name, key).
        """
        try:
            spec = rhea_parser.parse_s3_path(s3_url)
            return spec.bucket, spec.key
        except RheaError as e:
            raise PolyaxonStoresException(e)
Ejemplo n.º 14
0
    def get_store(cls, store_type=None, **kwargs):
        store_type = store_type or get_from_env(['POLYAXON_STORE'])
        # We assume that `None` refers to local store as well
        store_type = cls._LOCAL_STORE if store_type is None else store_type
        if store_type not in cls._STORE_TYPES:
            raise PolyaxonStoresException(
                'Received an unrecognised store type `{}`.'.format(store_type))

        if store_type == cls._LOCAL_STORE:
            from polystores.stores.local_store import LocalStore
            return LocalStore()
        if store_type == cls._AZURE_STORE:
            from polystores.stores.azure_store import AzureStore
            return AzureStore(**kwargs)
        if store_type == cls._S3_STORE:
            from polystores.stores.s3_store import S3Store
            return S3Store(**kwargs)
        if store_type == cls._GCS_STORE:
            from polystores.stores.gcs_store import GCSStore
            return GCSStore(**kwargs)

        raise PolyaxonStoresException(
            'Received an unrecognised store type `{}`.'.format(store_type))
Ejemplo n.º 15
0
    def parse_s3_url(s3_url):
        """
        Parses and validates an S3 url.

        Returns:
             tuple(bucket_name, key).
        """
        parsed_url = urllib.parse.urlparse(s3_url)
        if not parsed_url.netloc:
            raise PolyaxonStoresException('Received an invalid url `{}`'.format(s3_url))
        else:
            bucket_name = parsed_url.netloc
            key = parsed_url.path.strip('/')
            return bucket_name, key
Ejemplo n.º 16
0
    def get_key(self, key, bucket_name=None):
        """
        Returns a boto3.s3.Object

        Args:
            key: `str`. the path to the key.
            bucket_name: `str`. the name of the bucket.
        """
        if not bucket_name:
            (bucket_name, key) = self.parse_s3_url(key)

        try:
            obj = self.resource.Object(bucket_name, key)
            obj.load()
            return obj
        except Exception as e:
            raise PolyaxonStoresException(e)
Ejemplo n.º 17
0
    def get_blob(self, blob, bucket_name=None):
        """
        Get a file in Google Cloud Storage.

        Args:
            blob: `str`. the path to the object to check in the Google cloud storage bucket.
            bucket_name: `str`. Name of the bucket in which the file is stored
        """
        if not bucket_name:
            bucket_name, blob = self.parse_gcs_url(blob)

        bucket = self.get_bucket(bucket_name)
        # Wrap google.cloud.storage's blob to raise if the file doesn't exist
        obj = bucket.get_blob(blob)

        if obj is None:
            raise PolyaxonStoresException('File does not exist: {}'.format(blob))

        return obj
Ejemplo n.º 18
0
    def upload_file(self,
                    filename,
                    key,
                    bucket_name=None,
                    overwrite=False,
                    encrypt=False,
                    acl=None,
                    use_basename=True):
        """
        Uploads a local file to S3.

        Args:
            filename: `str`. name of the file to upload.
            key: `str`. S3 key that will point to the file.
            bucket_name: `str`. Name of the bucket in which to store the file.
            overwrite: `bool`. A flag to decide whether or not to overwrite the key
                if it already exists. If replace is False and the key exists, an
                error will be raised.
            encrypt: `bool`. If True, the file will be encrypted on the server-side
                by S3 and will be stored in an encrypted form while at rest in S3.
            acl: `str`. ACL to use for uploading, e.g. "public-read".
            use_basename: `bool`. whether or not to use the basename of the filename.
        """
        if not bucket_name:
            bucket_name, key = self.parse_s3_url(key)

        if use_basename:
            key = append_basename(key, filename)

        if not overwrite and self.check_key(key, bucket_name):
            raise PolyaxonStoresException(
                "The key {} already exists.".format(key))

        extra_args = {}
        if encrypt:
            extra_args['ServerSideEncryption'] = self.ENCRYPTION
        if acl:
            extra_args['ACL'] = acl

        self.client.upload_file(filename,
                                bucket_name,
                                key,
                                ExtraArgs=extra_args)
Ejemplo n.º 19
0
def check_dirname_exists(path, is_dir=False):
    if not is_dir:
        path = os.path.dirname(os.path.abspath(path))
    if not os.path.isdir(path):
        raise PolyaxonStoresException(
            'The parent path is not a directory {}'.format(path))