Beispiel #1
0
 def get_registry(self):
     registry_proto = RegistryProto()
     if self._filepath.exists():
         registry_proto.ParseFromString(self._filepath.read_bytes())
     else:
         registry_proto.registry_schema_version = REGISTRY_SCHEMA_VERSION
     return registry_proto
Beispiel #2
0
 def _write_registry(self, registry_proto: RegistryProto):
     registry_proto.version_id = str(uuid.uuid4())
     registry_proto.last_updated.FromDatetime(datetime.utcnow())
     file_dir = self._filepath.parent
     file_dir.mkdir(exist_ok=True)
     self._filepath.write_bytes(registry_proto.SerializeToString())
     return
Beispiel #3
0
 def _write_registry(self, registry_proto: RegistryProto):
     registry_proto.version_id = str(uuid.uuid4())
     registry_proto.last_updated.FromDatetime(datetime.utcnow())
     file_dir = self._filepath.parent
     file_dir.mkdir(exist_ok=True)
     with open(self._filepath, mode="wb", buffering=0) as f:
         f.write(registry_proto.SerializeToString())
Beispiel #4
0
    def get_registry_proto(self):
        file_obj = TemporaryFile()
        registry_proto = RegistryProto()
        try:
            from botocore.exceptions import ClientError
        except ImportError as e:
            from feast.errors import FeastExtrasDependencyImportError

            raise FeastExtrasDependencyImportError("aws", str(e))
        try:
            bucket = self.s3_client.Bucket(self._bucket)
            self.s3_client.meta.client.head_bucket(Bucket=bucket.name)
        except ClientError as e:
            # If a client error is thrown, then check that it was a 404 error.
            # If it was a 404 error, then the bucket does not exist.
            error_code = int(e.response["Error"]["Code"])
            if error_code == 404:
                raise S3RegistryBucketNotExist(self._bucket)
            else:
                raise S3RegistryBucketForbiddenAccess(self._bucket) from e

        try:
            obj = bucket.Object(self._key)
            obj.download_fileobj(file_obj)
            file_obj.seek(0)
            registry_proto.ParseFromString(file_obj.read())
            return registry_proto
        except ClientError as e:
            raise FileNotFoundError(
                f"Error while trying to locate Registry at path {self._uri.geturl()}"
            ) from e
Beispiel #5
0
 def get_registry_proto(self):
     registry_proto = RegistryProto()
     if self._filepath.exists():
         registry_proto.ParseFromString(self._filepath.read_bytes())
         return registry_proto
     raise FileNotFoundError(
         f'Registry not found at path "{self._filepath}". Have you run "feast apply"?'
     )
Beispiel #6
0
 def _initialize_registry(self):
     """Explicitly initializes the registry with an empty proto if it doesn't exist."""
     try:
         self._get_registry_proto()
     except FileNotFoundError:
         registry_proto = RegistryProto()
         registry_proto.registry_schema_version = REGISTRY_SCHEMA_VERSION
         self._registry_store.update_registry_proto(registry_proto)
Beispiel #7
0
 def _write_registry(self, registry_proto: RegistryProto):
     registry_proto.version_id = str(uuid.uuid4())
     registry_proto.last_updated.FromDatetime(datetime.utcnow())
     # we have already checked the bucket exists so no need to do it again
     file_obj = TemporaryFile()
     file_obj.write(registry_proto.SerializeToString())
     file_obj.seek(0)
     self.s3_client.Bucket(self._bucket).put_object(Body=file_obj, Key=self._key)
Beispiel #8
0
 def update_registry_proto(self, updater: Callable[[RegistryProto], RegistryProto]):
     try:
         registry_proto = self.get_registry_proto()
     except FileNotFoundError:
         registry_proto = RegistryProto()
         registry_proto.registry_schema_version = REGISTRY_SCHEMA_VERSION
     registry_proto = updater(registry_proto)
     self._write_registry(registry_proto)
     return
Beispiel #9
0
 def _write_registry(self, registry_proto: RegistryProto):
     registry_proto.version_id = str(uuid.uuid4())
     registry_proto.last_updated.FromDatetime(datetime.utcnow())
     # we have already checked the bucket exists so no need to do it again
     gs_bucket = self.gcs_client.get_bucket(self._bucket)
     blob = gs_bucket.blob(self._blob)
     file_obj = TemporaryFile()
     file_obj.write(registry_proto.SerializeToString())
     file_obj.seek(0)
     blob.upload_from_file(file_obj)
Beispiel #10
0
 def _prepare_registry_for_changes(self):
     """Prepares the Registry for changes by refreshing the cache if necessary."""
     try:
         self._get_registry_proto(allow_cache=True)
     except FileNotFoundError:
         registry_proto = RegistryProto()
         registry_proto.registry_schema_version = REGISTRY_SCHEMA_VERSION
         self.cached_registry_proto = registry_proto
         self.cached_registry_proto_created = datetime.now()
     return self.cached_registry_proto
Beispiel #11
0
    def get_registry(self):
        from google.cloud import storage
        from google.cloud.exceptions import NotFound

        file_obj = TemporaryFile()
        registry_proto = RegistryProto()
        try:
            bucket = self.gcs_client.get_bucket(self._bucket)
        except NotFound:
            raise Exception(
                f"No bucket named {self._bucket} exists; please create it first."
            )
        if storage.Blob(bucket=bucket,
                        name=self._blob).exists(self.gcs_client):
            self.gcs_client.download_blob_to_file(self._uri.geturl(), file_obj)
            file_obj.seek(0)
            registry_proto.ParseFromString(file_obj.read())
        else:
            registry_proto.registry_schema_version = REGISTRY_SCHEMA_VERSION
        return registry_proto
Beispiel #12
0
    def get_registry_proto(self):
        from google.cloud import storage
        from google.cloud.exceptions import NotFound

        file_obj = TemporaryFile()
        registry_proto = RegistryProto()
        try:
            bucket = self.gcs_client.get_bucket(self._bucket)
        except NotFound:
            raise Exception(
                f"No bucket named {self._bucket} exists; please create it first."
            )
        if storage.Blob(bucket=bucket, name=self._blob).exists(self.gcs_client):
            self.gcs_client.download_blob_to_file(
                self._uri.geturl(), file_obj, timeout=30
            )
            file_obj.seek(0)
            registry_proto.ParseFromString(file_obj.read())
            return registry_proto
        raise FileNotFoundError(
            f'Registry not found at path "{self._uri.geturl()}". Have you run "feast apply"?'
        )
Beispiel #13
0
 def get_registry_proto(self) -> RegistryProto:
     registry_proto = RegistryProto()
     try:
         with _get_conn(self.db_config) as conn, conn.cursor() as cur:
             cur.execute(
                 sql.SQL(
                     """
                     SELECT registry
                     FROM {}
                     WHERE version = (SELECT max(version) FROM {})
                     """
                 ).format(
                     sql.Identifier(self.table_name),
                     sql.Identifier(self.table_name),
                 )
             )
             row = cur.fetchone()
             if row:
                 registry_proto = registry_proto.FromString(row[0])
     except psycopg2.errors.UndefinedTable:
         pass
     return registry_proto
Beispiel #14
0
    def update_registry_proto(self, registry_proto: RegistryProto):
        """
        Overwrites the current registry proto with the proto passed in. This method
        writes to the registry path.

        Args:
            registry_proto: the new RegistryProto
        """
        schema_name = self.db_config.db_schema or self.db_config.user
        with _get_conn(self.db_config) as conn, conn.cursor() as cur:
            cur.execute(
                """
                SELECT schema_name
                FROM information_schema.schemata
                WHERE schema_name = %s
                """,
                (schema_name,),
            )
            schema_exists = cur.fetchone()
            if not schema_exists:
                cur.execute(
                    sql.SQL("CREATE SCHEMA IF NOT EXISTS {} AUTHORIZATION {}").format(
                        sql.Identifier(schema_name),
                        sql.Identifier(self.db_config.user),
                    ),
                )

            cur.execute(
                sql.SQL(
                    """
                    CREATE TABLE IF NOT EXISTS {} (
                        version BIGSERIAL PRIMARY KEY,
                        registry BYTEA NOT NULL
                    );
                    """
                ).format(sql.Identifier(self.table_name)),
            )
            # Do we want to keep track of the history or just keep the latest?
            cur.execute(
                sql.SQL(
                    """
                    INSERT INTO {} (registry)
                    VALUES (%s);
                    """
                ).format(sql.Identifier(self.table_name)),
                [registry_proto.SerializeToString()],
            )
Beispiel #15
0
 def _initialize_registry(self):
     """Explicitly initializes the registry with an empty proto."""
     registry_proto = RegistryProto()
     registry_proto.registry_schema_version = REGISTRY_SCHEMA_VERSION
     self._registry_store.update_registry_proto(registry_proto)