예제 #1
0
def _synapse_launcher(config: Config) -> JobLauncher:
    from feast_spark.pyspark.launchers import synapse

    return synapse.SynapseJobLauncher(
        synapse_dev_url=config.get(opt.AZURE_SYNAPSE_DEV_URL),
        pool_name=config.get(opt.AZURE_SYNAPSE_POOL_NAME),
        datalake_dir=config.get(opt.AZURE_SYNAPSE_DATALAKE_DIR),
        executor_size=config.get(opt.AZURE_SYNAPSE_EXECUTOR_SIZE),
        executors=int(config.get(opt.AZURE_SYNAPSE_EXECUTORS))
    )
예제 #2
0
def _k8s_launcher(config: Config) -> JobLauncher:
    from feast_spark.pyspark.launchers import k8s

    staging_location = config.get(opt.SPARK_STAGING_LOCATION)
    staging_uri = urlparse(staging_location)

    return k8s.KubernetesJobLauncher(
        namespace=config.get(opt.SPARK_K8S_NAMESPACE),
        generic_resource_template_path=config.get(opt.SPARK_K8S_JOB_TEMPLATE_PATH),
        batch_ingestion_resource_template_path=config.get(
            opt.SPARK_K8S_BATCH_INGESTION_TEMPLATE_PATH, None
        ),
        stream_ingestion_resource_template_path=config.get(
            opt.SPARK_K8S_STREAM_INGESTION_TEMPLATE_PATH, None
        ),
        historical_retrieval_resource_template_path=config.get(
            opt.SPARK_K8S_HISTORICAL_RETRIEVAL_TEMPLATE_PATH, None
        ),
        staging_location=staging_location,
        incluster=config.getboolean(opt.SPARK_K8S_USE_INCLUSTER_CONFIG),
        staging_client=get_staging_client(staging_uri.scheme, config),
        # azure-related arguments are None if not using Azure blob storage
        azure_account_name=config.get(opt.AZURE_BLOB_ACCOUNT_NAME, None),
        azure_account_key=config.get(opt.AZURE_BLOB_ACCOUNT_ACCESS_KEY, None),
    )
예제 #3
0
 def test_init_options_precedence(self):
     """
     Init options > env var > file options > default options
     """
     fd, path = mkstemp()
     os.environ["FEAST_CORE_URL"] = "env"
     options = {"core_url": "init", "serving_url": "init"}
     configuration_string = "[general]\nCORE_URL = file\n"
     with open(fd, "w") as f:
         f.write(configuration_string)
     config = Config(options, path)
     assert config.get("core_url") == "init"
     del os.environ["FEAST_CORE_URL"]
예제 #4
0
    def test_env_var_precedence(self):
        """
        Env vars > file options > default options
        """
        fd, path = mkstemp()
        os.environ["FEAST_CORE_URL"] = "env"
        configuration_string = "[general]\nCORE_URL = file\n"
        with open(fd, "w") as f:
            f.write(configuration_string)
        config = Config(path=path)
        assert config.get("CORE_URL") == "env"

        del os.environ["FEAST_CORE_URL"]
예제 #5
0
    def test_type_casting(self):
        """
        Test type casting of strings to other types
        """
        fd, path = mkstemp()
        os.environ["FEAST_INT_VAR"] = "1"
        os.environ["FEAST_FLOAT_VAR"] = "1.0"
        os.environ["FEAST_BOOLEAN_VAR"] = "True"
        config = Config(path=path)

        assert config.getint("INT_VAR") == 1
        assert config.getfloat("FLOAT_VAR") == 1.0
        assert config.getboolean("BOOLEAN_VAR") is True
예제 #6
0
def _emr_launcher(config: Config) -> JobLauncher:
    from feast.pyspark.launchers import aws

    def _get_optional(option):
        if config.exists(option):
            return config.get(option)

    return aws.EmrClusterLauncher(
        region=config.get(opt.EMR_REGION),
        existing_cluster_id=_get_optional(opt.EMR_CLUSTER_ID),
        new_cluster_template_path=_get_optional(opt.EMR_CLUSTER_TEMPLATE_PATH),
        staging_location=config.get(opt.SPARK_STAGING_LOCATION),
        emr_log_location=config.get(opt.EMR_LOG_LOCATION),
    )
예제 #7
0
파일: test_auth.py 프로젝트: xieydd/feast
def config_google():
    config_dict = {
        "core_url": "localhost:50051",
        "enable_auth": True,
        "auth_provider": "google",
    }
    return Config(config_dict)
예제 #8
0
def stage_dataframe(df, event_timestamp_column: str, config: Config) -> FileSource:
    """
    Helper function to upload a pandas dataframe in parquet format to a temporary location (under
    SPARK_STAGING_LOCATION) and return it wrapped in a FileSource.

    Args:
        event_timestamp_column(str): the name of the timestamp column in the dataframe.
        config(Config): feast config.
    """
    staging_location = config.get(opt.SPARK_STAGING_LOCATION)
    staging_uri = urlparse(staging_location)

    with tempfile.NamedTemporaryFile() as f:
        df.to_parquet(f)

        file_url = urlunparse(
            get_staging_client(staging_uri.scheme, config).upload_fileobj(
                f,
                f.name,
                remote_path_prefix=os.path.join(staging_location, "dataframes"),
                remote_path_suffix=".parquet",
            )
        )

    return FileSource(
        event_timestamp_column=event_timestamp_column,
        file_format=ParquetFormat(),
        file_url=file_url,
    )
예제 #9
0
    def __init__(self, config: Config):
        """
        Initializes a GoogleOpenIDAuthMetadataPlugin, used to sign gRPC requests
        Args:
            config: Feast Configuration object
        """
        super(GoogleOpenIDAuthMetadataPlugin, self).__init__()

        self._static_token = None
        self._token = None

        # If provided, set a static token
        if config.exists(opt.AUTH_TOKEN):
            self._static_token = config.get(opt.AUTH_TOKEN)

        self._request = RequestWithTimeout(timeout=5)
        self._refresh_token()
예제 #10
0
    def __init__(self, config: Config):
        """
        Initializes an OAuthMetadataPlugin, used to sign gRPC requests
        Args:
            config: Feast Configuration object
        """
        super(OAuthMetadataPlugin, self).__init__()

        self._static_token = None
        self._token = None

        # If provided, set a static token
        if config.exists(CONFIG_CORE_ENABLE_AUTH_TOKEN_KEY):
            self._static_token = config.get(CONFIG_CORE_ENABLE_AUTH_TOKEN_KEY)
            self._refresh_token(config)
        elif (config.exists(CONFIG_OAUTH_GRANT_TYPE_KEY)
              and config.exists(CONFIG_OAUTH_CLIENT_ID_KEY)
              and config.exists(CONFIG_OAUTH_CLIENT_SECRET_KEY)
              and config.exists(CONFIG_OAUTH_AUDIENCE_KEY)
              and config.exists(CONFIG_OAUTH_TOKEN_REQUEST_URL_KEY)):
            self._refresh_token(config)
        else:
            raise RuntimeError(
                " Please ensure that the "
                "necessary parameters are passed to the client - "
                "oauth_grant_type, oauth_client_id, oauth_client_secret, "
                "oauth_audience, oauth_token_request_url.")
예제 #11
0
def _source_to_argument(source: DataSource, config: Config):
    common_properties = {
        "field_mapping": dict(source.field_mapping),
        "event_timestamp_column": source.event_timestamp_column,
        "created_timestamp_column": source.created_timestamp_column,
        "date_partition_column": source.date_partition_column,
    }

    properties = {**common_properties}

    if isinstance(source, FileSource):
        properties["path"] = source.file_options.file_url
        properties["format"] = dict(
            json_class=source.file_options.file_format.__class__.__name__)
        return {"file": properties}

    if isinstance(source, BigQuerySource):
        project, dataset_and_table = source.bigquery_options.table_ref.split(
            ":")
        dataset, table = dataset_and_table.split(".")
        properties["project"] = project
        properties["dataset"] = dataset
        properties["table"] = table
        if config.exists(
                opt.SPARK_BQ_MATERIALIZATION_PROJECT) and config.exists(
                    opt.SPARK_BQ_MATERIALIZATION_DATASET):
            properties["materialization"] = dict(
                project=config.get(opt.SPARK_BQ_MATERIALIZATION_PROJECT),
                dataset=config.get(opt.SPARK_BQ_MATERIALIZATION_DATASET),
            )

        return {"bq": properties}

    if isinstance(source, KafkaSource):
        properties[
            "bootstrap_servers"] = source.kafka_options.bootstrap_servers
        properties["topic"] = source.kafka_options.topic
        properties["format"] = {
            **source.kafka_options.message_format.__dict__,
            "json_class":
            source.kafka_options.message_format.__class__.__name__,
        }
        return {"kafka": properties}

    raise NotImplementedError(f"Unsupported Datasource: {type(source)}")
예제 #12
0
    def __init__(self, config: Config):
        """
        Initializes a GoogleOpenIDAuthMetadataPlugin, used to sign gRPC requests
        Args:
            config: Feast Configuration object
        """
        super(GoogleOpenIDAuthMetadataPlugin, self).__init__()
        from google.auth.transport import requests

        self._static_token = None
        self._token = None

        # If provided, set a static token
        if config.exists(CONFIG_CORE_ENABLE_AUTH_TOKEN_KEY):
            self._static_token = config.get(CONFIG_CORE_ENABLE_AUTH_TOKEN_KEY)

        self._request = requests.Request()
        self._refresh_token()
예제 #13
0
파일: cli.py 프로젝트: lucaspressi1/feast
def config_list():
    """
    List Feast properties for the currently active configuration
    """
    try:
        print(Config())
    except Exception as e:
        _logger.error("Error occurred when reading Feast configuration file")
        _logger.exception(e)
        sys.exit(1)
예제 #14
0
def config_with_missing_variable():
    config_dict = {
        "core_url": "localhost:50051",
        "core_enable_auth": True,
        "core_auth_provider": "oauth",
        "oauth_grant_type": "client_credentials",
        "oauth_client_id": "fakeID",
        "oauth_client_secret": "fakeSecret",
        "oauth_token_request_url": AUTH_URL,
    }
    return Config(config_dict)
예제 #15
0
파일: client.py 프로젝트: vjrkr/feast
    def __init__(self, options=None, **kwargs):
        """
        JobControllerClient should be initialized with
            jobcontroller_url: Feast JobController address

        :param options: Configuration options to initialize client with
        :param kwargs: options in kwargs style
        """
        if options is None:
            options = dict()
        self._config = Config(options={**options, **kwargs})

        self._jobcontroller_service_stub: Optional[
            JobControllerServiceStub] = None
        self._auth_metadata: Optional[grpc.AuthMetadataPlugin] = None

        # Configure Auth Metadata Plugin if auth is enabled
        if self._config.getboolean(CONFIG_ENABLE_AUTH_KEY):
            self._auth_metadata = feast_auth.get_auth_metadata_plugin(
                self._config)
예제 #16
0
파일: launcher.py 프로젝트: mike0sv/feast
def _parse_additional_spark_options(config: Config) -> Dict[str, str]:
    options_string = config.get(opt.SPARK_ADDITIONAL_OPTS, None)
    if options_string is None:
        return {}
    try:
        return dict(
            _quoted_split(opt_val, "=")
            for opt_val in _quoted_split(options_string, ";")
        )
    except ValueError:
        raise ValueError(f"Cannot parse {opt.SPARK_ADDITIONAL_OPTS}: {options_string}")
예제 #17
0
파일: auth.py 프로젝트: yutiansut/feast
def get_auth_metadata_plugin(config: Config) -> grpc.AuthMetadataPlugin:
    """
    Get an Authentication Metadata Plugin. This plugin is used in gRPC to
    sign requests. Please see the following URL for more details
    https://grpc.github.io/grpc/python/_modules/grpc.html#AuthMetadataPlugin

    New plugins can be added to this function. For the time being we only
    support Google Open ID authentication.

    Returns: Returns an implementation of grpc.AuthMetadataPlugin

    Args:
        config: Feast Configuration object
    """
    if AuthProvider(config.get(opt.AUTH_PROVIDER)) == AuthProvider.GOOGLE:
        return GoogleOpenIDAuthMetadataPlugin(config)
    elif AuthProvider(config.get(opt.AUTH_PROVIDER)) == AuthProvider.OAUTH:
        return OAuthMetadataPlugin(config)
    else:
        raise RuntimeError("Could not determine OAuth provider."
                           'Must be set to either "google" or "oauth"')
예제 #18
0
def config_google():
    config_dict = {
        "core_url": "localhost:50051",
        "core_enable_auth": True,
        "core_auth_provider": "google",
        "oauth_grant_type": "client_credentials",
        "oauth_client_id": "fakeID",
        "oauth_client_secret": "fakeSecret",
        "oauth_audience": AUDIENCE,
        "oauth_token_request_url": AUTH_URL,
    }
    return Config(config_dict)
예제 #19
0
def _dataproc_launcher(config: Config) -> JobLauncher:
    from feast.pyspark.launchers import gcloud

    return gcloud.DataprocClusterLauncher(
        cluster_name=config.get(opt.DATAPROC_CLUSTER_NAME),
        staging_location=config.get(opt.SPARK_STAGING_LOCATION),
        region=config.get(opt.DATAPROC_REGION),
        project_id=config.get(opt.DATAPROC_PROJECT),
        executor_instances=config.get(opt.DATAPROC_EXECUTOR_INSTANCES),
        executor_cores=config.get(opt.DATAPROC_EXECUTOR_CORES),
        executor_memory=config.get(opt.DATAPROC_EXECUTOR_MEMORY),
    )
    def get_hash(self) -> str:
        source = _source_to_argument(self._feature_table.stream_source,
                                     Config())
        feature_table = _feature_table_to_argument(
            None, "default", self._feature_table)  # type: ignore

        job_json = json.dumps(
            {
                "source": source,
                "feature_table": feature_table
            },
            sort_keys=True,
        )
        return hashlib.md5(job_json.encode()).hexdigest()
예제 #21
0
    def __init__(self, options: Optional[Dict[str, str]] = None, **kwargs):
        """
        The Feast Client should be initialized with at least one service url

        Args:
            core_url: Feast Core URL. Used to manage features
            serving_url: Feast Serving URL. Used to retrieve features
            project: Sets the active project. This field is optional.
            core_secure: Use client-side SSL/TLS for Core gRPC API
            serving_secure: Use client-side SSL/TLS for Serving gRPC API
            options: Configuration options to initialize client with
            **kwargs: Additional keyword arguments that will be used as
                configuration options along with "options"
        """

        if options is None:
            options = dict()
        self._config = Config(options={**options, **kwargs})

        self.__core_channel: grpc.Channel = None
        self.__serving_channel: grpc.Channel = None
        self._core_service_stub: CoreServiceStub = None
        self._serving_service_stub: ServingServiceStub = None
예제 #22
0
파일: cli.py 프로젝트: lucaspressi1/feast
def stop_stream_to_online(feature_table: str):
    """
    Stop stream to online sync job
    """

    spark_launcher = Config().get(CONFIG_SPARK_LAUNCHER)

    if spark_launcher == "emr":
        import feast.pyspark.aws.jobs

        feast.pyspark.aws.jobs.stop_stream_to_online(feature_table)
    else:
        raise NotImplementedError(
            f"Feast currently does not provide support for the specified spark launcher: {spark_launcher}"
        )
예제 #23
0
    def __init__(self, options: Optional[Dict[str, str]] = None, **kwargs):
        """
        The Feast Client should be initialized with at least one service url
        Please see constants.py for configuration options. Commonly used options
        or arguments include:
            core_url: Feast Core URL. Used to manage features
            serving_url: Feast Serving URL. Used to retrieve features
            project: Sets the active project. This field is optional.
            core_secure: Use client-side SSL/TLS for Core gRPC API
            serving_secure: Use client-side SSL/TLS for Serving gRPC API
            enable_auth: Enable authentication and authorization
            auth_provider: Authentication provider – "google" or "oauth"
            if auth_provider is "oauth", the following fields are mandatory –
            oauth_grant_type, oauth_client_id, oauth_client_secret, oauth_audience, oauth_token_request_url

        Args:
            options: Configuration options to initialize client with
            **kwargs: Additional keyword arguments that will be used as
                configuration options along with "options"
        """

        if options is None:
            options = dict()
        self._config = Config(options={**options, **kwargs})

        self._core_service_stub: Optional[CoreServiceStub] = None
        self._serving_service_stub: Optional[ServingServiceStub] = None
        self._auth_metadata: Optional[grpc.AuthMetadataPlugin] = None
        self._registry_impl: Optional[Registry] = None

        # Configure Auth Metadata Plugin if auth is enabled
        if self._config.getboolean(opt.ENABLE_AUTH):
            self._auth_metadata = feast_auth.get_auth_metadata_plugin(
                self._config)

        self._configure_telemetry()
예제 #24
0
 def test_defaults_are_not_written(self):
     """
     default values are not written to config file
     """
     fd, path = mkstemp()
     config = Config(path=path)
     config.set("option", "value")
     config.save()
     with open(path) as f:
         assert f.read() == "[general]\noption = value\n\n"
예제 #25
0
    def test_exists(self):
        """
        Test type casting of strings to other types
        """
        fd, path = mkstemp()
        config = Config(path=path)
        config.set("my_val_exist", 1)

        assert config.exists("my_val_exist") is True
        assert config.exists("my_val_not_exist") is False
예제 #26
0
파일: cli.py 프로젝트: lucaspressi1/feast
def config_set(prop, value):
    """
    Set a Feast properties for the currently active configuration
    """
    try:
        conf = Config()
        conf.set(option=prop.strip(), value=value.strip())
        conf.save()
    except Exception as e:
        _logger.error("Error in reading config file")
        _logger.exception(e)
        sys.exit(1)
예제 #27
0
파일: cli.py 프로젝트: lucaspressi1/feast
def list_jobs():
    """
    List jobs
    """
    from tabulate import tabulate

    spark_launcher = Config().get(CONFIG_SPARK_LAUNCHER)

    if spark_launcher == "emr":
        import feast.pyspark.aws.jobs

        jobs = feast.pyspark.aws.jobs.list_jobs(None, None)
        print(
            tabulate(jobs,
                     headers=feast.pyspark.aws.jobs.JobInfo._fields,
                     tablefmt="plain"))
    else:
        raise NotImplementedError(
            f"Feast currently does not provide support for the specified spark launcher: {spark_launcher}"
        )
예제 #28
0
def _k8s_launcher(config: Config) -> JobLauncher:
    from feast.pyspark.launchers import k8s

    staging_location = config.get(opt.SPARK_STAGING_LOCATION)
    staging_uri = urlparse(staging_location)

    return k8s.KubernetesJobLauncher(
        namespace=config.get(opt.SPARK_K8S_NAMESPACE),
        resource_template_path=config.get(opt.SPARK_K8S_JOB_TEMPLATE_PATH,
                                          None),
        staging_location=staging_location,
        incluster=config.getboolean(opt.SPARK_K8S_USE_INCLUSTER_CONFIG),
        staging_client=get_staging_client(staging_uri.scheme, config),
        # azure-related arguments are None if not using Azure blob storage
        azure_account_name=config.get(opt.AZURE_BLOB_ACCOUNT_NAME),
        azure_account_key=config.get(opt.AZURE_BLOB_ACCOUNT_ACCESS_KEY),
    )
예제 #29
0
def resolve_launcher(config: Config) -> JobLauncher:
    return _launchers[config.get(opt.SPARK_LAUNCHER)](config)
예제 #30
0
def _standalone_launcher(config: Config) -> JobLauncher:
    from feast.pyspark.launchers import standalone

    return standalone.StandaloneClusterLauncher(
        config.get(opt.SPARK_STANDALONE_MASTER), config.get(opt.SPARK_HOME),
    )