Esempio n. 1
0
    def apply(
        self,
        objects: Union[Entity, FeatureView, FeatureService,
                       List[Union[FeatureView, Entity, FeatureService]], ],
    ) -> None:
        fs = self._build_feast_feature_store()
        fs.apply(objects)

        # Applying also initializes the sqlite tables in the online store
        FlyteContext.current_context().file_access.upload(
            self.config.online_store_path,
            f"s3://{self.config.s3_bucket}/{self.config.online_store_path}",
        )
Esempio n. 2
0
 def all(self, **kwargs) -> pyspark.sql.DataFrame:
     if self._fmt == SchemaFormat.PARQUET:
         ctx = FlyteContext.current_context().user_space_params
         return ctx.spark_session.read.parquet(self.from_path)
     raise AssertionError(
         "Only Parquet type files are supported for spark dataframe currently"
     )
Esempio n. 3
0
    def pre_execute(self,
                    user_params: ExecutionParameters) -> ExecutionParameters:
        import pyspark as _pyspark

        ctx = FlyteContext.current_context()
        sess_builder = _pyspark.sql.SparkSession.builder.appName(
            f"FlyteSpark: {user_params.execution_id}")
        if not (ctx.execution_state and ctx.execution_state.Mode
                == ExecutionState.Mode.TASK_EXECUTION):
            # If either of above cases is not true, then we are in local execution of this task
            # Add system spark-conf for local/notebook based execution.
            spark_conf = _pyspark.SparkConf()
            for k, v in self.task_config.spark_conf.items():
                spark_conf.set(k, v)
            # In local execution, propagate PYTHONPATH to executors too. This makes the spark
            # execution hermetic to the execution environment. For example, it allows running
            # Spark applications using Bazel, without major changes.
            if "PYTHONPATH" in os.environ:
                spark_conf.setExecutorEnv("PYTHONPATH",
                                          os.environ["PYTHONPATH"])
            sess_builder = sess_builder.config(conf=spark_conf)

        self.sess = sess_builder.getOrCreate()
        return user_params.builder().add_attr("SPARK_SESSION",
                                              self.sess).build()
Esempio n. 4
0
    def get_online_features(
        self,
        features: Union[List[str], FeatureService],
        entity_rows: List[Dict[str, Any]],
        feature_refs: Optional[List[str]] = None,
        full_feature_names: bool = False,
    ) -> Dict[str, Any]:
        FlyteContext.current_context().file_access.download(
            f"s3://{self.config.s3_bucket}/{self.config.online_store_path}",
            self.config.online_store_path,
        )
        fs = self._build_feast_feature_store()

        online_response = fs.get_online_features(features, entity_rows,
                                                 feature_refs,
                                                 full_feature_names)
        return online_response.to_dict()
Esempio n. 5
0
 def materialize(
     self,
     start_date: datetime,
     end_date: datetime,
     feature_views: Optional[List[str]] = None,
 ) -> None:
     FlyteContext.current_context().file_access.download(
         f"s3://{self.config.s3_bucket}/{self.config.online_store_path}",
         self.config.online_store_path,
     )
     fs = self._build_feast_feature_store()
     fs.materialize(
         start_date=start_date,
         end_date=end_date,
     )
     FlyteContext.current_context().file_access.upload(
         self.config.online_store_path,
         f"s3://{self.config.s3_bucket}/{self.config.online_store_path}",
     )
Esempio n. 6
0
 def __init__(self, base_dir: str = None):
     if base_dir is None:
         self._base_dir = (
             FlyteContext.current_context().user_space_params.working_directory
         )
         self._files = []
     else:
         self._base_dir = base_dir
         files = os.listdir(base_dir)
         self._files = [os.path.join(base_dir, f) for f in files]
Esempio n. 7
0
def test_parameter_ranges_transformer():
    t = ParameterRangesTransformer()
    assert t.get_literal_type(ParameterRangeOneOf) == Generic.to_flyte_literal_type()
    o = ParameterRangeOneOf(param=IntegerParameterRange(10, 0, 1))
    ctx = FlyteContext.current_context()
    lit = t.to_literal(ctx, python_val=o, python_type=ParameterRangeOneOf, expected=None)
    assert lit is not None
    assert lit.scalar.generic is not None
    ro = t.to_python_value(ctx, lit, ParameterRangeOneOf)
    assert ro is not None
    assert ro == o
Esempio n. 8
0
    def execute(self, **kwargs) -> typing.Any:
        with tempfile.TemporaryDirectory() as temp_dir:
            ctx = FlyteContext.current_context()
            file_ext = os.path.basename(self.task_config.uri)
            local_path = os.path.join(temp_dir, file_ext)
            ctx.file_access.download(self.task_config.uri, local_path)
            if self.task_config.compressed:
                local_path = unarchive_file(local_path, temp_dir)

            print(f"Connecting to db {local_path}")
            with contextlib.closing(sqlite3.connect(local_path)) as con:
                df = pd.read_sql_query(self.get_query(**kwargs), con)
                return df
Esempio n. 9
0
 def decode(
     self,
     ctx: FlyteContext,
     flyte_value: literals.StructuredDataset,
     current_task_metadata: StructuredDatasetMetadata,
 ) -> DataFrame:
     user_ctx = FlyteContext.current_context().user_space_params
     if current_task_metadata.structured_dataset_type and current_task_metadata.structured_dataset_type.columns:
         columns = [
             c.name
             for c in current_task_metadata.structured_dataset_type.columns
         ]
         return user_ctx.spark_session.read.parquet(
             flyte_value.uri).select(*columns)
     return user_ctx.spark_session.read.parquet(flyte_value.uri)
Esempio n. 10
0
    def execute_from_model(self, tt: task_models.TaskTemplate, **kwargs) -> typing.Any:
        with tempfile.TemporaryDirectory() as temp_dir:
            ctx = FlyteContext.current_context()
            file_ext = os.path.basename(tt.custom["uri"])
            local_path = os.path.join(temp_dir, file_ext)
            ctx.file_access.download(tt.custom["uri"], local_path)
            if tt.custom["compressed"]:
                local_path = unarchive_file(local_path, temp_dir)

            print(f"Connecting to db {local_path}")
            interpolated_query = SQLite3Task.interpolate_query(tt.custom["query_template"], **kwargs)
            print(f"Interpolated query {interpolated_query}")
            with contextlib.closing(sqlite3.connect(local_path)) as con:
                df = pd.read_sql_query(interpolated_query, con)
                return df
Esempio n. 11
0
    def pre_execute(self, user_params: ExecutionParameters) -> ExecutionParameters:
        import pyspark as _pyspark

        ctx = FlyteContext.current_context()
        if not (ctx.execution_state and ctx.execution_state.Mode == ExecutionState.Mode.TASK_EXECUTION):
            # If either of above cases is not true, then we are in local execution of this task
            # Add system spark-conf for local/notebook based execution.
            spark_conf = set()
            for k, v in self.task_config.spark_conf.items():
                spark_conf.add((k, v))
            spark_conf.add(("spark.master", "local"))
            _pyspark.SparkConf().setAll(spark_conf)

        sess = _pyspark.sql.SparkSession.builder.appName(f"FlyteSpark: {user_params.execution_id}").getOrCreate()
        return user_params.builder().add_attr("SPARK_SESSION", sess).build()
Esempio n. 12
0
def test_hpoconfig_transformer():
    t = HPOTuningJobConfigTransformer()
    assert t.get_literal_type(HyperparameterTuningJobConfig) == Generic.to_flyte_literal_type()
    o = HyperparameterTuningJobConfig(
        tuning_strategy=1,
        tuning_objective=HyperparameterTuningObjective(
            objective_type=HyperparameterTuningObjectiveType.MINIMIZE,
            metric_name="x",
        ),
        training_job_early_stopping_type=TrainingJobEarlyStoppingType.OFF,
    )
    ctx = FlyteContext.current_context()
    lit = t.to_literal(ctx, python_val=o, python_type=HyperparameterTuningJobConfig, expected=None)
    assert lit is not None
    assert lit.scalar.generic is not None
    ro = t.to_python_value(ctx, lit, HyperparameterTuningJobConfig)
    assert ro is not None
    assert ro == o
Esempio n. 13
0
    def pre_execute(self, user_params: ExecutionParameters) -> ExecutionParameters:
        """
        Pre-execute for Sagemaker will automatically add the distributed context to the execution params, only
        if the number of execution instances is > 1. Otherwise this is considered to be a single node execution
        """
        if self._is_distributed():
            logging.info("Distributed context detected!")
            exec_state = FlyteContext.current_context().execution_state
            if exec_state and exec_state.mode == ExecutionState.Mode.TASK_EXECUTION:
                """
                This mode indicates we are actually in a remote execute environment (within sagemaker in this case)
                """
                dist_ctx = DistributedTrainingContext.from_env()
            else:
                dist_ctx = DistributedTrainingContext.local_execute()
            return user_params.builder().add_attr("DISTRIBUTED_TRAINING_CONTEXT", dist_ctx).build()

        return user_params
Esempio n. 14
0
def record_outputs(**kwargs) -> str:
    """
    Use this method to record outputs from a notebook.
    It will convert all outputs to a Flyte understandable format. For Files, Directories, please use FlyteFile or
    FlyteDirectory, or wrap up your paths in these decorators.
    """
    if kwargs is None:
        return ""

    m = {}
    ctx = FlyteContext.current_context()
    for k, v in kwargs.items():
        expected = TypeEngine.to_literal_type(type(v))
        lit = TypeEngine.to_literal(ctx,
                                    python_type=type(v),
                                    python_val=v,
                                    expected=expected)
        m[k] = lit
    return LiteralMap(literals=m).to_flyte_idl()
Esempio n. 15
0
    def execute(self, **kwargs) -> Any:
        """
        TODO: Figure out how to share FlyteContext ExecutionParameters with the notebook kernel (as notebook kernel
             is executed in a separate python process)
        For Spark, the notebooks today need to use the new_session or just getOrCreate session and get a handle to the
        singleton
        """
        logger.info(
            f"Hijacking the call for task-type {self.task_type}, to call notebook."
        )
        # Execute Notebook via Papermill.
        pm.execute_notebook(self._notebook_path,
                            self.output_notebook_path,
                            parameters=kwargs)  # type: ignore

        outputs = self.extract_outputs(self.output_notebook_path)
        self.render_nb_html(self.output_notebook_path,
                            self.rendered_output_path)

        m = {}
        if outputs:
            m = outputs.literals
        output_list = []
        for k, type_v in self.python_interface.outputs.items():
            if k == self._IMPLICIT_OP_NOTEBOOK:
                output_list.append(self.output_notebook_path)
            elif k == self._IMPLICIT_RENDERED_NOTEBOOK:
                output_list.append(self.rendered_output_path)
            elif k in m:
                v = TypeEngine.to_python_value(
                    ctx=FlyteContext.current_context(),
                    lv=m[k],
                    expected_python_type=type_v)
                output_list.append(v)
            else:
                raise RuntimeError(
                    f"Expected output {k} of type {v} not found in the notebook outputs"
                )

        return tuple(output_list)
Esempio n. 16
0
    def dispatch_execute(
        self, ctx: FlyteContext, input_literal_map: _literal_models.LiteralMap
    ) -> Union[_literal_models.LiteralMap, _dynamic_job.DynamicJobSpec]:
        """
        This function is mostly copied from the base PythonTask, but differs in that we have to infer the Python
        interface before executing. Also, we refer to ``self.task_template`` rather than just ``self`` like in task
        classes that derive from the base ``PythonTask``.
        """
        # Invoked before the task is executed
        new_user_params = self.pre_execute(ctx.user_space_params)

        # Create another execution context with the new user params, but let's keep the same working dir
        with ctx.new_execution_context(
                mode=ctx.execution_state.mode,
                execution_params=new_user_params,
                working_dir=ctx.execution_state.working_dir,
        ) as exec_ctx:
            # Added: Have to reverse the Python interface from the task template Flyte interface
            # See docstring for more details.
            guessed_python_input_types = TypeEngine.guess_python_types(
                self.task_template.interface.inputs)
            native_inputs = TypeEngine.literal_map_to_kwargs(
                exec_ctx, input_literal_map, guessed_python_input_types)

            logger.info(
                f"Invoking FlyteTask executor {self.task_template.id.name} with inputs: {native_inputs}"
            )
            try:
                native_outputs = self.execute(**native_inputs)
            except Exception as e:
                logger.exception(f"Exception when executing {e}")
                raise e

            logger.info(
                f"Task executed successfully in user level, outputs: {native_outputs}"
            )
            # Lets run the post_execute method. This may result in a IgnoreOutputs Exception, which is
            # bubbled up to be handled at the callee layer.
            native_outputs = self.post_execute(new_user_params, native_outputs)

            # Short circuit the translation to literal map because what's returned may be a dj spec (or an
            # already-constructed LiteralMap if the dynamic task was a no-op), not python native values
            if isinstance(native_outputs,
                          _literal_models.LiteralMap) or isinstance(
                              native_outputs, _dynamic_job.DynamicJobSpec):
                return native_outputs

            expected_output_names = list(
                self.task_template.interface.outputs.keys())
            if len(expected_output_names) == 1:
                # Here we have to handle the fact that the task could've been declared with a typing.NamedTuple of
                # length one. That convention is used for naming outputs - and single-length-NamedTuples are
                # particularly troublesome but elegant handling of them is not a high priority
                # Again, we're using the output_tuple_name as a proxy.
                # Deleted some stuff
                native_outputs_as_map = {
                    expected_output_names[0]: native_outputs
                }
            elif len(expected_output_names) == 0:
                native_outputs_as_map = {}
            else:
                native_outputs_as_map = {
                    expected_output_names[i]: native_outputs[i]
                    for i, _ in enumerate(native_outputs)
                }

            # We manually construct a LiteralMap here because task inputs and outputs actually violate the assumption
            # built into the IDL that all the values of a literal map are of the same type.
            literals = {}
            for k, v in native_outputs_as_map.items():
                literal_type = self.task_template.interface.outputs[k].type
                py_type = type(v)

                if isinstance(v, tuple):
                    raise AssertionError(
                        f"Output({k}) in task{self.task_template.id.name} received a tuple {v}, instead of {py_type}"
                    )
                try:
                    literals[k] = TypeEngine.to_literal(
                        exec_ctx, v, py_type, literal_type)
                except Exception as e:
                    raise AssertionError(
                        f"failed to convert return value for var {k}") from e

            outputs_literal_map = _literal_models.LiteralMap(literals=literals)
            # After the execute has been successfully completed
            return outputs_literal_map