def test_serializer_not_configurable(self): # By default creates own JSONSerializer result = PrefectResult() assert isinstance(result.serializer, JSONSerializer) # Can specify one manually as well serializer = JSONSerializer() result = PrefectResult(serializer=serializer) assert result.serializer is serializer # Can set if it's a JSONSerializer serializer2 = JSONSerializer() result.serializer = serializer2 assert result.serializer is serializer2 # Type errors for other serializer types with pytest.raises(TypeError): result.serializer = PickleSerializer() with pytest.raises(TypeError): result = PrefectResult(serializer=PickleSerializer())
def test(e: Optional[Executor]): with TemporaryDirectory() as tmpdir: flow_result = LocalResult(tmpdir, serializer=JSONSerializer(), location="{task_name}.json") with Flow("write_result", result=flow_result) as f: _terminal = task(lambda: 42, checkpoint=True, name="magic")() with set_temporary_config({"flows.checkpointing": True}), \ raise_on_exception(): f.run(executor=e) files = os.listdir(tmpdir) assert files == ["magic.json"], files with open(os.path.join(tmpdir, files[0]), "rb") as file: val = json.load(file) assert val==42
def test_compressed_serializer_equality() -> None: assert CompressedSerializer(PickleSerializer(), format="bz2") == CompressedSerializer( PickleSerializer(), format="bz2") assert CompressedSerializer(PickleSerializer(), format="bz2") != CompressedSerializer( JSONSerializer(), format="bz2") assert CompressedSerializer(PickleSerializer(), format="bz2") != CompressedSerializer( PickleSerializer(), compress=gzip.compress, decompress=gzip.decompress) assert CompressedSerializer( PickleSerializer(), compress=gzip.compress, decompress=gzip.decompress) != CompressedSerializer( PickleSerializer(), compress=gzip.compress, decompress=gzip.decompress, compress_kwargs={"compresslevel": 8}, )
def __init__(self, **kwargs: Any) -> None: if "serializer" not in kwargs: kwargs["serializer"] = JSONSerializer() super().__init__(**kwargs)
def test_serialize_returns_json(self): value = ["abc", 123] serialized = JSONSerializer().serialize(value) assert serialized == json.dumps(value).encode()
def test_deserialize_returns_objects(self): value = ["abc", 123] serialized = JSONSerializer().serialize(value) deserialized = JSONSerializer().deserialize(serialized) assert deserialized == value
def test_serialize_returns_bytes(self): value = ["abc", 123] serialized = JSONSerializer().serialize(value) assert isinstance(serialized, bytes)
def test_equality(): assert PickleSerializer() == PickleSerializer() assert JSONSerializer() == JSONSerializer() assert PickleSerializer() != JSONSerializer()
from prefect import task, Flow from prefect.engine.results import LocalResult from prefect.engine.serializers import JSONSerializer @task(target="test.json", result=LocalResult(serializer=JSONSerializer())) def get_data(): return {"asdf": "here"} @task def print_data(d): print(d) with Flow("target_serializer") as f: d = get_data() print_data(d) f.run()
def gen_xgboost_pipeline() -> Flow: """Generate a ``xgboost`` fit pipeline. Parameters ---------- None Returns ------- Flow Generated pipeline. """ # Create a time range for AUROC calculation -- start to the end of the fourth quarter times = np.arange(2890, step=10) # Initialize tasks calib_data = CollapseData(name="Create calibration data") train_data = CollapseData(name="Create training data") tune_data = CollapseData(name="Create tuning data") stop_data = CollapseData(name="Create stopping data") tuning = XGBoostTuning( name="Run XGBoost hyperparameter tuning", checkpoint=True, result=LocalResult( dir=".", location="{output_dir}/models/{today}/xgboost/tuning.pkl", ), ) retrieve_best = GetItem( name="Get best parameters", checkpoint=True, result=LocalResult( dir=".", location="{output_dir}/models/{today}/xgboost/params.json", serializer=JSONSerializer(), ), ) tuneplots = PlotTuning( name="Plot XGBoost hyperparameter tuning", checkpoint=True, result=LocalResult( serializer=Plot(), dir=".", location= "{output_dir}/models/{today}/xgboost/hyperparameter-tuning.png", ), ) trained = FitXGBoost( name="Train XGBoost model", checkpoint=True, result=LocalResult( dir=".", location="{output_dir}/models/{today}/xgboost/model.pkl"), ) calcshap = XGBoostShap(name="Calculate SHAP Values") plotshap = PlotShapSummary( name="Plot SHAP values", checkpoint=True, result=LocalResult( serializer=Plot(), dir=".", location="{output_dir}/models/{today}/xgboost/shap-summary.png", ), ) calc_sprob = WinProbability(name="Calculate survival probability") cal = CalibrateClassifier( name="Calibrate XGBoost model", checkpoint=True, result=LocalResult( dir=".", location="{output_dir}/models/{today}/xgboost/calibrator.pkl"), ) pcal = PlotCalibration( name="Plot calibration curve", checkpoint=True, result=LocalResult( serializer=Plot(), dir=".", location= "{output_dir}/models/{today}/xgboost/calibration-curve.png", ), ) # Generate the flow with Flow(name="Train Cox model") as flow: # Define some parameters _ = Parameter("output_dir", "nba-data") data_dir = Parameter("data_dir", "nba-data") max_evals = Parameter("max_evals", 100) seed = Parameter("seed", 42) # Load the data rawtrain = load_df(data_dir=data_dir, dataset="train.csv") rawtune = load_df(data_dir=data_dir, dataset="tune.csv") # Collapse data to the final row train = train_data(rawtrain) tune = tune_data.map(data=unmapped(rawtune), timestep=times) stop = stop_data(rawtune) calib_input = calib_data.map(data=unmapped(rawtrain), timestep=times) # Run hyperparameter tuning params = tuning( train_data=train, tune_data=tune, stopping_data=stop, early_stopping_rounds=25, num_boost_round=10000, max_evals=max_evals, seed=seed, ) _ = retrieve_best(task_result=params, key="best") tuneplots(params["trials"]) # Fit the model model = trained( params=params["best"], train_data=train, stopping_data=stop, early_stopping_rounds=25, num_boost_round=10000, verbose_eval=False, ) # SHAP shap_values = calcshap(model=model, train_data=train) _ = plotshap(shap_values=shap_values) # Calibrate sprob = calc_sprob.map(model=unmapped(model), data=calib_input) iso = cal(train_data=sprob) _ = pcal(data=sprob, calibrator=iso) return flow
def gen_lifelines_pipeline() -> Flow: """Generate a ``lifelines`` model fit pipeline. Parameters ---------- None Returns ------- Flow The generated pipeline. """ # Create a time range for AUROC calculation -- start to the end of the fourth quarter times = np.arange(2890, step=10) # Initialize tasks calib_data = CollapseData(name="Create calibration data") tune_data = CollapseData(name="Create tuning data") tuning = LifelinesTuning( name="Run lifelines hyperparameter tuning", checkpoint=True, result=LocalResult( dir=".", location="{output_dir}/models/{today}/lifelines/tuning.pkl"), ) retrieve_best = GetItem( name="Get best parameters", checkpoint=True, result=LocalResult( dir=".", location="{output_dir}/models/{today}/lifelines/params.json", serializer=JSONSerializer(), ), ) tuneplots = PlotTuning( name="Plot lifelines hyperparameter tuning", checkpoint=True, result=LocalResult( serializer=Plot(), dir=".", location= "{output_dir}/models/{today}/lifelines/hyperparameter-tuning.png", ), ) model = InitializeLifelines(name="Initialize lifelines model") trained = FitLifelinesModel( name="Train lifelines model", checkpoint=True, result=LocalResult( dir=".", location="{output_dir}/models/{today}/lifelines/model.pkl"), ) calc_sprob = WinProbability(name="Calculate survival probability") cal = CalibrateClassifier( name="Calibrate Lifelines model", checkpoint=True, result=LocalResult( dir=".", location="{output_dir}/models/{today}/lifelines/calibrator.pkl"), ) pcal = PlotCalibration( name="Plot calibration curve", checkpoint=True, result=LocalResult( serializer=Plot(), dir=".", location= "{output_dir}/models/{today}/lifelines/calibration-curve.png", ), ) # Generate the flow with Flow(name="Train Cox model") as flow: # Define some parameters data_dir = Parameter("data_dir", "nba-data") max_evals = Parameter("max_evals", 100) seed = Parameter("seed", 42) # Load the data train = load_df(data_dir=data_dir, dataset="train.csv") rawtune = load_df(data_dir=data_dir, dataset="tune.csv") # Collapse the data to the final row for Concordance calculations tune = tune_data.map(data=unmapped(rawtune), timestep=times) calib_input = calib_data.map(data=unmapped(train), timestep=times) # Run hyperparameter tuning params = tuning(train_data=train, tune_data=tune, max_evals=max_evals, seed=seed) _ = retrieve_best(task_result=params, key="best") tuneplots(params["trials"]) model_obj = model(params["best"]) trained_model = trained(model=model_obj, data=train) sprob = calc_sprob.map(model=unmapped(trained_model), data=calib_input) iso = cal(train_data=sprob) _ = pcal(data=sprob, calibrator=iso) return flow