def test_from_yaml_serializes_correctly_with_feature_union( self, feature_union_classifier: DFFeatureUnion, tmp_path: pathlib.Path): model = Model(feature_union_classifier) result = model.to_dict() log = Log(name="test", metrics=Metrics.from_list(["accuracy"]), estimator=result) log.save_log(tmp_path) new_model = Model.from_yaml(log.output_path) assert len(new_model.estimator.steps[0][1].transformer_list) == 2 new_steps = new_model.estimator.steps old_steps = model.estimator.steps assert new_steps[0][0] == old_steps[0][0] assert isinstance(new_steps[0][1], type(old_steps[0][1])) new_union = new_steps[0][1].transformer_list old_union = old_steps[0][1].transformer_list assert len(new_union) == len(old_union) for new_transform, old_transform in zip(new_union, old_union): assert new_transform[1].steps[0][0] == old_transform[1].steps[0][0] assert (new_transform[1].steps[0][1].get_params() == old_transform[1].steps[0][1].get_params())
def test_can_load_serialized_model_from_estimator(self, classifier: Model, tmp_path: pathlib.Path): log = Log( name="test", estimator=classifier.to_dict(), metrics=Metrics([Metric("accuracy", score=1.0)]), ) log.save_log(tmp_path) model2 = Model.from_yaml(log.output_path) assert model2.estimator.get_params( ) == classifier.estimator.get_params()
def test_dump_serializes_correctly_without_pipeline( self, regression: Model): serialized_model = regression.to_dict() expected = [{ "module": "sklearn.linear_model._base", "classname": "LinearRegression", "params": { "copy_X": True, "fit_intercept": True, "n_jobs": None, "normalize": False, }, }] assert serialized_model == expected
def test_can_load_serialized_model_from_pipeline(self, pipeline_linear: Pipeline, tmp_path: pathlib.Path): model = Model(pipeline_linear) log = Log( name="test", estimator=model.to_dict(), metrics=Metrics([Metric("accuracy", score=1.0)]), ) log.save_log(tmp_path) model2 = Model.from_yaml(log.output_path) for model1, model2 in zip(model.estimator.steps, model2.estimator.steps): assert model1[0] == model2[0] assert model1[1].get_params() == model2[1].get_params()
def test_to_dict_serializes_correctly_with_feature_union( self, feature_union_classifier: DFFeatureUnion): model = Model(feature_union_classifier) result = model.to_dict() assert len(result) == 2 union = result[0] assert union["name"] == "features" assert len(union["params"]) == 2 pipe1 = union["params"][0] pipe2 = union["params"][1] assert pipe1["name"] == "pipe1" select1 = pipe1["params"][0] scale1 = pipe1["params"][1] assert select1["name"] == "select" assert select1["classname"] == "Select" assert select1["params"] == { "columns": ["sepal length (cm)", "sepal width (cm)"] } assert scale1["name"] == "scale" assert scale1["classname"] == "DFStandardScaler" assert scale1["params"] == { "copy": True, "with_mean": True, "with_std": True } assert pipe2["name"] == "pipe2" select2 = pipe2["params"][0] scale2 = pipe2["params"][1] assert select2["name"] == "select" assert select2["classname"] == "Select" assert select2["params"] == { "columns": ["petal length (cm)", "petal width (cm)"] } assert scale2["name"] == "scale" assert scale2["classname"] == "DFStandardScaler" assert scale2["params"] == { "copy": True, "with_mean": True, "with_std": True }