def test_fit_params(teardown): x_data = iris.data y_t_data = iris.target random_state = 123 n_components = 2 sample_weight = y_t_data + 1 # Just weigh the classes differently fit_params = {"logreg__sample_weight": sample_weight} # baikal way x = Input() y_t = Input() x_pca = PCA(n_components=n_components, random_state=random_state, name="pca")(x) y = LogisticRegression( multi_class="multinomial", solver="lbfgs", random_state=random_state, name="logreg", )(x_pca, y_t) model = Model(x, y, y_t) model.fit(x_data, y_t_data, **fit_params) # traditional way pca = PCA(n_components=n_components, random_state=random_state) logreg = LogisticRegression( multi_class="multinomial", solver="lbfgs", random_state=random_state ) pipe = Pipeline([("pca", pca), ("logreg", logreg)]) pipe.fit(x_data, y_t_data, **fit_params) # Use assert_allclose instead of all equal due to small numerical differences # between fit_transform(...) and fit(...).transform(...) assert_allclose(model.get_step("logreg").coef_, pipe.named_steps["logreg"].coef_)
def test_transformed_target(teardown): x = Input() y_t = Input() y_t_mod = Lambda(lambda y: np.log(y))(y_t) y_p_mod = LinearRegression()(x, y_t_mod) y_p = Lambda(lambda y: np.exp(y))(y_p_mod) model = Model(x, y_p, y_t) x_data = np.arange(4).reshape(-1, 1) y_t_data = np.exp(2 * x_data).ravel() model.fit(x_data, y_t_data) assert_array_equal(model.get_step("LinearRegression_0").coef_, np.array([2.0]))
def test_steps_cache(teardown): x1_data = iris.data[:, :2] x2_data = iris.data[:, 2:] y1_t_data = iris.target x1 = Input(name="x1") x2 = Input(name="x2") y1_t = Input(name="y1_t") y1 = LogisticRegression(name="LogReg")(x1, y1_t) y2 = PCA(name="PCA")(x2) hits, misses = 0, 0 # 1) instantiation always misses misses += 1 model = Model([x1, x2], [y1, y2], y1_t) assert model._nodes_cache.hits == hits and model._nodes_cache.misses == misses # 2) calling fit for the first time, hence a miss misses += 1 model.fit([x1_data, x2_data], y1_t_data) assert model._nodes_cache.hits == hits and model._nodes_cache.misses == misses # 3) same as above, just different format, hence a hit hits += 1 model.fit({x1: x1_data, x2: x2_data}, {y1_t: y1_t_data}) assert model._nodes_cache.hits == hits and model._nodes_cache.misses == misses # 4) trainable flags are considered in cache keys, hence a miss misses += 1 model.get_step("LogReg").trainable = False model.fit( [x1_data, x2_data], y1_t_data ) # NOTE: target is superfluous, but it affects caching assert model._nodes_cache.hits == hits and model._nodes_cache.misses == misses # 5) same as above, just different format, hence a hit hits += 1 model.fit({x1: x1_data, x2: x2_data}, y1_t_data) assert model._nodes_cache.hits == hits and model._nodes_cache.misses == misses # 6) we drop the (superflous) target, hence a miss misses += 1 model.fit({x1: x1_data, x2: x2_data}) assert model._nodes_cache.hits == hits and model._nodes_cache.misses == misses # 7) same as above, hence a hit hits += 1 model.fit({x1: x1_data, x2: x2_data}) assert model._nodes_cache.hits == hits and model._nodes_cache.misses == misses # 8) we restore the flag, becoming the same as 2) and 3), hence a hit hits += 1 model.get_step("LogReg").trainable = True model.fit({x1: x1_data, x2: x2_data}, y1_t_data) assert model._nodes_cache.hits == hits and model._nodes_cache.misses == misses # 9) new inputs/targets/outputs signature, hence a miss misses += 1 model.predict([x1_data, x2_data]) assert model._nodes_cache.hits == hits and model._nodes_cache.misses == misses # 10) same inputs/outputs signature as 9), hence a hit hits += 1 model.predict({"x1": x1_data, "x2": x2_data}, ["PCA:0/0", "LogReg:0/0"]) assert model._nodes_cache.hits == hits and model._nodes_cache.misses == misses # 11) new inputs/outputs signature, hence a miss misses += 1 model.predict({x1: x1_data}, "LogReg:0/0") assert model._nodes_cache.hits == hits and model._nodes_cache.misses == misses # 12) same as above, hence a hit hits += 1 model.predict({x1: x1_data}, "LogReg:0/0") assert model._nodes_cache.hits == hits and model._nodes_cache.misses == misses