def test_plot_nested_submodels(teardown, tmp_path, levels, expand_nested): submodels = [LogisticRegression()] for level in range(levels): sub_model = build_submodel(submodels[level], level + 1) submodels.append(sub_model) x = Input(name="x") y_t = Input(name="y_t") y_p = submodels[-1](x, y_t) model = Model(x, y_p, y_t) filename = str(tmp_path / "test_plot_model.png") plot_model(model, filename, show=False, expand_nested=expand_nested)
def test_plot_shared_submodel(teardown, tmp_path, expand_nested): xs, y_ts, y_ps = [], [], [] submodel = build_submodel(LogisticRegression(), 0) for i in range(2): x = Input(name="x{}".format(i)) y_t = Input(name="y_t{}".format(i)) y_p = submodel(x, y_t) xs.append(x) y_ts.append(y_t) y_ps.append(y_p) model = Model(xs, y_ps, y_ts) filename = str(tmp_path / PNG_FILENAME) plot_model(model, filename, show=False, expand_nested=expand_nested)
def test_plot_independent_submodels(teardown, tmp_path, expand_nested): xs, y_ts, y_ps, steps = [], [], [], [] for i in range(3): step = (LogisticRegression() if i == 0 else build_submodel( LogisticRegression(), i)) x = Input(name="x{}".format(i)) y_t = Input(name="y_t{}".format(i)) y_p = step(x, y_t) xs.append(x) y_ts.append(y_t) y_ps.append(y_p) model = Model(xs, y_ps, y_ts) filename = str(tmp_path / "test_plot_model.png") plot_model(model, filename, show=False, expand_nested=expand_nested)
def test_plot_model(teardown, tmp_path): x1 = Input(name="x1") x2 = Input(name="x2") y1, y2 = DummyMIMO()([x1, x2]) submodel = Model([x1, x2], [y1, y2], name="submodel") x = Input(name="x") h1, h2 = DummySIMO()(x) z1, z2 = submodel([h1, h2]) u = Input(name="u") v = DummySISO()(u) w = DummyMISO()([z1, z2]) model = Model([x, u], [w, v], name="main_model") filename = str(tmp_path / "test_plot_model.png") plot_model(model, filename, show=False, expand_nested=True)
def test_plot_big_model(teardown, tmp_path, expand_nested): # Below is a very contrived big dummy model # ------- Sub-model 1 x1_sub1 = Input(name="x1_sub1") x2_sub1 = Input(name="x2_sub1") y_t_sub1 = Input(name="y_t_sub1") y_p1_sub1, y_p2_sub1 = DummyMIMO()([x1_sub1, x2_sub1], y_t_sub1) submodel1 = Model([x1_sub1, x2_sub1], [y_p1_sub1, y_p2_sub1], y_t_sub1, name="submodel1") # ------- Sub-model 2 y_t_sub2 = Input(name="y_t_sub2") y_p_sub2 = DummySISO()(y_t_sub2) submodel2 = Model(y_t_sub2, y_p_sub2, name="submodel2") # ------- Sub-model 3 x1_sub3 = Input(name="x1_sub3") x2_sub3 = Input(name="x2_sub3") y_p1_sub3, y_p2_sub3 = DummyMIMO()([x1_sub3, x2_sub3]) submodel3 = Model([x1_sub3, x2_sub3], [y_p1_sub3, y_p2_sub3], name="submodel3") # ------- Main model x = Input(name="x") y_t = Input(name="y_t") y_t_trans = submodel2(y_t) h1, h2 = DummySIMO()(x) g1, g2 = submodel3([h1, h2]) z1, z2 = submodel1([g1, g2], y_t_trans) w = DummyMISO()([z1, z2]) model = Model(x, w, y_t, name="main_model") filename = str(tmp_path / "test_plot_model.png") plot_model(model, filename, show=False, expand_nested=expand_nested)
squeeze = Lambda(np.squeeze, axis=1) ys_t = Split(n_targets, axis=1)(y_t) ys_p = [] for j, k in enumerate(order): x_stacked = ColumnStack()(inputs=[x, *ys_p[:j]]) ys_t[k] = squeeze(ys_t[k]) ys_p.append(LogisticRegression(solver="lbfgs")(x_stacked, ys_t[k])) ys_p = [ys_p[order.index(j)] for j in range(n_targets)] y_p = ColumnStack()(ys_p) model = Model(inputs=x, outputs=y_p, targets=y_t) # This might take a few seconds plot_model(model, filename="classifier_chain.png", dpi=96) # ------- Train model model.fit(X_train, Y_train) # ------- Evaluate model Y_train_pred = model.predict(X_train) Y_test_pred = model.predict(X_test) print( "Jaccard score on train data:", jaccard_score(Y_train, Y_train_pred, average="samples"), ) print( "Jaccard score on test data:", jaccard_score(Y_test, Y_test_pred, average="samples"),
# 2. Build the model x1 = Input(name="x1") x2 = Input(name="x2") y_t = Input(name="y_t") y1 = ExtraTreesClassifier()(x1, y_t) y2 = RandomForestClassifier()(x2, y_t) z = PowerTransformer()(x2) z = PCA()(z) y3 = LogisticRegression()(z, y_t) stacked_features = Stack()([y1, y2, y3]) y_p = SVC()(stacked_features, y_t) model = Model([x1, x2], y_p, y_t) plot_model(model, filename="multiple_input_nonlinear_pipeline_example_plot.png") # 3. Train the model dataset = load_breast_cancer() X_train, X_test, y_train, y_test = train_test_split(dataset.data, dataset.target, random_state=0) # Let's suppose the dataset is originally split in two X1_train, X2_train = X_train[:, :15], X_train[:, 15:] X1_test, X2_test = X_test[:, :15], X_test[:, 15:] model.fit([X1_train, X2_train], y_train) # 4. Use the model y_test_pred = model.predict([X1_test, X2_test])
import sklearn.svm from sklearn.datasets import load_breast_cancer from sklearn.model_selection import train_test_split from baikal import Input, Model, make_step from baikal.plot import plot_model # 1. Define a step SVC = make_step(sklearn.svm.SVC) # 2. Build the model x = Input() y_t = Input() y_p = SVC(C=1.0, kernel="rbf", gamma=0.5)(x, y_t) model = Model(x, y_p, y_t) plot_model(model, filename="readme_quick_example.png") # 3. Train the model dataset = load_breast_cancer() X_train, X_test, y_train, y_test = train_test_split(dataset.data, dataset.target, random_state=0) model.fit(X_train, y_train) # 4. Use the model y_test_pred = model.predict(X_test)
ExtraTreesClassifier = make_step(sklearn.ensemble.ExtraTreesClassifier) # ------- Load dataset data = sklearn.datasets.load_breast_cancer() X, y_p = data.data, data.target X_train, X_test, y_train, y_test = train_test_split(X, y_p, test_size=0.2, random_state=0) # ------- Build model x = Input() y_t = Input() y_p1 = LogisticRegression(function="predict_proba")(x, y_t) y_p2 = RandomForestClassifier(function="predict_proba")(x, y_t) ensemble_features = Concatenate()([y_p1, y_p2]) y_p = ExtraTreesClassifier()(ensemble_features, y_t) model = Model(x, y_p, y_t) plot_model(model, filename="stacked_classifiers.png", dpi=96) # ------- Train model model.fit(X_train, y_train) # ------- Evaluate model y_train_pred = model.predict(X_train) y_test_pred = model.predict(X_test) print("F1 score on train data:", f1_score(y_train, y_train_pred)) print("F1 score on test data:", f1_score(y_test, y_test_pred))
dataset = load_boston() target = np.array(dataset.feature_names) == "DIS" X = dataset.data[:, np.logical_not(target)] y = dataset.data[:, target].squeeze() X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1) # ------- Build model transformer = QuantileTransformer(n_quantiles=300, output_distribution="normal") x = Input() y_t = Input() # QuantileTransformer requires an explicit feature dimension, hence the Lambda step y_t_trans = Lambda(np.reshape, newshape=(-1, 1))(y_t) y_t_trans = transformer(y_t_trans) y_p_trans = RidgeCV()(x, y_t_trans) y_p = transformer(y_p_trans, compute_func="inverse_transform", trainable=False) model = Model(x, y_p, y_t) plot_model(model, filename="transformed_target.png", dpi=96) # ------- Train model model.fit(X_train, y_train) # ------- Evaluate model y_pred = model.predict(X_test) r2 = r2_score(y_test, y_pred) mae = median_absolute_error(y_test, y_pred) print("R^2={}\nMAE={}".format(r2, mae))
# ------- Build model x = Input() y_t = Input() ys_t = Split(n_targets, axis=1)(y_t) ys_p = [] for j, k in enumerate(order): x_stacked = ColumnStack()(inputs=[x, *ys_p[:j]]) ys_t[k] = Lambda(np.squeeze, axis=1)(ys_t[k]) ys_p.append(LogisticRegression(solver="lbfgs")(x_stacked, ys_t[k])) ys_p = [ys_p[order.index(j)] for j in range(n_targets)] y_p = ColumnStack()(ys_p) model = Model(inputs=x, outputs=y_p, targets=y_t) plot_model(model, filename="classifier_chain.png", dpi=96) # This might take a few seconds # ------- Train model model.fit(X_train, Y_train) # ------- Evaluate model Y_train_pred = model.predict(X_train) Y_test_pred = model.predict(X_test) print( "Jaccard score on train data:", jaccard_score(Y_train, Y_train_pred, average="samples"), ) print( "Jaccard score on test data:", jaccard_score(Y_test, Y_test_pred, average="samples"),