Ejemplo n.º 1
0
def balance_yellowbrick(
    X,
    y,
    features,
):
    plt.switch_backend('agg')
    plt.clf()
    X_train, X_test, y_train, y_test = train_test_split(X[features],
                                                        y,
                                                        stratify=y,
                                                        test_size=0.01)
    X = pd.DataFrame(X_test, columns=features)
    y = pd.Series(y_test)
    visualizer = ClassBalance()
    visualizer.fit(y)
    visualizer.finalize()

    return plt
Ejemplo n.º 2
0
display(X.shape)
display(y.shape)

#%%
import matplotlib.pyplot as plt
from yellowbrick.target import ClassBalance

_, y_counts = np.unique(y, return_counts=True)
class_labels = ["survived", "deceased"]

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(9,4.5))
ax1.pie(y_counts, explode=(0, 0.05), labels = class_labels)

visualizer = ClassBalance(labels = class_labels, ax = ax2)
visualizer.fit(y)
visualizer.finalize()

plt.show()

#%%
print("Number of missing values:", X.isna().sum().sum())

#%%
X["timerecurrence"].describe()

#%%
# for column in X.columns[2:16]:
#     plt.scatter(X[column], y)
#     plt.xlabel(column)
#     plt.show()