Beispiel #1
0
mask = IsolationForest(contamination=0.15).fit_predict(X[column].to_frame(),
                                                       y) == 1
new_X = X[mask]
new_y = y[mask]

X_scaled = StandardScaler().fit_transform(new_X)

target_names = ['f', 's']

visualizer = ParallelCoordinates(classes=target_names,
                                 features=list(X.columns),
                                 sample=0.5,
                                 shuffle=True)
visualizer.fit_transform(X_scaled, new_y)
visualizer.show()

# %%
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA

X, y = tf.cats_to_one_hot(columns=[]).create_X_y()

X_scaled = StandardScaler().fit_transform(X)

X = X.values

pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_scaled)

tsne = TSNE(n_components=2)
        df[feature].max(skipna=True) - df[feature].min(skipna=True))

# convert values to numpy arrays
X = data_norm[num_features].to_numpy()
y = df.Survived.to_numpy()

# set up visualizer
from yellowbrick.features import ParallelCoordinates

visualizer = ParallelCoordinates(classes=classes, features=num_features)

# fit visualizer
visualizer.fit(X, y)
visualizer.transform(X)
# create PNG file and also display in shell
visualizer.show(outpath="titanic_fig4.png")
visualizer.show()

# set figure size, make subplots
plt.rcParams['figure.figsize'] = (20, 10)
fig, axes = plt.subplots(nrows=2, ncols=2)

# convert binary to survived/not survived, group by sex
Sex_survived = df.replace({'Survived': {
    1: 'Survived',
    0: 'Not-survived'
}})[df['Survived'] == 1]['Sex'].value_counts()
Sex_not_survived = df.replace({'Survived': {
    1: 'Survived',
    0: 'Not-survived'
}})[df['Survived'] == 0]['Sex'].value_counts()