Exemple #1
0
def test_parallel_coords(pandas=False, outpath=None):
    """
    Runs the parallel coordinates visualizer on the dataset.

    Parameters
    ----------
    pandas : bool
        Run the pandas version of the function
    outpath : path or None
        Save the figure to disk rather than show (if None)
    """
    data = load_data('occupancy')  # Load the data
    features = ['temp', 'humid', 'light', 'co2', 'hratio']
    classes = ['unoccupied', 'occupied']
    X = data[features].as_matrix()
    y = data.occupied.as_matrix()

    if pandas:
        parallel_coordinates(data[features + ['occupied']], 'occupied')
        if outpath:
            plt.savefig(outpath)
        else:
            plt.show()

    else:
        visualizer = ParallelCoordinates(  # Instantiate the visualizer
            classes=classes, features=features)
        visualizer.fit(X, y)  # Fit the data to the visualizer
        visualizer.transform(X)  # Transform the data
        visualizer.poof(outpath=outpath)  # Draw/show/poof the data
Exemple #2
0
def pcoords(X, y, outpath, **kwargs):
    # Create a new figure and axes
    _, ax = plt.subplots()

    # Create the visualizer
    visualizer = ParallelCoordinates(ax=ax, **kwargs)
    visualizer.fit(X, y)
    visualizer.transform(X)

    # Save to disk
    visualizer.poof(outpath=outpath)
Exemple #3
0
def pcoords(X, y, outpath, **kwargs):
    # Create a new figure and axes
    fig = plt.figure()
    ax = fig.add_subplot(111)

    # Create the visualizer
    visualizer = ParallelCoordinates(**kwargs)
    visualizer.fit(X, y)
    visualizer.transform(X)

    # Save to disk
    visualizer.poof(outpath=outpath)
Exemple #4
0
def pcoords(ax):
    from yellowbrick.features import ParallelCoordinates

    # Specify the features of interest and the classes of the target
    features = ["temperature", "relative humidity", "light", "C02", "humidity"]
    target = "occupancy"
    classes = ['unoccupied', 'occupied']

    # Load the data
    X, y = load_data('occupancy', cols=features, target=target)

    # Instantiate and fit the visualizer
    visualizer = ParallelCoordinates(ax=ax, classes=classes, features=features)
    visualizer.title = "Parallel Coordinates of Features to Predict Room Occupancy"
    visualizer.fit(X, y)
    visualizer.transform(X)
    return visualizer
data_norm = data.copy()
# normalize data to 0-1 range
for feature in num_features:
    data_norm[feature] = (data[feature] - data[feature].mean(skipna=True)) / (
        data[feature].max(skipna=True) - data[feature].min(skipna=True))

# Extract the numpy arrays from the data frame
X = data_norm[num_features].to_numpy()
y = data.Survived.to_numpy()

# Instantiate the visualizer
# Instantiate the visualizer
visualizer = ParallelCoordinates(classes=classes, features=num_features)

visualizer.fit(X, y)  # Fit the data to the visualizer
visualizer.transform(X)  # Transform the data
visualizer.poof(outpath="d://pcoords2.png")  # Draw/show/poof the data
plt.show()

# Step 10 - stacked bar charts to compare survived/not survived
# set up the figure size
# %matplotlib inline
plt.rcParams['figure.figsize'] = (20, 10)

# make subplots
fig, axes = plt.subplots(nrows=2, ncols=2)

# make the data read to feed into the visulizer
Sex_survived = data.replace({'Survived': {
    1: 'Survived',
    0: 'Not-survived'
Exemple #6
0
from sklearn.datasets import load_iris
data = load_iris()
data
import pandas as pd
pd.read_csv(data.filename)
from yellowbrick.features import ParallelCoordinates
viz = ParallelCoordinates(features=data.feature_names,
                          classes=data.target_names,
                          normalize='standard')
viz.fit(data.data, data.target)
viz.transform(data.data)
viz.poof()
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
X, y = data.data, data.target
X_train, X_test, y_train, y_test = train_test_split(X, y)
scaler = StandardScaler()
scaler.fit(X_train)
X1 = scaler.transform(X_train)

clf = DecisionTreeClassifier()
clf.fit(X1, y_train)
X2 = scaler.transform(X_test)
clf.predict(X2)
from sklearn.pipeline import Pipeline
decision_pipeline = Pipeline([
    ('normalize', StandardScaler()),
    ('decision', DecisionTreeClassifier())
])
decision_pipeline.fit(X_train, y_train)
for feature in num_features:
    data_norm[feature] = (df[feature] - df[feature].min(skipna=True)) / (
        df[feature].max(skipna=True) - df[feature].min(skipna=True))

# convert values to numpy arrays
X = data_norm[num_features].to_numpy()
y = df.Survived.to_numpy()

# set up visualizer
from yellowbrick.features import ParallelCoordinates

visualizer = ParallelCoordinates(classes=classes, features=num_features)

# fit visualizer
visualizer.fit(X, y)
visualizer.transform(X)
# create PNG file and also display in shell
visualizer.show(outpath="titanic_fig4.png")
visualizer.show()

# set figure size, make subplots
plt.rcParams['figure.figsize'] = (20, 10)
fig, axes = plt.subplots(nrows=2, ncols=2)

# convert binary to survived/not survived, group by sex
Sex_survived = df.replace({'Survived': {
    1: 'Survived',
    0: 'Not-survived'
}})[df['Survived'] == 1]['Sex'].value_counts()
Sex_not_survived = df.replace({'Survived': {
    1: 'Survived',