Esempio n. 1
0
def showParallelCoordinates():
    # Load the classification data set
    data = load_data('occupancy')

    # Specify the features of interest and the classes of the target
    features = ["temperature", "relative humidity", "light", "C02", "humidity"]
    classes = ['unoccupied', 'occupied']

    # Extract the numpy arrays from the data frame
    X = data[features].as_matrix()
    y = data.occupancy.as_matrix()
    # Instantiate the visualizer
    visualizer = ParallelCoordinates(classes=classes, features=features)

    visualizer.fit(X, y)  # Fit the data to the visualizer
    visualizer.transform(X)  # Transform the data
    visualizer.poof()  # Draw/show/poof the data

    # Instantiate the visualizer
    visualizer = ParallelCoordinates(
        classes=classes,
        features=features,
        normalize='standard',
        sample=0.1,
    )

    visualizer.fit(X, y)  # Fit the data to the visualizer
    visualizer.transform(X)  # Transform the data
    visualizer.poof()  # Draw/show/poof the data
Esempio n. 2
0
def showDirectDataVisualization():
    # Load the classification data set
    data = load_data('occupancy')

    # Specify the features of interest and the classes of the target
    features = ["temperature", "relative humidity", "light", "C02", "humidity"]
    classes = ['unoccupied', 'occupied']

    # Extract the numpy arrays from the data frame
    X = data[features]
    y = data.occupancy

    visualizer = ScatterVisualizer(x='light', y='C02', classes=classes)

    visualizer.fit(X, y)
    visualizer.transform(X)
    visualizer.poof()

    # Load the data
    df = load_data('concrete')
    feature = 'cement'
    target = 'strength'

    # Get the X and y data from the DataFrame
    X = df[feature]
    y = df[target]
    visualizer = JointPlotVisualizer(feature=feature, target=target)

    visualizer.fit(X, y)
    visualizer.poof()

    visualizer = JointPlotVisualizer(feature=feature,
                                     target=target,
                                     joint_plot='hex')

    visualizer.fit(X, y)
    visualizer.poof()
Esempio n. 3
0
def showRank2D():
    # Load the dataset
    data = load_data('credit')

    # Specify the features of interest
    features = [
        'limit',
        'sex',
        'edu',
        'married',
        'age',
        'apr_delay',
        'may_delay',
        'jun_delay',
        'jul_delay',
        'aug_delay',
        'sep_delay',
        'apr_bill',
        'may_bill',
        'jun_bill',
        'jul_bill',
        'aug_bill',
        'sep_bill',
        'apr_pay',
        'may_pay',
        'jun_pay',
        'jul_pay',
        'aug_pay',
        'sep_pay',
    ]

    # Extract the numpy arrays from the data frame
    X = data[features].as_matrix()
    y = data.default.as_matrix()

    # Instantiate the visualizer with the Covariance ranking algorithm
    visualizer = Rank2D(features=features, algorithm='pearson')

    visualizer.fit(X, y)  # Fit the data to the visualizer
    visualizer.transform(X)  # Transform the data
    visualizer.poof()  # Draw/show/poof the data
Esempio n. 4
0
def showPCAProjection():
    # Load the classification data set
    data = load_data('credit')

    # Specify the features of interest
    features = [
        'limit',
        'sex',
        'edu',
        'married',
        'age',
        'apr_delay',
        'may_delay',
        'jun_delay',
        'jul_delay',
        'aug_delay',
        'sep_delay',
        'apr_bill',
        'may_bill',
        'jun_bill',
        'jul_bill',
        'aug_bill',
        'sep_bill',
        'apr_pay',
        'may_pay',
        'jun_pay',
        'jul_pay',
        'aug_pay',
        'sep_pay',
    ]

    # Extract the numpy arrays from the data frame
    X = data[features].as_matrix()
    y = data.default.as_matrix()
    visualizer = PCADecomposition(scale=True, center=False, col=y)
    visualizer.fit_transform(X, y)
    visualizer.poof()

    visualizer = PCADecomposition(scale=True, center=False, col=y, proj_dim=3)
    visualizer.fit_transform(X, y)
    visualizer.poof()
Esempio n. 5
0
def showError():
    # Load the data
    df = load_data('concrete')
    feature_names = [
        'cement', 'slag', 'ash', 'water', 'splast', 'coarse', 'fine', 'age'
    ]
    target_name = 'strength'

    # Get the X and y data from the DataFrame
    X = df[feature_names].as_matrix()
    y = df[target_name].as_matrix()

    # Create the train and test data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
    # Instantiate the linear model and visualizer
    lasso = Lasso()
    visualizer = PredictionError(lasso)

    visualizer.fit(X_train, y_train)  # Fit the training data to the visualizer
    visualizer.score(X_test, y_test)  # Evaluate the model on the test data
    g = visualizer.poof()  # Draw/show/poof the data
Esempio n. 6
0
def showROC():
    # Load the classification data set
    data = load_data('occupancy')

    # Specify the features of interest and the classes of the target
    features = ["temperature", "relative humidity", "light", "C02", "humidity"]
    classes = ['unoccupied', 'occupied']

    # Extract the numpy arrays from the data frame
    X = data[features].as_matrix()
    y = data.occupancy.as_matrix()

    # Create the train and test data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
    # Instantiate the classification model and visualizer
    logistic = LogisticRegression()
    visualizer = ROCAUC(logistic)

    visualizer.fit(X_train, y_train)  # Fit the training data to the visualizer
    visualizer.score(X_test, y_test)  # Evaluate the model on the test data
    g = visualizer.poof()  # Draw/show/poof the data