def showParallelCoordinates(): # Load the classification data set data = load_data('occupancy') # Specify the features of interest and the classes of the target features = ["temperature", "relative humidity", "light", "C02", "humidity"] classes = ['unoccupied', 'occupied'] # Extract the numpy arrays from the data frame X = data[features].as_matrix() y = data.occupancy.as_matrix() # Instantiate the visualizer visualizer = ParallelCoordinates(classes=classes, features=features) visualizer.fit(X, y) # Fit the data to the visualizer visualizer.transform(X) # Transform the data visualizer.poof() # Draw/show/poof the data # Instantiate the visualizer visualizer = ParallelCoordinates( classes=classes, features=features, normalize='standard', sample=0.1, ) visualizer.fit(X, y) # Fit the data to the visualizer visualizer.transform(X) # Transform the data visualizer.poof() # Draw/show/poof the data
def showDirectDataVisualization(): # Load the classification data set data = load_data('occupancy') # Specify the features of interest and the classes of the target features = ["temperature", "relative humidity", "light", "C02", "humidity"] classes = ['unoccupied', 'occupied'] # Extract the numpy arrays from the data frame X = data[features] y = data.occupancy visualizer = ScatterVisualizer(x='light', y='C02', classes=classes) visualizer.fit(X, y) visualizer.transform(X) visualizer.poof() # Load the data df = load_data('concrete') feature = 'cement' target = 'strength' # Get the X and y data from the DataFrame X = df[feature] y = df[target] visualizer = JointPlotVisualizer(feature=feature, target=target) visualizer.fit(X, y) visualizer.poof() visualizer = JointPlotVisualizer(feature=feature, target=target, joint_plot='hex') visualizer.fit(X, y) visualizer.poof()
def showRank2D(): # Load the dataset data = load_data('credit') # Specify the features of interest features = [ 'limit', 'sex', 'edu', 'married', 'age', 'apr_delay', 'may_delay', 'jun_delay', 'jul_delay', 'aug_delay', 'sep_delay', 'apr_bill', 'may_bill', 'jun_bill', 'jul_bill', 'aug_bill', 'sep_bill', 'apr_pay', 'may_pay', 'jun_pay', 'jul_pay', 'aug_pay', 'sep_pay', ] # Extract the numpy arrays from the data frame X = data[features].as_matrix() y = data.default.as_matrix() # Instantiate the visualizer with the Covariance ranking algorithm visualizer = Rank2D(features=features, algorithm='pearson') visualizer.fit(X, y) # Fit the data to the visualizer visualizer.transform(X) # Transform the data visualizer.poof() # Draw/show/poof the data
def showPCAProjection(): # Load the classification data set data = load_data('credit') # Specify the features of interest features = [ 'limit', 'sex', 'edu', 'married', 'age', 'apr_delay', 'may_delay', 'jun_delay', 'jul_delay', 'aug_delay', 'sep_delay', 'apr_bill', 'may_bill', 'jun_bill', 'jul_bill', 'aug_bill', 'sep_bill', 'apr_pay', 'may_pay', 'jun_pay', 'jul_pay', 'aug_pay', 'sep_pay', ] # Extract the numpy arrays from the data frame X = data[features].as_matrix() y = data.default.as_matrix() visualizer = PCADecomposition(scale=True, center=False, col=y) visualizer.fit_transform(X, y) visualizer.poof() visualizer = PCADecomposition(scale=True, center=False, col=y, proj_dim=3) visualizer.fit_transform(X, y) visualizer.poof()
def showError(): # Load the data df = load_data('concrete') feature_names = [ 'cement', 'slag', 'ash', 'water', 'splast', 'coarse', 'fine', 'age' ] target_name = 'strength' # Get the X and y data from the DataFrame X = df[feature_names].as_matrix() y = df[target_name].as_matrix() # Create the train and test data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) # Instantiate the linear model and visualizer lasso = Lasso() visualizer = PredictionError(lasso) visualizer.fit(X_train, y_train) # Fit the training data to the visualizer visualizer.score(X_test, y_test) # Evaluate the model on the test data g = visualizer.poof() # Draw/show/poof the data
def showROC(): # Load the classification data set data = load_data('occupancy') # Specify the features of interest and the classes of the target features = ["temperature", "relative humidity", "light", "C02", "humidity"] classes = ['unoccupied', 'occupied'] # Extract the numpy arrays from the data frame X = data[features].as_matrix() y = data.occupancy.as_matrix() # Create the train and test data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) # Instantiate the classification model and visualizer logistic = LogisticRegression() visualizer = ROCAUC(logistic) visualizer.fit(X_train, y_train) # Fit the training data to the visualizer visualizer.score(X_test, y_test) # Evaluate the model on the test data g = visualizer.poof() # Draw/show/poof the data