visualize.Plot2DDataAndBinaryConcept(x, y, model) visualize.Save() doModel = False if doModel: import MachineLearningCourse.MLProjectSupport.Adult.AdultDataset as AdultDataset ### UPDATE this path for your environment kDataPath = "MachineLearningCourse/MLProjectSupport/Adult/dataset/adult.data" (xRaw, yRaw) = AdultDataset.LoadRawData(kDataPath) import MachineLearningCourse.MLUtilities.Data.Sample as Sample (xTrainRaw, yTrain, xValidateRaw, yValidate, xTestRaw, yTest) = Sample.TrainValidateTestSplit(xRaw, yRaw) print("Train is %d samples, %.4f percent >50K." % (len(yTrain), 100.0 * sum(yTrain) / len(yTrain))) print("Validate is %d samples, %.4f percent >50K." % (len(yValidate), 100.0 * sum(yValidate) / len(yValidate))) print("Test is %d samples %.4f percent >50K." % (len(yTest), 100.0 * sum(yTest) / len(yTest))) import MachineLearningCourse.Assignments.Module02.SupportCode.AdultFeaturize as AdultFeaturize featurizer = AdultFeaturize.AdultFeaturize() featurizer.CreateFeatureSet(xTrainRaw, yTrain, useCategoricalFeatures=True, useNumericFeatures=False)
kOutputDirectory = "MachineLearningCourse/Assignments/Module01/Graphs/visualize\\" import MachineLearningCourse.MLProjectSupport.SMSSpam.SMSSpamDataset as SMSSpamDataset kDataPath = "MachineLearningCourse/MLProjectSupport/SMSSpam/dataset/SMSSpamCollection" (xRaw, yRaw) = SMSSpamDataset.LoadRawData(kDataPath) import MachineLearningCourse.MLUtilities.Data.Sample as Sample (xTrainRaw, yTrain, xValidateRaw, yValidate, xTestRaw, yTest) = Sample.TrainValidateTestSplit(xRaw, yRaw, percentValidate=.1, percentTest=.1) import MachineLearningCourse.MLUtilities.Learners.LogisticRegression as LogisticRegression import MachineLearningCourse.MLUtilities.Evaluations.EvaluateBinaryClassification as EvaluateBinaryClassification import MachineLearningCourse.MLUtilities.Evaluations.ErrorBounds as ErrorBounds import MachineLearningCourse.Assignments.Module01.SupportCode.SMSSpamFeaturize as SMSSpamFeaturize import MachineLearningCourse.MLUtilities.Data.CrossValidation as CrossValidation import time import numpy as np # A helper function for calculating FN rate and FP rate across a range of thresholds def TabulateModelPerformanceForROC(model, xValidate, yValidate): pointsToEvaluate = 100 thresholds = [ x / float(pointsToEvaluate) for x in range(pointsToEvaluate + 1) ]
import MachineLearningCourse.MLUtilities.Data.Sample as Sample import MachineLearningCourse.MLUtilities.Data.CrossValidation as CrossValidationUtil import MachineLearningCourse.MLUtilities.Visualizations.Charting as Charting import MachineLearningCourse.MLUtilities.Evaluations.EvaluateBinaryClassification as EvaluateBinaryClassification import MachineLearningCourse.MLUtilities.Evaluations.ErrorBounds as ErrorBounds import MachineLearningCourse.MLUtilities.Learners.BoostedTree as BoostedTree import MachineLearningCourse.MLUtilities.Learners.NeuralNetworkFullyConnected as NeuralNetworkFullyConnected import MachineLearningCourse.MLUtilities.Visualizations.Charting as Charting import MachineLearningCourse.MLUtilities.Visualizations.Visualize2D as Visualize2D (xRaw, yRaw) = BlinkDataset.LoadRawData() import MachineLearningCourse.MLUtilities.Data.Sample as Sample (xTrainRaw, yTrain, xValidateRaw, yValidate, xTestRaw, yTest) = Sample.TrainValidateTestSplit(xRaw, yRaw) eyTrain = yTrain eyValidate = yValidate print("Train is %d samples, %.4f percent opened." % (len(yTrain), 100.0 * sum(yTrain)/len(yTrain))) print("Validate is %d samples, %.4f percent opened." % (len(yValidate), 100.0 * sum(yValidate)/len(yValidate))) print("Test is %d samples %.4f percent opened" % (len(yTest), 100.0 * sum(yTest)/len(yTest))) from PIL import Image import torchvision.transforms as transforms import torch kOutputDirectory = "C:\\temp\\visualize\\torch" import MachineLearningCourse.MLUtilities.Visualizations.Charting as Charting
kOutputDirectory = "C:\\temp\\visualize" import MachineLearningCourse.MLProjectSupport.SMSSpam.SMSSpamDataset as SMSSpamDataset (xRaw, yRaw) = SMSSpamDataset.LoadRawData() import MachineLearningCourse.MLUtilities.Data.Sample as Sample (xTrainRaw, yTrain, xValidateRaw, yValidate, xTestRaw, yTest) = Sample.TrainValidateTestSplit(xRaw, yRaw, percentValidate=.1, percentTest=.1) import MachineLearningCourse.MLUtilities.Learners.LogisticRegression as LogisticRegression import MachineLearningCourse.MLUtilities.Evaluations.EvaluateBinaryClassification as EvaluateBinaryClassification import MachineLearningCourse.Assignments.Module01.SupportCode.SMSSpamFeaturize as SMSSpamFeaturize import MachineLearningCourse.MLUtilities.Visualizations.Charting as Charting # A helper function for calculating FN rate and FP rate across a range of thresholds def TabulateModelPerformanceForROC(model, xValidate, yValidate): pointsToEvaluate = 100 thresholds = [ x / float(pointsToEvaluate) for x in range(pointsToEvaluate + 1)] FPRs = [] FNRs = [] yPredicted = model.predictProbabilities(xValidate) try: for threshold in thresholds: yHats = [ 1 if pred > threshold else 0 for pred in yPredicted ] FPRs.append(EvaluateBinaryClassification.FalsePositiveRate(yValidate, yHats)) FNRs.append(EvaluateBinaryClassification.FalseNegativeRate(yValidate, yHats)) except NotImplementedError: raise UserWarning("The 'model' parameter must have a 'predict' method that supports using a 'classificationThreshold' parameter with range [ 0 - 1.0 ] to create classifications.") return (FPRs, FNRs, thresholds)