예제 #1
0
    visualize.Plot2DDataAndBinaryConcept(x, y, model)
    visualize.Save()

doModel = False
if doModel:
    import MachineLearningCourse.MLProjectSupport.Adult.AdultDataset as AdultDataset

    ### UPDATE this path for your environment
    kDataPath = "MachineLearningCourse/MLProjectSupport/Adult/dataset/adult.data"

    (xRaw, yRaw) = AdultDataset.LoadRawData(kDataPath)

    import MachineLearningCourse.MLUtilities.Data.Sample as Sample

    (xTrainRaw, yTrain, xValidateRaw, yValidate, xTestRaw,
     yTest) = Sample.TrainValidateTestSplit(xRaw, yRaw)

    print("Train is %d samples, %.4f percent >50K." %
          (len(yTrain), 100.0 * sum(yTrain) / len(yTrain)))
    print("Validate is %d samples, %.4f percent >50K." %
          (len(yValidate), 100.0 * sum(yValidate) / len(yValidate)))
    print("Test is %d samples %.4f percent >50K." %
          (len(yTest), 100.0 * sum(yTest) / len(yTest)))

    import MachineLearningCourse.Assignments.Module02.SupportCode.AdultFeaturize as AdultFeaturize

    featurizer = AdultFeaturize.AdultFeaturize()
    featurizer.CreateFeatureSet(xTrainRaw,
                                yTrain,
                                useCategoricalFeatures=True,
                                useNumericFeatures=False)
kOutputDirectory = "MachineLearningCourse/Assignments/Module01/Graphs/visualize\\"

import MachineLearningCourse.MLProjectSupport.SMSSpam.SMSSpamDataset as SMSSpamDataset

kDataPath = "MachineLearningCourse/MLProjectSupport/SMSSpam/dataset/SMSSpamCollection"

(xRaw, yRaw) = SMSSpamDataset.LoadRawData(kDataPath)

import MachineLearningCourse.MLUtilities.Data.Sample as Sample
(xTrainRaw, yTrain, xValidateRaw, yValidate, xTestRaw,
 yTest) = Sample.TrainValidateTestSplit(xRaw,
                                        yRaw,
                                        percentValidate=.1,
                                        percentTest=.1)

import MachineLearningCourse.MLUtilities.Learners.LogisticRegression as LogisticRegression
import MachineLearningCourse.MLUtilities.Evaluations.EvaluateBinaryClassification as EvaluateBinaryClassification
import MachineLearningCourse.MLUtilities.Evaluations.ErrorBounds as ErrorBounds
import MachineLearningCourse.Assignments.Module01.SupportCode.SMSSpamFeaturize as SMSSpamFeaturize
import MachineLearningCourse.MLUtilities.Data.CrossValidation as CrossValidation

import time
import numpy as np


# A helper function for calculating FN rate and FP rate across a range of thresholds
def TabulateModelPerformanceForROC(model, xValidate, yValidate):
    pointsToEvaluate = 100
    thresholds = [
        x / float(pointsToEvaluate) for x in range(pointsToEvaluate + 1)
    ]
예제 #3
0
import MachineLearningCourse.MLUtilities.Data.Sample as Sample
import MachineLearningCourse.MLUtilities.Data.CrossValidation as CrossValidationUtil
import MachineLearningCourse.MLUtilities.Visualizations.Charting as Charting
import MachineLearningCourse.MLUtilities.Evaluations.EvaluateBinaryClassification as EvaluateBinaryClassification
import MachineLearningCourse.MLUtilities.Evaluations.ErrorBounds as ErrorBounds
import MachineLearningCourse.MLUtilities.Learners.BoostedTree as BoostedTree
import MachineLearningCourse.MLUtilities.Learners.NeuralNetworkFullyConnected as NeuralNetworkFullyConnected

import MachineLearningCourse.MLUtilities.Visualizations.Charting as Charting
import MachineLearningCourse.MLUtilities.Visualizations.Visualize2D as Visualize2D

(xRaw, yRaw) = BlinkDataset.LoadRawData()

import MachineLearningCourse.MLUtilities.Data.Sample as Sample

(xTrainRaw, yTrain, xValidateRaw, yValidate, xTestRaw, yTest) = Sample.TrainValidateTestSplit(xRaw, yRaw)

eyTrain = yTrain
eyValidate = yValidate

print("Train is %d samples, %.4f percent opened." % (len(yTrain), 100.0 * sum(yTrain)/len(yTrain)))
print("Validate is %d samples, %.4f percent opened." % (len(yValidate), 100.0 * sum(yValidate)/len(yValidate)))
print("Test is %d samples %.4f percent opened" % (len(yTest), 100.0 * sum(yTest)/len(yTest)))

from PIL import Image
import torchvision.transforms as transforms
import torch 

kOutputDirectory = "C:\\temp\\visualize\\torch"

import MachineLearningCourse.MLUtilities.Visualizations.Charting as Charting
예제 #4
0
kOutputDirectory = "C:\\temp\\visualize"

import MachineLearningCourse.MLProjectSupport.SMSSpam.SMSSpamDataset as SMSSpamDataset

(xRaw, yRaw) = SMSSpamDataset.LoadRawData()

import MachineLearningCourse.MLUtilities.Data.Sample as Sample
(xTrainRaw, yTrain, xValidateRaw, yValidate, xTestRaw, yTest) = Sample.TrainValidateTestSplit(xRaw, yRaw, percentValidate=.1, percentTest=.1)

import MachineLearningCourse.MLUtilities.Learners.LogisticRegression as LogisticRegression
import MachineLearningCourse.MLUtilities.Evaluations.EvaluateBinaryClassification as EvaluateBinaryClassification
import MachineLearningCourse.Assignments.Module01.SupportCode.SMSSpamFeaturize as SMSSpamFeaturize
import MachineLearningCourse.MLUtilities.Visualizations.Charting as Charting

# A helper function for calculating FN rate and FP rate across a range of thresholds
def TabulateModelPerformanceForROC(model, xValidate, yValidate):
   pointsToEvaluate = 100
   thresholds = [ x / float(pointsToEvaluate) for x in range(pointsToEvaluate + 1)]
   FPRs = []
   FNRs = []
   yPredicted = model.predictProbabilities(xValidate)

   try:
      for threshold in thresholds:
         yHats = [ 1 if pred > threshold else 0 for pred in yPredicted ]
         FPRs.append(EvaluateBinaryClassification.FalsePositiveRate(yValidate, yHats))
         FNRs.append(EvaluateBinaryClassification.FalseNegativeRate(yValidate, yHats))
   except NotImplementedError:
      raise UserWarning("The 'model' parameter must have a 'predict' method that supports using a 'classificationThreshold' parameter with range [ 0 - 1.0 ] to create classifications.")

   return (FPRs, FNRs, thresholds)