Esempio n. 1
0
    import MachineLearningCourse.MLUtilities.Data.Generators.SampleUniform2D as SampleUniform2D
    import MachineLearningCourse.MLUtilities.Data.Generators.ConceptCircle2D as ConceptCircle2D
    import MachineLearningCourse.MLUtilities.Data.Generators.ConceptSquare2D as ConceptSquare2D
    import MachineLearningCourse.MLUtilities.Data.Generators.ConceptLinear2D as ConceptLinear2D

    generator = SampleUniform2D.SampleUniform2D(seed=100)
    #concept = ConceptSquare2D.ConceptSquare2D(width=.2)
    concept = ConceptLinear2D.ConceptLinear2D(bias=0.05, weights=[0.2, -0.2])
    #concept = ConceptCircle2D.ConceptCircle2D(radius=.3)

    x = generator.generate(100)
    y = concept.predict(x)

    import MachineLearningCourse.MLUtilities.Visualizations.Visualize2D as Visualize2D

    visualize = Visualize2D.Visualize2D(kOutputDirectory, "Generated Concept")

    visualize.Plot2DDataAndBinaryConcept(x, y, concept)
    visualize.Save()

    print("Decision Tree on Generated Concept")
    model = DecisionTree.DecisionTree()
    model.fit(x, y, maxDepth=2)
    model.visualize()

    visualize = Visualize2D.Visualize2D(kOutputDirectory,
                                        "DecisionTree on Generated Concept")

    visualize.Plot2DDataAndBinaryConcept(x, y, model)
    visualize.Save()
conceptCircle = ConceptCircle2D.ConceptCircle2D(radius=.3)

concept = ConceptCompound2D.ConceptCompound2D(concepts = [ conceptLinear, conceptCircle ])

xTest = generator.generate(1000)
yTest = concept.predict(xTest)

xTrain = generator.generate(1000)
yTrain = concept.predict(xTrain)


import MachineLearningCourse.MLUtilities.Visualizations.Charting as Charting
import MachineLearningCourse.MLUtilities.Visualizations.Visualize2D as Visualize2D

## this code outputs the true concept.
visualize = Visualize2D.Visualize2D(kOutputDirectory, "4-Generated Concept")
visualize.Plot2DDataAndBinaryConcept(xTest,yTest,concept)
visualize.Save()

bestModel = None
kValues = [1, 10, 25, 50, 100]
maxDepth = 1
accuracies = []
errorBarsAccuracy = []
for kv in kValues:
    model = BoostedTree.BoostedTree()
    model.fit(xTrain, yTrain, maxDepth=maxDepth, k=kv)
    accuracy = EvaluateBinaryClassification.Accuracy(yTest, model.predict(xTest))
    lower, upper = ErrorBounds.GetAccuracyBounds(accuracy, len(yTest), .5)
    print(kv, ": ", accuracy)
    accuracies.append(accuracy)
        # do 10 iterations of training
        model.incrementalFit(xTrain,
                             yTrain,
                             maxSteps=10,
                             stepSize=1.0,
                             convergence=0.005)

        # then look at the models weights
        model.visualize()

        # then look at how training set loss is converging
        print(" fit for %d iterations, train set loss is %.4f" %
              (model.totalGradientDescentSteps, model.loss(xTrain, yTrain)))

        # and visualize the model's decision boundary
        visualization = Visualize2D.Visualize2D(
            kOutputDirectory, "{0:04}.test".format(model.totalIterations))
        visualization.Plot2DDataAndBinaryConcept(xTrain, yTrain, model)
        visualization.Save()

# Once your LogisticRegression learner seems to be working, set this flag to True and try it on the spam data
runSMSSpam = False
if runSMSSpam:
    import MachineLearningCourse.MLProjectSupport.SMSSpam.SMSSpamDataset as SMSSpamDataset

    ############################
    # Set up the data

    kDataPath = "MachineLearningCourse\\MLProjectSupport\\SMSSpam\\dataset\\SMSSpamCollection"

    (xRaw, yRaw) = SMSSpamDataset.LoadRawData(kDataPath)
conceptCircle = ConceptCircle2D.ConceptCircle2D(radius=.3)

concept = ConceptCompound2D.ConceptCompound2D(
    concepts=[conceptLinear, conceptCircle])

xTest = generator.generate(1000)
yTest = concept.predict(xTest)

xTrain = generator.generate(1000)
yTrain = concept.predict(xTrain)

import MachineLearningCourse.MLUtilities.Visualizations.Charting as Charting
import MachineLearningCourse.MLUtilities.Visualizations.Visualize2D as Visualize2D

## this code outputs the true concept.
visualize = Visualize2D.Visualize2D(kOutputDirectory, "Generated Concept")
visualize.Plot2DDataAndBinaryConcept(xTest, yTest, concept)
visualize.Save()

## you can use this to visualize what your model is learning.
# visualize = Visualize2D.Visualize2D(kOutputDirectory, "Your Boosted Tree...", size=150)
# visualize.PlotBinaryConcept(model)

# Or you can use it to visualize individual models that you learened, e.g.:
# visualize.PlotBinaryConcept(model->modelLearnedInRound[2])

## you might like to see the training or test data too, so you might prefer this to simply calling 'PlotBinaryConcept'
#visualize.Plot2DDataAndBinaryConcept(xTrain,yTrain,model)

# And remember to save
# visualize.Save()
        model.incrementalFit(xTrain,
                             yTrain,
                             maxSteps=10,
                             stepSize=1.0,
                             convergence=0.005)

        # then look at the models weights
        model.visualize()

        # then look at how training set loss is converging
        print(" fit for %d iterations, train set loss is %.4f" %
              (model.totalGradientDescentSteps, model.loss(xTrain, yTrain)))

        # and visualize the model's decision boundary
        visualization = Visualize2D.Visualize2D(
            kOutputDirectory,
            "{0:04}.test".format(model.totalGradientDescentSteps))
        visualization.Plot2DDataAndBinaryConcept(xTrain, yTrain, model)
        visualization.Save()

# Once your LogisticRegression learner seems to be working, set this flag to True and try it on the spam data
runSMSSpam = False
if runSMSSpam:
    import MachineLearningCourse.MLProjectSupport.SMSSpam.SMSSpamDataset as SMSSpamDataset

    ############################
    # Set up the data

    (xRaw, yRaw) = SMSSpamDataset.LoadRawData()

    import MachineLearningCourse.MLUtilities.Data.Sample as Sample