supplementalVocabularyWords=['call', 'to', 'your'])

# Apply the featurerizer to the raw data sets to produce feature vectors. In this case, each message will be converted to an array
#  with one bit per feature that is 1 if the message has the feature, and 0 if the message does not have the feature.
xTrain = featurizer.Featurize(xTrainRaw)
xValidate = featurizer.Featurize(xValidateRaw)
xTest = featurizer.Featurize(xTestRaw)

print("\n - Inspect the features -")
for i in range(len(xTrain[0])):
    print(featurizer.GetFeatureInfo(i))

print("\n - Inspect feature values for a few training samples -")
for i in range(5):
    print(yTrain[i], "-", xTrain[i], xTrainRaw[i])

# Now let's up our modeling game (as compared to predicting the most common class)
#  we'll use a heuristic (hand-tuned) linear model.
import MachineLearningCourse.MLUtilities.Learners.LinearHeuristicModel as LinearHeuristicModel
model = LinearHeuristicModel.LinearHeuristicModel()

model.fit(xTrain, yTrain, -1.0, [.75, .75, .75, .25, .25])

print("\n - Inspect the weights on the heuristically-tuned model -")
model.visualize()

yValidatePredicted = model.predict(xValidate)

import MachineLearningCourse.MLUtilities.Evaluations.EvaluateBinaryClassification as EvaluateBinaryClassification
EvaluateBinaryClassification.ExecuteAll(yValidate, yValidatePredicted)
    # Learn the logistic regression model

    print("Learning the logistic regression model:")
    import MachineLearningCourse.MLUtilities.Learners.LogisticRegression as LogisticRegression
    logisticRegressionModel = LogisticRegression.LogisticRegression()

    logisticRegressionModel.fit(xTrain,
                                yTrain,
                                stepSize=1.0,
                                convergence=0.005)

    #############################
    # Evaluate the model

    import MachineLearningCourse.MLUtilities.Evaluations.EvaluateBinaryClassification as EvaluateBinaryClassification

    print("\nLogistic regression model:")
    logisticRegressionModel.visualize()
    EvaluateBinaryClassification.ExecuteAll(
        yValidate,
        logisticRegressionModel.predict(xValidate,
                                        classificationThreshold=0.5))

    #################
    # You may find the following module helpful for making charts. You'll have to install matplotlib (see the lecture notes).
    #
    # import MachineLearningCourse.MLUtilities.Visualizations.Charting as Charting
    #
    # # trainLosses, validationLosses, and lossXLabels are parallel arrays with the losses you want to plot at the specified x coordinates
    #
    # Charting.PlotSeries([trainLosses, validationLosses], ['Train', 'Validate'], lossXLabels, chartTitle="Logistic Regression", xAxisTitle="Gradient Descent Steps", yAxisTitle="Avg. Loss", outputDirectory=kOutputDirectory, fileName="3-Logistic Regression Train vs Validate loss")