Example #1
0
    def evalModel(self, X, numTrain):
        """Evaluate model from training"""

        pdf = pd.DataFrame()
        evaluator = RegressionEvaluator(metricName="rmse", labelCol="rating", predictionCol="prediction")
        count = 0
        for i in range(numTrain):
            (train, test) = X.randomSplit([0.8, 0.2])
            model = ALS(implicitPrefs=True,
                        rank=self.bestRank,
                        maxIter=self.bestNumIter,
                        regParam=self.bestLambda,
                        alpha=self.bestAlpha,
                        userCol="steamid",
                        itemCol="appid", ratingCol="rating").fit(train)
            predictions = model.transform(test)
            ones = predictions.where("rating=1")
            zeroes = predictions.where("rating=0")
            predictors = {'all': predictions, 'zeroes': zeroes, 'ones': ones}

            for s, p in predictors.items():
                pdf = pdf.append(pd.DataFrame([[i, s,
                                                evaluator.setParams(metricName="rmse").evaluate(p),
                                                evaluator.setParams(metricName="mse").evaluate(p),
                                                evaluator.setParams(metricName="mae").evaluate(p)]]))
            count += 1
            print(round((i / 10) * 100, 0), '%')
        pdf.columns = ['iteration', 'type', 'rmse', 'mse', 'mae']
        print(pdf)
        print(pdf.groupby(by=['type'], axis=0).mean())
Example #2
0
        bestNumIter = numIter
        bestAlpha = alf

    count += 1
    #print('\r{0}%'.format(round((count / len(indexes)) * 100, 0)), end="", flush=True)

print("The best model was trained on evalData with rank = %d, lambda = %.2f, alpha = %d, " % (bestRank, bestLambda, bestAlpha) \
      + "numIter = %d and RMSE %f." % (bestNumIter, bestValidationRmse))


# brier score
# AUC

targetPrediction = bestModel.transform(target)
print('target prediction', targetPrediction.collect())
print('target RMSE:', evaluator.setParams(metricName="rmse").evaluate(targetPrediction))
print('target MSE:', evaluator.setParams(metricName="mse").evaluate(targetPrediction))
print('target MAE:', evaluator.setParams(metricName="mae").evaluate(targetPrediction))

predictions = bestModel.transform(test)

setvalues = ['all', 'zeroes', 'ones']

em = pd.DataFrame(columns=['rmse', 'mse', 'mae'])
em.index.names = ["set values"]

ones = predictions.where("rating=1")
zeroes = predictions.where("rating=0")
predictors = {'all': predictions, 'zeroes': zeroes, 'ones': ones}

#fpr, tpr, thresholds = roc_curve(predictions, pred, pos_label=2)
Example #3
0
        bestNumIter = numIter
        bestAlpha = alf

    count += 1
    #print('\r{0}%'.format(round((count / len(indexes)) * 100, 0)), end="", flush=True)

print("The best model was trained on evalData with rank = %d, lambda = %.2f, alpha = %d, " % (bestRank, bestLambda, bestAlpha) \
      + "numIter = %d and RMSE %f." % (bestNumIter, bestValidationRmse))

# brier score
# AUC

targetPrediction = bestModel.transform(target)
print('target prediction', targetPrediction.collect())
print('target RMSE:',
      evaluator.setParams(metricName="rmse").evaluate(targetPrediction))
print('target MSE:',
      evaluator.setParams(metricName="mse").evaluate(targetPrediction))
print('target MAE:',
      evaluator.setParams(metricName="mae").evaluate(targetPrediction))

predictions = bestModel.transform(test)

setvalues = ['all', 'zeroes', 'ones']

em = pd.DataFrame(columns=['rmse', 'mse', 'mae'])
em.index.names = ["set values"]

ones = predictions.where("rating=1")
zeroes = predictions.where("rating=0")
predictors = {'all': predictions, 'zeroes': zeroes, 'ones': ones}
Example #4
0
                regParam=bestLambda,
                maxIter=bestNumIter,
                alpha=bestAlpha,
                userCol="steamid",
                itemCol="appid",
                ratingCol="rating").fit(train)
    predictions = model.transform(test)
    ones = predictions.where("rating=1")
    zeroes = predictions.where("rating=0")
    predictors = {'all': predictions, 'zeroes': zeroes, 'ones': ones}

    for s, p in predictors.items():
        pdf = pdf.append(
            pd.DataFrame([[
                i, s,
                evaluator.setParams(metricName="rmse").evaluate(p),
                evaluator.setParams(metricName="mse").evaluate(p),
                evaluator.setParams(metricName="mae").evaluate(p)
            ]]))
    count += 1
    print(round((i / 10) * 100, 0), '%')
pdf.columns = ['iteration', 'type', 'rmse', 'mse', 'mae']
print(pdf)
print(pdf.groupby(by=['type'], axis=0).mean())
# brier score
# AUC

# setvalues = ['all', 'zeroes', 'ones']
#
# em = pd.DataFrame(columns=['rmse', 'mse', 'mae'])
# em.index.names = ["set values"]