def evalModel(self, X, numTrain): """Evaluate model from training""" pdf = pd.DataFrame() evaluator = RegressionEvaluator(metricName="rmse", labelCol="rating", predictionCol="prediction") count = 0 for i in range(numTrain): (train, test) = X.randomSplit([0.8, 0.2]) model = ALS(implicitPrefs=True, rank=self.bestRank, maxIter=self.bestNumIter, regParam=self.bestLambda, alpha=self.bestAlpha, userCol="steamid", itemCol="appid", ratingCol="rating").fit(train) predictions = model.transform(test) ones = predictions.where("rating=1") zeroes = predictions.where("rating=0") predictors = {'all': predictions, 'zeroes': zeroes, 'ones': ones} for s, p in predictors.items(): pdf = pdf.append(pd.DataFrame([[i, s, evaluator.setParams(metricName="rmse").evaluate(p), evaluator.setParams(metricName="mse").evaluate(p), evaluator.setParams(metricName="mae").evaluate(p)]])) count += 1 print(round((i / 10) * 100, 0), '%') pdf.columns = ['iteration', 'type', 'rmse', 'mse', 'mae'] print(pdf) print(pdf.groupby(by=['type'], axis=0).mean())
bestNumIter = numIter bestAlpha = alf count += 1 #print('\r{0}%'.format(round((count / len(indexes)) * 100, 0)), end="", flush=True) print("The best model was trained on evalData with rank = %d, lambda = %.2f, alpha = %d, " % (bestRank, bestLambda, bestAlpha) \ + "numIter = %d and RMSE %f." % (bestNumIter, bestValidationRmse)) # brier score # AUC targetPrediction = bestModel.transform(target) print('target prediction', targetPrediction.collect()) print('target RMSE:', evaluator.setParams(metricName="rmse").evaluate(targetPrediction)) print('target MSE:', evaluator.setParams(metricName="mse").evaluate(targetPrediction)) print('target MAE:', evaluator.setParams(metricName="mae").evaluate(targetPrediction)) predictions = bestModel.transform(test) setvalues = ['all', 'zeroes', 'ones'] em = pd.DataFrame(columns=['rmse', 'mse', 'mae']) em.index.names = ["set values"] ones = predictions.where("rating=1") zeroes = predictions.where("rating=0") predictors = {'all': predictions, 'zeroes': zeroes, 'ones': ones} #fpr, tpr, thresholds = roc_curve(predictions, pred, pos_label=2)
bestNumIter = numIter bestAlpha = alf count += 1 #print('\r{0}%'.format(round((count / len(indexes)) * 100, 0)), end="", flush=True) print("The best model was trained on evalData with rank = %d, lambda = %.2f, alpha = %d, " % (bestRank, bestLambda, bestAlpha) \ + "numIter = %d and RMSE %f." % (bestNumIter, bestValidationRmse)) # brier score # AUC targetPrediction = bestModel.transform(target) print('target prediction', targetPrediction.collect()) print('target RMSE:', evaluator.setParams(metricName="rmse").evaluate(targetPrediction)) print('target MSE:', evaluator.setParams(metricName="mse").evaluate(targetPrediction)) print('target MAE:', evaluator.setParams(metricName="mae").evaluate(targetPrediction)) predictions = bestModel.transform(test) setvalues = ['all', 'zeroes', 'ones'] em = pd.DataFrame(columns=['rmse', 'mse', 'mae']) em.index.names = ["set values"] ones = predictions.where("rating=1") zeroes = predictions.where("rating=0") predictors = {'all': predictions, 'zeroes': zeroes, 'ones': ones}
regParam=bestLambda, maxIter=bestNumIter, alpha=bestAlpha, userCol="steamid", itemCol="appid", ratingCol="rating").fit(train) predictions = model.transform(test) ones = predictions.where("rating=1") zeroes = predictions.where("rating=0") predictors = {'all': predictions, 'zeroes': zeroes, 'ones': ones} for s, p in predictors.items(): pdf = pdf.append( pd.DataFrame([[ i, s, evaluator.setParams(metricName="rmse").evaluate(p), evaluator.setParams(metricName="mse").evaluate(p), evaluator.setParams(metricName="mae").evaluate(p) ]])) count += 1 print(round((i / 10) * 100, 0), '%') pdf.columns = ['iteration', 'type', 'rmse', 'mse', 'mae'] print(pdf) print(pdf.groupby(by=['type'], axis=0).mean()) # brier score # AUC # setvalues = ['all', 'zeroes', 'ones'] # # em = pd.DataFrame(columns=['rmse', 'mse', 'mae']) # em.index.names = ["set values"]