lambda p: Row(userId=int(p[0]), movieId=int(p[1]), rating=float(p[2]))) ratings = spark.createDataFrame(ratingsRDD) (training, test) = ratings.randomSplit([0.8, 0.2]) # Build the recommendation model using ALS on the training data # Note we set cold start strategy to 'drop' to ensure we don't get NaN evaluation metrics als = ALS(rank=10, maxIter=5, regParam=0.01, implicitPrefs=True, alpha=40.0, userCol="userId", itemCol="movieId", ratingCol="rating", coldStartStrategy="drop") print( "\nALS training with implicitPrefs={}, rank={}, maxIter={}, regParam={}, alpha={}, seed={}\n" .format(als.getImplicitPrefs(), als.getRank(), als.getMaxIter(), als.getRegParam(), als.getAlpha(), als.getSeed())) model = als.fit(training) # Evaluate the model by computing the RMSE on the test data predictions = model.transform(test) evaluator = RegressionEvaluator(metricName="rmse", labelCol="rating", predictionCol="prediction") rmse = evaluator.evaluate(predictions) print("Root-mean-square error = " + str(rmse)) spark.stop()
print("done with predictions cache") #predictions.write.format("csv").save(str(time())) #print("done with predictions csv save") evaluator = RegressionEvaluator(metricName = "rmse", labelCol = "rating", predictionCol = "prediction") print("done with evaluator define") rmse = evaluator.evaluate(predictions) print("Root-mean-square error = " + str(rmse)) print("Regularization parameter =" + str(als.getRegParam())) print("Rank = " + str(als.getRank())) print("done with rmse") predictions.show() allDataDF.count() trainDataDF.count() valDataDF.count() print("done the end")