def test_logistic_regression(self): lr = LogisticRegression(maxIter=1) path = tempfile.mkdtemp() lr_path = path + "/logreg" lr.save(lr_path) lr2 = LogisticRegression.load(lr_path) self.assertEqual(lr2.uid, lr2.maxIter.parent, "Loaded LogisticRegression instance uid (%s) " "did not match Param's uid (%s)" % (lr2.uid, lr2.maxIter.parent)) self.assertEqual(lr._defaultParamMap[lr.maxIter], lr2._defaultParamMap[lr2.maxIter], "Loaded LogisticRegression instance default params did not match " + "original defaults") try: rmtree(path) except OSError: pass
def test_logistic_regression(self): lr = LogisticRegression(maxIter=1) path = tempfile.mkdtemp() lr_path = path + "/logreg" lr.save(lr_path) lr2 = LogisticRegression.load(lr_path) self.assertEqual(lr2.uid, lr2.maxIter.parent, "Loaded LogisticRegression instance uid (%s) " "did not match Param's uid (%s)" % (lr2.uid, lr2.maxIter.parent)) self.assertEqual(lr._defaultParamMap[lr.maxIter], lr2._defaultParamMap[lr2.maxIter], "Loaded LogisticRegression instance default params did not match " + "original defaults") try: rmtree(path) except OSError: pass
modelFiles = set() while ("lr" not in modelFiles) or ("lr_model" not in modelFiles): if os.path.exists("output"): tmp = os.getcwd() os.chdir("output") for i in os.listdir(): if os.path.isdir(i): modelFiles.add(i) os.chdir(tmp) if ("lr" not in modelFiles) or ("lr_model" not in modelFiles): print("Could'nt find Model, system is waiting...") sleep(5) lr_path = os.getcwd() + "/output/lr" lrModel_path = os.getcwd() + "/output/lr_model" lr = LogisticRegression.load(lr_path) lrModel = LogisticRegressionModel.load(lrModel_path) #shutil.copy2(os.getcwd()+'/randomsampled.csv', os.getcwd()+'/output') lr_path = os.getcwd() + "/output/_spark_metadata/lr" lrModel_path = os.getcwd() + "/output/_spark_metadata/lr_model" lr.write().overwrite().save(lr_path) lrModel.write().overwrite().save(lrModel_path) lr_path = os.getcwd() + "/output/lr" lrModel_path = os.getcwd() + "/output/lr_model" lr = LogisticRegression.load(lr_path) lrModel = LogisticRegressionModel.load(lrModel_path) startTime = time.time() with open('test.csv', 'r', encoding="utf-8") as predictCSV: for line in predictCSV: record = predictCSV.readline().replace("\n", "").split(",")
Row(label=0.0, weight=2.0, features=Vectors.dense(1.0, 2.0)), Row(label=1.0, weight=3.0, features=Vectors.dense(2.0, 1.0)), Row(label=0.0, weight=4.0, features=Vectors.dense(3.0, 3.0)) ]).toDF() blor = LogisticRegression(regParam=0.01, weightCol="weight") blorModel = blor.fit(bdf) blorModel.coefficients blorModel.intercept test1 = sc.parallelize([Row(features=Vectors.sparse(2, [0], [1.0]))]).toDF() blorModel.transform(test1).head().prediction save_path = "C:\\PySpark\\spark_ml\\saved_models\\logistic_regression_example_1\\" estimator_path = save_path + "lr" # Save the estimator blor.save(estimator_path) lr2 = LogisticRegression.load(estimator_path) lr2.getRegParam() #save the model model_path = save_path + "lr_model" blorModel.save(model_path) from pyspark.ml.classification import LogisticRegressionModel model2 = LogisticRegressionModel.load(model_path) print(blorModel.coefficients[0] == model2.coefficients[0]) print(blorModel.intercept == model2.intercept) print(model2, blorModel) spark.stop()