Exemple #1
0
 def test_logistic_regression(self):
     lr = LogisticRegression(maxIter=1)
     path = tempfile.mkdtemp()
     lr_path = path + "/logreg"
     lr.save(lr_path)
     lr2 = LogisticRegression.load(lr_path)
     self.assertEqual(lr2.uid, lr2.maxIter.parent,
                      "Loaded LogisticRegression instance uid (%s) "
                      "did not match Param's uid (%s)"
                      % (lr2.uid, lr2.maxIter.parent))
     self.assertEqual(lr._defaultParamMap[lr.maxIter], lr2._defaultParamMap[lr2.maxIter],
                      "Loaded LogisticRegression instance default params did not match " +
                      "original defaults")
     try:
         rmtree(path)
     except OSError:
         pass
Exemple #2
0
 def test_logistic_regression(self):
     lr = LogisticRegression(maxIter=1)
     path = tempfile.mkdtemp()
     lr_path = path + "/logreg"
     lr.save(lr_path)
     lr2 = LogisticRegression.load(lr_path)
     self.assertEqual(lr2.uid, lr2.maxIter.parent,
                      "Loaded LogisticRegression instance uid (%s) "
                      "did not match Param's uid (%s)"
                      % (lr2.uid, lr2.maxIter.parent))
     self.assertEqual(lr._defaultParamMap[lr.maxIter], lr2._defaultParamMap[lr2.maxIter],
                      "Loaded LogisticRegression instance default params did not match " +
                      "original defaults")
     try:
         rmtree(path)
     except OSError:
         pass
modelFiles = set()
while ("lr" not in modelFiles) or ("lr_model" not in modelFiles):
    if os.path.exists("output"):
        tmp = os.getcwd()
        os.chdir("output")
        for i in os.listdir():
            if os.path.isdir(i):
                modelFiles.add(i)
        os.chdir(tmp)
    if ("lr" not in modelFiles) or ("lr_model" not in modelFiles):
        print("Could'nt find Model, system is waiting...")
    sleep(5)

lr_path = os.getcwd() + "/output/lr"
lrModel_path = os.getcwd() + "/output/lr_model"
lr = LogisticRegression.load(lr_path)
lrModel = LogisticRegressionModel.load(lrModel_path)
#shutil.copy2(os.getcwd()+'/randomsampled.csv', os.getcwd()+'/output')
lr_path = os.getcwd() + "/output/_spark_metadata/lr"
lrModel_path = os.getcwd() + "/output/_spark_metadata/lr_model"
lr.write().overwrite().save(lr_path)
lrModel.write().overwrite().save(lrModel_path)
lr_path = os.getcwd() + "/output/lr"
lrModel_path = os.getcwd() + "/output/lr_model"
lr = LogisticRegression.load(lr_path)
lrModel = LogisticRegressionModel.load(lrModel_path)

startTime = time.time()
with open('test.csv', 'r', encoding="utf-8") as predictCSV:
    for line in predictCSV:
        record = predictCSV.readline().replace("\n", "").split(",")
Exemple #4
0
    Row(label=0.0, weight=2.0, features=Vectors.dense(1.0, 2.0)),
    Row(label=1.0, weight=3.0, features=Vectors.dense(2.0, 1.0)),
    Row(label=0.0, weight=4.0, features=Vectors.dense(3.0, 3.0))
]).toDF()

blor = LogisticRegression(regParam=0.01, weightCol="weight")
blorModel = blor.fit(bdf)
blorModel.coefficients
blorModel.intercept

test1 = sc.parallelize([Row(features=Vectors.sparse(2, [0], [1.0]))]).toDF()
blorModel.transform(test1).head().prediction

save_path = "C:\\PySpark\\spark_ml\\saved_models\\logistic_regression_example_1\\"
estimator_path = save_path + "lr"
# Save the estimator
blor.save(estimator_path)
lr2 = LogisticRegression.load(estimator_path)
lr2.getRegParam()

#save the model
model_path = save_path + "lr_model"
blorModel.save(model_path)

from pyspark.ml.classification import LogisticRegressionModel
model2 = LogisticRegressionModel.load(model_path)
print(blorModel.coefficients[0] == model2.coefficients[0])
print(blorModel.intercept == model2.intercept)
print(model2, blorModel)

spark.stop()