def test_default_read_write(self): temp_path = tempfile.mkdtemp() lr = LogisticRegression() lr.setMaxIter(50) lr.setThreshold(.75) writer = DefaultParamsWriter(lr) savePath = temp_path + "/lr" writer.save(savePath) reader = DefaultParamsReadable.read() lr2 = reader.load(savePath) self.assertEqual(lr.uid, lr2.uid) self.assertEqual(lr.extractParamMap(), lr2.extractParamMap()) # test overwrite lr.setThreshold(.8) writer.overwrite().save(savePath) reader = DefaultParamsReadable.read() lr3 = reader.load(savePath) self.assertEqual(lr.uid, lr3.uid) self.assertEqual(lr.extractParamMap(), lr3.extractParamMap())
def test_default_read_write(self): temp_path = tempfile.mkdtemp() lr = LogisticRegression() lr.setMaxIter(50) lr.setThreshold(0.75) writer = DefaultParamsWriter(lr) savePath = temp_path + "/lr" writer.save(savePath) reader = DefaultParamsReadable.read() lr2 = reader.load(savePath) self.assertEqual(lr.uid, lr2.uid) self.assertEqual(lr.extractParamMap(), lr2.extractParamMap()) # test overwrite lr.setThreshold(0.8) writer.overwrite().save(savePath) reader = DefaultParamsReadable.read() lr3 = reader.load(savePath) self.assertEqual(lr.uid, lr3.uid) self.assertEqual(lr.extractParamMap(), lr3.extractParamMap())
["label", "features"]) # Create a LogisticRegression instance. This instance is an Estimator. lr = LogisticRegression(maxIter=10, regParam=0.01) # Print out the parameters, documentation, and any default values. print("LogisticRegression parameters:\n" + lr.explainParams() + "\n") # Learn a LogisticRegression model. This uses the parameters stored in lr. model1 = lr.fit(training) # Since model1 is a Model (i.e., a transformer produced by an Estimator), # we can view the parameters it used during fit(). # This prints the parameter (name: value) pairs, where names are unique IDs for this # LogisticRegression instance. print("Model 1 was fit using parameters: ") print(lr.extractParamMap()) # We may alternatively specify parameters using a Python dictionary as a paramMap paramMap = {lr.maxIter: 20} paramMap[ lr.maxIter] = 30 # Specify 1 Param, overwriting the original maxIter. paramMap.update({ lr.regParam: 0.1, lr.threshold: 0.55 }) # Specify multiple Params. # You can combine paramMaps, which are python dictionaries. paramMap2 = { lr.probabilityCol: "myProbability" } # Change output column name paramMapCombined = paramMap.copy()
lr.predictionCol: 'predic' }] len(paramGrid) == len(expected) bce = BinaryClassificationEvaluator() # the crossvalidator takes the pipeline, the grid, and the evaluator # run on 2+ folds cv = CrossValidator().setEstimator(pipeline).setEstimatorParamMaps( paramGrid).setEvaluator(bce).setNumFolds(2) cvModel = cv.fit(training) print("Parameters lr") print(lr.extractParamMap()) print("Parameters cvmodel") print(cv.getEstimatorParamMaps()) # create the toy test documents test = sqlContext.createDataFrame([(4, "tom cruise"), (5, "played baseball")], ["id", "text"]) prediction = cvModel.transform(test) selected = prediction.select("id", "text", "probability", "predic") for row in selected.collect(): print(row)