def test_default_read_write(self):
        temp_path = tempfile.mkdtemp()

        lr = LogisticRegression()
        lr.setMaxIter(50)
        lr.setThreshold(.75)
        writer = DefaultParamsWriter(lr)

        savePath = temp_path + "/lr"
        writer.save(savePath)

        reader = DefaultParamsReadable.read()
        lr2 = reader.load(savePath)

        self.assertEqual(lr.uid, lr2.uid)
        self.assertEqual(lr.extractParamMap(), lr2.extractParamMap())

        # test overwrite
        lr.setThreshold(.8)
        writer.overwrite().save(savePath)

        reader = DefaultParamsReadable.read()
        lr3 = reader.load(savePath)

        self.assertEqual(lr.uid, lr3.uid)
        self.assertEqual(lr.extractParamMap(), lr3.extractParamMap())
    def test_default_read_write(self):
        temp_path = tempfile.mkdtemp()

        lr = LogisticRegression()
        lr.setMaxIter(50)
        lr.setThreshold(0.75)
        writer = DefaultParamsWriter(lr)

        savePath = temp_path + "/lr"
        writer.save(savePath)

        reader = DefaultParamsReadable.read()
        lr2 = reader.load(savePath)

        self.assertEqual(lr.uid, lr2.uid)
        self.assertEqual(lr.extractParamMap(), lr2.extractParamMap())

        # test overwrite
        lr.setThreshold(0.8)
        writer.overwrite().save(savePath)

        reader = DefaultParamsReadable.read()
        lr3 = reader.load(savePath)

        self.assertEqual(lr.uid, lr3.uid)
        self.assertEqual(lr.extractParamMap(), lr3.extractParamMap())
Exemple #3
0
                                     ["label", "features"])

    # Create a LogisticRegression instance. This instance is an Estimator.
    lr = LogisticRegression(maxIter=10, regParam=0.01)
    # Print out the parameters, documentation, and any default values.
    print("LogisticRegression parameters:\n" + lr.explainParams() + "\n")

    # Learn a LogisticRegression model. This uses the parameters stored in lr.
    model1 = lr.fit(training)

    # Since model1 is a Model (i.e., a transformer produced by an Estimator),
    # we can view the parameters it used during fit().
    # This prints the parameter (name: value) pairs, where names are unique IDs for this
    # LogisticRegression instance.
    print("Model 1 was fit using parameters: ")
    print(lr.extractParamMap())

    # We may alternatively specify parameters using a Python dictionary as a paramMap
    paramMap = {lr.maxIter: 20}
    paramMap[
        lr.maxIter] = 30  # Specify 1 Param, overwriting the original maxIter.
    paramMap.update({
        lr.regParam: 0.1,
        lr.threshold: 0.55
    })  # Specify multiple Params.

    # You can combine paramMaps, which are python dictionaries.
    paramMap2 = {
        lr.probabilityCol: "myProbability"
    }  # Change output column name
    paramMapCombined = paramMap.copy()
    lr.predictionCol: 'predic'
}]

len(paramGrid) == len(expected)

bce = BinaryClassificationEvaluator()

# the crossvalidator takes the pipeline, the grid, and the evaluator
# run on 2+ folds

cv = CrossValidator().setEstimator(pipeline).setEstimatorParamMaps(
    paramGrid).setEvaluator(bce).setNumFolds(2)

cvModel = cv.fit(training)

print("Parameters lr")
print(lr.extractParamMap())
print("Parameters cvmodel")
print(cv.getEstimatorParamMaps())

# create the toy test documents

test = sqlContext.createDataFrame([(4, "tom cruise"), (5, "played baseball")],
                                  ["id", "text"])

prediction = cvModel.transform(test)

selected = prediction.select("id", "text", "probability", "predic")
for row in selected.collect():
    print(row)