Example #1
0
    def test_copy(self):
        dataset = self.spark.createDataFrame([
            (10, 10.0),
            (50, 50.0),
            (100, 100.0),
            (500, 500.0)] * 10,
            ["feature", "label"])

        iee = InducedErrorEstimator()
        evaluator = RegressionEvaluator(metricName="r2")

        grid = ParamGridBuilder() \
            .addGrid(iee.inducedError, [100.0, 0.0, 10000.0]) \
            .build()
        tvs = TrainValidationSplit(estimator=iee, estimatorParamMaps=grid, evaluator=evaluator)
        tvsModel = tvs.fit(dataset)
        tvsCopied = tvs.copy()
        tvsModelCopied = tvsModel.copy()

        self.assertEqual(tvs.getEstimator().uid, tvsCopied.getEstimator().uid,
                         "Copied TrainValidationSplit has the same uid of Estimator")

        self.assertEqual(tvsModel.bestModel.uid, tvsModelCopied.bestModel.uid)
        self.assertEqual(len(tvsModel.validationMetrics),
                         len(tvsModelCopied.validationMetrics),
                         "Copied validationMetrics has the same size of the original")
        for index in range(len(tvsModel.validationMetrics)):
            self.assertEqual(tvsModel.validationMetrics[index],
                             tvsModelCopied.validationMetrics[index])
Example #2
0
    def test_copy(self):
        dataset = self.spark.createDataFrame([(10, 10.0), (50, 50.0),
                                              (100, 100.0), (500, 500.0)] * 10,
                                             ["feature", "label"])

        iee = InducedErrorEstimator()
        evaluator = RegressionEvaluator(metricName="r2")

        grid = ParamGridBuilder().addGrid(iee.inducedError,
                                          [100.0, 0.0, 10000.0]).build()
        tvs = TrainValidationSplit(estimator=iee,
                                   estimatorParamMaps=grid,
                                   evaluator=evaluator,
                                   collectSubModels=True)
        tvsModel = tvs.fit(dataset)
        tvsCopied = tvs.copy()
        tvsModelCopied = tvsModel.copy()

        for param in [
                lambda x: x.getCollectSubModels(),
                lambda x: x.getParallelism(),
                lambda x: x.getSeed(),
                lambda x: x.getTrainRatio(),
        ]:
            self.assertEqual(param(tvs), param(tvsCopied))

        for param in [
                lambda x: x.getSeed(),
                lambda x: x.getTrainRatio(),
        ]:
            self.assertEqual(param(tvsModel), param(tvsModelCopied))

        self.assertEqual(
            tvs.getEstimator().uid,
            tvsCopied.getEstimator().uid,
            "Copied TrainValidationSplit has the same uid of Estimator",
        )

        self.assertEqual(tvsModel.bestModel.uid, tvsModelCopied.bestModel.uid)
        self.assertEqual(
            len(tvsModel.validationMetrics),
            len(tvsModelCopied.validationMetrics),
            "Copied validationMetrics has the same size of the original",
        )
        for index in range(len(tvsModel.validationMetrics)):
            self.assertEqual(tvsModel.validationMetrics[index],
                             tvsModelCopied.validationMetrics[index])

        tvsModel.validationMetrics[0] = "foo"
        self.assertNotEqual(
            tvsModelCopied.validationMetrics[0],
            "foo",
            "Changing the original validationMetrics should not affect the copied model",
        )
        tvsModel.subModels[0].getInducedError = lambda: "foo"
        self.assertNotEqual(
            tvsModelCopied.subModels[0].getInducedError(),
            "foo",
            "Changing the original subModels should not affect the copied model",
        )
Example #3
0
    def test_copy(self):
        dataset = self.spark.createDataFrame([(10, 10.0), (50, 50.0),
                                              (100, 100.0), (500, 500.0)] * 10,
                                             ["feature", "label"])

        iee = InducedErrorEstimator()
        evaluator = RegressionEvaluator(metricName="r2")

        grid = ParamGridBuilder() \
            .addGrid(iee.inducedError, [100.0, 0.0, 10000.0]) \
            .build()
        tvs = TrainValidationSplit(estimator=iee,
                                   estimatorParamMaps=grid,
                                   evaluator=evaluator)
        tvsModel = tvs.fit(dataset)
        tvsCopied = tvs.copy()
        tvsModelCopied = tvsModel.copy()

        self.assertEqual(
            tvs.getEstimator().uid,
            tvsCopied.getEstimator().uid,
            "Copied TrainValidationSplit has the same uid of Estimator")

        self.assertEqual(tvsModel.bestModel.uid, tvsModelCopied.bestModel.uid)
        self.assertEqual(
            len(tvsModel.validationMetrics),
            len(tvsModelCopied.validationMetrics),
            "Copied validationMetrics has the same size of the original")
        for index in range(len(tvsModel.validationMetrics)):
            self.assertEqual(tvsModel.validationMetrics[index],
                             tvsModelCopied.validationMetrics[index])