예제 #1
0
    def test_execute_should_save_stage(self):
        output = "resources/stages/Tokenizer"
        # TODO remove output

        params = {"path": output, "overwrite": "true"}

        stage = feature.Tokenizer(inputCol="text", outputCol="words")
        step = Step('save', params, stage)

        step.execute()
        self.assertTrue(os.path.exists(output))
예제 #2
0
    def test_execute_should_load_stage(self):
        params = {"path": "resources/stages/LogisticRegression"}

        stage = classification.LogisticRegression()
        step = Step('load', params, stage)

        loaded = step.execute()

        cls = classification.LogisticRegression
        self.assertIsInstance(
            loaded,
            cls,
            msg=f"Loaded stage {loaded} is not instance of {cls.__class__}")
예제 #3
0
    def test_execute_should_fit_stage(self):
        cls = Model

        dataset = self.spark.createDataFrame([(0, Vectors.dense([1, 2]), 1),
                                              (1, Vectors.dense([1, 3]), 1),
                                              (2, Vectors.dense([2, 3]), 0),
                                              (3, Vectors.dense([4, 5]), 1)],
                                             ["id", "features", "label"])

        params = {"dataset": dataset}

        stage = classification.LogisticRegression()
        step = Step('fit', params, stage)

        model = step.execute()

        self.assertIsInstance(
            model,
            cls,
            msg=f"Result {model} is not instance of {cls.__class__}")