def test_output_columns(self):
        # arrange
        h3_feature = Feature(
            name="new_feature",
            description="unit test",
            dtype=DataType.STRING,
            transformation=H3HashTransform(
                h3_resolutions=[6, 7, 8, 9, 10, 11, 12],
                lat_column="lat",
                lng_column="lng",
            ),
        )
        target_columns = [
            "lat_lng__h3_hash__6",
            "lat_lng__h3_hash__7",
            "lat_lng__h3_hash__8",
            "lat_lng__h3_hash__9",
            "lat_lng__h3_hash__10",
            "lat_lng__h3_hash__11",
            "lat_lng__h3_hash__12",
        ]

        # act
        output_columns = h3_feature.get_output_columns()

        # assert
        assert sorted(output_columns) == sorted(target_columns)
Ejemplo n.º 2
0
    def test_feature_get_output_columns_without_transformations(self):

        test_feature = Feature(
            name="feature",
            from_column="origin",
            description="unit test",
            dtype=DataType.BIGINT,
        )

        assert test_feature.get_output_columns() == [test_feature.name]
    def test__get_features_columns(self):
        # arrange
        feature_1 = Feature("feature1", "description", DataType.FLOAT)
        feature_1.get_output_columns = Mock(return_value=["col_a", "col_b"])

        feature_2 = Feature("feature2", "description", DataType.FLOAT)
        feature_2.get_output_columns = Mock(return_value=["col_c"])

        feature_3 = Feature("feature3", "description", DataType.FLOAT)
        feature_3.get_output_columns = Mock(return_value=["col_d"])

        target_features_columns = ["col_a", "col_b", "col_c", "col_d"]

        # act
        result_features_columns = FeatureSet._get_features_columns(
            feature_1, feature_2, feature_3)

        # assert
        assert target_features_columns == result_features_columns
Ejemplo n.º 4
0
    def test_output_columns(self):
        test_feature = Feature(
            name="feature1_over_feature2",
            description="unit test",
            dtype=DataType.FLOAT,
            transformation=SQLExpressionTransform(expression="feature1/feature2"),
        )

        df_columns = test_feature.get_output_columns()

        assert all([a == b for a, b in zip(df_columns, ["feature1_over_feature2"],)])
    def test_output_columns(self, feature_set_dataframe):

        test_feature = Feature(
            name="feature",
            description="unit test",
            dtype=DataType.BIGINT,
            transformation=CustomTransform(
                transformer=divide, column1="feature1", column2="feature2",
            ),
        )

        df_columns = test_feature.get_output_columns()

        assert isinstance(df_columns, list)
        assert df_columns == ["feature"]
Ejemplo n.º 6
0
    def test_feature_get_output_columns_with_transformations(
            self, feature_set_dataframe):

        some_transformation = Mock()
        some_transformation.output_columns = feature_set_dataframe.columns

        test_feature = Feature(
            name="feature",
            from_column="origin",
            description="unit test",
            transformation=some_transformation,
            dtype=DataType.BIGINT,
        )

        assert test_feature.get_output_columns(
        ) == feature_set_dataframe.columns
    def test_output_columns(self):
        test_feature = Feature(
            name="feature1",
            description="unit test",
            transformation=AggregatedTransform(functions=[
                Function(functions.avg, DataType.DOUBLE),
                Function(functions.stddev_pop, DataType.DOUBLE),
            ]),
        )

        df_columns = test_feature.get_output_columns()

        assert all([
            a == b for a, b in zip(
                df_columns,
                ["feature1__avg", "feature1__stddev_pop"],
            )
        ])
Ejemplo n.º 8
0
    def test_output_columns(self):
        test_feature = Feature(
            name="feature1",
            description="unit test",
            transformation=SparkFunctionTransform(functions=[
                Function(functions.avg, DataType.DOUBLE)
            ], ).with_window(
                partition_by="id",
                mode="fixed_windows",
                window_definition=["7 days", "2 weeks"],
            ),
        )

        df_columns = test_feature.get_output_columns()

        assert all([
            a == b for a, b in zip(
                df_columns,
                [
                    "feature1__avg_over_7_days_fixed_windows",
                    "feature1__avg_over_2_weeks_fixed_windows",
                ],
            )
        ])