예제 #1
0
def test_geodetic_parse():
    # parse geodetic string with altitude into lon/lat/altitude
    post_df = geodetic_parse(
        df=lonlat_df[["city", "geodetic"]],
        geodetic="geodetic",
        latitude="latitude",
        longitude="longitude",
        altitude="altitude",
    )
    assert sorted(post_df.columns.tolist()) == sorted(
        ["city", "geodetic", "latitude", "longitude", "altitude"])
    assert series_to_list(post_df["longitude"]) == series_to_list(
        lonlat_df["longitude"])
    assert series_to_list(post_df["latitude"]) == series_to_list(
        lonlat_df["latitude"])
    assert series_to_list(post_df["altitude"]) == series_to_list(
        lonlat_df["altitude"])

    # parse geodetic string into lon/lat
    post_df = geodetic_parse(
        df=lonlat_df[["city", "geodetic"]],
        geodetic="geodetic",
        latitude="latitude",
        longitude="longitude",
    )
    assert sorted(post_df.columns.tolist()) == sorted(
        ["city", "geodetic", "latitude", "longitude"])
    assert series_to_list(post_df["longitude"]) == series_to_list(
        lonlat_df["longitude"])
    assert series_to_list(post_df["latitude"]), series_to_list(
        lonlat_df["latitude"])
예제 #2
0
def test_diff():
    # overwrite column
    post_df = diff(df=timeseries_df, columns={"y": "y"})
    assert post_df.columns.tolist() == ["label", "y"]
    assert series_to_list(post_df["y"]) == [None, 1.0, 1.0, 1.0]

    # add column
    post_df = diff(df=timeseries_df, columns={"y": "y1"})
    assert post_df.columns.tolist() == ["label", "y", "y1"]
    assert series_to_list(post_df["y"]) == [1.0, 2.0, 3.0, 4.0]
    assert series_to_list(post_df["y1"]) == [None, 1.0, 1.0, 1.0]

    # look ahead
    post_df = diff(df=timeseries_df, columns={"y": "y1"}, periods=-1)
    assert series_to_list(post_df["y1"]) == [-1.0, -1.0, -1.0, None]

    # invalid column reference
    with pytest.raises(QueryObjectValidationError):
        diff(
            df=timeseries_df,
            columns={"abc": "abc"},
        )

    # diff by columns
    post_df = diff(df=timeseries_df2, columns={"y": "y", "z": "z"}, axis=1)
    assert post_df.columns.tolist() == ["label", "y", "z"]
    assert series_to_list(post_df["z"]) == [0.0, 2.0, 8.0, 6.0]
예제 #3
0
def test_aggregate():
    aggregates = {
        "asc sum": {
            "column": "asc_idx",
            "operator": "sum"
        },
        "asc q2": {
            "column": "asc_idx",
            "operator": "percentile",
            "options": {
                "q": 75
            },
        },
        "desc q1": {
            "column": "desc_idx",
            "operator": "percentile",
            "options": {
                "q": 25
            },
        },
    }
    df = aggregate(df=categories_df,
                   groupby=["constant"],
                   aggregates=aggregates)
    assert df.columns.tolist() == ["constant", "asc sum", "asc q2", "desc q1"]
    assert series_to_list(df["asc sum"])[0] == 5050
    assert series_to_list(df["asc q2"])[0] == 75
    assert series_to_list(df["desc q1"])[0] == 25
예제 #4
0
def test_geohash_encode():
    # encode lon/lat into geohash
    post_df = geohash_encode(
        df=lonlat_df[["city", "latitude", "longitude"]],
        latitude="latitude",
        longitude="longitude",
        geohash="geohash",
    )
    assert sorted(post_df.columns.tolist()) == sorted(
        ["city", "geohash", "latitude", "longitude"])
    assert series_to_list(post_df["geohash"]) == series_to_list(
        lonlat_df["geohash"])
예제 #5
0
def test_geohash_decode():
    # decode lon/lat from geohash
    post_df = geohash_decode(
        df=lonlat_df[["city", "geohash"]],
        geohash="geohash",
        latitude="latitude",
        longitude="longitude",
    )
    assert sorted(post_df.columns.tolist()) == sorted(
        ["city", "geohash", "latitude", "longitude"])
    assert round_floats(series_to_list(post_df["longitude"]),
                        6) == round_floats(
                            series_to_list(lonlat_df["longitude"]), 6)
    assert round_floats(series_to_list(post_df["latitude"]),
                        6) == round_floats(
                            series_to_list(lonlat_df["latitude"]), 6)
예제 #6
0
def test_compare():
    # `difference` comparison
    post_df = compare(
        df=timeseries_df2,
        source_columns=["y"],
        compare_columns=["z"],
        compare_type="difference",
    )
    assert post_df.columns.tolist() == ["label", "y", "z", "difference__y__z"]
    assert series_to_list(
        post_df["difference__y__z"]) == [0.0, -2.0, -8.0, -6.0]

    # drop original columns
    post_df = compare(
        df=timeseries_df2,
        source_columns=["y"],
        compare_columns=["z"],
        compare_type="difference",
        drop_original_columns=True,
    )
    assert post_df.columns.tolist() == ["label", "difference__y__z"]

    # `percentage` comparison
    post_df = compare(
        df=timeseries_df2,
        source_columns=["y"],
        compare_columns=["z"],
        compare_type="percentage",
    )
    assert post_df.columns.tolist() == ["label", "y", "z", "percentage__y__z"]
    assert series_to_list(
        post_df["percentage__y__z"]) == [0.0, -0.5, -0.8, -0.75]

    # `ratio` comparison
    post_df = compare(
        df=timeseries_df2,
        source_columns=["y"],
        compare_columns=["z"],
        compare_type="ratio",
    )
    assert post_df.columns.tolist() == ["label", "y", "z", "ratio__y__z"]
    assert series_to_list(post_df["ratio__y__z"]) == [1.0, 0.5, 0.2, 0.25]
예제 #7
0
def test_cum():
    # create new column (cumsum)
    post_df = pp.cum(
        df=timeseries_df,
        columns={"y": "y2"},
        operator="sum",
    )
    assert post_df.columns.tolist() == ["label", "y", "y2"]
    assert series_to_list(post_df["label"]) == ["x", "y", "z", "q"]
    assert series_to_list(post_df["y"]) == [1.0, 2.0, 3.0, 4.0]
    assert series_to_list(post_df["y2"]) == [1.0, 3.0, 6.0, 10.0]

    # overwrite column (cumprod)
    post_df = pp.cum(
        df=timeseries_df,
        columns={"y": "y"},
        operator="prod",
    )
    assert post_df.columns.tolist() == ["label", "y"]
    assert series_to_list(post_df["y"]) == [1.0, 2.0, 6.0, 24.0]

    # overwrite column (cummin)
    post_df = pp.cum(
        df=timeseries_df,
        columns={"y": "y"},
        operator="min",
    )
    assert post_df.columns.tolist() == ["label", "y"]
    assert series_to_list(post_df["y"]) == [1.0, 1.0, 1.0, 1.0]

    # invalid operator
    with pytest.raises(InvalidPostProcessingError):
        pp.cum(
            df=timeseries_df,
            columns={"y": "y"},
            operator="abc",
        )
예제 #8
0
def test_rolling():
    # sum rolling type
    post_df = pp.rolling(
        df=timeseries_df,
        columns={"y": "y"},
        rolling_type="sum",
        window=2,
        min_periods=0,
    )

    assert post_df.columns.tolist() == ["label", "y"]
    assert series_to_list(post_df["y"]) == [1.0, 3.0, 5.0, 7.0]

    # mean rolling type with alias
    post_df = pp.rolling(
        df=timeseries_df,
        rolling_type="mean",
        columns={"y": "y_mean"},
        window=10,
        min_periods=0,
    )
    assert post_df.columns.tolist() == ["label", "y", "y_mean"]
    assert series_to_list(post_df["y_mean"]) == [1.0, 1.5, 2.0, 2.5]

    # count rolling type
    post_df = pp.rolling(
        df=timeseries_df,
        rolling_type="count",
        columns={"y": "y"},
        window=10,
        min_periods=0,
    )
    assert post_df.columns.tolist() == ["label", "y"]
    assert series_to_list(post_df["y"]) == [1.0, 2.0, 3.0, 4.0]

    # quantile rolling type
    post_df = pp.rolling(
        df=timeseries_df,
        columns={"y": "q1"},
        rolling_type="quantile",
        rolling_type_options={"quantile": 0.25},
        window=10,
        min_periods=0,
    )
    assert post_df.columns.tolist() == ["label", "y", "q1"]
    assert series_to_list(post_df["q1"]) == [1.0, 1.25, 1.5, 1.75]

    # incorrect rolling type
    with pytest.raises(InvalidPostProcessingError):
        pp.rolling(
            df=timeseries_df,
            columns={"y": "y"},
            rolling_type="abc",
            window=2,
        )

    # incorrect rolling type options
    with pytest.raises(InvalidPostProcessingError):
        pp.rolling(
            df=timeseries_df,
            columns={"y": "y"},
            rolling_type="quantile",
            rolling_type_options={"abc": 123},
            window=2,
        )
예제 #9
0
def test_sort():
    df = sort(df=categories_df, columns={"category": True, "asc_idx": False})
    assert series_to_list(df["asc_idx"])[1] == 96

    with pytest.raises(QueryObjectValidationError):
        sort(df=df, columns={"abc": True})
예제 #10
0
def test_sort():
    df = sort(df=categories_df, columns={"category": True, "asc_idx": False})
    assert series_to_list(df["asc_idx"])[1] == 96

    with pytest.raises(InvalidPostProcessingError):
        sort(df=df, columns={"abc": True})