Exemple #1
0
def test_multiclasslabel():
    df = pd.DataFrame({"a": [1], "b": [0.5], "c": [-3]})
    conv = DFtoVW(df=df,
                  label=MulticlassLabel(label="a", weight="b"),
                  features=Feature("c"))
    first_line = conv.convert_df()[0]
    assert first_line == "1 0.5 | c:-3"
Exemple #2
0
def test_multilabel_with_listlabel_builder():
    df = pd.DataFrame({"y1": [1], "y2": [2], "x": [3]})
    conv = DFtoVW(df=df,
                  label=[MultiLabel("y1"), MultiLabel("y2")],
                  features=Feature("x"))
    first_line = conv.convert_df()[0]
    assert first_line == "1,2 | x:3"
Exemple #3
0
def test_contextualbanditlabel_one_label():
    df = pd.DataFrame({"a": [1], "c": [-0.5], "p": [0.1], "x": [1]})
    conv = DFtoVW(df=df,
                  label=ContextualbanditLabel("a", "c", "p"),
                  features=Feature("x"))
    first_line = conv.convert_df()[0]
    assert first_line == "1:-0.5:0.1 | x:1"
Exemple #4
0
def test_mixed_type_features():
    df = pd.DataFrame({"y": [1], "x1": ["a"], "x2": [2]})
    conv = DFtoVW(
        label=SimpleLabel("y"),
        features=[Feature(value=colname) for colname in ["x1", "x2"]],
        df=df,
    )
    first_line = conv.convert_df()[0]
    assert first_line == "1 | x1=a x2:2"
Exemple #5
0
def test_feature_column_renaming_and_tag():
    df = pd.DataFrame({"idx": ["id_1"], "y": [1], "x": [2]})
    conv = DFtoVW(
        label=SimpleLabel("y"),
        tag="idx",
        features=Feature(value="x", rename_feature="col_x"),
        df=df,
    )
    first_line = conv.convert_df()[0]
    assert first_line == "1 id_1| col_x:2"
Exemple #6
0
def test_as_type_in_features():
    df = pd.DataFrame({"y": [1], "a": [2], "b": [3], "c": ["4"]})
    features = [
        Feature("a", as_type="categorical"),
        Feature("b"),
        Feature("c", as_type="numerical"),
    ]
    conv = DFtoVW(label=SimpleLabel("y"), features=features, df=df)
    first_line = conv.convert_df()[0]
    assert first_line == "1 | a=2 b:3 c:4"
Exemple #7
0
def test_multiple_lines_with_weight():
    df = pd.DataFrame({
        "y": [1, 2, -1],
        "w": [2.5, 1.2, 3.75],
        "x": ["a", "b", "c"]
    })
    conv = DFtoVW(df=df,
                  label=SimpleLabel(label="y", weight="w"),
                  features=Feature("x"))
    lines_list = conv.convert_df()
    assert lines_list == ["1 2.5 | x=a", "2 1.2 | x=b", "-1 3.75 | x=c"]
Exemple #8
0
def test_feature_with_nan():
    df = pd.DataFrame({
        "y": [-1, 1, 1],
        "x1": [1, 2, None],
        "x2": [3, None, 2]
    })
    conv = DFtoVW(df=df,
                  features=[Feature("x1"), Feature("x2")],
                  label=SimpleLabel("y"))
    lines = conv.convert_df()
    assert lines == ["-1 | x1:1.0 x2:3.0", "1 | x1:2.0 ", "1 |  x2:2.0"]
Exemple #9
0
def test_multiple_named_namespaces():
    df = pd.DataFrame({"y": [1], "a": [2], "b": [3]})
    conv = DFtoVW(
        df=df,
        label=SimpleLabel("y"),
        namespaces=[
            Namespace(name="FirstNameSpace", features=Feature("a")),
            Namespace(name="DoubleIt", value=2, features=Feature("b")),
        ],
    )
    first_line = conv.convert_df()[0]
    assert first_line == "1 |FirstNameSpace a:2 |DoubleIt:2 b:3"
Exemple #10
0
def test_dirty_colname_feature():
    df = pd.DataFrame({
        "target :": [1],
        " my first feature:": ["x"],
        "white space at the end ": [2]
    })
    features = [
        Feature(colname)
        for colname in [" my first feature:", "white space at the end "]
    ]
    conv = DFtoVW(label=SimpleLabel("target :"), features=features, df=df)
    first_line = conv.convert_df()[0]
    assert first_line == "1 | my_first_feature=x white_space_at_the_end:2"
Exemple #11
0
def test_wrong_weight_type_error():
    df = pd.DataFrame({"y": [1], "x": [2], "w": ["a"]})
    with pytest.raises(TypeError) as type_error:
        DFtoVW(df=df,
               label=SimpleLabel(label="y", weight="w"),
               features=Feature("x"))
    expected = "In argument 'weight' of 'SimpleLabel', column 'w' should be either of the following type(s): 'int', 'float'."
    assert expected == str(type_error.value)
Exemple #12
0
def test_multiple_named_namespaces_multiple_features_multiple_lines():
    df = pd.DataFrame({
        "y": [1, -1],
        "a": [2, 3],
        "b": ["x1", "x2"],
        "c": [36.4, 47.8]
    })
    ns1 = Namespace(name="FirstNameSpace", features=Feature("a"))
    ns2 = Namespace(name="DoubleIt",
                    value=2,
                    features=[Feature("b"), Feature("c")])
    label = SimpleLabel("y")
    conv = DFtoVW(df=df, label=label, namespaces=[ns1, ns2])
    lines_list = conv.convert_df()
    assert lines_list == [
        "1 |FirstNameSpace a:2 |DoubleIt:2 b=x1 c:36.4",
        "-1 |FirstNameSpace a:3 |DoubleIt:2 b=x2 c:47.8",
    ]
Exemple #13
0
def test_contextualbanditlabel_non_float_proba_error():
    df = pd.DataFrame({"a": [1], "c": [-0.5], "p": [1], "x": [1]})
    with pytest.raises(TypeError) as value_error:
        DFtoVW(
            df=df,
            label=ContextualbanditLabel("a", "c", "p"),
            features=Feature("x"),
        )
    expected = "In argument 'probability' of 'ContextualbanditLabel', column 'p' should be either of the following type(s): 'float'."
    assert expected == str(value_error.value)
Exemple #14
0
def test_multilabel_non_positive_label_error():
    df = pd.DataFrame({"y": [0], "b": [1]})
    with pytest.raises(ValueError) as value_error:
        DFtoVW(
            df=df,
            label=MultiLabel(label="y"),
            features=Feature("b"),
        )
    expected = "In argument 'label' of 'MultiLabel', column 'y' must be >= 1."
    assert expected == str(value_error.value)
Exemple #15
0
def test_multiclasslabel_non_positive_label_error():
    df = pd.DataFrame({"a": [0], "b": [0.5], "c": ["x"]})
    with pytest.raises(ValueError) as value_error:
        DFtoVW(
            df=df,
            label=MulticlassLabel(label="a", weight="b"),
            features=Feature("c"),
        )
    expected = "In argument 'label' of 'MulticlassLabel', column 'a' must be >= 1."
    assert expected == str(value_error.value)
Exemple #16
0
def test_absent_col_error():
    with pytest.raises(ValueError) as value_error:
        df = pd.DataFrame({"a": [1]})
        DFtoVW(
            df=df,
            label=SimpleLabel("a"),
            features=[Feature(col) for col in ["a", "c", "d"]],
        )
    expected = "In 'Feature': column(s) 'c', 'd' not found in dataframe."
    assert expected == str(value_error.value)
Exemple #17
0
def test_listlabel_not_allowed_label_error():
    with pytest.raises(TypeError) as type_error:
        df = pd.DataFrame({"a": [1], "b": [2], "x": [1]})
        conv = DFtoVW(
            df=df,
            label=[SimpleLabel("a"), SimpleLabel("b")],
            features=Feature("x"),
        )
    expected = "The only labels that can be used with list are 'ContextualbanditLabel', 'MultiLabel'."
    assert expected == str(type_error.value)
Exemple #18
0
def test_contextualbanditlabel_negative_proba_error():
    df = pd.DataFrame({"a": [1], "c": [-0.5], "p": [-0.1], "x": [1]})
    with pytest.raises(ValueError) as value_error:
        DFtoVW(
            df=df,
            label=ContextualbanditLabel("a", "c", "p"),
            features=Feature("x"),
        )
    expected = "In argument 'probability' of 'ContextualbanditLabel', column 'p' must be >= 0 and <= 1."
    assert expected == str(value_error.value)
Exemple #19
0
def test_multiclasslabel_negative_weight_error():
    df = pd.DataFrame({"y": [1], "w": [-0.5], "x": [2]})
    with pytest.raises(ValueError) as value_error:
        DFtoVW(
            df=df,
            label=MulticlassLabel(label="y", weight="w"),
            features=Feature("x"),
        )
    expected = "In argument 'weight' of 'MulticlassLabel', column 'w' must be >= 0."
    assert expected == str(value_error.value)
Exemple #20
0
def test_contextualbanditlabel_multiple_label():
    df = pd.DataFrame({
        "a1": [1],
        "c1": [-0.5],
        "p1": [0.1],
        "a2": [2],
        "c2": [-1.5],
        "p2": [0.6],
        "x": [1],
    })
    conv = DFtoVW(
        df=df,
        label=[
            ContextualbanditLabel("a1", "c1", "p1"),
            ContextualbanditLabel("a2", "c2", "p2"),
        ],
        features=Feature("x"),
    )
    first_line = conv.convert_df()[0]
    assert first_line == "1:-0.5:0.1 2:-1.5:0.6 | x:1"
Exemple #21
0
def test_contextualbanditlabel_non_positive_action():
    df = pd.DataFrame({"a": [0], "c": [-0.5], "p": [0.5], "x": [1]})
    with pytest.raises(ValueError) as value_error:
        DFtoVW(
            df=df,
            label=ContextualbanditLabel("a", "c", "p"),
            features=Feature("x"),
        )
    expected = (
        "In argument 'action' of 'ContextualbanditLabel', column 'a' must be >= 1."
    )
    assert expected == str(value_error.value)
Exemple #22
0
def test_listlabel_mixed_label_error():
    with pytest.raises(TypeError) as type_error:
        df = pd.DataFrame({
            "a": [1],
            "c": [-0.5],
            "p": [0.1],
            "b": [2],
            "x": [1]
        })
        conv = DFtoVW(
            df=df,
            label=[ContextualbanditLabel("a", "c", "p"),
                   MultiLabel("b")],
            features=Feature("x"),
        )
    expected = "The list passed in 'label' has mixed label types."
    assert expected == str(type_error.value)
Exemple #23
0
def test_multilabel():
    df = pd.DataFrame({"y1": [1], "y2": [2], "x": [3]})
    conv = DFtoVW(df=df, label=MultiLabel(["y1", "y2"]), features=Feature("x"))
    first_line = conv.convert_df()[0]
    assert first_line == "1,2 | x:3"
Exemple #24
0
def test_multiple_lines():
    df = pd.DataFrame({"y": [1, -1], "x": [1, 2]})
    conv = DFtoVW(label=SimpleLabel("y"), features=Feature(value="x"), df=df)
    lines_list = conv.convert_df()
    assert lines_list == ["1 | x:1", "-1 | x:2"]
Exemple #25
0
def test_non_numerical_error():
    df = pd.DataFrame({"y": ["a"], "x": ["featX"]})
    with pytest.raises(TypeError) as type_error:
        DFtoVW(df=df, label=SimpleLabel(label="y"), features=Feature("x"))
    expected = "In argument 'label' of 'SimpleLabel', column 'y' should be either of the following type(s): 'int', 'float'."
    assert expected == str(type_error.value)
Exemple #26
0
def test_from_colnames_constructor():
    df = pd.DataFrame({"y": [1], "x": [2]})
    conv = DFtoVW.from_colnames(y="y", x=["x"], df=df)
    lines_list = conv.convert_df()
    first_line = lines_list[0]
    assert first_line == "1 | x:2"
Exemple #27
0
def test_no_label_multiple_features():
    df = pd.DataFrame({"a": [2], "b": [3]})
    conv = DFtoVW(df=df, features=[Feature(col) for col in ["a", "b"]])
    first_line = conv.convert_df()[0]
    assert first_line == "| a:2 b:3"
Exemple #28
0
def test_wrong_feature_type_error():
    df = pd.DataFrame({"y": [1], "x": [2]})
    with pytest.raises(TypeError) as type_error:
        DFtoVW(df=df, label=SimpleLabel("y"), features="x")
    expected = "Argument 'features' should be a Feature or a list of Feature."
    assert expected == str(type_error.value)
Exemple #29
0
def test_no_label_rename_feature():
    df = pd.DataFrame({"a": [1]})
    first_line = DFtoVW(df=df,
                        features=Feature("a",
                                         rename_feature="")).convert_df()[0]
    assert first_line == "| :1"
Exemple #30
0
def test_multilabel_list_of_len_1():
    df = pd.DataFrame({"y": [1], "x": [2]})
    conv1 = DFtoVW(df=df, label=MultiLabel(["y"]), features=Feature("x"))
    conv2 = DFtoVW(df=df, label=MultiLabel("y"), features=Feature("x"))
    assert conv1.convert_df()[0] == conv2.convert_df()[0]