Exemple #1
0
def test_vw_config_manager():
    expected_set = {
        "--no_stdin",
        "--quiet",
        "--loss_function=logistic",
        "--data=test/train-sets/rcv1_small.dat",
    }
    expected_reductions = {"gd", "scorer-identity", "count_label"}

    vw = vowpalwabbit.Workspace(
        arg_str=
        "--loss_function logistic -d test/train-sets/rcv1_small.dat --quiet")
    config = vw.get_config()
    enabled_reductions = vw.get_enabled_reductions()

    cmd_str_list = helper_options_to_list_strings(config)
    assert set(cmd_str_list) == expected_set
    assert set(enabled_reductions) == expected_reductions

    vw.finish()

    # do another iteration generating the cmd string from the output of previous
    new_args = " ".join(cmd_str_list)

    other_vw = vowpalwabbit.Workspace(new_args)
    new_config = vw.get_config()
    new_cmd_str_list = helper_options_to_list_strings(new_config)

    assert set(new_cmd_str_list) == expected_set

    other_vw.finish()
Exemple #2
0
def test_ccb_single_slot_and_cb_equivalence_no_slot_features():
    # --- CCB
    ccb_model_file_name = "model_file_ccb_equiv.txt"
    ccb_workspace = vowpalwabbit.Workspace(
        quiet=True,
        predict_only_model=True,
        ccb_explore_adf=True,
        readable_model=ccb_model_file_name,
    )

    ccb_ex = """
    ccb shared |User b
    ccb action |Action d
    ccb action |Action e
    ccb action |Action f
    ccb action |Action ff
    ccb action |Action fff
    ccb slot 4:1:0.2 |
    """
    ccb_workspace.learn(ccb_ex)
    ccb_workspace.finish()

    ccb_num_weights = count_weights_from_readable_model_file_for_equiv_test(
        ccb_model_file_name
    )

    # --- CB
    cb_model_file_name = "model_file_cb_equiv.txt"
    cb_workspace = vowpalwabbit.Workspace(
        quiet=True,
        predict_only_model=True,
        cb_explore_adf=True,
        readable_model=cb_model_file_name,
    )

    cb_ex = """
    shared |User b
    |Action d
    |Action e
    |Action f
    |Action ff
    4:1:0.2 |Action fff
    """

    cb_workspace.learn(cb_ex)
    cb_workspace.finish()
    cb_num_weights = count_weights_from_readable_model_file_for_equiv_test(
        cb_model_file_name
    )

    assert ccb_num_weights == cb_num_weights
Exemple #3
0
def test_cats_pdf():
    min_value = 10
    max_value = 20

    vw = vowpalwabbit.Workspace("--cats_pdf 4 --min_value " + str(min_value) +
                                " --max_value " + str(max_value) +
                                " --bandwidth 1")
    vw_example = vw.parse("ca 15:0.657567:6.20426e-05 | f1 f2 f3 f4",
                          vowpalwabbit.LabelType.CONTINUOUS)
    vw.learn(vw_example)
    vw.finish_example(vw_example)

    assert (vw.get_prediction_type() == vowpalwabbit.PredictionType.PDF
            ), "prediction_type should be pdf"

    pdf_segments = vw.predict("| f1 f2 f3 f4")
    mass = 0
    for segment in pdf_segments:
        assert len(segment) == 3

        # returned action range should lie within supplied limits
        assert segment[0] >= min_value
        assert segment[0] <= max_value
        assert segment[1] >= min_value
        assert segment[1] <= max_value

        # pdf value must be non-negative
        assert segment[2] >= 0

        mass += (segment[1] - segment[0]) * segment[2]

    assert mass >= 0.9999 and mass <= 1.0001

    vw.finish()
Exemple #4
0
def test_getting_started_example_with():
    train_df, test_df = helper_get_data()

    # with syntax calls into vw.finish() automatically.
    # you actually want to use 'with vowpalwabbit.Workspace("--cb 4") as vw:'
    # but we need to assert on vw.finished for test purposes
    vw = vowpalwabbit.Workspace("--cb 4")
    with vw as vw:
        for i in train_df.index:
            action = train_df.loc[i, "action"]
            cost = train_df.loc[i, "cost"]
            probability = train_df.loc[i, "probability"]
            feature1 = train_df.loc[i, "feature1"]
            feature2 = train_df.loc[i, "feature2"]
            feature3 = train_df.loc[i, "feature3"]

            learn_example = (str(action) + ":" + str(cost) + ":" +
                             str(probability) + " | " + str(feature1) + " " +
                             str(feature2) + " " + str(feature3))
            vw.learn(learn_example)

        assert (vw.get_prediction_type() == vw.pMULTICLASS
                ), "prediction_type should be multiclass"

        for j in test_df.index:
            feature1 = test_df.loc[j, "feature1"]
            feature2 = test_df.loc[j, "feature2"]
            feature3 = test_df.loc[j, "feature3"]
            choice = vw.predict("| " + str(feature1) + " " + str(feature2) +
                                " " + str(feature3))
            assert isinstance(choice, int), "choice should be int"
            assert choice == 3, "predicted action should be 3"

    assert vw.finished == True, "with syntax should finish() vw instance"
Exemple #5
0
def test_ccb_single_slot_and_cb_non_equivalence_with_slot_features():
    # --- CCB
    ccb_model_file_name = "model_file_ccb_no_equiv.txt"
    ccb_workspace = vowpalwabbit.Workspace(
        quiet=True, ccb_explore_adf=True, readable_model=ccb_model_file_name
    )

    ccb_ex = """
    ccb shared |User b
    ccb action |Action d
    ccb action |Action e
    ccb action |Action f
    ccb action |Action ff
    ccb action |Action fff
    ccb slot 4:1:0.2 | slot_feature_1
    """
    ccb_workspace.learn(ccb_ex)
    ccb_workspace.finish()

    ccb_num_weights = count_weights_from_readable_model_file_for_equiv_test(
        ccb_model_file_name
    )

    # --- CB
    cb_model_file_name = "model_file_cb_no_equiv.txt"
    cb_workspace = vowpalwabbit.Workspace(
        quiet=True, cb_explore_adf=True, readable_model=cb_model_file_name
    )

    cb_ex = """
    shared |User b
    |Action d
    |Action e
    |Action f
    |Action ff
    4:1:0.2 |Action fff
    """

    cb_workspace.learn(cb_ex)
    cb_workspace.finish()
    cb_num_weights = count_weights_from_readable_model_file_for_equiv_test(
        cb_model_file_name
    )

    # Since there was at least one slot feature supplied, the equivalent mode
    # does not apply and so we expect there to be more weights in the CCB model.
    assert ccb_num_weights > cb_num_weights
Exemple #6
0
def test_MulticlassLabel_example():
    n = 4
    model = vowpalwabbit.Workspace(loss_function="logistic", oaa=n, quiet=True)
    ex = model.example("1 | a b c d", 2)
    ml2 = vowpalwabbit.MulticlassLabel.from_example(ex)
    assert ml2.label == 1
    assert ml2.weight == 1.0
    assert ml2.prediction == 0
    assert str(ml2) == "1"
Exemple #7
0
def test_ccb_non_slot_none_outcome():
    model = vowpalwabbit.Workspace(quiet=True, ccb_explore_adf=True)
    example = vowpalwabbit.Example(
        vw=model, labelType=vowpalwabbit.LabelType.CONDITIONAL_CONTEXTUAL_BANDIT
    )
    label = example.get_label(vowpalwabbit.CCBLabel)
    # CCB label is set to UNSET by default.
    assert label.type == vowpalwabbit.CCBLabelType.UNSET
    assert label.outcome is None
Exemple #8
0
def helper_getting_started_example(which_cb):
    train_df, test_df = helper_get_data()

    vw = vowpalwabbit.Workspace(which_cb + " 4 --log_level off --cb_type mtr",
                                enable_logging=True)

    for i in train_df.index:
        action = train_df.loc[i, "action"]
        cost = train_df.loc[i, "cost"]
        probability = train_df.loc[i, "probability"]
        feature1 = train_df.loc[i, "feature1"]
        feature2 = train_df.loc[i, "feature2"]
        feature3 = train_df.loc[i, "feature3"]

        learn_example = (str(action) + ":" + str(cost) + ":" +
                         str(probability) + " | " + str(feature1) + " " +
                         str(feature2) + " " + str(feature3))
        vw.learn(learn_example)

    assert (vw.get_prediction_type() == vw.pMULTICLASS
            ), "prediction_type should be multiclass"

    for j in test_df.index:
        feature1 = test_df.loc[j, "feature1"]
        feature2 = test_df.loc[j, "feature2"]
        feature3 = test_df.loc[j, "feature3"]
        choice = vw.predict("| " + str(feature1) + " " + str(feature2) + " " +
                            str(feature3))
        assert isinstance(choice, int), "choice should be int"
        assert choice == 3, "predicted action should be 3 instead of " + str(
            choice)

    # test that metrics is empty since "--extra_metrics filename" was not supplied
    assert len(vw.get_learner_metrics()) == 0

    vw.finish()

    output = vw.get_log()

    if which_cb.find("legacy") != -1:
        test_file = "test-sets/ref/python_test_cb_legacy.stderr"
    else:
        test_file = "test-sets/ref/python_test_cb.stderr"

    print("Output received:")
    print("----------------")
    print("\n".join(output))
    print("----------------")

    with open(path.join(helper_get_test_dir(), test_file), "r") as file:
        expected = file.readlines()
        for expected_line, output_line in zip(expected, output):
            output_line = output_line.replace("...", "").strip()
            expected_line = expected_line.replace("...", "").strip()
            assert not is_line_different(output_line, expected_line, 0.001)
Exemple #9
0
def test_MulticlassProbabilitiesLabel():
    n = 4
    model = vowpalwabbit.Workspace(loss_function="logistic",
                                   oaa=n,
                                   probabilities=True,
                                   quiet=True)
    ex = model.example("1 | a b c d", 2)
    model.learn(ex)
    mpl = vowpalwabbit.MulticlassProbabilitiesLabel.from_example(ex)
    assert str(mpl) == "1:0.25 2:0.25 3:0.25 4:0.25"
    mpl = vowpalwabbit.MulticlassProbabilitiesLabel([0.4, 0.3, 0.3])
    assert str(mpl) == "1:0.4 2:0.3 3:0.3"
Exemple #10
0
def helper_getting_started_example(which_cb):
    train_df, test_df = helper_get_data()

    vw = vowpalwabbit.Workspace(which_cb + " 4 --log_level off",
                                enable_logging=True)

    for i in train_df.index:
        action = train_df.loc[i, "action"]
        cost = train_df.loc[i, "cost"]
        probability = train_df.loc[i, "probability"]
        feature1 = train_df.loc[i, "feature1"]
        feature2 = train_df.loc[i, "feature2"]
        feature3 = train_df.loc[i, "feature3"]

        learn_example = (str(action) + ":" + str(cost) + ":" +
                         str(probability) + " | " + str(feature1) + " " +
                         str(feature2) + " " + str(feature3))
        vw.learn(learn_example)

    assert (vw.get_prediction_type() == vw.pMULTICLASS
            ), "prediction_type should be multiclass"

    for j in test_df.index:
        feature1 = test_df.loc[j, "feature1"]
        feature2 = test_df.loc[j, "feature2"]
        feature3 = test_df.loc[j, "feature3"]
        choice = vw.predict("| " + str(feature1) + " " + str(feature2) + " " +
                            str(feature3))
        assert isinstance(choice, int), "choice should be int"
        assert choice == 3, "predicted action should be 3 instead of " + str(
            choice)

    # test that metrics is empty since "--extra_metrics filename" was not supplied
    assert len(vw.get_learner_metrics()) == 0

    vw.finish()

    output = vw.get_log()

    if which_cb.find("legacy") != -1:
        test_file = "test-sets/ref/python_test_cb_legacy.stderr"
    else:
        test_file = "test-sets/ref/python_test_cb.stderr"

    with open(path.join(helper_get_test_dir(), test_file), "r") as file:
        actual = file.readlines()
        for j, i in zip(actual, output):
            assert i == j, "line mismatch should be: " + j + " output: " + i
Exemple #11
0
def test_cats():
    min_value = 10
    max_value = 20

    vw = vowpalwabbit.Workspace("--cats 4 --min_value " + str(min_value) +
                                " --max_value " + str(max_value) +
                                " --bandwidth 1")
    vw_example = vw.parse("ca 15:0.657567:6.20426e-05 | f1 f2 f3 f4",
                          vowpalwabbit.LabelType.CONTINUOUS)
    vw.learn(vw_example)
    vw.finish_example(vw_example)

    assert (vw.get_prediction_type() == vowpalwabbit.PredictionType.
            ACTION_PDF_VALUE), "prediction_type should be action_pdf_value"

    action, pdf_value = vw.predict("| f1 f2 f3 f4")
    assert action >= 10
    assert action <= 20
    vw.finish()
Exemple #12
0
def test_dsjson():
    vw = vowpalwabbit.Workspace("--cb_explore_adf --epsilon 0.2 --dsjson")

    ex_l_str = '{"_label_cost":-1.0,"_label_probability":0.5,"_label_Action":1,"_labelIndex":0,"o":[{"v":1.0,"EventId":"38cbf24f-70b2-4c76-aa0c-970d0c8d388e","ActionTaken":false}],"Timestamp":"2020-11-15T17:09:31.8350000Z","Version":"1","EventId":"38cbf24f-70b2-4c76-aa0c-970d0c8d388e","a":[1,2],"c":{ "GUser":{"id":"person5","major":"engineering","hobby":"hiking","favorite_character":"spock"}, "_multi": [ { "TAction":{"topic":"SkiConditions-VT"} }, { "TAction":{"topic":"HerbGarden"} } ] },"p":[0.5,0.5],"VWState":{"m":"N/A"}}\n'
    ex_l = vw.parse(ex_l_str)
    vw.learn(ex_l)
    pred = ex_l[0].get_action_scores()
    expected = [0.5, 0.5]
    assert len(pred) == len(expected)
    for a, b in zip(pred, expected):
        assert isclose(a, b)
    vw.finish_example(ex_l)

    ex_p = '{"_label_cost":-1.0,"_label_probability":0.5,"_label_Action":1,"_labelIndex":0,"o":[{"v":1.0,"EventId":"38cbf24f-70b2-4c76-aa0c-970d0c8d388e","ActionTaken":false}],"Timestamp":"2020-11-15T17:09:31.8350000Z","Version":"1","EventId":"38cbf24f-70b2-4c76-aa0c-970d0c8d388e","a":[1,2],"c":{ "GUser":{"id":"person5","major":"engineering","hobby":"hiking","favorite_character":"spock"}, "_multi": [ { "TAction":{"topic":"SkiConditions-VT"} }, { "TAction":{"topic":"HerbGarden"} } ] },"p":[0.5,0.5],"VWState":{"m":"N/A"}}\n'
    pred = vw.predict(ex_p)
    expected = [0.9, 0.1]
    assert len(pred) == len(expected)
    for a, b in zip(pred, expected):
        assert isclose(a, b)
Exemple #13
0
def test_dsjson_with_metrics():
    vw = vowpalwabbit.Workspace(
        "--extra_metrics metrics.json --cb_explore_adf --epsilon 0.2 --dsjson")

    ex_l_str = '{"_label_cost":-0.9,"_label_probability":0.5,"_label_Action":1,"_labelIndex":0,"o":[{"v":1.0,"EventId":"38cbf24f-70b2-4c76-aa0c-970d0c8d388e","ActionTaken":false}],"Timestamp":"2020-11-15T17:09:31.8350000Z","Version":"1","EventId":"38cbf24f-70b2-4c76-aa0c-970d0c8d388e","a":[1,2],"c":{ "GUser":{"id":"person5","major":"engineering","hobby":"hiking","favorite_character":"spock"}, "_multi": [ { "TAction":{"topic":"SkiConditions-VT"} }, { "TAction":{"topic":"HerbGarden"} } ] },"p":[0.5,0.5],"VWState":{"m":"N/A"}}\n'
    ex_l = vw.parse(ex_l_str)
    vw.learn(ex_l)
    pred = ex_l[0].get_action_scores()
    expected = [0.5, 0.5]
    assert len(pred) == len(expected)
    for a, b in zip(pred, expected):
        assert isclose(a, b)
    vw.finish_example(ex_l)

    ex_p = '{"_label_cost":-1.0,"_label_probability":0.5,"_label_Action":1,"_labelIndex":0,"o":[{"v":1.0,"EventId":"38cbf24f-70b2-4c76-aa0c-970d0c8d388e","ActionTaken":false}],"Timestamp":"2020-11-15T17:09:31.8350000Z","Version":"1","EventId":"38cbf24f-70b2-4c76-aa0c-970d0c8d388e","a":[1,2],"c":{ "GUser":{"id":"person5","major":"engineering","hobby":"hiking","favorite_character":"spock"}, "_multi": [ { "TAction":{"topic":"SkiConditions-VT"} }, { "TAction":{"topic":"HerbGarden"} } ] },"p":[0.5,0.5],"VWState":{"m":"N/A"}}\n'
    pred = vw.predict(ex_p)
    expected = [0.9, 0.1]
    assert len(pred) == len(expected)
    for a, b in zip(pred, expected):
        assert isclose(a, b)

    learner_metric_dict = vw.get_learner_metrics()
    assert len(vw.get_learner_metrics()) == 17

    assert learner_metric_dict["total_predict_calls"] == 2
    assert learner_metric_dict["total_learn_calls"] == 1
    assert learner_metric_dict["cbea_labeled_ex"] == 1
    assert learner_metric_dict["cbea_predict_in_learn"] == 0
    assert learner_metric_dict["cbea_label_first_action"] == 1
    assert learner_metric_dict["cbea_label_not_first"] == 0
    assert pytest.approx(learner_metric_dict["cbea_sum_cost"]) == -0.9
    assert pytest.approx(learner_metric_dict["cbea_sum_cost_baseline"]) == -0.9
    assert learner_metric_dict["cbea_non_zero_cost"] == 1
    assert pytest.approx(learner_metric_dict["cbea_avg_feat_per_event"]) == 24
    assert pytest.approx(
        learner_metric_dict["cbea_avg_actions_per_event"]) == 2
    assert pytest.approx(learner_metric_dict["cbea_avg_ns_per_event"]) == 16
    assert pytest.approx(learner_metric_dict["cbea_avg_feat_per_action"]) == 12
    assert pytest.approx(learner_metric_dict["cbea_avg_ns_per_action"]) == 8
    assert learner_metric_dict["cbea_min_actions"] == 2
    assert learner_metric_dict["cbea_max_actions"] == 2
    assert learner_metric_dict["sfm_count_learn_example_with_shared"] == 1
Exemple #14
0
def main():
    opts = sys.argv[1:]
    vowpalwabbit.Workspace(" ".join(opts))
Exemple #15
0
def test_constructor_exception_is_safe():
    try:
        vw = vowpalwabbit.Workspace("--invalid_option")
    except:
        pass
Exemple #16
0
def test_not_runparser_cmd_string():
    vw = vowpalwabbit.Workspace("")
    assert vw.parser_ran == False, "vw should set parser_ran to false"
    vw.finish()
Exemple #17
0
def test_runparser_cmd_string_short():
    vw = vowpalwabbit.Workspace("-d ./test/train-sets/rcv1_small.dat")
    assert vw.parser_ran == True, "vw should set parser_ran to true if --data present"
    vw.finish()
Exemple #18
0
def main():
    vowpalwabbit.Workspace(arg_list=sys.argv[1:])