예제 #1
0
def test_task5(test_data, dataset):
    dataset_obj = test_data[dataset]["dataset_object"]

    best_model = choose_best_model(dataset_obj)

    if best_model is None:
        pytest.fail("When testing Task #5, choose_best_model returned None instead of a Model object")

    testing_R2 = validate_model(dataset_obj, best_model)

    exp_training_R2 = test_data[dataset]["expected"]["validate_model"]["training_R2"]
    exp_testing_R2 = test_data[dataset]["expected"]["validate_model"]["testing_R2"]

    assert best_model.R2 == pytest.approx(exp_training_R2), "Incorrect Training R2"
    assert testing_R2 == pytest.approx(exp_testing_R2), "Incorrect Testing R2"
예제 #2
0
def go(dataset, gen_html):
    '''
    Put together the work for all the tasks

    Inputs: the dataset
    '''
    models = compute_single_var_models(dataset)
    format_list_of_models("1a", models, gen_html=gen_html)

    model = compute_all_vars_model(dataset)
    format_list_of_models("1b", [model], gen_html=gen_html)

    best_bivariate_model = compute_best_pair(dataset)
    format_list_of_models("2", [best_bivariate_model], gen_html=gen_html)

    models = backward_selection(dataset)
    format_list_of_models("3", models, gen_html=gen_html)

    best_model = choose_best_model(dataset)
    format_list_of_models("4", [best_model],
                          include_adj_R2=True,
                          gen_html=gen_html)

    if gen_html:
        gh = ":"
        tabs = ""
        nl = " |br|\n"
    else:
        gh = ""
        tabs = "    "
        nl = "\n"

    s = gh + "Task 5{}\n".format(gh)
    if best_model is None:
        s += tabs + "Can't test this until Task 4 is implemented" + nl
    else:
        testing_R2 = validate_model(dataset, best_model)

        s += tabs + str(best_model) + nl
        if not hasattr(best_model, "R2"):
            s += tabs + "Training R2: choose_best_model returned an object without an R2 model" + nl
        else:
            s += tabs + "Training R2: {}".format(best_model.R2) + nl
        s += tabs + "Testing R2: {}".format(testing_R2) + nl

    print(s)
def test_task4(test_data, dataset):
    dataset_obj = test_data[dataset]["dataset_object"]

    model = choose_best_model(dataset_obj)

    check_models(test_data, dataset, "choose_best_model", [model])