コード例 #1
0
def test_config_rejects_path_and_lv_config_not_matching():
    config = c.Config(config_test_path_matrix())
    config.add_lv("AGRI", Mode.A, c.MV("gini"))
    config.add_lv("IND", Mode.A, c.MV("gnpr"))
    russa = pd.read_csv("file:tests/data/russa.csv", index_col=0)
    with pytest.raises(ValueError):
        config.filter(russa)
コード例 #2
0
def test_paths():
    mobi = pd.read_csv("file:tests/data/mobi.csv", index_col=0)

    structure = c.Structure()
    structure.add_path(["Expectation", "Quality"], ["Loyalty"])
    structure.add_path(["Image"], ["Expectation"])
    structure.add_path(["Complaints"], ["Loyalty"])

    config = c.Config(structure.path(), default_scale=Scale.NUM)
    config.add_lv_with_columns_named("Expectation", Mode.A, mobi, "CUEX")
    config.add_lv_with_columns_named("Quality", Mode.B, mobi, "PERQ")
    config.add_lv_with_columns_named("Loyalty", Mode.A, mobi, "CUSL")
    config.add_lv_with_columns_named("Image", Mode.A, mobi, "IMAG")
    config.add_lv_with_columns_named("Complaints", Mode.A, mobi, "CUSCO")
    mobi_pls = Plspm(mobi, config, Scheme.PATH, 100, 0.00000001)
    expected_outer_model = pd.read_csv(
        "file:tests/data/seminr-mobi-basic-outer-model.csv", index_col=0)
    actual_outer_model = mobi_pls.outer_model().drop(
        ["communality", "redundancy"], axis=1)
    npt.assert_allclose(expected_outer_model.sort_index(),
                        actual_outer_model.sort_index(),
                        rtol=1e-5)

    expected_paths = pd.read_csv("file:tests/data/seminr-mobi-basic-paths.csv",
                                 index_col=0)
    actual_paths = mobi_pls.path_coefficients().transpose()
    npt.assert_allclose(expected_paths.sort_index().sort_index(axis=1),
                        actual_paths.sort_index().sort_index(axis=1),
                        rtol=1e-6)
コード例 #3
0
def test_cannot_add_mvs_twice():
    structure = c.Structure()
    structure.add_path(source=["BONOBO"], target=["APE"])
    config = c.Config(structure.path())
    config.add_lv("BONOBO", Mode.A, c.MV("a"), c.MV("b"))
    with pytest.raises(ValueError):
        config.add_lv("APE", Mode.A, c.MV("a"), c.MV("b"))
コード例 #4
0
def test_data_should_only_contain_numerical_values():
    russa = pd.read_csv("file:tests/data/russa.csv", index_col=0)
    russa['gini'] = russa['gini'].astype(str)
    config = c.Config(config_test_path_matrix())
    config.add_lv("AGRI", Mode.A, c.MV("gini"), c.MV("farm"), c.MV("rent"))
    with pytest.raises(ValueError):
        config.filter(russa)
コード例 #5
0
def test_config_filters_mvs():
    russa = pd.read_csv("file:tests/data/russa.csv", index_col=0)
    config = c.Config(config_test_path_matrix())
    config.add_lv("POLINS", Mode.A)
    config.add_lv("AGRI", Mode.A, c.MV("gini"), c.MV("farm"), c.MV("rent"))
    config.add_lv("IND", Mode.A)
    npt.assert_array_equal(list(config.filter(russa)),
                           ["gini", "farm", "rent"])
コード例 #6
0
def test_scaling_should_be_false_if_all_raw():
    russa = pd.read_csv("file:tests/data/russa.csv", index_col=0)
    config = c.Config(config_test_path_matrix(), default_scale=Scale.RAW)
    config.add_lv("POLINS", Mode.A, c.MV("ecks"), c.MV("death"), c.MV("demo"),
                  c.MV("inst"))
    config.add_lv("AGRI", Mode.A, c.MV("gini"), c.MV("farm"), c.MV("rent"))
    config.add_lv("IND", Mode.A, c.MV("gnpr"), c.MV("labo"))
    config.treat(config.filter(russa))
    assert not config.scaled()
コード例 #7
0
def test_all_mvs_should_have_a_scale_if_data_is_nonmetric():
    russa = pd.read_csv("file:tests/data/russa.csv", index_col=0)
    config = c.Config(config_test_path_matrix())
    config.add_lv("POLINS", Mode.A, c.MV("ecks"), c.MV("death"), c.MV("demo"),
                  c.MV("inst"))
    config.add_lv("AGRI", Mode.A, c.MV("gini", Scale.NUM), c.MV("farm"),
                  c.MV("rent"))
    config.add_lv("IND", Mode.A, c.MV("gnpr"), c.MV("labo"))
    with pytest.raises(TypeError):
        config.treat(config.filter(russa))
コード例 #8
0
def test_config_rejects_bad_path_matrix():
    # Takes a matrix
    with pytest.raises(TypeError):
        c.Config("hello")
    # Matrix should be square
    with pytest.raises(ValueError):
        c.Config(pd.DataFrame([[0, 0, 0]]))
    # Matrix should be lower triangular
    with pytest.raises(ValueError):
        c.Config(pd.DataFrame([[1, 1], [1, 1]]))
    # Only 1 and 0 allowed in matrix
    with pytest.raises(ValueError):
        c.Config(pd.DataFrame([[1, 0], [2, 1]]))
    # Indices and columns should have the same names
    with pytest.raises(ValueError):
        c.Config(
            pd.DataFrame([[1, 0], [1, 1]],
                         index=["A", "B"],
                         columns=["C", "D"]))
コード例 #9
0
def test_scaling_should_be_true_and_all_scales_set_to_num_if_only_raw_and_num_supplied(
):
    russa = pd.read_csv("file:tests/data/russa.csv", index_col=0)
    config = c.Config(config_test_path_matrix(), default_scale=Scale.RAW)
    config.add_lv("POLINS", Mode.A, c.MV("ecks", Scale.NUM), c.MV("death"),
                  c.MV("demo"), c.MV("inst"))
    config.add_lv("AGRI", Mode.A, c.MV("gini"), c.MV("farm"), c.MV("rent"))
    config.add_lv("IND", Mode.A, c.MV("gnpr"), c.MV("labo"))
    config.treat(config.filter(russa))
    assert config.scaled()
    for mv in ["gini", "farm", "rent"]:
        assert config.scale(mv) == Scale.NUM
コード例 #10
0
def test_only_single_item_constructs():
    satisfaction = pd.read_csv("file:tests/data/satisfaction.csv", index_col=0)
    config = c.Config(satisfaction_path_matrix())
    config.add_lv("QUAL", Mode.A, c.MV("qual1"))
    config.add_lv("VAL", Mode.A, c.MV("val1"))
    config.add_lv("SAT", Mode.A, c.MV("sat1"))
    config.add_lv("LOY", Mode.A, c.MV("loy1"))
    config.add_lv("IMAG", Mode.A, c.MV("imag1"))
    config.add_lv("EXPE", Mode.A, c.MV("expe1"))

    plspm_calc = Plspm(satisfaction, config, Scheme.CENTROID)
    with pytest.raises(ValueError):
        plspm_calc.goodness_of_fit()
        
コード例 #11
0
def test_scales_should_remain_unchanged_if_values_other_than_num_and_raw_supplied(
):
    russa = pd.read_csv("file:tests/data/russa.csv", index_col=0)
    config = c.Config(config_test_path_matrix(),
                      default_scale=Scale.RAW,
                      scaled=False)
    config.add_lv("AGRI", Mode.A, c.MV("gini", Scale.NUM),
                  c.MV("farm", Scale.ORD), c.MV("rent"))
    config.add_lv("IND", Mode.A, c.MV("gnpr"), c.MV("labo"))
    config.add_lv("POLINS", Mode.A, c.MV("ecks"), c.MV("death"), c.MV("demo"),
                  c.MV("inst"))
    config.filter(russa)
    assert not config.scaled()
    assert config.scale("farm") == Scale.ORD
コード例 #12
0
def test_can_add_hoc_lv_paths_correctly():
    structure = c.Structure()
    structure.add_path(["MANDRILL", "BONOBO"], ["APE"])
    structure.add_path(["APE"], ["GOAT"])
    initial_path = structure.path()
    config = c.Config(initial_path)
    config.add_higher_order("APE", Mode.A, ["CHEDDAR", "GOUDA"])
    estimator = Estimator(config)
    structure = c.Structure(initial_path)
    structure.add_path(["MANDRILL", "BONOBO"], ["CHEDDAR"])
    structure.add_path(["MANDRILL", "BONOBO"], ["GOUDA"])
    structure.add_path(["GOUDA", "CHEDDAR"], ["GOAT"])
    expected = structure.path().drop("APE").drop("APE", axis=1)
    actual = estimator.hoc_path_first_stage(config)
    pt.assert_frame_equal(expected, actual)
コード例 #13
0
def test_plspm_russa_mode_b():
    satisfaction = pd.read_csv("file:tests/data/satisfaction.csv", index_col=0)
    config = c.Config(satisfaction_path_matrix(), scaled=False)
    config.add_lv_with_columns_named("QUAL", Mode.B, satisfaction, "qual")
    config.add_lv_with_columns_named("VAL", Mode.B, satisfaction, "val")
    config.add_lv_with_columns_named("SAT", Mode.B, satisfaction, "sat")
    config.add_lv_with_columns_named("LOY", Mode.B, satisfaction, "loy")
    config.add_lv_with_columns_named("IMAG", Mode.B, satisfaction, "imag")
    config.add_lv_with_columns_named("EXPE", Mode.B, satisfaction, "expe")

    plspm_calc = Plspm(satisfaction, config, Scheme.CENTROID)
    expected_inner_summary = pd.read_csv("file:tests/data/satisfaction.modeb.inner-summary.csv", index_col=0)
    npt.assert_allclose(util.sort_cols(expected_inner_summary.drop(["type"], axis=1)).sort_index(),
                        util.sort_cols(plspm_calc.inner_summary().drop(["type", "r_squared_adj"], axis=1)).sort_index())
    pt.assert_series_equal(expected_inner_summary.loc[:, "type"].sort_index(),
                           plspm_calc.inner_summary().loc[:, "type"].sort_index())
コード例 #14
0
def test_plspm_russa_mode_b():
    russa = pd.read_csv("file:tests/data/russa.csv", index_col=0)
    config = c.Config(russa_path_matrix(), default_scale=Scale.NUM)
    config.add_lv("AGRI", Mode.B, c.MV("gini"), c.MV("farm"), c.MV("rent"))
    config.add_lv("POLINS", Mode.B, c.MV("ecks"), c.MV("demo"), c.MV("inst"),
                  c.MV("death"))
    config.add_lv("IND", Mode.B, c.MV("gnpr"), c.MV("labo"))

    plspm_calc = Plspm(russa, config, Scheme.CENTROID, 100, 0.0000001)
    expected_inner_summary = pd.read_csv(
        "file:tests/data/russa.mode_b_inner_summary.csv", index_col=0)
    npt.assert_allclose(
        util.sort_cols(expected_inner_summary.drop(["type"],
                                                   axis=1)).sort_index(),
        util.sort_cols(plspm_calc.inner_summary().drop(
            ["type", "r_squared_adj"], axis=1)).sort_index())
    pt.assert_series_equal(
        expected_inner_summary.loc[:, "type"].sort_index(),
        plspm_calc.inner_summary().loc[:, "type"].sort_index())
コード例 #15
0
def test_plspm_russa_missing_data():
    russa = pd.read_csv("file:tests/data/russa.csv", index_col=0)
    russa.iloc[0, 0] = np.NaN
    russa.iloc[3, 3] = np.NaN
    russa.iloc[5, 5] = np.NaN
    config = c.Config(russa_path_matrix(), default_scale=Scale.NUM)
    config.add_lv("AGRI", Mode.A, c.MV("gini"), c.MV("farm"), c.MV("rent"))
    config.add_lv("IND", Mode.A, c.MV("gnpr"), c.MV("labo"))
    config.add_lv("POLINS", Mode.A, c.MV("ecks"), c.MV("death"), c.MV("demo"),
                  c.MV("inst"))

    plspm_calc = Plspm(russa, config, Scheme.CENTROID, 100, 0.0000001)
    expected_inner_summary = pd.read_csv(
        "file:tests/data/russa.missing.inner_summary.csv", index_col=0)
    npt.assert_allclose(
        util.sort_cols(expected_inner_summary.drop(["type"],
                                                   axis=1)).sort_index(),
        util.sort_cols(plspm_calc.inner_summary().drop(
            ["type", "r_squared_adj"], axis=1)).sort_index())
    pt.assert_series_equal(
        expected_inner_summary.loc[:, "type"].sort_index(),
        plspm_calc.inner_summary().loc[:, "type"].sort_index())
    assert plspm_calc.unidimensionality().drop(["mode", "mvs"],
                                               axis=1).isnull().values.all()
コード例 #16
0
def test_hoc_two_stage():
    mobi = pd.read_csv("file:tests/data/mobi.csv", index_col=0)

    structure = c.Structure()
    structure.add_path(["Expectation", "Quality"], ["Satisfaction"])
    structure.add_path(["Satisfaction"], ["Complaints", "Loyalty"])

    config = c.Config(structure.path(), default_scale=Scale.NUM)
    config.add_higher_order("Satisfaction", Mode.A, ["Image", "Value"])
    config.add_lv_with_columns_named("Expectation", Mode.A, mobi, "CUEX")
    config.add_lv_with_columns_named("Quality", Mode.B, mobi, "PERQ")
    config.add_lv_with_columns_named("Loyalty", Mode.A, mobi, "CUSL")
    config.add_lv_with_columns_named("Image", Mode.A, mobi, "IMAG")
    config.add_lv_with_columns_named("Complaints", Mode.A, mobi, "CUSCO")
    config.add_lv_with_columns_named("Value", Mode.A, mobi, "PERV")
    mobi_pls = Plspm(mobi, config, Scheme.PATH, 100, 0.00000001)
    expected_outer_model = pd.read_csv(
        "file:tests/data/seminr-mobi-hoc-ts-outer-model.csv", index_col=0)
    actual_outer_model = mobi_pls.outer_model().drop(
        ["communality", "redundancy"], axis=1)
    indices = list(
        set(expected_outer_model.index.values.tolist()).intersection(
            set(actual_outer_model.index.values.tolist())))
    expected_outer_model = expected_outer_model.loc[indices].sort_index(
    ).sort_index(axis=1)
    actual_outer_model = actual_outer_model.loc[indices].sort_index(
    ).sort_index(axis=1)
    npt.assert_allclose(expected_outer_model, actual_outer_model, rtol=1e-4)

    expected_paths = pd.read_csv(
        "file:tests/data/seminr-mobi-hoc-ts-paths.csv",
        index_col=0).transpose()
    actual_paths = mobi_pls.path_coefficients()
    npt.assert_allclose(expected_paths.sort_index().sort_index(axis=1),
                        actual_paths.sort_index().sort_index(axis=1),
                        rtol=1e-6)
コード例 #17
0
def test_plspm_russa():
    russa = pd.read_csv("file:tests/data/russa.csv", index_col=0)
    config = c.Config(russa_path_matrix(), default_scale=Scale.NUM)
    config.add_lv("POLINS", Mode.A, c.MV("ecks"), c.MV("death"), c.MV("demo"),
                  c.MV("inst"))
    config.add_lv("AGRI", Mode.A, c.MV("gini"), c.MV("rent"), c.MV("farm"))
    config.add_lv("IND", Mode.A, c.MV("gnpr"), c.MV("labo"))

    plspm_calc = Plspm(russa, config, Scheme.CENTROID, 100, 0.0000001)
    expected_scores = pd.read_csv("file:tests/data/russa.scores.csv",
                                  index_col=0)
    npt.assert_allclose(util.sort_cols(expected_scores),
                        util.sort_cols(plspm_calc.scores()))

    expected_inner_model = pd.read_csv("file:tests/data/russa.inner_model.csv",
                                       index_col=0)
    actual_inner_model = plspm_calc.inner_model()
    actual_inner_model = actual_inner_model[actual_inner_model['to'].isin(
        ["POLINS"])].drop(["to"], axis=1)

    npt.assert_allclose(
        util.sort_cols(expected_inner_model).sort_index(),
        util.sort_cols(actual_inner_model.set_index(["from"],
                                                    drop=True)).sort_index())

    expected_outer_model = pd.read_csv("file:tests/data/russa.outer_model.csv",
                                       index_col=0)
    npt.assert_allclose(
        util.sort_cols(
            expected_outer_model.filter(
                ["weight", "loading", "communality",
                 "redundancy"])).sort_index(),
        util.sort_cols(plspm_calc.outer_model()).sort_index())

    expected_crossloadings = pd.read_csv(
        "file:tests/data/russa.crossloadings.csv", index_col=0)
    npt.assert_allclose(
        util.sort_cols(expected_crossloadings.filter(["AGRI", "IND",
                                                      "POLINS"])).sort_index(),
        util.sort_cols(plspm_calc.crossloadings()).sort_index())

    expected_inner_summary = pd.read_csv(
        "file:tests/data/russa.inner_summary.csv", index_col=0)
    npt.assert_allclose(
        util.sort_cols(expected_inner_summary.drop(["type"],
                                                   axis=1)).sort_index(),
        util.sort_cols(plspm_calc.inner_summary().drop(
            ["type", "r_squared_adj"], axis=1)).sort_index())
    pt.assert_series_equal(
        expected_inner_summary.loc[:, "type"].sort_index(),
        plspm_calc.inner_summary().loc[:, "type"].sort_index())

    assert math.isclose(0.643594505232204, plspm_calc.goodness_of_fit())

    plspm_calc_path = Plspm(russa, config, Scheme.PATH, 100, 0.0000001)
    expected_outer_model_path = util.sort_cols(
        pd.read_csv("file:tests/data/russa.outer_model_path.csv",
                    index_col=0).filter(
                        ["weight", "loading", "communality",
                         "redundancy"])).sort_index()
    npt.assert_allclose(
        expected_outer_model_path,
        util.sort_cols(plspm_calc_path.outer_model()).sort_index())

    plspm_calc_factorial = Plspm(russa, config, Scheme.FACTORIAL, 100,
                                 0.0000001)
    expected_outer_model_factorial = util.sort_cols(
        pd.read_csv("file:tests/data/russa.outer_model_factorial.csv",
                    index_col=0).filter(
                        ["weight", "loading", "communality",
                         "redundancy"])).sort_index()
    npt.assert_allclose(
        expected_outer_model_factorial,
        util.sort_cols(plspm_calc_factorial.outer_model()).sort_index())
コード例 #18
0
def test_plspm_satisfaction():
    satisfaction = pd.read_csv("file:tests/data/satisfaction.csv", index_col=0)
    config = c.Config(satisfaction_path_matrix(), scaled=False)
    config.add_lv_with_columns_named("IMAG", Mode.A, satisfaction, "imag")
    config.add_lv_with_columns_named("EXPE", Mode.A, satisfaction, "expe")
    config.add_lv_with_columns_named("VAL", Mode.A, satisfaction, "val")
    config.add_lv_with_columns_named("QUAL", Mode.A, satisfaction, "qual")
    config.add_lv_with_columns_named("SAT", Mode.A, satisfaction, "sat")
    config.add_lv_with_columns_named("LOY", Mode.A, satisfaction, "loy")

    plspm_calc = Plspm(satisfaction, config)
    expected_scores = pd.read_csv("file:tests/data/satisfaction.scores.csv")
    npt.assert_allclose(util.sort_cols(expected_scores), util.sort_cols(plspm_calc.scores()))

    expected_inner_model = pd.read_csv("file:tests/data/satisfaction.inner-model.csv", index_col=0)
    actual_inner_model = plspm_calc.inner_model()
    actual_inner_model = actual_inner_model[actual_inner_model['to'].isin(["SAT"])].drop(["to"], axis=1)
    npt.assert_allclose(util.sort_cols(expected_inner_model).sort_index(),
                        util.sort_cols(actual_inner_model.set_index(["from"],drop=True)).sort_index())
    expected_outer_model = pd.read_csv("file:tests/data/satisfaction.outer-model.csv", index_col=0).drop(["block"], axis=1)
    pt.assert_index_equal(expected_outer_model.columns, plspm_calc.outer_model().columns)
    npt.assert_allclose(
        util.sort_cols(expected_outer_model.sort_index()),
        util.sort_cols(plspm_calc.outer_model()).sort_index())

    expected_crossloadings = pd.read_csv("file:tests/data/satisfaction.crossloadings.csv", index_col=0)
    npt.assert_allclose(util.sort_cols(expected_crossloadings.drop(["block"], axis=1)).sort_index(),
                        util.sort_cols(plspm_calc.crossloadings()).sort_index())

    expected_inner_summary = pd.read_csv("file:tests/data/satisfaction.inner-summary.csv", index_col=0)

    npt.assert_allclose(util.sort_cols(expected_inner_summary.drop(["type"], axis=1)).sort_index(),
                        util.sort_cols(plspm_calc.inner_summary().drop(["type", "r_squared_adj"], axis=1)).sort_index())
    pt.assert_series_equal(expected_inner_summary.loc[:, "type"].sort_index(),
                           plspm_calc.inner_summary().loc[:, "type"].sort_index())

    expected_effects = pd.read_csv("file:tests/data/satisfaction.effects.csv", index_col=0)

    pt.assert_frame_equal(expected_effects.loc[:, ["from", "to"]].sort_index(),
                           plspm_calc.effects().loc[:, ["from", "to"]].sort_index())
    npt.assert_allclose(expected_effects.drop(["from", "to"], axis=1).sort_index(),
                        plspm_calc.effects().drop(["from", "to"], axis=1).sort_index())

    expected_unidimensionality = pd.read_csv("file:tests/data/satisfaction_unidim.csv", index_col=0)
    npt.assert_allclose(util.sort_cols(expected_unidimensionality.drop(["mode"], axis=1)).sort_index(),
                        util.sort_cols(plspm_calc.unidimensionality().drop(["mode"], axis=1)).sort_index())

    assert math.isclose(0.609741624338411,plspm_calc.goodness_of_fit())

    plspm_calc_path = Plspm(satisfaction, config, Scheme.PATH)
    expected_outer_model_path = util.sort_cols(
        pd.read_csv("file:tests/data/satisfaction.outer-model-path.csv", index_col=0).drop(["block"],
                                                                                           axis=1)).sort_index()
    npt.assert_allclose(expected_outer_model_path,
                        util.sort_cols(plspm_calc_path.outer_model()).sort_index())

    plspm_calc_factorial = Plspm(satisfaction, config, Scheme.FACTORIAL)
    expected_outer_model_factorial = util.sort_cols(
        pd.read_csv("file:tests/data/satisfaction.outer-model-factorial.csv", index_col=0).drop(["block"],
                                                                                                axis=1)).sort_index()
    npt.assert_allclose(expected_outer_model_factorial,
                        util.sort_cols(plspm_calc_factorial.outer_model()).sort_index())
コード例 #19
0
def test_config_rejects_adding_lv_not_present_in_path():
    config = c.Config(config_test_path_matrix())
    with pytest.raises(ValueError):
        config.add_lv("POO", Mode.A, c.MV("test"))
コード例 #20
0
def test_config_returns_correct_mode_and_mvs():
    config = c.Config(config_test_path_matrix())
    config.add_lv("AGRI", Mode.A, c.MV("gini"), c.MV("farm"), c.MV("rent"))
    assert config.mode("AGRI") == Mode.A
    npt.assert_array_equal(config.mvs("AGRI"), ["gini", "farm", "rent"])
コード例 #21
0
def test_config_rejects_missing_mvs():
    russa = pd.read_csv("file:tests/data/russa.csv", index_col=0)
    config = c.Config(config_test_path_matrix())
    config.add_lv("AGRI", Mode.A, c.MV("gini"), c.MV("farm"), c.MV("poo"))
    with pytest.raises(ValueError):
        config.filter(russa)