def test_config_rejects_path_and_lv_config_not_matching(): config = c.Config(config_test_path_matrix()) config.add_lv("AGRI", Mode.A, c.MV("gini")) config.add_lv("IND", Mode.A, c.MV("gnpr")) russa = pd.read_csv("file:tests/data/russa.csv", index_col=0) with pytest.raises(ValueError): config.filter(russa)
def test_paths(): mobi = pd.read_csv("file:tests/data/mobi.csv", index_col=0) structure = c.Structure() structure.add_path(["Expectation", "Quality"], ["Loyalty"]) structure.add_path(["Image"], ["Expectation"]) structure.add_path(["Complaints"], ["Loyalty"]) config = c.Config(structure.path(), default_scale=Scale.NUM) config.add_lv_with_columns_named("Expectation", Mode.A, mobi, "CUEX") config.add_lv_with_columns_named("Quality", Mode.B, mobi, "PERQ") config.add_lv_with_columns_named("Loyalty", Mode.A, mobi, "CUSL") config.add_lv_with_columns_named("Image", Mode.A, mobi, "IMAG") config.add_lv_with_columns_named("Complaints", Mode.A, mobi, "CUSCO") mobi_pls = Plspm(mobi, config, Scheme.PATH, 100, 0.00000001) expected_outer_model = pd.read_csv( "file:tests/data/seminr-mobi-basic-outer-model.csv", index_col=0) actual_outer_model = mobi_pls.outer_model().drop( ["communality", "redundancy"], axis=1) npt.assert_allclose(expected_outer_model.sort_index(), actual_outer_model.sort_index(), rtol=1e-5) expected_paths = pd.read_csv("file:tests/data/seminr-mobi-basic-paths.csv", index_col=0) actual_paths = mobi_pls.path_coefficients().transpose() npt.assert_allclose(expected_paths.sort_index().sort_index(axis=1), actual_paths.sort_index().sort_index(axis=1), rtol=1e-6)
def test_cannot_add_mvs_twice(): structure = c.Structure() structure.add_path(source=["BONOBO"], target=["APE"]) config = c.Config(structure.path()) config.add_lv("BONOBO", Mode.A, c.MV("a"), c.MV("b")) with pytest.raises(ValueError): config.add_lv("APE", Mode.A, c.MV("a"), c.MV("b"))
def test_data_should_only_contain_numerical_values(): russa = pd.read_csv("file:tests/data/russa.csv", index_col=0) russa['gini'] = russa['gini'].astype(str) config = c.Config(config_test_path_matrix()) config.add_lv("AGRI", Mode.A, c.MV("gini"), c.MV("farm"), c.MV("rent")) with pytest.raises(ValueError): config.filter(russa)
def test_config_filters_mvs(): russa = pd.read_csv("file:tests/data/russa.csv", index_col=0) config = c.Config(config_test_path_matrix()) config.add_lv("POLINS", Mode.A) config.add_lv("AGRI", Mode.A, c.MV("gini"), c.MV("farm"), c.MV("rent")) config.add_lv("IND", Mode.A) npt.assert_array_equal(list(config.filter(russa)), ["gini", "farm", "rent"])
def test_scaling_should_be_false_if_all_raw(): russa = pd.read_csv("file:tests/data/russa.csv", index_col=0) config = c.Config(config_test_path_matrix(), default_scale=Scale.RAW) config.add_lv("POLINS", Mode.A, c.MV("ecks"), c.MV("death"), c.MV("demo"), c.MV("inst")) config.add_lv("AGRI", Mode.A, c.MV("gini"), c.MV("farm"), c.MV("rent")) config.add_lv("IND", Mode.A, c.MV("gnpr"), c.MV("labo")) config.treat(config.filter(russa)) assert not config.scaled()
def test_all_mvs_should_have_a_scale_if_data_is_nonmetric(): russa = pd.read_csv("file:tests/data/russa.csv", index_col=0) config = c.Config(config_test_path_matrix()) config.add_lv("POLINS", Mode.A, c.MV("ecks"), c.MV("death"), c.MV("demo"), c.MV("inst")) config.add_lv("AGRI", Mode.A, c.MV("gini", Scale.NUM), c.MV("farm"), c.MV("rent")) config.add_lv("IND", Mode.A, c.MV("gnpr"), c.MV("labo")) with pytest.raises(TypeError): config.treat(config.filter(russa))
def test_config_rejects_bad_path_matrix(): # Takes a matrix with pytest.raises(TypeError): c.Config("hello") # Matrix should be square with pytest.raises(ValueError): c.Config(pd.DataFrame([[0, 0, 0]])) # Matrix should be lower triangular with pytest.raises(ValueError): c.Config(pd.DataFrame([[1, 1], [1, 1]])) # Only 1 and 0 allowed in matrix with pytest.raises(ValueError): c.Config(pd.DataFrame([[1, 0], [2, 1]])) # Indices and columns should have the same names with pytest.raises(ValueError): c.Config( pd.DataFrame([[1, 0], [1, 1]], index=["A", "B"], columns=["C", "D"]))
def test_scaling_should_be_true_and_all_scales_set_to_num_if_only_raw_and_num_supplied( ): russa = pd.read_csv("file:tests/data/russa.csv", index_col=0) config = c.Config(config_test_path_matrix(), default_scale=Scale.RAW) config.add_lv("POLINS", Mode.A, c.MV("ecks", Scale.NUM), c.MV("death"), c.MV("demo"), c.MV("inst")) config.add_lv("AGRI", Mode.A, c.MV("gini"), c.MV("farm"), c.MV("rent")) config.add_lv("IND", Mode.A, c.MV("gnpr"), c.MV("labo")) config.treat(config.filter(russa)) assert config.scaled() for mv in ["gini", "farm", "rent"]: assert config.scale(mv) == Scale.NUM
def test_only_single_item_constructs(): satisfaction = pd.read_csv("file:tests/data/satisfaction.csv", index_col=0) config = c.Config(satisfaction_path_matrix()) config.add_lv("QUAL", Mode.A, c.MV("qual1")) config.add_lv("VAL", Mode.A, c.MV("val1")) config.add_lv("SAT", Mode.A, c.MV("sat1")) config.add_lv("LOY", Mode.A, c.MV("loy1")) config.add_lv("IMAG", Mode.A, c.MV("imag1")) config.add_lv("EXPE", Mode.A, c.MV("expe1")) plspm_calc = Plspm(satisfaction, config, Scheme.CENTROID) with pytest.raises(ValueError): plspm_calc.goodness_of_fit()
def test_scales_should_remain_unchanged_if_values_other_than_num_and_raw_supplied( ): russa = pd.read_csv("file:tests/data/russa.csv", index_col=0) config = c.Config(config_test_path_matrix(), default_scale=Scale.RAW, scaled=False) config.add_lv("AGRI", Mode.A, c.MV("gini", Scale.NUM), c.MV("farm", Scale.ORD), c.MV("rent")) config.add_lv("IND", Mode.A, c.MV("gnpr"), c.MV("labo")) config.add_lv("POLINS", Mode.A, c.MV("ecks"), c.MV("death"), c.MV("demo"), c.MV("inst")) config.filter(russa) assert not config.scaled() assert config.scale("farm") == Scale.ORD
def test_can_add_hoc_lv_paths_correctly(): structure = c.Structure() structure.add_path(["MANDRILL", "BONOBO"], ["APE"]) structure.add_path(["APE"], ["GOAT"]) initial_path = structure.path() config = c.Config(initial_path) config.add_higher_order("APE", Mode.A, ["CHEDDAR", "GOUDA"]) estimator = Estimator(config) structure = c.Structure(initial_path) structure.add_path(["MANDRILL", "BONOBO"], ["CHEDDAR"]) structure.add_path(["MANDRILL", "BONOBO"], ["GOUDA"]) structure.add_path(["GOUDA", "CHEDDAR"], ["GOAT"]) expected = structure.path().drop("APE").drop("APE", axis=1) actual = estimator.hoc_path_first_stage(config) pt.assert_frame_equal(expected, actual)
def test_plspm_russa_mode_b(): satisfaction = pd.read_csv("file:tests/data/satisfaction.csv", index_col=0) config = c.Config(satisfaction_path_matrix(), scaled=False) config.add_lv_with_columns_named("QUAL", Mode.B, satisfaction, "qual") config.add_lv_with_columns_named("VAL", Mode.B, satisfaction, "val") config.add_lv_with_columns_named("SAT", Mode.B, satisfaction, "sat") config.add_lv_with_columns_named("LOY", Mode.B, satisfaction, "loy") config.add_lv_with_columns_named("IMAG", Mode.B, satisfaction, "imag") config.add_lv_with_columns_named("EXPE", Mode.B, satisfaction, "expe") plspm_calc = Plspm(satisfaction, config, Scheme.CENTROID) expected_inner_summary = pd.read_csv("file:tests/data/satisfaction.modeb.inner-summary.csv", index_col=0) npt.assert_allclose(util.sort_cols(expected_inner_summary.drop(["type"], axis=1)).sort_index(), util.sort_cols(plspm_calc.inner_summary().drop(["type", "r_squared_adj"], axis=1)).sort_index()) pt.assert_series_equal(expected_inner_summary.loc[:, "type"].sort_index(), plspm_calc.inner_summary().loc[:, "type"].sort_index())
def test_plspm_russa_mode_b(): russa = pd.read_csv("file:tests/data/russa.csv", index_col=0) config = c.Config(russa_path_matrix(), default_scale=Scale.NUM) config.add_lv("AGRI", Mode.B, c.MV("gini"), c.MV("farm"), c.MV("rent")) config.add_lv("POLINS", Mode.B, c.MV("ecks"), c.MV("demo"), c.MV("inst"), c.MV("death")) config.add_lv("IND", Mode.B, c.MV("gnpr"), c.MV("labo")) plspm_calc = Plspm(russa, config, Scheme.CENTROID, 100, 0.0000001) expected_inner_summary = pd.read_csv( "file:tests/data/russa.mode_b_inner_summary.csv", index_col=0) npt.assert_allclose( util.sort_cols(expected_inner_summary.drop(["type"], axis=1)).sort_index(), util.sort_cols(plspm_calc.inner_summary().drop( ["type", "r_squared_adj"], axis=1)).sort_index()) pt.assert_series_equal( expected_inner_summary.loc[:, "type"].sort_index(), plspm_calc.inner_summary().loc[:, "type"].sort_index())
def test_plspm_russa_missing_data(): russa = pd.read_csv("file:tests/data/russa.csv", index_col=0) russa.iloc[0, 0] = np.NaN russa.iloc[3, 3] = np.NaN russa.iloc[5, 5] = np.NaN config = c.Config(russa_path_matrix(), default_scale=Scale.NUM) config.add_lv("AGRI", Mode.A, c.MV("gini"), c.MV("farm"), c.MV("rent")) config.add_lv("IND", Mode.A, c.MV("gnpr"), c.MV("labo")) config.add_lv("POLINS", Mode.A, c.MV("ecks"), c.MV("death"), c.MV("demo"), c.MV("inst")) plspm_calc = Plspm(russa, config, Scheme.CENTROID, 100, 0.0000001) expected_inner_summary = pd.read_csv( "file:tests/data/russa.missing.inner_summary.csv", index_col=0) npt.assert_allclose( util.sort_cols(expected_inner_summary.drop(["type"], axis=1)).sort_index(), util.sort_cols(plspm_calc.inner_summary().drop( ["type", "r_squared_adj"], axis=1)).sort_index()) pt.assert_series_equal( expected_inner_summary.loc[:, "type"].sort_index(), plspm_calc.inner_summary().loc[:, "type"].sort_index()) assert plspm_calc.unidimensionality().drop(["mode", "mvs"], axis=1).isnull().values.all()
def test_hoc_two_stage(): mobi = pd.read_csv("file:tests/data/mobi.csv", index_col=0) structure = c.Structure() structure.add_path(["Expectation", "Quality"], ["Satisfaction"]) structure.add_path(["Satisfaction"], ["Complaints", "Loyalty"]) config = c.Config(structure.path(), default_scale=Scale.NUM) config.add_higher_order("Satisfaction", Mode.A, ["Image", "Value"]) config.add_lv_with_columns_named("Expectation", Mode.A, mobi, "CUEX") config.add_lv_with_columns_named("Quality", Mode.B, mobi, "PERQ") config.add_lv_with_columns_named("Loyalty", Mode.A, mobi, "CUSL") config.add_lv_with_columns_named("Image", Mode.A, mobi, "IMAG") config.add_lv_with_columns_named("Complaints", Mode.A, mobi, "CUSCO") config.add_lv_with_columns_named("Value", Mode.A, mobi, "PERV") mobi_pls = Plspm(mobi, config, Scheme.PATH, 100, 0.00000001) expected_outer_model = pd.read_csv( "file:tests/data/seminr-mobi-hoc-ts-outer-model.csv", index_col=0) actual_outer_model = mobi_pls.outer_model().drop( ["communality", "redundancy"], axis=1) indices = list( set(expected_outer_model.index.values.tolist()).intersection( set(actual_outer_model.index.values.tolist()))) expected_outer_model = expected_outer_model.loc[indices].sort_index( ).sort_index(axis=1) actual_outer_model = actual_outer_model.loc[indices].sort_index( ).sort_index(axis=1) npt.assert_allclose(expected_outer_model, actual_outer_model, rtol=1e-4) expected_paths = pd.read_csv( "file:tests/data/seminr-mobi-hoc-ts-paths.csv", index_col=0).transpose() actual_paths = mobi_pls.path_coefficients() npt.assert_allclose(expected_paths.sort_index().sort_index(axis=1), actual_paths.sort_index().sort_index(axis=1), rtol=1e-6)
def test_plspm_russa(): russa = pd.read_csv("file:tests/data/russa.csv", index_col=0) config = c.Config(russa_path_matrix(), default_scale=Scale.NUM) config.add_lv("POLINS", Mode.A, c.MV("ecks"), c.MV("death"), c.MV("demo"), c.MV("inst")) config.add_lv("AGRI", Mode.A, c.MV("gini"), c.MV("rent"), c.MV("farm")) config.add_lv("IND", Mode.A, c.MV("gnpr"), c.MV("labo")) plspm_calc = Plspm(russa, config, Scheme.CENTROID, 100, 0.0000001) expected_scores = pd.read_csv("file:tests/data/russa.scores.csv", index_col=0) npt.assert_allclose(util.sort_cols(expected_scores), util.sort_cols(plspm_calc.scores())) expected_inner_model = pd.read_csv("file:tests/data/russa.inner_model.csv", index_col=0) actual_inner_model = plspm_calc.inner_model() actual_inner_model = actual_inner_model[actual_inner_model['to'].isin( ["POLINS"])].drop(["to"], axis=1) npt.assert_allclose( util.sort_cols(expected_inner_model).sort_index(), util.sort_cols(actual_inner_model.set_index(["from"], drop=True)).sort_index()) expected_outer_model = pd.read_csv("file:tests/data/russa.outer_model.csv", index_col=0) npt.assert_allclose( util.sort_cols( expected_outer_model.filter( ["weight", "loading", "communality", "redundancy"])).sort_index(), util.sort_cols(plspm_calc.outer_model()).sort_index()) expected_crossloadings = pd.read_csv( "file:tests/data/russa.crossloadings.csv", index_col=0) npt.assert_allclose( util.sort_cols(expected_crossloadings.filter(["AGRI", "IND", "POLINS"])).sort_index(), util.sort_cols(plspm_calc.crossloadings()).sort_index()) expected_inner_summary = pd.read_csv( "file:tests/data/russa.inner_summary.csv", index_col=0) npt.assert_allclose( util.sort_cols(expected_inner_summary.drop(["type"], axis=1)).sort_index(), util.sort_cols(plspm_calc.inner_summary().drop( ["type", "r_squared_adj"], axis=1)).sort_index()) pt.assert_series_equal( expected_inner_summary.loc[:, "type"].sort_index(), plspm_calc.inner_summary().loc[:, "type"].sort_index()) assert math.isclose(0.643594505232204, plspm_calc.goodness_of_fit()) plspm_calc_path = Plspm(russa, config, Scheme.PATH, 100, 0.0000001) expected_outer_model_path = util.sort_cols( pd.read_csv("file:tests/data/russa.outer_model_path.csv", index_col=0).filter( ["weight", "loading", "communality", "redundancy"])).sort_index() npt.assert_allclose( expected_outer_model_path, util.sort_cols(plspm_calc_path.outer_model()).sort_index()) plspm_calc_factorial = Plspm(russa, config, Scheme.FACTORIAL, 100, 0.0000001) expected_outer_model_factorial = util.sort_cols( pd.read_csv("file:tests/data/russa.outer_model_factorial.csv", index_col=0).filter( ["weight", "loading", "communality", "redundancy"])).sort_index() npt.assert_allclose( expected_outer_model_factorial, util.sort_cols(plspm_calc_factorial.outer_model()).sort_index())
def test_plspm_satisfaction(): satisfaction = pd.read_csv("file:tests/data/satisfaction.csv", index_col=0) config = c.Config(satisfaction_path_matrix(), scaled=False) config.add_lv_with_columns_named("IMAG", Mode.A, satisfaction, "imag") config.add_lv_with_columns_named("EXPE", Mode.A, satisfaction, "expe") config.add_lv_with_columns_named("VAL", Mode.A, satisfaction, "val") config.add_lv_with_columns_named("QUAL", Mode.A, satisfaction, "qual") config.add_lv_with_columns_named("SAT", Mode.A, satisfaction, "sat") config.add_lv_with_columns_named("LOY", Mode.A, satisfaction, "loy") plspm_calc = Plspm(satisfaction, config) expected_scores = pd.read_csv("file:tests/data/satisfaction.scores.csv") npt.assert_allclose(util.sort_cols(expected_scores), util.sort_cols(plspm_calc.scores())) expected_inner_model = pd.read_csv("file:tests/data/satisfaction.inner-model.csv", index_col=0) actual_inner_model = plspm_calc.inner_model() actual_inner_model = actual_inner_model[actual_inner_model['to'].isin(["SAT"])].drop(["to"], axis=1) npt.assert_allclose(util.sort_cols(expected_inner_model).sort_index(), util.sort_cols(actual_inner_model.set_index(["from"],drop=True)).sort_index()) expected_outer_model = pd.read_csv("file:tests/data/satisfaction.outer-model.csv", index_col=0).drop(["block"], axis=1) pt.assert_index_equal(expected_outer_model.columns, plspm_calc.outer_model().columns) npt.assert_allclose( util.sort_cols(expected_outer_model.sort_index()), util.sort_cols(plspm_calc.outer_model()).sort_index()) expected_crossloadings = pd.read_csv("file:tests/data/satisfaction.crossloadings.csv", index_col=0) npt.assert_allclose(util.sort_cols(expected_crossloadings.drop(["block"], axis=1)).sort_index(), util.sort_cols(plspm_calc.crossloadings()).sort_index()) expected_inner_summary = pd.read_csv("file:tests/data/satisfaction.inner-summary.csv", index_col=0) npt.assert_allclose(util.sort_cols(expected_inner_summary.drop(["type"], axis=1)).sort_index(), util.sort_cols(plspm_calc.inner_summary().drop(["type", "r_squared_adj"], axis=1)).sort_index()) pt.assert_series_equal(expected_inner_summary.loc[:, "type"].sort_index(), plspm_calc.inner_summary().loc[:, "type"].sort_index()) expected_effects = pd.read_csv("file:tests/data/satisfaction.effects.csv", index_col=0) pt.assert_frame_equal(expected_effects.loc[:, ["from", "to"]].sort_index(), plspm_calc.effects().loc[:, ["from", "to"]].sort_index()) npt.assert_allclose(expected_effects.drop(["from", "to"], axis=1).sort_index(), plspm_calc.effects().drop(["from", "to"], axis=1).sort_index()) expected_unidimensionality = pd.read_csv("file:tests/data/satisfaction_unidim.csv", index_col=0) npt.assert_allclose(util.sort_cols(expected_unidimensionality.drop(["mode"], axis=1)).sort_index(), util.sort_cols(plspm_calc.unidimensionality().drop(["mode"], axis=1)).sort_index()) assert math.isclose(0.609741624338411,plspm_calc.goodness_of_fit()) plspm_calc_path = Plspm(satisfaction, config, Scheme.PATH) expected_outer_model_path = util.sort_cols( pd.read_csv("file:tests/data/satisfaction.outer-model-path.csv", index_col=0).drop(["block"], axis=1)).sort_index() npt.assert_allclose(expected_outer_model_path, util.sort_cols(plspm_calc_path.outer_model()).sort_index()) plspm_calc_factorial = Plspm(satisfaction, config, Scheme.FACTORIAL) expected_outer_model_factorial = util.sort_cols( pd.read_csv("file:tests/data/satisfaction.outer-model-factorial.csv", index_col=0).drop(["block"], axis=1)).sort_index() npt.assert_allclose(expected_outer_model_factorial, util.sort_cols(plspm_calc_factorial.outer_model()).sort_index())
def test_config_rejects_adding_lv_not_present_in_path(): config = c.Config(config_test_path_matrix()) with pytest.raises(ValueError): config.add_lv("POO", Mode.A, c.MV("test"))
def test_config_returns_correct_mode_and_mvs(): config = c.Config(config_test_path_matrix()) config.add_lv("AGRI", Mode.A, c.MV("gini"), c.MV("farm"), c.MV("rent")) assert config.mode("AGRI") == Mode.A npt.assert_array_equal(config.mvs("AGRI"), ["gini", "farm", "rent"])
def test_config_rejects_missing_mvs(): russa = pd.read_csv("file:tests/data/russa.csv", index_col=0) config = c.Config(config_test_path_matrix()) config.add_lv("AGRI", Mode.A, c.MV("gini"), c.MV("farm"), c.MV("poo")) with pytest.raises(ValueError): config.filter(russa)