Example #1
0
def test_getframe_smoke():
    #  mostly smoke tests for now
    mod = Factor(X.iloc[:, 1:-1], 2, smc=True)
    res = mod.fit()

    df = res.get_loadings_frame(style='raw')
    assert_(isinstance(df, pd.DataFrame))
    if pd.__version__ < '0.17':
        return
    lds = res.get_loadings_frame(style='strings', decimals=3, threshold=0.3)
    lds.to_latex()

    # The Styler option require jinja2, skip if not available
    try:
        from jinja2 import Template
    except ImportError:
        return

    ldf = res.get_loadings_frame(style='display')
    assert_(isinstance(ldf, pd.formats.style.Styler))
    assert_(isinstance(ldf.data, pd.DataFrame))

    res.get_loadings_frame(style='display', decimals=3, threshold=0.2)

    res.get_loadings_frame(style='display', decimals=3, color_max='GAINSBORO')

    res.get_loadings_frame(style='display',
                           decimals=3,
                           threshold=0.45,
                           highlight_max=False,
                           sort_=False)
Example #2
0
def test_plots():
    mod = Factor(X.iloc[:, 1:], 3)
    results = mod.fit()
    results.rotate('oblimin')
    results.plot_scree()
    fig_loadings = results.plot_loadings()
    assert_equal(3, len(fig_loadings))
def fa_neighbors(vecs,
                 d,
                 num_neighbors=5,
                 rotation=None,
                 method='pa',
                 rotate_args=[]):
    # find latent factors
    fa = Factor(vecs.vectors, d, method=method)
    loadings = fa.fit().loadings
    padding = np.zeros((vecs.vectors.shape[1], d - loadings.shape[1]))
    loadings = np.hstack([loadings, padding])

    # rotate factors
    if rotation is not None:
        loadings, transformation = rotate_factors(loadings, rotation,
                                                  *rotate_args)

    # find neighbors
    labels = np.array(list(range(d)))
    neighbors = compute_nn(vecs,
                           loadings.T,
                           labels,
                           num_neighbors,
                           whole_matrix=True)

    return neighbors, loadings
Example #4
0
def test_getframe_smoke():
    #  mostly smoke tests for now
    mod = Factor(X.iloc[:, 1:-1], 2, smc=True)
    res = mod.fit()

    df = res.get_loadings_frame(style='raw')
    assert_(isinstance(df, pd.DataFrame))
    if pd.__version__ < '0.17':
        return
    lds = res.get_loadings_frame(style='strings', decimals=3, threshold=0.3)
    lds.to_latex()

    # The Styler option require jinja2, skip if not available
    try:
        from jinja2 import Template
    except ImportError:
        return

    try:
        from pandas.io import formats as pd_formats
    except ImportError:
        from pandas import formats as pd_formats

    ldf = res.get_loadings_frame(style='display')
    assert_(isinstance(ldf, pd_formats.style.Styler))
    assert_(isinstance(ldf.data, pd.DataFrame))

    res.get_loadings_frame(style='display', decimals=3, threshold=0.2)

    res.get_loadings_frame(style='display', decimals=3, color_max='GAINSBORO')

    res.get_loadings_frame(style='display', decimals=3, threshold=0.45, highlight_max=False, sort_=False)
Example #5
0
def test_plots(close_figures):
    mod = Factor(X.iloc[:, 1:], 3)
    results = mod.fit()
    results.rotate('oblimin')
    fig = results.plot_scree()

    fig_loadings = results.plot_loadings()
    assert_equal(3, len(fig_loadings))
Example #6
0
def test_factor_scoring():
    path = os.path.abspath(__file__)
    dir_path = os.path.dirname(path)
    csv_path = os.path.join(dir_path, 'results', 'factor_data.csv')
    y = pd.read_csv(csv_path)
    csv_path = os.path.join(dir_path, 'results', 'factors_stata.csv')
    f_s = pd.read_csv(csv_path)
    #  mostly smoke tests for now
    mod = Factor(y, 2)
    res = mod.fit(maxiter=1)
    res.rotate('varimax')
    f_reg = res.factor_scoring(method='reg')
    assert_allclose(f_reg * [1, -1],
                    f_s[["f1", 'f2']].values,
                    atol=1e-4,
                    rtol=1e-3)
    f_bart = res.factor_scoring()
    assert_allclose(f_bart * [1, -1],
                    f_s[["f1b", 'f2b']].values,
                    atol=1e-4,
                    rtol=1e-3)

    # check we have high correlation to ols and gls
    f_ols = res.factor_scoring(method='ols')
    f_gls = res.factor_scoring(method='gls')
    f_reg_z = _zscore(f_reg)
    f_ols_z = _zscore(f_ols)
    f_gls_z = _zscore(f_gls)
    assert_array_less(0.98, (f_ols_z * f_reg_z).mean(0))
    assert_array_less(0.999, (f_gls_z * f_reg_z).mean(0))

    # with oblique rotation
    res.rotate('oblimin')
    # Note: Stata has second factor with flipped sign compared to statsmodels
    assert_allclose(res._corr_factors()[0, 1], (-1) * 0.25651037, rtol=1e-3)
    f_reg = res.factor_scoring(method='reg')
    assert_allclose(f_reg * [1, -1],
                    f_s[["f1o", 'f2o']].values,
                    atol=1e-4,
                    rtol=1e-3)
    f_bart = res.factor_scoring()
    assert_allclose(f_bart * [1, -1],
                    f_s[["f1ob", 'f2ob']].values,
                    atol=1e-4,
                    rtol=1e-3)

    # check we have high correlation to ols and gls
    f_ols = res.factor_scoring(method='ols')
    f_gls = res.factor_scoring(method='gls')
    f_reg_z = _zscore(f_reg)
    f_ols_z = _zscore(f_ols)
    f_gls_z = _zscore(f_gls)
    assert_array_less(0.97, (f_ols_z * f_reg_z).mean(0))
    assert_array_less(0.999, (f_gls_z * f_reg_z).mean(0))

    # check provided endog
    f_ols2 = res.factor_scoring(method='ols', endog=res.model.endog)
    assert_allclose(f_ols2, f_ols, rtol=1e-13)
Example #7
0
def test_loglike():

    uniq, load, corr, par = _toy()
    fa = Factor(n_factor=2, corr=corr)

    # Two ways of passing the parameters to loglike
    ll1 = fa.loglike((load, uniq))
    ll2 = fa.loglike(par)

    assert_allclose(ll1, ll2)
Example #8
0
def test_loglike():

    uniq, load, corr, par = _toy()
    fa = Factor(n_factor=2, corr=corr)

    # Two ways of passing the parameters to loglike
    ll1 = fa.loglike((load, uniq))
    ll2 = fa.loglike(par)

    assert_allclose(ll1, ll2)
Example #9
0
def test_plots():
    mod = Factor(X.iloc[:, 1:], 3)
    results = mod.fit()
    results.rotate('oblimin')
    fig = results.plot_scree()
    plt.close(fig)

    fig_loadings = results.plot_loadings()
    assert_equal(3, len(fig_loadings))
    for fig in fig_loadings[:-1]:
        plt.close(fig)
    plt.close('all')
Example #10
0
def test_plots():
    mod = Factor(X.iloc[:, 1:], 3)
    results = mod.fit()
    results.rotate('oblimin')
    fig = results.plot_scree()
    plt.close(fig)

    fig_loadings = results.plot_loadings()
    assert_equal(3, len(fig_loadings))
    for fig in fig_loadings[:-1]:
        plt.close(fig)
    plt.close('all')
Example #11
0
def test_score():

    uniq, load, corr, par = _toy()
    fa = Factor(n_factor=2, corr=corr)

    def f(par):
        return fa.loglike(par)

    par2 = np.r_[0.1, 0.2, 0.3, 0.4, 0.3, 0.1, 0.2, -0.2, 0, 0.8, 0.5, 0]

    for pt in (par, par2):
        g1 = approx_fprime(pt, f, 1e-8)
        g2 = fa.score(pt)
        assert_allclose(g1, g2, atol=1e-3)
Example #12
0
def test_factor_scoring():
    path = os.path.abspath(__file__)
    dir_path = os.path.dirname(path)
    csv_path = os.path.join(dir_path, 'results', 'factor_data.csv')
    y = pd.read_csv(csv_path)
    csv_path = os.path.join(dir_path, 'results', 'factors_stata.csv')
    f_s = pd.read_csv(csv_path)
    #  mostly smoke tests for now
    mod = Factor(y, 2)
    res = mod.fit(maxiter=1)
    res.rotate('varimax')
    f_reg = res.factor_scoring(method='reg')
    assert_allclose(f_reg * [1, -1], f_s[["f1", 'f2']].values,
                    atol=1e-4, rtol=1e-3)
    f_bart = res.factor_scoring()
    assert_allclose(f_bart * [1, -1], f_s[["f1b", 'f2b']].values,
                    atol=1e-4, rtol=1e-3)

    # check we have high correlation to ols and gls
    f_ols = res.factor_scoring(method='ols')
    f_gls = res.factor_scoring(method='gls')
    f_reg_z = _zscore(f_reg)
    f_ols_z = _zscore(f_ols)
    f_gls_z = _zscore(f_gls)
    assert_array_less(0.98, (f_ols_z * f_reg_z).mean(0))
    assert_array_less(0.999, (f_gls_z * f_reg_z).mean(0))

    # with oblique rotation
    res.rotate('oblimin')
    # Note: Stata has second factor with flipped sign compared to statsmodels
    assert_allclose(res._corr_factors()[0, 1],  (-1) * 0.25651037, rtol=1e-3)
    f_reg = res.factor_scoring(method='reg')
    assert_allclose(f_reg * [1, -1], f_s[["f1o", 'f2o']].values,
                    atol=1e-4, rtol=1e-3)
    f_bart = res.factor_scoring()
    assert_allclose(f_bart * [1, -1], f_s[["f1ob", 'f2ob']].values,
                    atol=1e-4, rtol=1e-3)

    # check we have high correlation to ols and gls
    f_ols = res.factor_scoring(method='ols')
    f_gls = res.factor_scoring(method='gls')
    f_reg_z = _zscore(f_reg)
    f_ols_z = _zscore(f_ols)
    f_gls_z = _zscore(f_gls)
    assert_array_less(0.97, (f_ols_z * f_reg_z).mean(0))
    assert_array_less(0.999, (f_gls_z * f_reg_z).mean(0))

    # check provided endog
    f_ols2 = res.factor_scoring(method='ols', endog=res.model.endog)
    assert_allclose(f_ols2, f_ols, rtol=1e-13)
Example #13
0
def test_score():

    uniq, load, corr, par = _toy()
    fa = Factor(n_factor=2, corr=corr)

    def f(par):
        return fa.loglike(par)

    par2 = np.r_[0.1, 0.2, 0.3, 0.4, 0.3, 0.1, 0.2, -0.2, 0, 0.8, 0.5, 0]

    for pt in (par, par2):
        g1 = approx_fprime(pt, f, 1e-8)
        g2 = fa.score(pt)
        assert_allclose(g1, g2, atol=1e-3)
Example #14
0
def test_em():

    n_factor = 1
    cor = np.asarray([[1, 0.5, 0.3], [0.5, 1, 0], [0.3, 0, 1]])

    fa = Factor(corr=cor, n_factor=n_factor, method='ml')
    rslt = fa.fit(opt={'gtol': 1e-3})
    load_opt = rslt.loadings
    uniq_opt = rslt.uniqueness

    load_em, uniq_em = fa._fit_ml_em(1000)
    cc = np.dot(load_em, load_em.T)
    cc.flat[::cc.shape[0]+1] += uniq_em

    assert_allclose(cc, rslt.fitted_cov, rtol=1e-2, atol=1e-2)
Example #15
0
def test_em():

    n_factor = 1
    cor = np.asarray([[1, 0.5, 0.3], [0.5, 1, 0], [0.3, 0, 1]])

    fa = Factor(corr=cor, n_factor=n_factor, method='ml')
    rslt = fa.fit(opt={'gtol': 1e-3})
    load_opt = rslt.loadings
    uniq_opt = rslt.uniqueness

    load_em, uniq_em = fa._fit_ml_em(1000)
    cc = np.dot(load_em, load_em.T)
    cc.flat[::cc.shape[0]+1] += uniq_em

    assert_allclose(cc, rslt.fitted_cov, rtol=1e-2, atol=1e-2)
Example #16
0
def test_auto_col_name():
    # Test auto generated variable names when endog_names is None
    mod = Factor(None, 2, corr=np.eye(11), endog_names=None, smc=False)
    assert_array_equal(mod.endog_names, [
        'var00', 'var01', 'var02', 'var03', 'var04', 'var05', 'var06', 'var07',
        'var08', 'var09', 'var10'
    ])
Example #17
0
def test_2factor():
    """
    # R code:
    r = 0.4
    p = 6
    ii = seq(0, p-1)
    ii = outer(ii, ii, "-")
    ii = abs(ii)
    cm = r^ii
    factanal(covmat=cm, factors=2)
    """

    r = 0.4
    p = 6
    ii = np.arange(p)
    cm = r**np.abs(np.subtract.outer(ii, ii))

    fa = Factor(corr=cm, n_factor=2, nobs=100, method='ml')
    rslt = fa.fit()

    for j in 0, 1:
        if rslt.loadings[0, j] < 0:
            rslt.loadings[:, j] *= -1

    uniq = np.r_[0.782, 0.367, 0.696, 0.696, 0.367, 0.782]
    assert_allclose(uniq, rslt.uniqueness, rtol=1e-3, atol=1e-3)

    loads = [
        np.r_[0.323, 0.586, 0.519, 0.519, 0.586, 0.323],
        np.r_[0.337, 0.538, 0.187, -0.187, -0.538, -0.337]
    ]
    for k in 0, 1:
        if np.dot(loads[k], rslt.loadings[:, k]) < 0:
            loads[k] *= -1
        assert_allclose(loads[k], rslt.loadings[:, k], rtol=1e-3, atol=1e-3)

    assert_equal(rslt.df, 4)

    # Smoke test for standard errors
    e = np.asarray([
        0.11056836, 0.05191071, 0.09836349, 0.09836349, 0.05191071, 0.11056836
    ])
    assert_allclose(rslt.uniq_stderr, e, atol=1e-4)
    e = np.asarray([[0.08842151, 0.08842151], [0.06058582, 0.06058582],
                    [0.08339874, 0.08339874], [0.08339874, 0.08339874],
                    [0.06058582, 0.06058582], [0.08842151, 0.08842151]])
    assert_allclose(rslt.load_stderr, e, atol=1e-4)
Example #18
0
def test_factor_missing():
    xm = X.iloc[:, 1:-1].copy()
    nobs, k_endog = xm.shape
    xm.iloc[2, 2] = np.nan
    mod = Factor(xm, 2)
    assert_equal(mod.nobs, nobs - 1)
    assert_equal(mod.k_endog, k_endog)
    assert_equal(mod.endog.shape, (nobs - 1, k_endog))
Example #19
0
def test_direct_corr_matrix():
    # Test specifying the correlation matrix directly
    mod = Factor(None, 2, corr=np.corrcoef(X.iloc[:, 1:-1], rowvar=0),
                 smc=False)
    results = mod.fit(tol=1e-10)
    a = np.array([[0.965392158864, 0.225880658666255],
                  [0.967587154301, 0.212758741910989],
                  [0.929891035996, -0.000603217967568],
                  [0.486822656362, -0.869649573289374]])
    assert_array_almost_equal(results.loadings, a, decimal=8)
    # Test set and get endog_names
    mod.endog_names = X.iloc[:, 1:-1].columns
    assert_array_equal(mod.endog_names, ['Basal', 'Occ', 'Max', 'id'])

    # Test set endog_names with the wrong number of elements
    assert_raises(ValueError, setattr, mod, 'endog_names',
                  X.iloc[:, :1].columns)
Example #20
0
def test_direct_corr_matrix():
    # Test specifying the correlation matrix directly
    mod = Factor(None, 2, corr=np.corrcoef(X.iloc[:, 1:-1], rowvar=0),
                 smc=False)
    results = mod.fit(tol=1e-10)
    a = np.array([[0.965392158864, 0.225880658666255],
                  [0.967587154301, 0.212758741910989],
                  [0.929891035996, -0.000603217967568],
                  [0.486822656362, -0.869649573289374]])
    assert_array_almost_equal(results.loadings, a, decimal=8)
    # Test set and get endog_names
    mod.endog_names = X.iloc[:, 1:-1].columns
    assert_array_equal(mod.endog_names, ['Basal', 'Occ', 'Max', 'id'])

    # Test set endog_names with the wrong number of elements
    assert_raises(ValueError, setattr, mod, 'endog_names',
                  X.iloc[:, :1].columns)
Example #21
0
def test_2factor():
    """
    # R code:
    r = 0.4
    p = 6
    ii = seq(0, p-1)
    ii = outer(ii, ii, "-")
    ii = abs(ii)
    cm = r^ii
    factanal(covmat=cm, factors=2)
    """

    r = 0.4
    p = 6
    ii = np.arange(p)
    cm = r ** np.abs(np.subtract.outer(ii, ii))

    fa = Factor(corr=cm, n_factor=2, nobs=100, method='ml')
    rslt = fa.fit()

    for j in 0, 1:
        if rslt.loadings[0, j] < 0:
            rslt.loadings[:, j] *= -1

    uniq = np.r_[0.782, 0.367, 0.696, 0.696, 0.367, 0.782]
    assert_allclose(uniq, rslt.uniqueness, rtol=1e-3, atol=1e-3)

    loads = [np.r_[0.323, 0.586, 0.519, 0.519, 0.586, 0.323],
             np.r_[0.337, 0.538, 0.187, -0.187, -0.538, -0.337]]
    for k in 0, 1:
        if np.dot(loads[k], rslt.loadings[:, k]) < 0:
            loads[k] *= -1
        assert_allclose(loads[k], rslt.loadings[:, k], rtol=1e-3, atol=1e-3)

    assert_equal(rslt.df, 4)

    # Smoke test for standard errors
    e = np.asarray([0.11056836, 0.05191071, 0.09836349,
                    0.09836349, 0.05191071, 0.11056836])
    assert_allclose(rslt.uniq_stderr, e, atol=1e-4)
    e = np.asarray([[0.08842151, 0.08842151], [0.06058582, 0.06058582],
                    [0.08339874, 0.08339874], [0.08339874, 0.08339874],
                    [0.06058582, 0.06058582], [0.08842151, 0.08842151]])
    assert_allclose(rslt.load_stderr, e, atol=1e-4)
Example #22
0
def test_exact():
    # Test if we can recover exact factor-structured matrices with
    # default starting values.

    np.random.seed(23324)

    # Works for larger k_var but slow for routine testing.
    for k_var in 5, 10, 25:
        for n_factor in 1, 2, 3:
            load = np.random.normal(size=(k_var, n_factor))
            uniq = np.linspace(1, 2, k_var)
            c = np.dot(load, load.T)
            c.flat[::c.shape[0]+1] += uniq
            s = np.sqrt(np.diag(c))
            c /= np.outer(s, s)
            fa = Factor(corr=c, n_factor=n_factor, method='ml')
            rslt = fa.fit()
            assert_allclose(rslt.fitted_cov, c, rtol=1e-4, atol=1e-4)
            rslt.summary()  # smoke test
Example #23
0
def test_exact():
    # Test if we can recover exact factor-structured matrices with
    # default starting values.

    np.random.seed(23324)

    # Works for larger k_var but slow for routine testing.
    for k_var in 5, 10, 25:
        for n_factor in 1, 2, 3:
            load = np.random.normal(size=(k_var, n_factor))
            uniq = np.linspace(1, 2, k_var)
            c = np.dot(load, load.T)
            c.flat[::c.shape[0]+1] += uniq
            s = np.sqrt(np.diag(c))
            c /= np.outer(s, s)
            fa = Factor(corr=c, n_factor=n_factor, method='ml')
            rslt = fa.fit()
            assert_allclose(rslt.fitted_cov, c, rtol=1e-4, atol=1e-4)
            rslt.summary()  # smoke test
Example #24
0
def test_getframe_smoke():
    #  mostly smoke tests for now
    mod = Factor(X.iloc[:, 1:-1], 2, smc=True)
    res = mod.fit()

    df = res.get_loadings_frame(style='raw')
    assert_(isinstance(df, pd.DataFrame))

    lds = res.get_loadings_frame(style='strings', decimals=3, threshold=0.3)

    # The Styler option require jinja2, skip if not available
    try:
        from jinja2 import Template  # noqa:F401
    except ImportError:
        return
        # TODO: separate this and do pytest.skip?

    # Old implementation that warns
    if PD_LT_1_4:
        with warnings.catch_warnings():
            warnings.simplefilter("always")
            lds.to_latex()
    else:
        # Smoke test using new style to_latex
        lds.style.to_latex()
    try:
        from pandas.io import formats as pd_formats
    except ImportError:
        from pandas import formats as pd_formats

    ldf = res.get_loadings_frame(style='display')
    assert_(isinstance(ldf, pd_formats.style.Styler))
    assert_(isinstance(ldf.data, pd.DataFrame))

    res.get_loadings_frame(style='display', decimals=3, threshold=0.2)

    res.get_loadings_frame(style='display', decimals=3, color_max='GAINSBORO')

    res.get_loadings_frame(style='display',
                           decimals=3,
                           threshold=0.45,
                           highlight_max=False,
                           sort_=False)
Example #25
0
def test_exact_em():
    # Test if we can recover exact factor-structured matrices with
    # default starting values using the EM algorithm.

    np.random.seed(23324)

    # Works for larger k_var but slow for routine testing.
    for k_var in 5, 10, 25:
        for n_factor in 1, 2, 3:
            load = np.random.normal(size=(k_var, n_factor))
            uniq = np.linspace(1, 2, k_var)
            c = np.dot(load, load.T)
            c.flat[::c.shape[0]+1] += uniq
            s = np.sqrt(np.diag(c))
            c /= np.outer(s, s)
            fa = Factor(corr=c, n_factor=n_factor, method='ml')
            load_e, uniq_e = fa._fit_ml_em(200)
            c_e = np.dot(load_e, load_e.T)
            c_e.flat[::c_e.shape[0]+1] += uniq_e
            assert_allclose(c_e, c, rtol=1e-4, atol=1e-4)
Example #26
0
def test_exact_em():
    # Test if we can recover exact factor-structured matrices with
    # default starting values using the EM algorithm.

    np.random.seed(23324)

    # Works for larger k_var but slow for routine testing.
    for k_var in 5, 10, 25:
        for n_factor in 1, 2, 3:
            load = np.random.normal(size=(k_var, n_factor))
            uniq = np.linspace(1, 2, k_var)
            c = np.dot(load, load.T)
            c.flat[::c.shape[0]+1] += uniq
            s = np.sqrt(np.diag(c))
            c /= np.outer(s, s)
            fa = Factor(corr=c, n_factor=n_factor, method='ml')
            load_e, uniq_e = fa._fit_ml_em(2000)
            c_e = np.dot(load_e, load_e.T)
            c_e.flat[::c_e.shape[0]+1] += uniq_e
            assert_allclose(c_e, c, rtol=1e-4, atol=1e-4)
Example #27
0
def test_1factor():
    """
    # R code:
    r = 0.4
    p = 4
    ii = seq(0, p-1)
    ii = outer(ii, ii, "-")
    ii = abs(ii)
    cm = r^ii
    fa = factanal(covmat=cm, factors=1)
    print(fa, digits=10)
    """

    r = 0.4
    p = 4
    ii = np.arange(p)
    cm = r ** np.abs(np.subtract.outer(ii, ii))

    fa = Factor(corr=cm, n_factor=1, method='ml')
    rslt = fa.fit()

    if rslt.loadings[0, 0] < 0:
        rslt.loadings[:, 0] *= -1

    # R solution, but our likelihood is higher
    # uniq = np.r_[0.8392472054, 0.5820958187, 0.5820958187, 0.8392472054]
    # load = np.asarray([[0.4009399224, 0.6464550935, 0.6464550935,
    #                     0.4009399224]]).T
    # l1 = fa.loglike(fa._pack(load, uniq))
    # l2 = fa.loglike(fa._pack(rslt.loadings, rslt.uniqueness))

    # So use a smoke test
    uniq = np.r_[0.85290232,  0.60916033,  0.55382266,  0.82610666]
    load = np.asarray([[0.38353316], [0.62517171], [0.66796508],
                       [0.4170052]])

    assert_allclose(load, rslt.loadings, rtol=1e-3, atol=1e-3)
    assert_allclose(uniq, rslt.uniqueness, rtol=1e-3, atol=1e-3)

    assert_equal(rslt.df, 2)
Example #28
0
def test_1factor():
    """
    # R code:
    r = 0.4
    p = 4
    ii = seq(0, p-1)
    ii = outer(ii, ii, "-")
    ii = abs(ii)
    cm = r^ii
    fa = factanal(covmat=cm, factors=1)
    print(fa, digits=10)
    """

    r = 0.4
    p = 4
    ii = np.arange(p)
    cm = r ** np.abs(np.subtract.outer(ii, ii))

    fa = Factor(corr=cm, n_factor=1, method='ml')
    rslt = fa.fit()

    if rslt.loadings[0, 0] < 0:
        rslt.loadings[:, 0] *= -1

    # R solution, but our likelihood is higher
    # uniq = np.r_[0.8392472054, 0.5820958187, 0.5820958187, 0.8392472054]
    # load = np.asarray([[0.4009399224, 0.6464550935, 0.6464550935,
    #                     0.4009399224]]).T
    # l1 = fa.loglike(fa._pack(load, uniq))
    # l2 = fa.loglike(fa._pack(rslt.loadings, rslt.uniqueness))

    # So use a smoke test
    uniq = np.r_[0.85290232,  0.60916033,  0.55382266,  0.82610666]
    load = np.asarray([[0.38353316], [0.62517171], [0.66796508],
                       [0.4170052]])

    assert_allclose(load, rslt.loadings, rtol=1e-3, atol=1e-3)
    assert_allclose(uniq, rslt.uniqueness, rtol=1e-3, atol=1e-3)

    assert_equal(rslt.df, 2)
Example #29
0
def test_fit_ml_em_random_state():
    # Ensure Factor._fit_ml_em doesn't change numpy's singleton random state
    # see #7357

    T = 10
    epsilon = np.random.multivariate_normal(np.zeros(3), np.eye(3), size=T).T
    initial = np.random.get_state()
    with warnings.catch_warnings():
        warnings.filterwarnings("ignore", message='Fitting did not converge')
        Factor(endog=epsilon, n_factor=2, method='ml').fit()
    final = np.random.get_state()

    assert(initial[0] == final[0])
    assert_equal(initial[1], final[1])
    assert(initial[2:] == final[2:])
Example #30
0
def test_unknown_fa_method_error():
    # Test raise error if an unkonwn FA method is specified in fa.method
    mod = Factor(X.iloc[:, 1:-1], 2, method='ab')
    assert_raises(ValueError, mod.fit)
Example #31
0
def test_example_compare_to_R_output():
    # Testing basic functions and compare to R output

    # R code for producing the results:
    # library(psych)
    # library(GPArotation)
    # Basal = c(2.068,	2.068,	2.09,	2.097,	2.117,	2.14,	2.045,	2.076,	2.09,	2.111,	2.093,	2.1,	2.104)
    # Occ = c(2.07,	2.074,	2.09,	2.093,	2.125,	2.146,	2.054,	2.088,	2.093,	2.114,	2.098,	2.106,	2.101)
    # Max = c(1.58,	1.602,	1.613,	1.613,	1.663,	1.681,	1.58,	1.602,	1.643,	1.643,	1.653,	1.623,	1.653)
    # id = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13)
    # Y <- cbind(Basal, Occ, Max, id)
    # a <- fa(Y, nfactors=2, fm="pa", rotate="none", SMC=FALSE, min.err=1e-10)
    # b <- cbind(a$loadings[,1], -a$loadings[,2])
    # b
    # a <- fa(Y, nfactors=2, fm="pa", rotate="Promax", SMC=TRUE, min.err=1e-10)
    # b <- cbind(a$loadings[,1], a$loadings[,2])
    # b
    # a <- fa(Y, nfactors=2, fm="pa", rotate="Varimax", SMC=TRUE, min.err=1e-10)
    # b <- cbind(a$loadings[,1], a$loadings[,2])
    # b
    # a <- fa(Y, nfactors=2, fm="pa", rotate="quartimax", SMC=TRUE, min.err=1e-10)
    # b <- cbind(a$loadings[,1], -a$loadings[,2])
    # b
    # a <- fa(Y, nfactors=2, fm="pa", rotate="oblimin", SMC=TRUE, min.err=1e-10)
    # b <- cbind(a$loadings[,1], a$loadings[,2])
    # b

    # No rotation without squared multiple correlations prior
    # produce same results as in R `fa`
    mod = Factor(X.iloc[:, 1:-1], 2, smc=False)
    results = mod.fit(tol=1e-10)
    a = np.array([[0.965392158864, 0.225880658666255],
                  [0.967587154301, 0.212758741910989],
                  [0.929891035996, -0.000603217967568],
                  [0.486822656362, -0.869649573289374]])
    assert_array_almost_equal(results.loadings, a, decimal=8)

    # No rotation WITH squared multiple correlations prior
    # produce same results as in R `fa`
    mod = Factor(X.iloc[:, 1:-1], 2, smc=True)
    results = mod.fit()
    a = np.array([[0.97541115, 0.20280987],
                  [0.97113975, 0.17207499],
                  [0.9618705, -0.2004196],
                  [0.37570708, -0.45821379]])
    assert_array_almost_equal(results.loadings, a, decimal=8)

    # Same as R GRArotation
    results.rotate('varimax')
    a = np.array([[0.98828898, -0.12587155],
                  [0.97424206, -0.15354033],
                  [0.84418097, -0.502714],
                  [0.20601929, -0.55558235]])
    assert_array_almost_equal(results.loadings, a, decimal=8)

    results.rotate('quartimax')  # Same as R fa
    a = np.array([[0.98935598, 0.98242714, 0.94078972, 0.33442284],
                  [0.117190049, 0.086943252, -0.283332952, -0.489159543]])
    assert_array_almost_equal(results.loadings, a.T, decimal=8)

    results.rotate('equamax')  # Not the same as R fa

    results.rotate('promax')  # Not the same as R fa

    results.rotate('biquartimin')  # Not the same as R fa

    results.rotate('oblimin')  # Same as R fa
    a = np.array([[1.02834170170, 1.00178840104, 0.71824931384,
                   -0.00013510048],
                  [0.06563421, 0.03096076, -0.39658839, -0.59261944]])
    assert_array_almost_equal(results.loadings, a.T, decimal=8)

    # Testing result summary string
    results.rotate('varimax')
    desired = (
"""   Factor analysis results
=============================
      Eigenvalues
-----------------------------
 Basal   Occ    Max      id
-----------------------------
 2.9609 0.3209 0.0000 -0.0000
-----------------------------

-----------------------------
      Communality
-----------------------------
  Basal   Occ    Max     id
-----------------------------
  0.9926 0.9727 0.9654 0.3511
-----------------------------

-----------------------------
   Pre-rotated loadings
-----------------------------------
            factor 0       factor 1
-----------------------------------
Basal         0.9754         0.2028
Occ           0.9711         0.1721
Max           0.9619        -0.2004
id            0.3757        -0.4582
-----------------------------

-----------------------------
   varimax rotated loadings
-----------------------------------
            factor 0       factor 1
-----------------------------------
Basal         0.9883        -0.1259
Occ           0.9742        -0.1535
Max           0.8442        -0.5027
id            0.2060        -0.5556
=============================
""")
    actual = results.summary().as_text()
    actual = "\n".join(line.rstrip() for line in actual.splitlines()) + "\n"
    assert_equal(actual, desired)
Example #32
0
def test_example_compare_to_R_output():
    # Testing basic functions and compare to R output

    # R code for producing the results:
    # library(psych)
    # library(GPArotation)
    # Basal = c(2.068,	2.068,	2.09,	2.097,	2.117,	2.14,	2.045,	2.076,	2.09,	2.111,	2.093,	2.1,	2.104)
    # Occ = c(2.07,	2.074,	2.09,	2.093,	2.125,	2.146,	2.054,	2.088,	2.093,	2.114,	2.098,	2.106,	2.101)
    # Max = c(1.58,	1.602,	1.613,	1.613,	1.663,	1.681,	1.58,	1.602,	1.643,	1.643,	1.653,	1.623,	1.653)
    # id = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13)
    # Y <- cbind(Basal, Occ, Max, id)
    # a <- fa(Y, nfactors=2, fm="pa", rotate="none", SMC=FALSE, min.err=1e-10)
    # b <- cbind(a$loadings[,1], -a$loadings[,2])
    # b
    # a <- fa(Y, nfactors=2, fm="pa", rotate="Promax", SMC=TRUE, min.err=1e-10)
    # b <- cbind(a$loadings[,1], a$loadings[,2])
    # b
    # a <- fa(Y, nfactors=2, fm="pa", rotate="Varimax", SMC=TRUE, min.err=1e-10)
    # b <- cbind(a$loadings[,1], a$loadings[,2])
    # b
    # a <- fa(Y, nfactors=2, fm="pa", rotate="quartimax", SMC=TRUE, min.err=1e-10)
    # b <- cbind(a$loadings[,1], -a$loadings[,2])
    # b
    # a <- fa(Y, nfactors=2, fm="pa", rotate="oblimin", SMC=TRUE, min.err=1e-10)
    # b <- cbind(a$loadings[,1], a$loadings[,2])
    # b

    # No rotation without squared multiple correlations prior
    # produce same results as in R `fa`
    mod = Factor(X.iloc[:, 1:-1], 2, smc=False)
    results = mod.fit(tol=1e-10)
    a = np.array([[0.965392158864, 0.225880658666255],
                  [0.967587154301, 0.212758741910989],
                  [0.929891035996, -0.000603217967568],
                  [0.486822656362, -0.869649573289374]])
    assert_array_almost_equal(results.loadings, a, decimal=8)

    # No rotation WITH squared multiple correlations prior
    # produce same results as in R `fa`
    mod = Factor(X.iloc[:, 1:-1], 2, smc=True)
    results = mod.fit()
    a = np.array([[0.97541115, 0.20280987], [0.97113975, 0.17207499],
                  [0.9618705, -0.2004196], [0.37570708, -0.45821379]])
    assert_array_almost_equal(results.loadings, a, decimal=8)

    # Same as R GRArotation
    results.rotate('varimax')
    a = np.array([[0.98828898, -0.12587155], [0.97424206, -0.15354033],
                  [0.84418097, -0.502714], [0.20601929, -0.55558235]])
    assert_array_almost_equal(results.loadings, a, decimal=8)

    results.rotate('quartimax')  # Same as R fa
    a = np.array([[0.98935598, 0.98242714, 0.94078972, 0.33442284],
                  [0.117190049, 0.086943252, -0.283332952, -0.489159543]])
    assert_array_almost_equal(results.loadings, a.T, decimal=8)

    results.rotate('equamax')  # Not the same as R fa

    results.rotate('promax')  # Not the same as R fa

    results.rotate('biquartimin')  # Not the same as R fa

    results.rotate('oblimin')  # Same as R fa
    a = np.array(
        [[1.02834170170, 1.00178840104, 0.71824931384, -0.00013510048],
         [0.06563421, 0.03096076, -0.39658839, -0.59261944]])
    assert_array_almost_equal(results.loadings, a.T, decimal=8)

    # Testing result summary string
    results.rotate('varimax')
    desired = ("""   Factor analysis results
=============================
      Eigenvalues
-----------------------------
 Basal   Occ    Max      id
-----------------------------
 2.9609 0.3209 0.0000 -0.0000
-----------------------------

-----------------------------
      Communality
-----------------------------
  Basal   Occ    Max     id
-----------------------------
  0.9926 0.9727 0.9654 0.3511
-----------------------------

-----------------------------
   Pre-rotated loadings
-----------------------------------
            factor 0       factor 1
-----------------------------------
Basal         0.9754         0.2028
Occ           0.9711         0.1721
Max           0.9619        -0.2004
id            0.3757        -0.4582
-----------------------------

-----------------------------
   varimax rotated loadings
-----------------------------------
            factor 0       factor 1
-----------------------------------
Basal         0.9883        -0.1259
Occ           0.9742        -0.1535
Max           0.8442        -0.5027
id            0.2060        -0.5556
=============================
""")
    actual = results.summary().as_text()
    actual = "\n".join(line.rstrip() for line in actual.splitlines()) + "\n"
    assert_equal(actual, desired)
Example #33
0
def test_specify_nobs():
    # Test specifying nobs
    Factor(np.zeros([10, 3]), 2, nobs=10)
    assert_raises(ValueError, Factor, np.zeros([10, 3]), 2, nobs=9)