def test_unique_elements(): expected = [1, 2, 3, 4, 5] x = [1, 2, 3, 2, 1, 3, 4, 4, 3, 5, 5] output = unique_elements(x) eq_(output, expected)
def calculate_py_output_cfa(json_name, data_name, is_cov=False, data_dir=None, json_dir=None, n_obs=None, **kwargs): if data_dir is None: data_dir = DATA_DIR if json_dir is None: json_dir = JSON_DIR filename = join(data_dir, data_name + '.csv') jsonname = join(json_dir, json_name + '.json') data = pd.read_csv(filename, **kwargs) if n_obs is None and not is_cov: n_obs = data.shape[0] with open(jsonname) as model_file: model = json.load(model_file) columns = unique_elements([v for f in model.values() for v in f]) data = data[columns].copy() model_spec = ModelSpecificationParser.parse_model_specification_from_dict( data, model) cfa = ConfirmatoryFactorAnalyzer(model_spec, n_obs=n_obs, is_cov_matrix=is_cov, disp=False) cfa.fit(data.values) transform = cfa.transform(data.values) (loadingsse, errorcovsse) = cfa.get_standard_errors() outputs = { 'loadings': cfa.loadings_.copy(), 'errorvars': cfa.error_vars_.copy(), 'factorcovs': cfa.factor_varcovs_.copy(), 'loadingsse': loadingsse.copy(), 'errorvarsse': errorcovsse.copy(), 'transform': transform.copy() } return outputs, cfa.model.n_factors
def parse_model_specification_from_dict(X, specification=None): """ Generate the model specification from a dictionary. The keys in the dictionary should be the factor names, and the values should be the feature names. If this method is used to create the ``ModelSpecification``, then factor names and variable names will be added as properties to that object. Parameters ---------- X : array-like The data set that will be used for CFA. specification : dict or None A dictionary with the loading details. If None, the matrix will be created assuming all variables load on all factors. Defaults to None. Returns ------- ModelSpecification A model specification object Raises ------ ValueError If `specification` is not in the expected format. Examples -------- >>> import pandas as pd >>> from factor_analyzer import (ConfirmatoryFactorAnalyzer, ... ModelSpecificationParser) >>> X = pd.read_csv('tests/data/test11.csv') >>> model_dict = {"F1": ["V1", "V2", "V3", "V4"], ... "F2": ["V5", "V6", "V7", "V8"]} >>> model_spec = ModelSpecificationParser.parse_model_specification_from_dict(X, model_dict) """ if specification is None: factor_names, variable_names = None, None n_variables, n_factors = X.shape[1], X.shape[1] loadings = np.ones((n_factors, n_factors), dtype=int) elif isinstance(specification, dict): factor_names = list(specification) variable_names = unique_elements([v for f in specification.values() for v in f]) loadings_new = {} for factor in factor_names: loadings_for_factor = pd.Series(variable_names).isin(specification[factor]) loadings_for_factor = loadings_for_factor.astype(int) loadings_new[factor] = loadings_for_factor loadings = pd.DataFrame(loadings_new).values n_variables, n_factors = loadings.shape else: raise ValueError('The model `specification` must be either a dict ' 'or None, not {}'.format(type(specification))) return ModelSpecification(**{'loadings': loadings, 'n_variables': n_variables, 'n_factors': n_factors, 'factor_names': factor_names, 'variable_names': variable_names})