def run_model(model_def: str, df: pd.DataFrame, pos_def_warnings: str = 'ignore', **opt_kwargs) -> Optimizer: pos_def_warnings = _validate_pos_def(pos_def_warnings) model = Model(model_def) model.load_dataset(df) if pos_def_warnings == 'error': try: np.linalg.cholesky(model.mx_cov) except np.linalg.LinAlgError: raise SampleCovMatrixNotPositiveDefiniteException with io.StringIO() as buf, redirect_stdout(buf): opt = Optimizer(model) opt.optimize(**opt_kwargs) stdout_val = buf.getvalue() if pos_def_warnings == 'error': sigma = opt.get_sigma()[0] try: np.linalg.cholesky(sigma) except np.linalg.LinAlgError: raise ModelCovMatrixNotPositiveDefiniteException if pos_def_warnings == 'print': print(stdout_val) opt._sample_cov_pos_df = True opt._model_cov_pos_df = True if 'a sample covariance matrix is not positive-definite' in stdout_val: opt._sample_cov_pos_df = False if 'resulting model-implied covariance matrix is not postive-definite' in stdout_val: opt._model_cov_pos_df = False return opt
def Bootstrapwrapper(self, chi): '''Wrapper method for ADF Chi2 bootstrap''' # Run boostrap s, gamma = self.BootstrapChiADF() s = pd.DataFrame(s, index = self.mod_semopy.vars['observed'],\ columns = self.mod_semopy.vars['observed']) # Get new results (no need to run inspector) # NOTE: in May 2021 semopy cannot handle custom weighting matrices for # its WLS procedure. Make sure you mod semopy such that it can handle them model = Model(self.equation, mimic_lavaan=self.mimic_lavaan, baseline=self.bool_baseline) est = model.fit(self.data, cov=s, obj=self.obj, solver=self.solver, custom_w=gamma) # Get Chi2_b, Corrected Chi2 and the chi2 bias chi_b = stats.calc_chi2(model)[0] chi_corr = 2 * chi - chi_b bias_chi2 = chi_b - chi return chi_b, chi_corr, bias_chi2
def sem_function(mod, Agg_Trips_tem, Start_date): model = Model(mod) model.load_dataset(Agg_Trips_tem) opt = Optimizer(model) objective_function_value = opt.optimize() coeff_tem = inspect(opt) coeff_tem['Date'] = Start_date return coeff_tem
def sem(name, data): desc = ''' # measurement model # *** DOMAINS *** employment =~ number_of_jobs + income_from_jobs # + sex housing =~ housing_space + sanitation_of_housing # + relocation_to_bhasanchar education =~ quality_of_education + lt_12_education + between_12_18_education + gt_18_education health =~ psychological_healthcare + number_of_healthcare_facilities + quality_of_healthcare social_links =~ help_from_ngo + trust_on_ngo + trust_on_law_enforcement social_bridges =~ possibility_of_friendship_with_host + friendship_with_host + marriage_to_host social_bonds =~ bond_with_rohingyas_outside + bond_with_neighbors + bond_with_majhis # + number_of_friends + bond_with_imams language_and_cultural_knowledge =~ number_of_religious_facilities + removal_of_religious_barriers + cultural_mixability safety_and_stability =~ intention_to_leave + improvement_in_6mos + feeling_about_future + number_of_violence + fear_of_children_safety # + stress + knowledge_of_missing_children + number_of_sexual_harassment + fear_of_leaving_again + discussion_about_violent_groups rights_and_citizenship =~ repatriation_in_home + return_to_home + rights_in_home # *** THEMES *** markers_and_means =~ education + employment + housing + health social_connections =~ social_bonds + social_bridges + social_links facilitators =~ language_and_cultural_knowledge + safety_and_stability foundation =~ rights_and_citizenship integration =~ markers_and_means + social_connections + facilitators + foundation # regressions number_of_jobs ~ sex # should we use it? # residual correlations removal_of_religious_barriers ~~ number_of_religious_facilities ''' mod = Model(desc) res = mod.fit(data) ins = mod.inspect() # pprint(ins) ins.to_excel('../result/inspect_' + name + '.xlsx', index=False) stat = semopy.calc_stats(mod) stat = stat.T stat.to_excel('../result/stat_' + name + '.xlsx') g = semopy.semplot(mod, '../plots/' + name + '.pdf')
def structural_equation_modeling(data, mod): model = Model(mod) model.load_dataset(data) opt = Optimizer(model) objective_function_value = opt.optimize() result = inspect(opt, mode='mx') beta = result[0][1].to_dict('dict') lam = result[1][1].to_dict('dict') psi = result[2][1].to_dict('dict') theta = result[3][1].to_dict('dict') sigma = result[4][1].to_dict('dict') cov = result[5][1].to_dict('dict') return beta, lam, psi, theta, sigma, cov
def semopyFit(self, dataframe): ''''Fits the model with semopy. Helper function. ''' # Set model model = Model(self.equation, mimic_lavaan=self.mimic_lavaan, baseline=self.bool_baseline) # Estimate model est = model.fit(dataframe, obj=self.obj, solver=self.solver) # Get results res = model.inspect(std_est=True, se_robust=self.bool_robust) return model, est, res
def getSemopyEdgeFeedback(variables): print("APIString2: ") print(variables) data_dict = {} edgeDict = {} inputEdges = variables.split(",") for edge in inputEdges: source = edge.split("-")[1] target = edge.split("-")[0] edgeDict[source] = edgeDict.get(source, "N/A") + "," + target SemopyString = """""" for key in edgeDict: SemopyString = SemopyString + "\n" + key + " ~ " for node in edgeDict[key].split(","): if(node == "N/A"): continue SemopyString = SemopyString + node + "+" SemopyString = SemopyString[:-1] print(SemopyString) SemopyModel = Model(SemopyString) SemopyModel.load_dataset(GLOBAL_dataframe) opt = Optimizer(SemopyModel) objective_function_value = opt.optimize() try: stats = gather_statistics(opt) print(stats) print(stats.bic) edgeDict["BIC"] = json.dumps(round(stats.bic, 3)) edgeDict["CHI2"] = json.dumps(round(stats.chi2[0], 3)) edgeDict["PVALUE"] = json.dumps(round(stats.cfi, 3)) # edgeDict["PVALUE"] = json.dumps(stats.params[0].pvalue) edgeDict["RMSEA"] = json.dumps(round(stats.rmsea, 3)) except: edgeDict["Chi2"] = json.dumps(-1) edgeDict["CHI2"] = json.dumps(-1) edgeDict["PVALUE"] = json.dumps(-1) edgeDict["RMSEA"] = json.dumps(-1) print('ERROR') return json.dumps(edgeDict)
def get_structure_connected(data: Data, loading_cutoff=None, use_kinship=True): mod = get_structure_unconnect(data, loading_cutoff=loading_cutoff, get_mod_full=True)['mod_full'] # get sem model and estimate sem if(use_kinship): sem = ModelEffects(mod) sem.fit(data.d_all, group='group', k=data.d_kinship) sem_inspect = sem.inspect() # print(sem_inspect.loc[1:10, 'Estimate']) sem = ModelGeneralizedEffects(mod, effects='group') sem.fit(data.d_all, group='group', k=data.d_kinship) sem_inspect = sem.inspect() # print(sem_inspect.loc[1:10, 'Estimate']) else: sem = Model(mod) sem.fit(data.d_all) sem_inspect = sem.inspect() # print(sem_inspect.loc[1:10, 'Estimate']) # Fix parameters # add influencies from one factor to another lat_vars = sem.vars['latent'] # TODO while to add more relations, use hyperparameters for stability ml_min = 10e10 mod_min = mod for f1, f2 in permutations(lat_vars, 2): mod_tmp = f'{mod}\n{f1} ~ {f2}' sem = Model(mod_tmp, cov_diag=True) sem.fit(data.d_all) res = calc_reduced_ml(sem, data.phens) if ml_min > res: mod_min = mod_tmp return dict(mod_connected=mod_min)
import semopy from semopy import Model import pandas as pd from pprint import pprint desc = semopy.examples.political_democracy.get_model() print(desc) data = semopy.examples.political_democracy.get_data() # print(type(data), data.columns) mod = Model(desc) res = mod.fit(data) # print(res) ins = mod.inspect() # pprint(ins)
files = [f for f in files if f.endswith('.txt')] n_subjects = len(files) # num subjects n_models = len(descs) # num models logliks = np.zeros((n_subjects, n_models)) # likelihoods lmes = np.zeros((n_subjects, n_models)) # LMEs = -0.5 * BICs bics = np.zeros((n_subjects, n_models)) # BICs ks = np.zeros((n_subjects, n_models)) # # params ns = np.zeros((n_subjects, n_models)) # # data points for i in range(n_subjects): for j in range(n_models): model = Model(descs[j]) filepath = os.path.join(dirname, files[i]) data = pd.read_csv(filepath, sep='\t') opt_res = model.fit(data) # from /Users/momchil/anaconda3/lib/python3.7/site-packages/semopy/stats.py: calc_bic() # WARNING: all of these are up to a proportionality constant that DIFFERS ACROSS SUBJECTS => do not use for BMS # ''' logliks[i,j] = stats.calc_likelihood(model) # note up to proportionality constant, b/c w.r.t. saturated model, but that's the same for all models so it's fine ks[i,j], ns[i,j] = len(model.param_vals), model.mx_data.shape[0] bic = stats.calc_bic(model) lmes[i,j] = -0.5 * bic '''