예제 #1
0
def run_model(model_def: str, df: pd.DataFrame, pos_def_warnings: str = 'ignore',
              **opt_kwargs) -> Optimizer:
    pos_def_warnings = _validate_pos_def(pos_def_warnings)
    model = Model(model_def)
    model.load_dataset(df)

    if pos_def_warnings == 'error':
        try:
            np.linalg.cholesky(model.mx_cov)
        except np.linalg.LinAlgError:
            raise SampleCovMatrixNotPositiveDefiniteException

    with io.StringIO() as buf, redirect_stdout(buf):
        opt = Optimizer(model)
        opt.optimize(**opt_kwargs)
        stdout_val = buf.getvalue()

    if pos_def_warnings == 'error':
        sigma = opt.get_sigma()[0]
        try:
            np.linalg.cholesky(sigma)
        except np.linalg.LinAlgError:
            raise ModelCovMatrixNotPositiveDefiniteException

    if pos_def_warnings == 'print':
        print(stdout_val)

    opt._sample_cov_pos_df = True
    opt._model_cov_pos_df = True
    if 'a sample covariance matrix is not positive-definite' in stdout_val:
        opt._sample_cov_pos_df = False
    if 'resulting model-implied covariance matrix is not postive-definite' in stdout_val:
        opt._model_cov_pos_df = False

    return opt
예제 #2
0
    def Bootstrapwrapper(self, chi):
        '''Wrapper method for ADF Chi2 bootstrap'''
        # Run boostrap
        s, gamma = self.BootstrapChiADF()
        s = pd.DataFrame(s, index = self.mod_semopy.vars['observed'],\
                         columns = self.mod_semopy.vars['observed'])

        # Get new results (no need to run inspector)
        # NOTE: in May 2021 semopy cannot handle custom weighting matrices for
        # its WLS procedure. Make sure you mod semopy such that it can handle them
        model = Model(self.equation,
                      mimic_lavaan=self.mimic_lavaan,
                      baseline=self.bool_baseline)
        est = model.fit(self.data,
                        cov=s,
                        obj=self.obj,
                        solver=self.solver,
                        custom_w=gamma)

        # Get Chi2_b, Corrected Chi2 and the chi2 bias
        chi_b = stats.calc_chi2(model)[0]
        chi_corr = 2 * chi - chi_b
        bias_chi2 = chi_b - chi

        return chi_b, chi_corr, bias_chi2
def sem_function(mod, Agg_Trips_tem, Start_date):
    model = Model(mod)
    model.load_dataset(Agg_Trips_tem)
    opt = Optimizer(model)
    objective_function_value = opt.optimize()
    coeff_tem = inspect(opt)
    coeff_tem['Date'] = Start_date
    return coeff_tem
예제 #4
0
def sem(name, data):
    desc = '''
    
    # measurement model
    
    # ***   DOMAINS   ***
    employment  =~ number_of_jobs + income_from_jobs
                # + sex
    housing     =~ housing_space + sanitation_of_housing
                # + relocation_to_bhasanchar
    education   =~ quality_of_education + lt_12_education + between_12_18_education + gt_18_education
    health      =~ psychological_healthcare + number_of_healthcare_facilities + quality_of_healthcare
    
    social_links    =~ help_from_ngo + trust_on_ngo + trust_on_law_enforcement
    social_bridges  =~ possibility_of_friendship_with_host + friendship_with_host + marriage_to_host
    social_bonds    =~ bond_with_rohingyas_outside + bond_with_neighbors + bond_with_majhis
                    # + number_of_friends + bond_with_imams
    
    language_and_cultural_knowledge =~ number_of_religious_facilities + removal_of_religious_barriers + cultural_mixability 
    safety_and_stability            =~ intention_to_leave + improvement_in_6mos + feeling_about_future + number_of_violence + fear_of_children_safety 
                                    # + stress + knowledge_of_missing_children + number_of_sexual_harassment + fear_of_leaving_again + discussion_about_violent_groups
    
    rights_and_citizenship  =~ repatriation_in_home + return_to_home + rights_in_home
    
    # ***   THEMES   ***
    markers_and_means   =~ education + employment + housing + health
    social_connections  =~ social_bonds + social_bridges + social_links
    facilitators        =~ language_and_cultural_knowledge + safety_and_stability
    foundation          =~ rights_and_citizenship  
    integration         =~ markers_and_means + social_connections + facilitators + foundation
    
    
    # regressions
    number_of_jobs  ~ sex    # should we use it?
    
    # residual correlations
    removal_of_religious_barriers   ~~ number_of_religious_facilities
    
    '''

    mod = Model(desc)
    res = mod.fit(data)

    ins = mod.inspect()
    # pprint(ins)
    ins.to_excel('../result/inspect_' + name + '.xlsx', index=False)

    stat = semopy.calc_stats(mod)
    stat = stat.T
    stat.to_excel('../result/stat_' + name + '.xlsx')

    g = semopy.semplot(mod, '../plots/' + name + '.pdf')
예제 #5
0
def structural_equation_modeling(data, mod):
    model = Model(mod)
    model.load_dataset(data)
    opt = Optimizer(model)
    objective_function_value = opt.optimize()
    result = inspect(opt, mode='mx')
    beta = result[0][1].to_dict('dict')
    lam = result[1][1].to_dict('dict')
    psi = result[2][1].to_dict('dict')
    theta = result[3][1].to_dict('dict')
    sigma = result[4][1].to_dict('dict')
    cov = result[5][1].to_dict('dict')

    return beta, lam, psi, theta, sigma, cov
예제 #6
0
    def semopyFit(self, dataframe):
        ''''Fits the model with semopy. Helper function. '''

        # Set model
        model = Model(self.equation,
                      mimic_lavaan=self.mimic_lavaan,
                      baseline=self.bool_baseline)

        # Estimate model
        est = model.fit(dataframe, obj=self.obj, solver=self.solver)

        # Get results
        res = model.inspect(std_est=True, se_robust=self.bool_robust)

        return model, est, res
예제 #7
0
def getSemopyEdgeFeedback(variables):
	print("APIString2: ")
	print(variables)
	

	data_dict = {}

	edgeDict = {}
	inputEdges = variables.split(",")
	for edge in inputEdges:
		source = edge.split("-")[1]
		target = edge.split("-")[0]
		edgeDict[source] = edgeDict.get(source, "N/A") + "," + target

	SemopyString = """"""
	for key in edgeDict:
		SemopyString = SemopyString + "\n" + key + " ~ "
		for node in edgeDict[key].split(","):
			if(node == "N/A"):
				continue
			SemopyString = SemopyString + node + "+"

		SemopyString = SemopyString[:-1]
	print(SemopyString)
	SemopyModel = Model(SemopyString)

	SemopyModel.load_dataset(GLOBAL_dataframe)

	opt = Optimizer(SemopyModel)
	objective_function_value = opt.optimize()
	try:
		stats = gather_statistics(opt)
		print(stats)
		print(stats.bic)
		edgeDict["BIC"] = json.dumps(round(stats.bic, 3))
		edgeDict["CHI2"] = json.dumps(round(stats.chi2[0], 3))
		edgeDict["PVALUE"] = json.dumps(round(stats.cfi, 3))
		# edgeDict["PVALUE"] = json.dumps(stats.params[0].pvalue)
		edgeDict["RMSEA"] = json.dumps(round(stats.rmsea, 3))
	except:
		edgeDict["Chi2"] = json.dumps(-1)
		edgeDict["CHI2"] = json.dumps(-1)
		edgeDict["PVALUE"] = json.dumps(-1)
		edgeDict["RMSEA"] = json.dumps(-1)
		print('ERROR')
	return json.dumps(edgeDict)
예제 #8
0
def get_structure_connected(data: Data,
                            loading_cutoff=None,
                            use_kinship=True):
    mod = get_structure_unconnect(data, loading_cutoff=loading_cutoff, get_mod_full=True)['mod_full']

    # get sem model and estimate sem
    if(use_kinship):

        sem = ModelEffects(mod)
        sem.fit(data.d_all, group='group', k=data.d_kinship)
        sem_inspect = sem.inspect()
        # print(sem_inspect.loc[1:10, 'Estimate'])


        sem = ModelGeneralizedEffects(mod, effects='group')
        sem.fit(data.d_all, group='group', k=data.d_kinship)
        sem_inspect = sem.inspect()
        # print(sem_inspect.loc[1:10, 'Estimate'])
    else:
        sem = Model(mod)
        sem.fit(data.d_all)
        sem_inspect = sem.inspect()
        # print(sem_inspect.loc[1:10, 'Estimate'])

    # Fix parameters

    # add influencies from one factor to another


    lat_vars = sem.vars['latent']
    # TODO while to add more relations, use hyperparameters for stability

    ml_min = 10e10
    mod_min = mod
    for f1, f2 in permutations(lat_vars, 2):
        mod_tmp = f'{mod}\n{f1} ~ {f2}'
        sem = Model(mod_tmp, cov_diag=True)
        sem.fit(data.d_all)
        res = calc_reduced_ml(sem, data.phens)

        if ml_min > res:
            mod_min = mod_tmp

    return dict(mod_connected=mod_min)
예제 #9
0
import semopy
from semopy import Model
import pandas as pd
from pprint import pprint

desc = semopy.examples.political_democracy.get_model()
print(desc)

data = semopy.examples.political_democracy.get_data()
# print(type(data), data.columns)

mod = Model(desc)
res = mod.fit(data)
# print(res)

ins = mod.inspect()
# pprint(ins)
예제 #10
0
    files = [f for f in files if f.endswith('.txt')]

    n_subjects = len(files)  # num subjects
    n_models = len(descs)  # num models

    logliks = np.zeros((n_subjects, n_models))  # likelihoods
    lmes = np.zeros((n_subjects, n_models))  # LMEs = -0.5 * BICs
    bics = np.zeros((n_subjects, n_models))  # BICs
    ks = np.zeros((n_subjects, n_models))  # # params
    ns = np.zeros((n_subjects, n_models))  # # data points

    for i in range(n_subjects):

        for j in range(n_models):

            model = Model(descs[j])

            filepath = os.path.join(dirname, files[i])
            data = pd.read_csv(filepath, sep='\t')

            opt_res = model.fit(data)

            # from /Users/momchil/anaconda3/lib/python3.7/site-packages/semopy/stats.py: calc_bic()
            # WARNING: all of these are up to a proportionality constant that DIFFERS ACROSS SUBJECTS => do not use for BMS
            #
            '''
            logliks[i,j] = stats.calc_likelihood(model) # note up to proportionality constant, b/c w.r.t. saturated model, but that's the same for all models so it's fine
            ks[i,j], ns[i,j] = len(model.param_vals), model.mx_data.shape[0]
            bic = stats.calc_bic(model)
            lmes[i,j] = -0.5 * bic
            '''