def crude_mixedMLbayse(df_merged, x_feature, y_feature, covars='False', logit=False): #TODO: Replace covars variable with actual selection of indivdual features df_merged = df_merged.replace(-9, np.nan).replace('-9', np.nan).replace( 999, np.nan).replace(888, np.nan) if covars == 'False': data = df_merged[[x_feature, y_feature, 'CohortType']].dropna(how='any', axis='rows') fit_string = y_feature + '~' + x_feature if covars == 'True': data = add_confound(df_merged, x_feature, y_feature) ## create the model string for fit_string = y_feature + '~' cnt = 0 ## filter out target, at birth, and reference dummy variables in model for x in data.columns: #data.drop(['education'], inplace = True, axis = 0) if x != 'birthWt' and x !='Outcome_weeks' and x!= 'Outcome' and x != 'PIN_Patient' and x != 'SGA' and x != 'LGA' \ and x !='birthLen' and x != 'CohortType' and x != 'race' and x!='race_1' and x!= 'smoking' and x != 'smoking_3' \ and x != 'education_5' and x != 'education': if cnt == 0: fit_string += ' ' + x + ' ' else: fit_string += ' + ' + x + ' ' cnt += 1 print('mixedML string:') print(fit_string) fit_string += '+ (1|CohortType)' if logit == False: model = bmb.Model(data) results = model.fit(fit_string) else: model = bmb.Model(data) results = model.fit(fit_string, family='bernoulli', link='logit') ## miced linear model with group variable = CohortType mdf = az.summary(results) return mdf
# # # data.loc[:, ('employment_rate', idx)] = c2emp[(c, y)] # # data['foreign_born'][idx] = c2fb[(c, y)] # data['net_migration'][idx] = c2nm[(c, y)] # data['expenditures'][idx] = c2ex[(c, y)] # data['employment_rate'][idx] = c2emp[(c, y)] # except KeyError: # pass # data.to_pickle("data_dump") data = pd.read_pickle("data_dump") print(data) print(data.columns.tolist()) # Table 4. Fit a two-way fixed effects model. Percent foreign born on welfare # state attitudes, controlling for social welfare expenditures and the # employment rate. model = bambi.Model(data, dropna=True) model.add('deps_jobs ~ 0') # print(model) model.add('foreign_born') model.add('expenditures') model.add('employment_rate') results = model.fit(link="logit") print(results) print(results.summary()) # results.plot() # Table 5. Fix a two-way fixed effects model. Net migration on welfare state # attitudes, controlling for social welfare expenditures, exployment rate, # and percent foreign born.
'interval': intervals }) # Read the comma-separated values file containing metadata of our corpus corpus = pd.read_csv("corpus/corpus.csv") # Concatenate all data from the corpus into one big pandas DataFrame # To do so, loop over all rows of the corpus, get the syllable intervals DataFrames, and concatenate them all data = pd.concat( [syllable_intervals_data(row) for _, row in corpus.iterrows()]) # Maximum likelihood (ML/REML) estimation of mixed-effects linear model import statsmodels.formula.api as smf # Construct and fit the StatsModels model, with 'native' a fixed effect and 'speaker' a random effect # For details, see http://www.statsmodels.org/devel/mixed_linear.html model = smf.mixedlm('interval ~ native', data, groups=data['speaker']) results = model.fit() # Print the results (or they could be saved, or further queried) print(results.summary()) # Bayesian estimation of mixed-effects linear model import bambi # Construct and fit a Bayesian version of the same mixed-effect model # For details, see https://github.com/bambinos/bambi#user-guide model = bambi.Model(data) results = model.fit('interval ~ native', random=['1|speaker']) # And again print those results print(results.summary())
def build_model(self, bids_folder='/data/ds-tmsrisk'): formula = self.models[self.model_type] self.model = bmb.Model(formula, self.data[[ 'chose_risky', 'x', 'risky_first', 'n_safe']].reset_index(), family="bernoulli", link='probit')
sleep_st.groupby('hour').count() start_fig = px.histogram(y=sleep_st.hour) start_fig.show() ## do a random regression just so that you know how ## FIXME: scale values with zscores() regression = data[[ "duration", "maxAvgHeartRate", "start_time", "wake_time", "awakeSleepSeconds", "restingHeartRate", "activeKilocalories", "activeSeconds" ]] ## specify a simple additive model to try to predict ## how many times I spent awake during the night formula = 'awakeSleepSeconds ~ activeKilocalories + duration + restingHeartRate' # see if level of exercise affected sleep duration model = bmb.Model(formula=formula, data=regression, dropna=True) results = model.fit(draws=2000, chains=4) model.predict(results, kind="pps", draws=1000) results.posterior.mean() az.plot_trace(results) az.summary(results) az.plot_ppc(results) # TODO: to partial r squared correlation