Ejemplo n.º 1
0
def crude_mixedMLbayse(df_merged,
                       x_feature,
                       y_feature,
                       covars='False',
                       logit=False):

    #TODO: Replace covars variable with actual selection of indivdual features

    df_merged = df_merged.replace(-9, np.nan).replace('-9', np.nan).replace(
        999, np.nan).replace(888, np.nan)

    if covars == 'False':

        data = df_merged[[x_feature, y_feature,
                          'CohortType']].dropna(how='any', axis='rows')

        fit_string = y_feature + '~' + x_feature

    if covars == 'True':

        data = add_confound(df_merged, x_feature, y_feature)

        ## create the model string for
        fit_string = y_feature + '~'

        cnt = 0
        ## filter out target, at birth, and reference dummy variables in model
        for x in data.columns:

            #data.drop(['education'], inplace = True, axis = 0)

            if x != 'birthWt' and x !='Outcome_weeks' and x!= 'Outcome' and x != 'PIN_Patient' and x != 'SGA' and x != 'LGA' \
                and x !='birthLen' and x != 'CohortType' and x != 'race' and x!='race_1' and x!= 'smoking' and x != 'smoking_3' \
                and x != 'education_5' and x != 'education':

                if cnt == 0:
                    fit_string += ' ' + x + ' '
                else:
                    fit_string += ' + ' + x + ' '
                cnt += 1

    print('mixedML string:')
    print(fit_string)
    fit_string += '+ (1|CohortType)'
    if logit == False:
        model = bmb.Model(data)
        results = model.fit(fit_string)
    else:
        model = bmb.Model(data)
        results = model.fit(fit_string, family='bernoulli', link='logit')

    ## miced linear model with group variable = CohortType
    mdf = az.summary(results)
    return mdf
Ejemplo n.º 2
0
#         # # data.loc[:, ('employment_rate', idx)] = c2emp[(c, y)]
#
#         data['foreign_born'][idx] = c2fb[(c, y)]
#         data['net_migration'][idx] = c2nm[(c, y)]
#         data['expenditures'][idx] = c2ex[(c, y)]
#         data['employment_rate'][idx] = c2emp[(c, y)]
#     except KeyError:
#         pass

# data.to_pickle("data_dump")
data = pd.read_pickle("data_dump")
print(data)
print(data.columns.tolist())
# Table 4. Fit a two-way fixed effects model. Percent foreign born on welfare
# state attitudes, controlling for social welfare expenditures and the
# employment rate.
model = bambi.Model(data, dropna=True)
model.add('deps_jobs ~ 0')
# print(model)
model.add('foreign_born')
model.add('expenditures')
model.add('employment_rate')
results = model.fit(link="logit")
print(results)
print(results.summary())
# results.plot()

# Table 5. Fix a two-way fixed effects model. Net migration on welfare state
# attitudes, controlling for social welfare expenditures, exployment rate,
# and percent foreign born.
Ejemplo n.º 3
0
        'interval': intervals
    })


# Read the comma-separated values file containing metadata of our corpus
corpus = pd.read_csv("corpus/corpus.csv")
# Concatenate all data from the corpus into one big pandas DataFrame
# To do so, loop over all rows of the corpus, get the syllable intervals DataFrames, and concatenate them all
data = pd.concat(
    [syllable_intervals_data(row) for _, row in corpus.iterrows()])

# Maximum likelihood (ML/REML) estimation of mixed-effects linear model
import statsmodels.formula.api as smf

# Construct and fit the StatsModels model, with 'native' a fixed effect and 'speaker' a random effect
# For details, see http://www.statsmodels.org/devel/mixed_linear.html
model = smf.mixedlm('interval ~ native', data, groups=data['speaker'])
results = model.fit()
# Print the results (or they could be saved, or further queried)
print(results.summary())

# Bayesian estimation of mixed-effects linear model
import bambi

# Construct and fit a Bayesian version of the same mixed-effect model
# For details, see https://github.com/bambinos/bambi#user-guide
model = bambi.Model(data)
results = model.fit('interval ~ native', random=['1|speaker'])
# And again print those results
print(results.summary())
Ejemplo n.º 4
0
    def build_model(self, bids_folder='/data/ds-tmsrisk'):

        formula = self.models[self.model_type]

        self.model = bmb.Model(formula, self.data[[
                               'chose_risky', 'x', 'risky_first', 'n_safe']].reset_index(), family="bernoulli", link='probit')
Ejemplo n.º 5
0
sleep_st.groupby('hour').count()

start_fig = px.histogram(y=sleep_st.hour)
start_fig.show()

## do a random regression just so that you know how
## FIXME: scale values with zscores()
regression = data[[
    "duration", "maxAvgHeartRate", "start_time", "wake_time",
    "awakeSleepSeconds", "restingHeartRate", "activeKilocalories",
    "activeSeconds"
]]

## specify a simple additive model to try to predict
## how many times I spent awake during the night
formula = 'awakeSleepSeconds ~ activeKilocalories + duration + restingHeartRate'

# see if level of exercise affected sleep duration
model = bmb.Model(formula=formula, data=regression, dropna=True)

results = model.fit(draws=2000, chains=4)
model.predict(results, kind="pps", draws=1000)
results.posterior.mean()

az.plot_trace(results)
az.summary(results)

az.plot_ppc(results)

# TODO: to partial r squared correlation