Python interaction_plot 예제들, statsmodels.graphics.api.interaction_plot Python 예제들

예제 #1

0

파일 보기

파일: statsMOOC.py 프로젝트: StrategicC/mooc-file-processing

def plot_interaction(data_lastDV):
    """
    Plot the interaction of the given data (should be three columns)
    :param data: data frame containing the independent variables in first two columns, dependent in the third
    :return: None
    """
    col_names = data_lastDV.columns.values  # get the columns' names
    factor_groups = data_lastDV[col_names].dropna()

    # TODO: fix the boxplot generating a separate plot (why doesn't subplots work?)
    plt.figure()

    plt.subplot(121)
    interaction_plot(factor_groups[col_names[0]], factor_groups[col_names[1]], factor_groups[col_names[2]], colors=['red', 'blue'], markers=['D', '^'], ms=10, ax=plt.gca())

    plt.subplot(122)
    factor_groups.boxplot(return_type='axes', column=col_names[2], by=[col_names[0], col_names[1]])
    plt.show()

예제 #2

0

파일 보기

파일: interactions_anova.py 프로젝트: murphyec/statsmodels

plt.ylabel('Salary')

# From our first look at the data, the difference between Master's and PhD
# in the management group is different than in the non-management group.
# This is an interaction between the two qualitative variables management,M
# and education,E. We can visualize this by first removing the effect of
# experience, then plotting the means within each of the 6 groups using
# interaction.plot.

U = S - X * interX_lm32.params['X']

plt.figure(figsize=(6, 6))
interaction_plot(E,
                 M,
                 U,
                 colors=['red', 'blue'],
                 markers=['^', 'D'],
                 markersize=10,
                 ax=plt.gca())

# ## Minority Employment Data

try:
    jobtest_table = pd.read_table('jobtest.table')
except:  # don't have data already
    url = 'http://stats191.stanford.edu/data/jobtest.table'
    jobtest_table = pd.read_table(url)

factor_group = jobtest_table.groupby(['MINORITY'])

fig, ax = plt.subplots(figsize=(6, 6))

예제 #3

0

파일 보기

파일: interactions_anova.py 프로젝트: B-Rich/statsmodels

    idx = group.index
    plt.scatter(X[idx], S[idx], marker=symbols[j], color=colors[i-1],
                s=144, edgecolors='black')
    # drop NA because there is no idx 32 in the final model
    plt.plot(mf.X[idx].dropna(), lm_final.fittedvalues[idx].dropna(),
            ls=lstyle[j], color=colors[i-1])
plt.xlabel('Experience');
plt.ylabel('Salary');


# From our first look at the data, the difference between Master's and PhD in the management group is different than in the non-management group. This is an interaction between the two qualitative variables management,M and education,E. We can visualize this by first removing the effect of experience, then plotting the means within each of the 6 groups using interaction.plot.

U = S - X * interX_lm32.params['X']

plt.figure(figsize=(6,6))
interaction_plot(E, M, U, colors=['red','blue'], markers=['^','D'],
        markersize=10, ax=plt.gca())


# ## Minority Employment Data

try:
    minority_table = pandas.read_table('minority.table')
except:  # don't have data already
    url = 'http://stats191.stanford.edu/data/minority.table'
    minority_table = pandas.read_table(url)

factor_group = minority_table.groupby(['ETHN'])

plt.figure(figsize=(6,6))
colors = ['purple', 'green']
markers = ['o', 'v']

예제 #4

0

파일 보기

파일: example_interactions.py 프로젝트: dmcdougall/statsmodels

    i, j = values
    idx = group.index
    plt.scatter(X[idx], S[idx], marker=symbols[j], color=colors[i - 1], s=144, edgecolors="black")
    # drop NA because there is no idx 32 in the final model
    plt.plot(mf.X[idx].dropna(), lm_final.fittedvalues[idx].dropna(), ls=lstyle[j], color=colors[i - 1])
plt.xlabel("Experience")
# @savefig fitted_drop32.png align=center
plt.ylabel("Salary")

# From our first look at the data, the difference between Master's and PhD in the management group is different than in the non-management group. This is an interaction between the two qualitative variables management,M and education,E. We can visualize this by first removing the effect of experience, then plotting the means within each of the 6 groups using interaction.plot.

U = S - X * interX_lm32.params["X"]

plt.figure(figsize=(6, 6))
# @savefig interaction_plot.png align=center
interaction_plot(E, M, U, colors=["red", "blue"], markers=["^", "D"], markersize=10, ax=plt.gca())

# Minority Employment Data
# ------------------------

try:
    minority_table = pandas.read_table("minority.table")
except:  # don't have data already
    url = "http://stats191.stanford.edu/data/minority.table"
    minority_table = pandas.read_table(url)

factor_group = minority_table.groupby(["ETHN"])

plt.figure(figsize=(6, 6))
colors = ["purple", "green"]
markers = ["o", "v"]

예제 #5

0

파일 보기

파일: anova_example3.py 프로젝트: DoosanJung/tf_prob_study

       Days  Duration  Weight  ID
    0   0.0         1       1   1
    1   2.0         1       1   2
    2   1.0         1       1   3
    3   3.0         1       1   4
    4   0.0         1       1   5

    r = 3 (weight gain)
    m = 2 (duration of treatment)
    n_ij = 10 for all (i, j)
    """
    print("Balanced panel" + "\n")
    fig = interaction_plot(kidney_table['Weight'],
                           kidney_table['Duration'],
                           np.log(kidney_table['Days'] + 1),
                           colors=['red', 'blue'],
                           markers=['D', '^'],
                           ms=10,
                           ax=plt.gca())
    plt.show()

    formula = "np.log(Days+1) ~ C(Duration)"
    lm = fit_linear_model(formula, data=kidney_table)

    formula2 = "np.log(Days+1) ~ C(Weight)"
    lm2 = fit_linear_model(formula2, data=kidney_table)

    formula3 = "np.log(Days+1) ~ C(Duration) + C(Weight)"
    lm3 = fit_linear_model(formula3, data=kidney_table)

    formula4 = "np.log(Days+1) ~ C(Duration) * C(Weight)"

예제 #6

0

파일 보기

파일: interactions_anova.py 프로젝트: timgates42/statsmodels

plt.ylabel("Salary")

# From our first look at the data, the difference between Master's and PhD
# in the management group is different than in the non-management group.
# This is an interaction between the two qualitative variables management,M
# and education,E. We can visualize this by first removing the effect of
# experience, then plotting the means within each of the 6 groups using
# interaction.plot.

U = S - X * interX_lm32.params["X"]

plt.figure(figsize=(6, 6))
interaction_plot(E,
                 M,
                 U,
                 colors=["red", "blue"],
                 markers=["^", "D"],
                 markersize=10,
                 ax=plt.gca())

# ## Minority Employment Data

try:
    jobtest_table = pd.read_table("jobtest.table")
except:  # do not have data already
    url = "http://stats191.stanford.edu/data/jobtest.table"
    jobtest_table = pd.read_table(url)

factor_group = jobtest_table.groupby(["MINORITY"])

fig, ax = plt.subplots(figsize=(6, 6))

예제 #7

0

파일 보기

#
# Measurement of fetal head circumference **hs**, by four observers in three fetuses.

# In[55]:

# https://raw.githubusercontent.com/thomas-haslwanter/statsintro_python/master/ipynb/Data/data_altman/altman_12_6.txt
df = pd.read_csv('../data/altman_12_6.txt', names=['hs', 'fetus', 'observer'])
df.head()

# In[56]:

from statsmodels.graphics.api import interaction_plot
plt.figure(figsize=(8, 6))
fig = interaction_plot(df['fetus'],
                       df['observer'],
                       df['hs'],
                       ms=10,
                       ax=plt.gca())

# In[169]:

formula = 'hs ~ C(fetus) + C(observer) + C(fetus):C(observer)'
lm = ols(formula, df).fit()
print(anova_lm(lm))

#
# ###  卡方检验 A chi-squared test
#
# https://en.wikipedia.org/wiki/Chi-squared_test
#
#

예제 #8

0

파일 보기

            if len(groups) == 2:

                X = data[X]
                Y = data[S]

                s = 100

                plt.figure(figsize=(8, 6))

                groups = data.groupby(data[E])

                for key, group in groups:  # ERROR (working on it)
                    interaction_plot(X,
                                     group,
                                     np.log(Y + 1),
                                     colors=['r', 'b'],
                                     markers=['D', '^'],
                                     ms=10,
                                     ax=plt.gca())

                    plt.show()  #?

            else:

                fig, ax = plt.subplots(figsize=(8, 6))

                s = 100

                for key, group in groups:  # ERROR (working on it)

                    group.plot(ax=ax,

예제 #9

0

파일 보기

파일: gese_oddmilk.py 프로젝트: oddmilk/IMS

gese.loc[gese.stressful_life_events > 10, 'events_cat'] = 10  # Recode labels

#############################################################################################
# Simple plotting
import matplotlib.pyplot as plt
import seaborn

# Histograms
plt.hist(gese.depression)  # slightly right skewed
plt.hist(gese.gene)  # unequally distributed
plt.hist(gese.stressful_life_events)  # heavily right skewed

#############################################################################################
# Testing for interaction
from statsmodels.graphics.api import interaction_plot
fig = interaction_plot(gese.stressful_life_events, gese.gene, gese.depression)
plt.show(
)  # The plot does indicate an interaction between gene and stressful life events

# Correlation
pearsoncorr = stats.pearsonr(
    gese.stressful_life_events, gese.depression
)  # p-value (<.05) indicates a significant correlation between stressful life events and depression outcome

# t test
t_test = stats.ttest_ind(gese.depression[gese.gene == 1],
                         gese.depression[gese.gene == 0])
print(
    t_test_gene
)  # p-value (>.05) indicates mean of depression is not significantly different in these two genotypes

예제 #10

0

파일 보기

    anova_lm_check(res_lm_subset, res_lm_interaction_M_subset)
    """
        df_resid           ssr  df_diff       ss_diff            F        Pr(>F)
    0      40.0  4.320910e+07      0.0           NaN          NaN           NaN
    1      38.0  1.711881e+05      2.0  4.303791e+07  4776.734853  2.291239e-46
    """

    resid_studentized_subset = plot_residuals_studentized(
        result=res_lm_interaction_M_subset, data=salary_table)

    # fitted value plotting
    plot_fitted_values(formula=formula_interaction_M,
                       data=salary_table,
                       drop_idx=drop_idx)

    # the difference between Master's and PhD in the management group is different
    # than in the non-management group. (interaction between the two qualitative variables M and E)
    # => first remove the effect of experience,
    # => then plot the means within each of the 6 groups using interaction.plot.
    U = salary_table.S - salary_table.X * res_lm_interaction_X_subset.params[
        'X']

    # Interaction plot for factor level statistics.
    interaction_plot(x=salary_table.E,
                     trace=salary_table.M,
                     response=U,
                     colors=['red', 'blue'],
                     markers=['^', 'D'],
                     markersize=10,
                     ax=plt.gca())
    plt.show()