Python OLS Beispiele, scikits.statsmodels.api.OLS Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: extra.py Projekt: rgerkin/trillion

def fit(results, components):
    """Basic OLS model to predict test results."""
    X = np.zeros((len(results),26+len(components)*2))
    Y = np.zeros((len(results),1))
    for i,result in enumerate(results):
        Y[i] = result.correct
        x = np.zeros(len(components))
        test_components = result.test.double.components+result.test.single.components
        for component in test_components:
            try:
                index = components.index(component)
            except ValueError:
                print "Couldn't find %s" % component
                sys.exit(0)
            else:
                x[index] += 1
        for j in range(len(components)):
            X[i,26+2*j] = x[j]==1
            X[i,26+2*j+1] = x[j]==2
        X[i,result.subject_id] = 1             
    from scikits.statsmodels.api import OLS
    from sklearn import linear_model as lm
    clf = lm.Lasso(0.001)#alpha = alpha)
    results = clf.fit(X,Y)
    for i,beta in enumerate(results.coef_):
        result = round(1000*beta)
        if abs(result) > 0.1:
            print i,result
    print Y.shape,X.shape
    model = OLS(Y,X)
    results = model.fit()
    print results.summary()
    return Y,X

Beispiel #2

0

Datei anzeigen

Datei: ancova.py Projekt: kyuhwas/formula

def typeIII(response, ancova, recarray):
    """
    Produce an ANCOVA table
    with type III sum of squares
    from a given ANCOVA formula.

    Inputs
    ------

    response: str
              field name of response in recarray

    ancova: ANCOVA
            specifies the model to be fit

    recarray: np.ndarray
              should contain all field names in the terms of ancova
              as well as response
    """

    X = ancova.formula.design(recarray, return_float=True)
    Y = recarray[response]
    model = OLS(Y, X)

    results = model.fit()
    SSE_F = np.sum(results.resid**2)
    df_F = results.df_resid

    names = []
    fs = []
    dfs = []
    sss = []
    pvals = []
    for contrast in ancova.contrast_names:
        r = results.f_test(ancova.contrast_matrices[contrast])
        names.append(contrast)
        fs.append(r.fvalue)
        dfs.append(r.df_num)
        pvals.append(r.pvalue)
        sss.append(r.fvalue * results.scale * r.df_num)

    # Add in the "residual row"

    sss.append(SSE_F)
    dfs.append(df_F)
    pvals.append(np.nan)
    fs.append(np.nan)
    names.append('Residuals')

    result = np.array(
        names, np.dtype([('contrast', 'S%d' % max([len(n) for n in names]))]))
    result = ML.rec_append_fields(
        result, ['SS', 'df', 'MS', 'F', 'p_value'],
        [sss, dfs, np.array(sss) / np.array(dfs), fs, pvals])
    return result

Beispiel #3

0

Datei anzeigen

Datei: ancova.py Projekt: statsmodels/formula

def typeIII(response, ancova, recarray):
    """
    Produce an ANCOVA table
    with type III sum of squares
    from a given ANCOVA formula.

    Inputs
    ------

    response: str
              field name of response in recarray

    ancova: ANCOVA
            specifies the model to be fit

    recarray: np.ndarray
              should contain all field names in the terms of ancova
              as well as response
    """

    X = ancova.formula.design(recarray, return_float=True)
    Y = recarray[response]
    model = OLS(Y, X)

    results = model.fit()
    SSE_F = np.sum(results.resid**2)
    df_F = results.df_resid

    names = []
    fs = []
    dfs = []
    sss = []
    pvals = []
    for contrast in ancova.contrast_names:
        r = results.f_test(ancova.contrast_matrices[contrast])
        names.append(contrast)
        fs.append(r.fvalue)
        dfs.append(r.df_num)
        pvals.append(r.pvalue)
        sss.append(r.fvalue * results.scale * r.df_num)

    # Add in the "residual row"

    sss.append(SSE_F)
    dfs.append(df_F)
    pvals.append(np.nan)
    fs.append(np.nan)
    names.append('Residuals')

    result = np.array(names, np.dtype([('contrast','S%d' % max([len(n) for n in names]))]))
    result = ML.rec_append_fields(result, ['SS', 'df', 'MS', 'F', 'p_value'], [sss, dfs, np.array(sss) / np.array(dfs), fs, pvals])
    return result

Beispiel #4

0

Datei anzeigen

Datei: ancova.py Projekt: kyuhwas/formula

def typeII(response, ancova, recarray):
    """
    Produce an ANCOVA table
    from a given ANCOVA formula
    with type II sums of squares.

    Inputs
    ------

    response: str
              field name of response in recarray

    ancova: ANCOVA
            specifies the model to be fit

    recarray: np.ndarray
              should contain all field names in the terms of ancova
              as well as response
    """

    Y = recarray[response]
    X = ancova.formula.design(recarray, return_float=True)
    model = OLS(Y, X)
    results = model.fit()
    SSE_F = np.sum(results.resid**2)
    df_F = results.df_resid

    names = []
    sss = []
    fs = []
    dfs = []
    pvals = []

    for name, expr_factors in zip(ancova.contrast_names, ancova.sequence()):
        expr, factors = expr_factors
        F = ancova.all_but_above(expr, factors)
        C = ancova.contrasts[name]
        XF, contrast_matrices = F.formula.design(recarray, contrasts={'C': C})
        modelF = OLS(Y, XF)
        resultsF = modelF.fit()

        SSEF = np.sum(resultsF.resid**2)
        dfF = resultsF.df_resid
        ftest = resultsF.f_test(contrast_matrices['C'])

        SSER = SSEF + ftest.fvalue * ftest.df_num * (SSEF / dfF)
        dfR = dfF + ftest.df_num

        sss.append(SSER - SSEF)
        dfs.append(ftest.df_num)
        fs.append(((SSER - SSEF) / (dfR - dfF)) / (SSE_F / df_F))
        pvals.append(f_dbn.sf(fs[-1], dfR - dfF, df_F))
        names.append(name)

    # Add in the "residual row"

    sss.append(SSE_F)
    dfs.append(df_F)
    pvals.append(np.nan)
    fs.append(np.nan)
    names.append('Residuals')

    result = np.array(
        names, np.dtype([('contrast', 'S%d' % max([len(n) for n in names]))]))
    result = ML.rec_append_fields(
        result, ['SS', 'df', 'MS', 'F', 'p_value'],
        [sss, dfs, np.array(sss) / np.array(dfs), fs, pvals])
    return result

Beispiel #5

0

Datei anzeigen

Datei: ancova.py Projekt: kyuhwas/formula

def typeI(response, ancova, recarray):
    """
    Produce an ANCOVA table
    from a given ANCOVA formula
    with type I sums of squares
    where the order is based on the order of terms
    in the contrast_names of ancova.

    Inputs
    ------

    response: str
              field name of response in recarray

    ancova: ANCOVA
            specifies the model to be fit

    recarray: np.ndarray
              should contain all field names in the terms of ancova
              as well as response
    """

    Y = recarray[response]
    X = ancova.formula.design(recarray, return_float=True)
    model = OLS(Y, X)
    results = model.fit()
    SSE_F = np.sum(results.resid**2)
    df_F = results.df_resid

    model = OLS(Y, ancova.formulae[0].design(recarray, return_float=True))
    results = model.fit()
    SSE_old = np.sum(results.resid**2)
    df_old = results.df_resid

    names = []
    sss = []
    fs = []
    dfs = []
    pvals = []

    names.append(ancova.contrast_names[0])
    fs.append(
        ((np.sum(Y**2) - SSE_old) / (Y.shape[0] - df_old)) / (SSE_F / df_F))
    sss.append((np.sum(Y**2) - SSE_old))
    dfs.append(Y.shape[0] - df_old)
    pvals.append(f_dbn.sf(fs[-1], Y.shape[0] - df_old, df_F))

    for d in range(1, len(ancova.formulae)):
        terms = []
        for f in ancova.formulae[:(d + 1)]:
            terms += list(f.terms)

        # JT: this is not numerically efficient
        # could be done by updating some factorization of the full X

        X = Formula(terms).design(recarray, return_float=True)
        model = OLS(Y, X)
        results = model.fit()
        SSE_new = np.sum(results.resid**2)
        df_new = results.df_resid

        sss.append(SSE_old - SSE_new)
        dfs.append(df_old - df_new)
        fs.append(((SSE_old - SSE_new) / (df_old - df_new)) / (SSE_F / df_F))
        pvals.append(f_dbn.sf(fs[-1], df_old - df_new, df_new))
        names.append(ancova.contrast_names[d])
        SSE_old = SSE_new
        df_old = df_new

    # Add in the "residual row"

    sss.append(SSE_new)
    dfs.append(df_new)
    pvals.append(np.nan)
    fs.append(np.nan)
    names.append('Residuals')

    result = np.array(
        names, np.dtype([('contrast', 'S%d' % max([len(n) for n in names]))]))
    result = ML.rec_append_fields(
        result, ['SS', 'df', 'MS', 'F', 'p_value'],
        [sss, dfs, np.array(sss) / np.array(dfs), fs, pvals])
    return result

Beispiel #6

0

Datei anzeigen

Datei: ancova.py Projekt: statsmodels/formula

def typeII(response, ancova, recarray):
    """
    Produce an ANCOVA table
    from a given ANCOVA formula
    with type II sums of squares.

    Inputs
    ------

    response: str
              field name of response in recarray

    ancova: ANCOVA
            specifies the model to be fit

    recarray: np.ndarray
              should contain all field names in the terms of ancova
              as well as response
    """

    Y = recarray[response]
    X = ancova.formula.design(recarray, return_float=True)
    model = OLS(Y, X)
    results = model.fit()
    SSE_F = np.sum(results.resid**2)
    df_F = results.df_resid

    names = []
    sss = []
    fs = []
    dfs = []
    pvals = []

    for name, expr_factors in zip(ancova.contrast_names,
                                  ancova.sequence()):
        expr, factors = expr_factors
        F = ancova.all_but_above(expr, factors)
        C = ancova.contrasts[name]
        XF, contrast_matrices = F.formula.design(recarray, contrasts={'C':C})
        modelF = OLS(Y, XF)
        resultsF = modelF.fit()

        SSEF = np.sum(resultsF.resid**2)
        dfF = resultsF.df_resid
        ftest = resultsF.f_test(contrast_matrices['C'])

        SSER = SSEF + ftest.fvalue * ftest.df_num * (SSEF / dfF)
        dfR = dfF + ftest.df_num

        sss.append(SSER - SSEF)
        dfs.append(ftest.df_num)
        fs.append(((SSER - SSEF) / (dfR - dfF)) / (SSE_F / df_F))
        pvals.append(f_dbn.sf(fs[-1], dfR-dfF, df_F))
        names.append(name)

    # Add in the "residual row"

    sss.append(SSE_F)
    dfs.append(df_F)
    pvals.append(np.nan)
    fs.append(np.nan)
    names.append('Residuals')

    result = np.array(names, np.dtype([('contrast','S%d' % max([len(n) for n in names]))]))
    result = ML.rec_append_fields(result, ['SS', 'df', 'MS', 'F', 'p_value'], [sss, dfs, np.array(sss) / np.array(dfs), fs, pvals])
    return result

Beispiel #7

0

Datei anzeigen

Datei: ancova.py Projekt: statsmodels/formula

def typeI(response, ancova, recarray):
    """
    Produce an ANCOVA table
    from a given ANCOVA formula
    with type I sums of squares
    where the order is based on the order of terms
    in the contrast_names of ancova.

    Inputs
    ------

    response: str
              field name of response in recarray

    ancova: ANCOVA
            specifies the model to be fit

    recarray: np.ndarray
              should contain all field names in the terms of ancova
              as well as response
    """

    Y = recarray[response]
    X = ancova.formula.design(recarray, return_float=True)
    model = OLS(Y, X)
    results = model.fit()
    SSE_F = np.sum(results.resid**2)
    df_F = results.df_resid

    model = OLS(Y, ancova.formulae[0].design(recarray, return_float=True))
    results = model.fit()
    SSE_old = np.sum(results.resid**2)
    df_old = results.df_resid

    names = []
    sss = []
    fs = []
    dfs = []
    pvals = []

    names.append(ancova.contrast_names[0])
    fs.append(((np.sum(Y**2) - SSE_old) / (Y.shape[0] - df_old)) / (SSE_F / df_F))
    sss.append((np.sum(Y**2) - SSE_old))
    dfs.append(Y.shape[0] - df_old)
    pvals.append(f_dbn.sf(fs[-1], Y.shape[0]-df_old, df_F))

    for d in range(1,len(ancova.formulae)):
        terms = []
        for f in ancova.formulae[:(d+1)]:
            terms += list(f.terms)

        # JT: this is not numerically efficient
        # could be done by updating some factorization of the full X

        X = Formula(terms).design(recarray, return_float=True)
        model = OLS(Y, X)
        results = model.fit()
        SSE_new = np.sum(results.resid**2)
        df_new = results.df_resid

        sss.append(SSE_old - SSE_new)
        dfs.append(df_old - df_new)
        fs.append(((SSE_old-SSE_new) / (df_old - df_new)) / (SSE_F / df_F))
        pvals.append(f_dbn.sf(fs[-1], df_old-df_new, df_new))
        names.append(ancova.contrast_names[d])
        SSE_old = SSE_new
        df_old = df_new

    # Add in the "residual row"

    sss.append(SSE_new)
    dfs.append(df_new)
    pvals.append(np.nan)
    fs.append(np.nan)
    names.append('Residuals')

    result = np.array(names, np.dtype([('contrast','S%d' % max([len(n) for n in names]))]))
    result = ML.rec_append_fields(result, ['SS', 'df', 'MS', 'F', 'p_value'], [sss, dfs, np.array(sss) / np.array(dfs), fs, pvals])
    return result