Esempio n. 1
0
def left_right():
    db = utils.get_db()

    left_f = shot_filters.merge_filters({'left':0})
    left_df = DataFrame(list(db.shots.find(left_f)))
    n_l = len(left_df)
    left_p = len(left_df[left_df['made']]) / float(n_l)
    left_ci = utils.ci(left_p, n_l)

    right_f = shot_filters.merge_filters({'right':0})
    right_df = DataFrame(list(db.shots.find(right_f)))
    n_r = len(right_df)
    right_p = len(right_df[right_df['made']]) / float(n_r)
    right_ci = utils.ci(right_p, n_r)

    # # hypothesis test: different sides of court
    # pooled_p = (left_p*n_l + right_p*n_r)/(n_r + n_l)
    # pooled_se = np.sqrt(pooled_p * (1-pooled_p) * ((1.0/n_l) + (1.0/n_r)) )
    # pooled_z_stat = (right_p - left_p) / pooled_se
    json.dump([{
            'name': 'Right',
            'value': right_p,
            'l': right_ci[0],
            'u': right_ci[1]
        },
        {
            'name': 'Left',
            'value': left_p,
            'l': left_ci[0],
            'u': left_ci[1]
        }], open('static/data/left-right.json','w'))
Esempio n. 2
0
def quarterly():
    db = utils.get_db()
    shots = list(db.shots.find())
    shots_df = DataFrame(shots)

    q1 = shots_df['qtr'] == '1'
    q2 = shots_df['qtr'] == '2'
    q3 = shots_df['qtr'] == '3'
    q4 = shots_df['qtr'] == '4'
    q1q2 = np.any([shots_df['qtr'] == '1', shots_df['qtr'] == '2'], 0)

    qtr_ps = {
            'Q1': [len(shots_df[np.all([q1, shots_df['made']], 0)]) / float(len(shots_df[q1])), len(shots_df[q1])],
            'Q2': [len(shots_df[np.all([q2, shots_df['made']], 0)]) / float(len(shots_df[q2])), len(shots_df[q2])],
            'Q3': [len(shots_df[np.all([q3, shots_df['made']], 0)]) / float(len(shots_df[q3])), len(shots_df[q3])],
            'Q4': [len(shots_df[np.all([q4, shots_df['made']], 0)]) / float(len(shots_df[q4])), len(shots_df[q4])],
            # 'q1q2': [len(shots_df[np.all([q1q2, shots_df['made']], 0)]) / float(len(shots_df[q1q2])), len(shots_df[q1q2])]
            }

    cis = {}
    for q in qtr_ps:
        cis[q] = utils.ci(qtr_ps[q][0], qtr_ps[q][1])

    json.dump([{
        'name': q,
        'value': qtr_ps[q][0],
        'l': cis[q][0],
        'u': cis[q][1]
        } for q in qtr_ps], open('static/data/quarterly.json','w'))
Esempio n. 3
0
# hypothesis test for difference in means between left
# and right side of net for proportion of shots made
import utils
import shot_filters
from pandas import DataFrame, Series
import numpy as np

db = utils.get_db()

left_f = shot_filters.merge_filters({'left': 0})
left_df = DataFrame(list(db.shots.find(left_f)))
n_l = len(left_df)
left_p = len(left_df[left_df['made']]) / float(n_l)
left_ci = utils.ci(left_p, n_l)

right_f = shot_filters.merge_filters({'right': 0})
right_df = DataFrame(list(db.shots.find(right_f)))
n_r = len(right_df)
right_p = len(right_df[right_df['made']]) / float(n_r)
right_ci = utils.ci(right_p, n_r)

# hypothesis test: different sides of court
pooled_p = (left_p * n_l + right_p * n_r) / (n_r + n_l)
pooled_se = np.sqrt(pooled_p * (1 - pooled_p) * ((1.0 / n_l) + (1.0 / n_r)))
pooled_z_stat = (right_p - left_p) / pooled_se
Esempio n. 4
0
    'q1': [
        len(shots_df[np.all([q1, shots_df['made']], 0)]) /
        float(len(shots_df[q1])),
        len(shots_df[q1])
    ],
    'q2': [
        len(shots_df[np.all([q2, shots_df['made']], 0)]) /
        float(len(shots_df[q2])),
        len(shots_df[q2])
    ],
    'q3': [
        len(shots_df[np.all([q3, shots_df['made']], 0)]) /
        float(len(shots_df[q3])),
        len(shots_df[q3])
    ],
    'q4': [
        len(shots_df[np.all([q4, shots_df['made']], 0)]) /
        float(len(shots_df[q4])),
        len(shots_df[q4])
    ],
    'q1q2': [
        len(shots_df[np.all([q1q2, shots_df['made']], 0)]) /
        float(len(shots_df[q1q2])),
        len(shots_df[q1q2])
    ]
}

cis = {}
for q in qtr_ps:
    cis[q] = ci(qtr_ps[q][0], qtr_ps[q][1])
Esempio n. 5
0
from utils import get_db
from pandas import DataFrame
from utils import ci
import numpy as np

db = get_db()
shots = list(db.shots.find())
shots_df = DataFrame(shots)

q1 = shots_df['qtr'] == '1'
q2 = shots_df['qtr'] == '2'
q3 = shots_df['qtr'] == '3'
q4 = shots_df['qtr'] == '4'
q1q2 = np.any([shots_df['qtr'] == '1', shots_df['qtr'] == '2'], 0)

qtr_ps = {
        'q1': [len(shots_df[np.all([q1, shots_df['made']], 0)]) / float(len(shots_df[q1])), len(shots_df[q1])],
        'q2': [len(shots_df[np.all([q2, shots_df['made']], 0)]) / float(len(shots_df[q2])), len(shots_df[q2])],
        'q3': [len(shots_df[np.all([q3, shots_df['made']], 0)]) / float(len(shots_df[q3])), len(shots_df[q3])],
        'q4': [len(shots_df[np.all([q4, shots_df['made']], 0)]) / float(len(shots_df[q4])), len(shots_df[q4])],
        'q1q2': [len(shots_df[np.all([q1q2, shots_df['made']], 0)]) / float(len(shots_df[q1q2])), len(shots_df[q1q2])]
        }

cis = {}
for q in qtr_ps:
    cis[q] = ci(qtr_ps[q][0], qtr_ps[q][1])
Esempio n. 6
0
def test_ci():
    n = len(x)
    mu = np.sum(x)/n
    assert utils.ci(x) == st.t.interval(0.95, n-1, loc=mu, scale=st.sem(x))
Esempio n. 7
0
def resample(models, lmd, X, z, nboots, split_size=0.2):
    """ Dictionaires to keep track of the results  """
    z_test = {"ridge": [], "lasso": [], "ols": []}
    z_pred_test = {"ridge": [], "lasso": [], "ols": []}

    bias = {"ridge": [], "lasso": [], "ols": []}
    var = {"ridge": [], "lasso": [], "ols": []}
    beta = {"ridge": [], "lasso": [], "ols": []}

    mse_test = {"ridge": [], "lasso": [], "ols": []}
    #r2_test = {"ridge": [], "lasso": [], "ols": []}
    "       ----------------------"
    mse_train = {"ridge": [], "lasso": [], "ols": []}
    #r2_train = {"ridge": [], "lasso": [], "ols": []}

    np.random.seed(2018)

    # Spilt the data in tran and split
    X_train, X_test, z_train, z_test_ = train_test_split(X,
                                                         z,
                                                         test_size=split_size)

    # # extract data from design matrix
    # x = X[:, 1]
    # y = X[:, 2]
    # x_test = X_test[:, 1]
    # y_test = X_test[:, 2]

    for name, model in models.items():
        # creating a model with the previosly known best lmd
        estimator = model(lmd[name])
        # Train a model for this pair of lambda and random state
        """  Keeping information for test """
        estimator.fit(X_train, z_train)
        z_pred_test_ = np.empty((z_test_.shape[0], nboots))
        z_pred_train_ = np.empty((z_train.shape[0], nboots))
        beta_ = np.empty((X.shape[1], nboots))
        for i in range(nboots):
            X_, z_ = bootstrap(
                X_train, z_train,
                i)  # i is now also the random state for the bootstrap

            estimator.fit(X_, z_)
            # Evaluate the new model on the same test data each time.
            z_pred_test_[:, i] = np.squeeze(estimator.predict(X_test))
            z_pred_train_[:, i] = np.squeeze(estimator.predict(X_train))
            beta_[:, i] = np.squeeze(estimator.coef_)

        beta[name] = beta_
        z_pred_test[name] = z_pred_test_

        z_test_ = z_test_.reshape((z_test_.shape[0], 1))
        z_test[name] = z_test_
        mse_test[name] = (np.mean(
            np.mean((z_test_ - z_pred_test_)**2, axis=1, keepdims=True)))
        bias[name] = np.mean(
            (z_test_ - np.mean(z_pred_test_, axis=1, keepdims=True))**2)
        var[name] = np.mean(np.var(z_pred_test_, axis=1, keepdims=True))

        z_train = z_train.reshape((z_train.shape[0], 1))
        mse_train[name] = np.mean(
            np.mean((z_train - z_pred_train_)**2, axis=1, keepdims=True))

        # print('Error:', mse_test)
        # print('Bias^2:', bias)
        # print('Var:', var)
        # print('{} >= {} + {} = {}'.format(mse_test, bias, variance, bias + variance))

        # plt.figure(1, figsize=(11, 7))
        # plt.subplot(121)
        # plt.plot(x, z, label='f')
        # plt.scatter(x_test, z_test, label='Data points')
        # plt.scatter(x_test, np.mean(z_pred, axis=1), label='Pred')
        # plt.legend()
        # plt.xlabel('x')
        # plt.ylabel('z')
        #
        # plt.subplot(122)
        # plt.plot(y, z, label='f')
        # plt.scatter(y_test, z_test, label='Data points')
        # plt.scatter(y_test, np.mean(z_pred, axis=1), label='Pred')
        # plt.legend()
        # plt.xlabel('y')
        # plt.ylabel('z')
        # plt.show()

        # Confidence intervals
        ci_beta = np.empty((2, beta_.shape[0]))
        poly = []
        for p in range(beta_.shape[0]):
            ci_beta[:, p] = np.array(ci(beta_[p, :])).T
            poly.append(p)

        # plt.plot(poly, ci_beta[0, :], label='Upper CI (95%)')  # --> Vise i tabell
        # plt.plot(poly, np.mean(beta, axis=1), label='Beta')
        # plt.plot(poly, ci_beta[1, :], label='Lower CI (95%)')
        # plt.legend()
        # plt.show()

    return z_test, z_pred_test, bias, var, beta, mse_test, mse_train, ci_beta
Esempio n. 8
0
# hypothesis test for difference in means between left
# and right side of net for proportion of shots made
import utils
import shot_filters
from pandas import DataFrame, Series
import numpy as np

db = utils.get_db()

left_f = shot_filters.merge_filters({'left':0})
left_df = DataFrame(list(db.shots.find(left_f)))
n_l = len(left_df)
left_p = len(left_df[left_df['made']]) / float(n_l)
left_ci = utils.ci(left_p, n_l)

right_f = shot_filters.merge_filters({'right':0})
right_df = DataFrame(list(db.shots.find(right_f)))
n_r = len(right_df)
right_p = len(right_df[right_df['made']]) / float(n_r)
right_ci = utils.ci(right_p, n_r)

# hypothesis test: different sides of court
pooled_p = (left_p*n_l + right_p*n_r)/(n_r + n_l)
pooled_se = np.sqrt(pooled_p * (1-pooled_p) * ((1.0/n_l) + (1.0/n_r)) )
pooled_z_stat = (right_p - left_p) / pooled_se