def poisson_regression(self, endog, exog, clean_data="greedy"):

        s = self.map_column_to_sheet(endog)

        arg_endog = endog
        arg_exog = exog

        # prepare data
        v = np.copy(exog)
        v = np.append(v, endog)
        dfClean = s.cleanData(v, clean_data)
        exog = sm.add_constant(dfClean[exog])
        endog = dfClean[endog]

        poisson = Poisson(endog, exog)
        fit = poisson.fit()

        utterance = (
            "Here are the results of a Poisson regression with endogenous variables "
        )
        utterance = (
            utterance
            + str(arg_endog)
            + " and exogenous variables "
            + str(arg_exog)
            + ".\n"
        )
        utterance = utterance + str(fit.summary())

        return QueryResult(fit.summary(), utterance)
예제 #2
0
    def setup_class(cls):

        expected_params = [1, 1, 0.5]
        np.random.seed(987123)
        nobs = 500
        exog = np.ones((nobs, 2))
        exog[:nobs//2, 1] = 0
        # offset is used to create misspecification of the model
        # for predicted probabilities conditional moment test
        #offset = 0.5 * np.random.randn(nobs)
        #range_mix = 0.5
        #offset = -range_mix / 2 + range_mix * np.random.rand(nobs)
        offset = 0
        mu_true = np.exp(exog.dot(expected_params[:-1]) + offset)

        endog_poi = np.random.poisson(mu_true / 5)
        # endog3 = distr.zigenpoisson.rvs(mu_true, 0,
        #                                2, 0.01, size=mu_true.shape)

        model_poi = Poisson(endog_poi, exog)
        res_poi = model_poi.fit(method='bfgs', maxiter=5000, maxfun=5000)
        cls.exog = exog
        cls.endog = endog_poi
        cls.res = res_poi
        cls.nobs = nobs
예제 #3
0
    def setup_class(cls):

        expected_params = [1, 1, 0.5]
        np.random.seed(987123)
        nobs = 500
        exog = np.ones((nobs, 2))
        exog[:nobs // 2, 1] = 0
        # offset is used to create misspecification of the model
        # for predicted probabilities conditional moment test
        #offset = 0.5 * np.random.randn(nobs)
        #range_mix = 0.5
        #offset = -range_mix / 2 + range_mix * np.random.rand(nobs)
        offset = 0
        mu_true = np.exp(exog.dot(expected_params[:-1]) + offset)

        endog_poi = np.random.poisson(mu_true / 5)
        # endog3 = distr.zigenpoisson.rvs(mu_true, 0,
        #                                2, 0.01, size=mu_true.shape)

        model_poi = Poisson(endog_poi, exog)
        res_poi = model_poi.fit(method='bfgs', maxiter=5000, maxfun=5000)
        cls.exog = exog
        cls.endog = endog_poi
        cls.res = res_poi
        cls.nobs = nobs
예제 #4
0
    def _initialize(cls):
        y, x = cls.y, cls.x

        modp = Poisson(y, x)
        cls.res2 = modp.fit(disp=0)

        mod = PoissonPenalized(y, x)
        mod.pen_weight = 0
        cls.res1 = mod.fit(method='bfgs', maxiter=100, disp=0)

        cls.atol = 5e-6
예제 #5
0
    def _initialize(cls):
        y, x = cls.y, cls.x

        modp = Poisson(y, x)
        cls.res2 = modp.fit(disp=0)

        mod = PoissonPenalized(y, x)
        mod.pen_weight = 0
        cls.res1 = mod.fit(method='bfgs', maxiter=100, disp=0)

        cls.atol = 5e-6
예제 #6
0
    def setup_class(cls):
        # here we don't need to check convergence from default start_params
        start_params = [14.1709, 0.7085, -3.4548, -0.539, 3.2368,  -7.9299,
                        -5.0529]
        mod_poi = Poisson(endog, exog)
        res_poi = mod_poi.fit(start_params=start_params)
        marge_poi = res_poi.get_margeff(dummy=True)
        cls.res = res_poi
        cls.margeff = marge_poi

        cls.res1_slice = [0, 1, 2, 3, 5, 6]
        cls.res1 = res_stata.results_poisson_margins_dummy
예제 #7
0
    def setup_class(cls):
        df = data_bin
        mod = GLM(df['constrict'], df[['const', 'log_rate', 'log_volumne']],
                  family=families.Poisson())
        res = mod.fit(attach_wls=True, atol=1e-10)
        from statsmodels.discrete.discrete_model import Poisson
        mod2 = Poisson(df['constrict'],
                       df[['const', 'log_rate', 'log_volumne']])
        res2 = mod2.fit(tol=1e-10)

        cls.infl0 = res.get_influence()
        cls.infl1 = res2.get_influence()
예제 #8
0
    def setup_class(cls):
        # here we don't need to check convergence from default start_params
        start_params = [
            14.1709, 0.7085, -3.4548, -0.539, 3.2368, -7.9299, -5.0529
        ]
        mod_poi = Poisson(endog, exog)
        res_poi = mod_poi.fit(start_params=start_params)
        marge_poi = res_poi.get_margeff(dummy=True)
        cls.res = res_poi
        cls.margeff = marge_poi

        cls.res1_slice = [0, 1, 2, 3, 5, 6]
        cls.res1 = res_stata.results_poisson_margins_dummy
예제 #9
0
    def _initialize(cls):
        y, x = cls.y, cls.x
        modp = Poisson(y, x[:, :cls.k_nonzero])
        cls.res2 = modp.fit(disp=0)

        mod = PoissonPenalized(y, x, penal=cls.penalty)
        mod.pen_weight *= 1.5
        mod.penal.tau = 0.05
        cls.res1 = mod.fit(method='bfgs', maxiter=100, disp=0)

        cls.exog_index = slice(None, cls.k_nonzero, None)

        cls.atol = 5e-3
예제 #10
0
    def _initialize(cls):
        y, x = cls.y, cls.x
        modp = Poisson(y, x[:, :cls.k_nonzero])
        cls.res2 = modp.fit(disp=0)

        mod = PoissonPenalized(y, x, penal=cls.penalty)
        mod.pen_weight *= 1.5
        mod.penal.tau = 0.05
        cls.res1 = mod.fit(method='bfgs', maxiter=100, disp=0)

        cls.exog_index = slice(None, cls.k_nonzero, None)

        cls.atol = 5e-3
예제 #11
0
    def setup_class(cls):
        # here we don't need to check convergence from default start_params
        start_params = [14.1709, 0.7085, -3.4548, -0.539, 3.2368,  -7.9299,
                        -5.0529]
        mod_poi = Poisson(endog, exog)
        res_poi = mod_poi.fit(start_params=start_params)
        #res_poi = mod_poi.fit(maxiter=100)
        marge_poi = res_poi.get_margeff()
        cls.res = res_poi
        cls.margeff = marge_poi

        cls.rtol_fac = 1
        cls.res1_slice = slice(None, None, None)
        cls.res1 = res_stata.results_poisson_margins_cont
예제 #12
0
    def setup_class(cls):
        # here we don't need to check convergence from default start_params
        start_params = [
            14.1709, 0.7085, -3.4548, -0.539, 3.2368, -7.9299, -5.0529
        ]
        mod_poi = Poisson(endog, exog)
        res_poi = mod_poi.fit(start_params=start_params)
        #res_poi = mod_poi.fit(maxiter=100)
        marge_poi = res_poi.get_margeff()
        cls.res = res_poi
        cls.margeff = marge_poi

        cls.rtol_fac = 1
        cls.res1_slice = slice(None, None, None)
        cls.res1 = res_stata.results_poisson_margins_cont
예제 #13
0
#!/usr/bin/env python
# coding: utf-8

# In[ ]:


from statsmodels.discrete.discrete_model import Poisson
model =Poisson(endog=doi.Infections.astype(float), exog=add_constant(doi.CYPOPDENS.astype(float))) #Endog is the dependent variable here
results = model.fit()
print(results.summary())  


# In[ ]:


DENSCOEF = 1 - np.exp(.0007)    #.0007 is the coefficient of our endogenous variable of interest
print('CYPOPDENS coefficent exponetiated: {} '.format(np.exp(DENSCOEF)))  #outputs workable percentage

import numpy as np

np.unique(V, return_counts=True)

# In[84]:

import statsmodels

U_Const = statsmodels.tools.add_constant(U)

# In[85]:

from statsmodels.discrete.discrete_model import Poisson

mpr = Poisson(V, U_Const)
res_mpr = mpr.fit()

# In[93]:

from statsmodels.genmod.generalized_linear_model import GLM
from statsmodels.genmod import families

mod = GLM(V, U_Const, family=families.Poisson())
res = mod.fit()
print(res.summary())

# ### La surdispersion

# In[95]:

#Surdispersion
예제 #15
0
    X = sm.add_constant(X)

    # general OLS
    # https://www.statsmodels.org/stable/generated/statsmodels.regression.linear_model.OLS.html
    # model=sm.OLS(Y, X.astype(float))

    # robust regression
    # https://www.statsmodels.org/stable/generated/statsmodels.robust.robust_linear_model.RLM.html
    # model=sm.RLM(Y, X.astype(float))

    # probit model
    # https://www.statsmodels.org/stable/generated/statsmodels.discrete.discrete_model.Probit.html
    # model = Probit(Y, X.astype(float))

    # logit model
    # https://www.statsmodels.org/stable/generated/statsmodels.discrete.discrete_model.Logit.html
    # model = Logit(Y, X.astype(float))

    # poisson model
    # https://www.statsmodels.org/stable/generated/statsmodels.formula.api.poisson.html
    model = Poisson(Y, X.astype(float))

    final_model = model.fit()
    results_summary = final_model.summary()
    print(results_summary)
    results_as_html = results_summary.tables[1].as_html()
    result_df = pd.read_html(results_as_html, header=0, index_col=0)[0]

    print(result_df.to_latex())
예제 #16
0
class ZeroInflatedPoisson(GenericZeroInflated):
    __doc__ = """
    Poisson Zero Inflated Model

    %(params)s
    %(extra_params)s

    Attributes
    ----------
    endog : ndarray
        A reference to the endogenous response variable
    exog : ndarray
        A reference to the exogenous design.
    exog_infl : ndarray
        A reference to the zero-inflated exogenous design.
    """ % {'params' : base._model_params_doc,
           'extra_params' : _doc_zi_params + base._missing_param_doc}

    def __init__(self, endog, exog, exog_infl=None, offset=None, exposure=None,
                 inflation='logit', missing='none', **kwargs):
        super(ZeroInflatedPoisson, self).__init__(endog, exog, offset=offset,
                                                  inflation=inflation,
                                                  exog_infl=exog_infl,
                                                  exposure=exposure,
                                                  missing=missing, **kwargs)
        self.model_main = Poisson(self.endog, self.exog, offset=offset,
                                  exposure=exposure)
        self.distribution = zipoisson
        self.result_class = ZeroInflatedPoissonResults
        self.result_class_wrapper = ZeroInflatedPoissonResultsWrapper
        self.result_class_reg = L1ZeroInflatedPoissonResults
        self.result_class_reg_wrapper = L1ZeroInflatedPoissonResultsWrapper

    def _hessian_main(self, params):
        params_infl = params[:self.k_inflate]
        params_main = params[self.k_inflate:]

        y = self.endog
        w = self.model_infl.predict(params_infl)
        w = np.clip(w, np.finfo(float).eps, 1 - np.finfo(float).eps)
        score = self.score(params)
        zero_idx = np.nonzero(y == 0)[0]
        nonzero_idx = np.nonzero(y)[0]

        mu = self.model_main.predict(params_main)

        hess_arr = np.zeros((self.k_exog, self.k_exog))

        coeff = (1 + w[zero_idx] * (np.exp(mu[zero_idx]) - 1))

        #d2l/dp2
        for i in range(self.k_exog):
            for j in range(i, -1, -1):
                hess_arr[i, j] = ((
                    self.exog[zero_idx, i] * self.exog[zero_idx, j] *
                    mu[zero_idx] * (w[zero_idx] - 1) * (1 / coeff -
                    w[zero_idx] * mu[zero_idx] * np.exp(mu[zero_idx]) /
                    coeff**2)).sum() - (mu[nonzero_idx] * self.exog[nonzero_idx, i] *
                    self.exog[nonzero_idx, j]).sum())

        return hess_arr

    def _predict_prob(self, params, exog, exog_infl, exposure, offset,
                      y_values=None):
        params_infl = params[:self.k_inflate]
        params_main = params[self.k_inflate:]

        if y_values is None:
            y_values = np.atleast_2d(np.arange(0, np.max(self.endog)+1))

        if len(exog_infl.shape) < 2:
            transform = True
            w = np.atleast_2d(
                self.model_infl.predict(params_infl, exog_infl))[:, None]
        else:
            transform = False
            w = self.model_infl.predict(params_infl, exog_infl)[:, None]

        w = np.clip(w, np.finfo(float).eps, 1 - np.finfo(float).eps)
        mu = self.model_main.predict(params_main, exog,
            offset=offset)[:, None]
        result = self.distribution.pmf(y_values, mu, w)
        return result[0] if transform else result

    def _predict_var(self, params, mu, prob_infl):
        """predict values for conditional variance V(endog | exog)

        Parameters
        ----------
        params : array_like
            The model parameters. This is only used to extract extra params
            like dispersion parameter.
        mu : array_like
            Array of mean predictions for main model.
        prob_inlf : array_like
            Array of predicted probabilities of zero-inflation `w`.

        Returns
        -------
        Predicted conditional variance.
        """
        w = prob_infl
        var_ = (1 - w) * mu * (1 + w * mu)
        return var_

    def _get_start_params(self):
        start_params = self.model_main.fit(disp=0, method="nm").params
        start_params = np.append(np.ones(self.k_inflate) * 0.1, start_params)
        return start_params

    def get_distribution(self, params, exog=None, exog_infl=None,
                         exposure=None, offset=None):
        """Get frozen instance of distribution based on predicted parameters.

        Parameters
        ----------
        params : array_like
            The parameters of the model.
        exog : ndarray, optional
            Explanatory variables for the main count model.
            If ``exog`` is None, then the data from the model will be used.
        exog_infl : ndarray, optional
            Explanatory variables for the zero-inflation model.
            ``exog_infl`` has to be provided if ``exog`` was provided unless
            ``exog_infl`` in the model is only a constant.
        offset : ndarray, optional
            Offset is added to the linear predictor of the mean function with
            coefficient equal to 1.
            Default is zero if exog is not None, and the model offset if exog
            is None.
        exposure : ndarray, optional
            Log(exposure) is added to the linear predictor  of the mean
            function with coefficient equal to 1. If exposure is specified,
            then it will be logged by the method. The user does not need to
            log it first.
            Default is one if exog is is not None, and it is the model exposure
            if exog is None.

        Returns
        -------
        Instance of frozen scipy distribution subclass.
        """
        mu = self.predict(params, exog=exog, exog_infl=exog_infl,
                          exposure=exposure, offset=offset, which="mean-main")
        w = self.predict(params, exog=exog, exog_infl=exog_infl,
                         exposure=exposure, offset=offset, which="prob-main")

        distr = self.distribution(mu[:, None], 1 - w[:, None])
        return distr
예제 #17
0
class PredictPlayerStats(ConvertMixin):

    def __init__(self, engine, player_name, stat_to_predict, opposing_team_name,
                 predictor_stats=('csum_min_kills', 'csum_min_minions_killed'),
                 defense_predictor_stats=('csum_prev_min_allowed_kills', 'csum_prev_min_allowed_assists'),
                 game_range=None):
        self.engine = engine
        self.player_name = player_name
        self.stat_to_predict = stat_to_predict
        if predictor_stats:
            self.predictor_stats = ('csum_prev_min_kills', 'csum_prev_min_minions_killed')
        else:
            self.predictor_stats = ('csum_prev_min_kills', 'csum_prev_min_minions_killed')
        role_stats = ('Jungler', 'Mid', 'Coach', 'Support', 'AD', 'Sub', 'Top')
        self.predictor_stats = self.predictor_stats + defense_predictor_stats + role_stats
        self.opposing_team_name = opposing_team_name
        self.player_stats_table_name = 'player_stats_df'
        self.processed_player_stars_table_name = 'processed_player_stats_df'
        self.key_stats = ('kills', 'deaths', 'assists', 'minions_killed', 'gold',
                          'k_a', 'a_over_k')
        self.game_range = game_range
        self._process_player_stats_and_train()

    def _process_player_stats_and_train(self):
        processed_player_stats_df = self._get_processed_player_stats_in_df()
        self.latest_predictor_numpy_array = self._get_latest_player_stats_numpy_array(processed_player_stats_df)
        print('latest predictors numpy array {}'.format(self.latest_predictor_numpy_array))
        predictors, y_array = self._get_predictors_in_numpy_arrays(processed_player_stats_df)
        self._train_model(predictors, y_array)

    def _get_latest_player_stats_numpy_array(self, processed_player_stats_df):
        player_id = self._get_player_id_by_player_name(self.player_name)
        player_stats_df = processed_player_stats_df[processed_player_stats_df['player_id'] == player_id]
        latest_player_stats_df = player_stats_df.sort(['game_id'], ascending=False).head(1)
        dict_player = latest_player_stats_df.to_dict('records')[0]
        player_predictor_stats = []
        for predictor_stat in self.predictor_stats:
            # print('processing predictor stat {}'.format(predictor_stat))
            player_predictor_stats.append(dict_player[predictor_stat])
        latest_predictor_numpy_array = numpy.array([player_predictor_stats])
        return latest_predictor_numpy_array

    def _get_predictors_in_numpy_arrays(self, processed_player_stats_df):
        player_game_records = self._get_predictors(processed_player_stats_df)
        game_list = []
        y_array_list = []
        for player_game_record in player_game_records:
            game_predictor_stats = []
            if not (numpy.isnan(player_game_record['csum_prev_min_kills'])
                    or numpy.isnan(player_game_record['csum_prev_min_allowed_kills'])):
                if player_game_record['csum_prev_min_assists'] != 0:
                    prev_predictor_stats = self._convert_predictors_to_prev_csum(self.predictor_stats)
                    for prev_predictor_stat in prev_predictor_stats:
                        game_predictor_stats.append(player_game_record[prev_predictor_stat])
                    game_list.append(game_predictor_stats)
                    y_array_list.append(player_game_record['y_element'])
        predictors = numpy.array(game_list)
        y_array = numpy.array([y_array_list])
        return predictors, y_array

    def _get_predictors(self, processed_player_stats_df):
        player_game_records = processed_player_stats_df.to_dict('records')
        player_game_records.sort(key=itemgetter('game_id'))
        for player_game_record in player_game_records:
            player_game_record['y_element'] = player_game_record[self.stat_to_predict]
        return player_game_records

    def _train_model(self, predictors, y_array):
        y_1darray = numpy.squeeze(y_array)
        self.poisson = Poisson(y_1darray, predictors)
        self.pos_result = self.poisson.fit(method='bfgs')

    def _get_game_ids_from_database(self):
        game_ids_row = Game.objects.values_list('id', flat=True)
        game_ids = [game for game in game_ids_row]
        return game_ids

    def _get_lastest_processed_team_stats_by_name(self):
        return ProcessedTeamStatsDf.objects.filter(name=self.opposing_team_name).order_by('-id').first()

    def _get_game_by_ids(self, game_ids):
        return Game.objects.filter(id__in=game_ids)

    def _get_player_id_by_player_name(self, player_name):
        player = Player.objects.filter(name=player_name)
        return player[0].id

    def _get_processed_player_stats_in_df(self):
        game_ids = self._get_game_ids_from_database()
        last_game_number = game_ids[-1]
        has_processed_team_stats_table = self.engine.has_table(self.processed_player_stars_table_name)
        if has_processed_team_stats_table:
            df_game_stats = pandas.read_sql(self.player_stats_table_name, self.engine)
            df_game_stats_all = df_game_stats[df_game_stats.game_id.isin(game_ids)]
            # Using game_numbers here since we need the last few games to check.
            max_game_id_cached = df_game_stats_all['game_id'].max()
            max_index_cached = df_game_stats_all['index'].max()
            if pandas.isnull(max_game_id_cached):
                max_game_id_cached = game_ids[0]
            # Check if all the game numbers have been cached,
            # if not return what game to start form and what game to end from.
            if max_game_id_cached != last_game_number:
                # Get the index of the max_game_id
                max_game_id_index = game_ids.index(max_game_id_cached)
                # Trim down the list to only the games that need to be retrieved,
                # start from the max_id + 1 because we don't
                # want to count max_id we already have it
                game_ids_to_find = game_ids[max_game_id_index:]
                games = self._get_game_by_ids(game_ids_to_find)
                player_stats_df = self._get_player_stats_in_df(games, max_index_cached)
                self._insert_into_player_stats_df_tables(player_stats_df)
            else:
                # If everything was cached return cached as true and just return the last numbers
                # I could do this part better.
                print("everything cached no need to retrieve from api")
        else:
            _get_player_stats_in_df = 0
            # Table did not exist, have to get all
            games = self._get_game_by_ids(game_ids)
            player_stats_df = self._get_player_stats_in_df(games, _get_player_stats_in_df)
            print('table does not exist inserting full table')
            self._insert_into_player_stats_df_tables(player_stats_df)
            print('table inserted')
        if self.game_range == '5':
            processed_player_stats_df = pandas.read_sql('select * from processed_player_stats_df_limit_5',
                                                              con=self.engine)
        elif self.game_range == '10':
            processed_player_stats_df = pandas.read_sql('select * from processed_player_stats_df_limit_10',
                                                              con=self.engine)
        else:
            processed_player_stats_df = pandas.read_sql_table(self.processed_player_stars_table_name, self.engine)
        return processed_player_stats_df

    def _process_player_stats_df(self, player_stats_df):
        player_stats_df = player_stats_df.sort(['game_id', 'player_id'])
        key_stats = ['game_length_minutes'] + (list(self.key_stats))
        player_stats_df['clean_kills'] = player_stats_df['kills']
        player_stats_df.ix[player_stats_df.clean_kills == 0, 'clean_kills'] = 1
        player_stats_df['k_a'] = \
            player_stats_df['kills'] + player_stats_df['assists']
        player_stats_df['a_over_k'] = \
            player_stats_df['assists'] / player_stats_df['clean_kills']
        player_stats_for_pivot = player_stats_df[['player_name', 'role']]
        player_stats_for_pivot['value'] = 1
        player_pivot_df = player_stats_for_pivot.pivot_table(index='player_name', columns='role', values='value')
        player_pivot_df.fillna(0, inplace=True)
        player_pivot_df.reset_index(inplace=True)
        player_stats_df = pandas.merge(player_stats_df, player_pivot_df, on='player_name')
        for key_stat in key_stats:
            print('doing key stats {}'.format(key_stat))
            player_stats_df['csum_{}'.format(key_stat)] = player_stats_df.groupby(by='player_id')[key_stat].cumsum()
            player_stats_df['csum_prev_{}'.format(key_stat)] = \
                player_stats_df['csum_{}'.format(key_stat)] - player_stats_df[key_stat]
            # player_stats_df['csum_prev_avg_{}'.format(key_stat)] = \
            #     player_stats_df['csum_prev_{}'.format(key_stat)] / player_stats_df['csum_prev_game_number']
            player_stats_df['per_min_{}'.format(key_stat)] = player_stats_df[key_stat] / \
                                                             player_stats_df['game_length_minutes']
            if key_stat not in ['game_number', 'game_length_minutes']:
                print('doing stats not game_number {}'.format(key_stat))
                player_stats_df['csum_min_{}'.format(key_stat)] = \
                    player_stats_df['csum_{}'.format(key_stat)] / player_stats_df['csum_game_length_minutes']
                player_stats_df['csum_prev_min_{}'.format(key_stat)] = \
                    player_stats_df['csum_prev_{}'.format(key_stat)] / player_stats_df['csum_prev_game_length_minutes']
                player_stats_df['csum_prev_min_{}'.format(key_stat)].fillna(0, inplace=True)
            player_stats_df = player_stats_df.sort(['game_id'])
        return player_stats_df

    def _get_player_stats_in_df(self, games, max_index_cached):
        player_stats_df = None
        for game in games:
            players_stats = self._convert_game_to_player_stats_df(game)
            if player_stats_df is None:
                player_stats_df = pandas.DataFrame(players_stats, index=list(range(max_index_cached, (max_index_cached + 10))))
            else:
                single_game_player_stats_df = pandas.DataFrame(players_stats, index=list(range(max_index_cached, (max_index_cached + 10))))
                player_stats_df = player_stats_df.append(single_game_player_stats_df)
            max_index_cached += 10
        return player_stats_df

    def _convert_game_to_player_stats_df(self, game):
        players_stats = game.playerstats_set.all()
        players_stats_dict = game.playerstats_set.all().values()
        player_stats_list = []
        for player_stats, player_stats_dict in zip(players_stats, players_stats_dict):
            player_stats_dict['game_length_minutes'] = float(game.game_length_minutes)
            player_stats_dict['gold'] = float(player_stats_dict['gold'])
            player_stats_dict['player_name'] = player_stats.player.name
            self._populate_player_stats_with_defense_stats(player_stats_dict, player_stats, game)
            player_stats_list.append(player_stats_dict)
        return player_stats_list

    def _populate_player_stats_with_defense_stats(self, player_stats_dict, player_stats, game):
        current_team = player_stats.team
        processed_team_stats_dict = game.processedteamstatsdf_set.exclude(team_name=current_team).values()[0]
        for key_stat in self.key_stats:
            player_stats_dict['csum_prev_min_allowed_{}'.format(key_stat)] = \
                processed_team_stats_dict['csum_prev_min_allowed_{}'.format(key_stat)]
            player_stats_dict['csum_min_allowed_{}'.format(key_stat)] = \
                processed_team_stats_dict['csum_min_allowed_{}'.format(key_stat)]

    def _insert_into_player_stats_df_tables(self, player_stats_df):
        player_stats_df.to_sql(self.player_stats_table_name, self.engine, if_exists='append')
        # Could be optimized kinda a hack
        player_stats_df = pandas.read_sql("select ps.*, p.role, p.image from player_stats_df ps, player p "
                                          "where ps.player_id = p.id", self.engine)
        processed_team_stats_df = self._process_player_stats_df(player_stats_df)
        processed_team_stats_df.to_sql(self.processed_player_stars_table_name, self.engine, if_exists='append')

    def predict_player_stat(self):
        #reshaped_numpy_array = numpy.reshape(self.latest_predictor_numpy_array, 3,1)
        probability_in_numpy_array = self.poisson.predict(self.pos_result.params, self.latest_predictor_numpy_array)
        return {self.player_name: probability_in_numpy_array}
예제 #18
0
class ZeroInflatedPoisson(GenericZeroInflated):
    __doc__ = """
    Poisson Zero Inflated model for count data

    %(params)s
    %(extra_params)s

    Attributes
    -----------
    endog : array
        A reference to the endogenous response variable
    exog : array
        A reference to the exogenous design.
    exog_infl: array
        A reference to the zero-inflated exogenous design.
    """ % {'params' : base._model_params_doc,
           'extra_params' : _doc_zi_params + base._missing_param_doc}

    def __init__(self, endog, exog, exog_infl=None, offset=None, exposure=None,
                 inflation='logit', missing='none', **kwargs):
        super(ZeroInflatedPoisson, self).__init__(endog, exog, offset=offset,
                                                  inflation=inflation,
                                                  exog_infl=exog_infl,
                                                  exposure=exposure,
                                                  missing=missing, **kwargs)
        self.model_main = Poisson(self.endog, self.exog, offset=offset,
                                  exposure=exposure)
        self.distribution = zipoisson
        self.result_class = ZeroInflatedPoissonResults
        self.result_class_wrapper = ZeroInflatedPoissonResultsWrapper
        self.result_class_reg = L1ZeroInflatedPoissonResults
        self.result_class_reg_wrapper = L1ZeroInflatedPoissonResultsWrapper

    def _hessian_main(self, params):
        params_infl = params[:self.k_inflate]
        params_main = params[self.k_inflate:]

        y = self.endog
        w = self.model_infl.predict(params_infl)
        w = np.clip(w, np.finfo(float).eps, 1 - np.finfo(float).eps)
        score = self.score(params)
        zero_idx = np.nonzero(y == 0)[0]
        nonzero_idx = np.nonzero(y)[0]

        mu = self.model_main.predict(params_main)

        hess_arr = np.zeros((self.k_exog, self.k_exog))

        coeff = (1 + w[zero_idx] * (np.exp(mu[zero_idx]) - 1))

        #d2l/dp2
        for i in range(self.k_exog):
            for j in range(i, -1, -1):
                hess_arr[i, j] = ((
                    self.exog[zero_idx, i] * self.exog[zero_idx, j] *
                    mu[zero_idx] * (w[zero_idx] - 1) * (1 / coeff -
                    w[zero_idx] * mu[zero_idx] * np.exp(mu[zero_idx]) /
                    coeff**2)).sum() - (mu[nonzero_idx] * self.exog[nonzero_idx, i] *
                    self.exog[nonzero_idx, j]).sum())

        return hess_arr

    def _predict_prob(self, params, exog, exog_infl, exposure, offset):
        params_infl = params[:self.k_inflate]
        params_main = params[self.k_inflate:]

        counts = np.atleast_2d(np.arange(0, np.max(self.endog)+1))

        if len(exog_infl.shape) < 2:
            transform = True
            w = np.atleast_2d(
                self.model_infl.predict(params_infl, exog_infl))[:, None]
        else:
            transform = False
            w = self.model_infl.predict(params_infl, exog_infl)[:, None]

        w = np.clip(w, np.finfo(float).eps, 1 - np.finfo(float).eps)
        mu = self.model_main.predict(params_main, exog,
            offset=offset)[:, None]
        result = self.distribution.pmf(counts, mu, w)
        return result[0] if transform else result

    def _get_start_params(self):
        start_params = self.model_main.fit(disp=0, method="nm").params
        start_params = np.append(np.ones(self.k_inflate) * 0.1, start_params)
        return start_params
예제 #19
0
nobs, k_vars = 500, 20
k_nonzero = 4
x = (np.random.rand(nobs, k_vars) + 0.5 *
     (np.random.rand(nobs, 1) - 0.5)) * 2 - 1
x *= 1.2
x[:, 0] = 1
beta = np.zeros(k_vars)
beta[:k_nonzero] = 1. / np.arange(1, k_nonzero + 1)
linpred = x.dot(beta)
mu = np.exp(linpred)
y = np.random.poisson(mu)
import os
debug = raw_input("please attach to pid:{},then press any key".format(
    os.getpid()))
modp = Poisson(y, x)
resp = modp.fit()
print(resp.params)

mod = PoissonPenalized(y, x)
res = mod.fit(method='bfgs', maxiter=1000)
print(res.params)

############### Penalized Probit
y_star = linpred + 0.25 * np.random.randn(nobs)
y2 = (y_star > 0.75).astype(float)
y_star.mean(), y2.mean()

res0 = Probit(y2, x).fit()
print(res0.summary())
res_oracle = Probit(y2, x[:, :k_nonzero]).fit()
print(res_oracle.params)