예제 #1
0
class ZeroInflatedPoisson(GenericZeroInflated):
    __doc__ = """
    Poisson Zero Inflated model for count data

    %(params)s
    %(extra_params)s

    Attributes
    -----------
    endog : array
        A reference to the endogenous response variable
    exog : array
        A reference to the exogenous design.
    exog_infl: array
        A reference to the zero-inflated exogenous design.
    """ % {'params' : base._model_params_doc,
           'extra_params' : _doc_zi_params + base._missing_param_doc}

    def __init__(self, endog, exog, exog_infl=None, offset=None, exposure=None,
                 inflation='logit', missing='none', **kwargs):
        super(ZeroInflatedPoisson, self).__init__(endog, exog, offset=offset,
                                                  inflation=inflation,
                                                  exog_infl=exog_infl,
                                                  exposure=exposure,
                                                  missing=missing, **kwargs)
        self.model_main = Poisson(self.endog, self.exog, offset=offset,
                                  exposure=exposure)
        self.distribution = zipoisson
        self.result_class = ZeroInflatedPoissonResults
        self.result_class_wrapper = ZeroInflatedPoissonResultsWrapper
        self.result_class_reg = L1ZeroInflatedPoissonResults
        self.result_class_reg_wrapper = L1ZeroInflatedPoissonResultsWrapper

    def _hessian_main(self, params):
        params_infl = params[:self.k_inflate]
        params_main = params[self.k_inflate:]

        y = self.endog
        w = self.model_infl.predict(params_infl)
        w = np.clip(w, np.finfo(float).eps, 1 - np.finfo(float).eps)
        score = self.score(params)
        zero_idx = np.nonzero(y == 0)[0]
        nonzero_idx = np.nonzero(y)[0]

        mu = self.model_main.predict(params_main)

        hess_arr = np.zeros((self.k_exog, self.k_exog))

        coeff = (1 + w[zero_idx] * (np.exp(mu[zero_idx]) - 1))

        #d2l/dp2
        for i in range(self.k_exog):
            for j in range(i, -1, -1):
                hess_arr[i, j] = ((
                    self.exog[zero_idx, i] * self.exog[zero_idx, j] *
                    mu[zero_idx] * (w[zero_idx] - 1) * (1 / coeff -
                    w[zero_idx] * mu[zero_idx] * np.exp(mu[zero_idx]) /
                    coeff**2)).sum() - (mu[nonzero_idx] * self.exog[nonzero_idx, i] *
                    self.exog[nonzero_idx, j]).sum())

        return hess_arr

    def _predict_prob(self, params, exog, exog_infl, exposure, offset):
        params_infl = params[:self.k_inflate]
        params_main = params[self.k_inflate:]

        counts = np.atleast_2d(np.arange(0, np.max(self.endog)+1))

        if len(exog_infl.shape) < 2:
            transform = True
            w = np.atleast_2d(
                self.model_infl.predict(params_infl, exog_infl))[:, None]
        else:
            transform = False
            w = self.model_infl.predict(params_infl, exog_infl)[:, None]

        w = np.clip(w, np.finfo(float).eps, 1 - np.finfo(float).eps)
        mu = self.model_main.predict(params_main, exog,
            offset=offset)[:, None]
        result = self.distribution.pmf(counts, mu, w)
        return result[0] if transform else result

    def _get_start_params(self):
        start_params = self.model_main.fit(disp=0, method="nm").params
        start_params = np.append(np.ones(self.k_inflate) * 0.1, start_params)
        return start_params
예제 #2
0
    "station_diur_temp_rng_c", "precipitation_amt_mm",
    "reanalysis_dew_point_temp_k", "reanalysis_air_temp_k",
    "reanalysis_relative_humidity_percent",
    "reanalysis_specific_humidity_g_per_kg", "reanalysis_precip_amt_kg_per_m2",
    "reanalysis_max_air_temp_k", "reanalysis_min_air_temp_k",
    "reanalysis_avg_temp_k", "reanalysis_tdtr_k", "ndvi_se", "ndvi_sw",
    "ndvi_ne", "ndvi_nw"
]
n_features = len(features_list)

df_train_features = df_train_features.fillna(df_train_features.mean())
df_test_features = df_test_features.fillna(df_test_features.mean())

X_train = df_train_features[features_list].values
X_test = df_test_features[features_list].values

y_train = df_train_labels["total_cases"].values

# Model:
poisson_mod = Poisson(endog=y_train, exog=X_train).fit(maxiter=61)

print(poisson_mod.summary())

predictions = poisson_mod.predict(X_test)
predictions_rounded = np.rint(predictions).astype(np.int64)
print(predictions_rounded)

write_result(predictions_rounded,
             "/poisson.csv",
             sample_source=sample_submission_path,
             write_source=predictions_path)
예제 #3
0
class ZeroInflatedPoisson(GenericZeroInflated):
    __doc__ = """
    Poisson Zero Inflated Model

    %(params)s
    %(extra_params)s

    Attributes
    ----------
    endog : ndarray
        A reference to the endogenous response variable
    exog : ndarray
        A reference to the exogenous design.
    exog_infl : ndarray
        A reference to the zero-inflated exogenous design.
    """ % {'params' : base._model_params_doc,
           'extra_params' : _doc_zi_params + base._missing_param_doc}

    def __init__(self, endog, exog, exog_infl=None, offset=None, exposure=None,
                 inflation='logit', missing='none', **kwargs):
        super(ZeroInflatedPoisson, self).__init__(endog, exog, offset=offset,
                                                  inflation=inflation,
                                                  exog_infl=exog_infl,
                                                  exposure=exposure,
                                                  missing=missing, **kwargs)
        self.model_main = Poisson(self.endog, self.exog, offset=offset,
                                  exposure=exposure)
        self.distribution = zipoisson
        self.result_class = ZeroInflatedPoissonResults
        self.result_class_wrapper = ZeroInflatedPoissonResultsWrapper
        self.result_class_reg = L1ZeroInflatedPoissonResults
        self.result_class_reg_wrapper = L1ZeroInflatedPoissonResultsWrapper

    def _hessian_main(self, params):
        params_infl = params[:self.k_inflate]
        params_main = params[self.k_inflate:]

        y = self.endog
        w = self.model_infl.predict(params_infl)
        w = np.clip(w, np.finfo(float).eps, 1 - np.finfo(float).eps)
        score = self.score(params)
        zero_idx = np.nonzero(y == 0)[0]
        nonzero_idx = np.nonzero(y)[0]

        mu = self.model_main.predict(params_main)

        hess_arr = np.zeros((self.k_exog, self.k_exog))

        coeff = (1 + w[zero_idx] * (np.exp(mu[zero_idx]) - 1))

        #d2l/dp2
        for i in range(self.k_exog):
            for j in range(i, -1, -1):
                hess_arr[i, j] = ((
                    self.exog[zero_idx, i] * self.exog[zero_idx, j] *
                    mu[zero_idx] * (w[zero_idx] - 1) * (1 / coeff -
                    w[zero_idx] * mu[zero_idx] * np.exp(mu[zero_idx]) /
                    coeff**2)).sum() - (mu[nonzero_idx] * self.exog[nonzero_idx, i] *
                    self.exog[nonzero_idx, j]).sum())

        return hess_arr

    def _predict_prob(self, params, exog, exog_infl, exposure, offset,
                      y_values=None):
        params_infl = params[:self.k_inflate]
        params_main = params[self.k_inflate:]

        if y_values is None:
            y_values = np.atleast_2d(np.arange(0, np.max(self.endog)+1))

        if len(exog_infl.shape) < 2:
            transform = True
            w = np.atleast_2d(
                self.model_infl.predict(params_infl, exog_infl))[:, None]
        else:
            transform = False
            w = self.model_infl.predict(params_infl, exog_infl)[:, None]

        w = np.clip(w, np.finfo(float).eps, 1 - np.finfo(float).eps)
        mu = self.model_main.predict(params_main, exog,
            offset=offset)[:, None]
        result = self.distribution.pmf(y_values, mu, w)
        return result[0] if transform else result

    def _predict_var(self, params, mu, prob_infl):
        """predict values for conditional variance V(endog | exog)

        Parameters
        ----------
        params : array_like
            The model parameters. This is only used to extract extra params
            like dispersion parameter.
        mu : array_like
            Array of mean predictions for main model.
        prob_inlf : array_like
            Array of predicted probabilities of zero-inflation `w`.

        Returns
        -------
        Predicted conditional variance.
        """
        w = prob_infl
        var_ = (1 - w) * mu * (1 + w * mu)
        return var_

    def _get_start_params(self):
        start_params = self.model_main.fit(disp=0, method="nm").params
        start_params = np.append(np.ones(self.k_inflate) * 0.1, start_params)
        return start_params

    def get_distribution(self, params, exog=None, exog_infl=None,
                         exposure=None, offset=None):
        """Get frozen instance of distribution based on predicted parameters.

        Parameters
        ----------
        params : array_like
            The parameters of the model.
        exog : ndarray, optional
            Explanatory variables for the main count model.
            If ``exog`` is None, then the data from the model will be used.
        exog_infl : ndarray, optional
            Explanatory variables for the zero-inflation model.
            ``exog_infl`` has to be provided if ``exog`` was provided unless
            ``exog_infl`` in the model is only a constant.
        offset : ndarray, optional
            Offset is added to the linear predictor of the mean function with
            coefficient equal to 1.
            Default is zero if exog is not None, and the model offset if exog
            is None.
        exposure : ndarray, optional
            Log(exposure) is added to the linear predictor  of the mean
            function with coefficient equal to 1. If exposure is specified,
            then it will be logged by the method. The user does not need to
            log it first.
            Default is one if exog is is not None, and it is the model exposure
            if exog is None.

        Returns
        -------
        Instance of frozen scipy distribution subclass.
        """
        mu = self.predict(params, exog=exog, exog_infl=exog_infl,
                          exposure=exposure, offset=offset, which="mean-main")
        w = self.predict(params, exog=exog, exog_infl=exog_infl,
                         exposure=exposure, offset=offset, which="prob-main")

        distr = self.distribution(mu[:, None], 1 - w[:, None])
        return distr
예제 #4
0
class PoissonModel(object):
    def __init__(self, data):
        # data is path for csv file
        self.data = data
        self.df = pd.read_csv(self.data, low_memory=False)
        # Drop extra column
        self.df.drop(['Unnamed: 0'], axis=1, inplace=True)
        # Create X and y
        self.y = self.df.pop('freq')
        self.X = self.df
        # Busy travel times (2010-2020)
        self.trav_hol = [
            '2010-11-23', '2010-11-25', '2010-11-26', '2010-12-24',
            '2010-12-25', '2010-12-26', '2011-11-23', '2011-11-24',
            '2011-11-25', '2011-12-24', '2011-12-25', '2011-12-26',
            '2012-11-21', '2012-11-22', '2012-11-23', '2012-12-24',
            '2012-12-25', '2012-12-26', '2013-11-27', '2013-11-28',
            '2013-11-29', '2013-12-24', '2013-12-25', '2013-12-26',
            '2014-11-26', '2014-11-27', '2014-11-28', '2014-12-24',
            '2014-12-25', '2014-12-26', '2015-11-25', '2015-11-26',
            '2015-11-27', '2015-12-24', '2015-12-25', '2015-12-26',
            '2016-11-23', '2016-11-24', '2016-11-25', '2016-12-24',
            '2016-12-25', '2016-12-26', '2017-11-22', '2017-11-23',
            '2017-11-24', '2017-12-24', '2017-12-25', '2017-12-26',
            '2018-11-21', '2018-11-22', '2018-11-23', '2018-12-24',
            '2018-12-25', '2018-12-26', '2019-11-27', '2019-11-28',
            '2019-11-29', '2019-12-24', '2019-12-25', '2019-12-26',
            '2020-11-25', '2020-11-26', '2020-11-27', '2020-12-24',
            '2020-12-25', '2020-12-26'
        ]

        # Dangerous holidays are New Years day and July 4th (2010-2020)
        self.dang_hol = [
            '2010-01-01', '2010-07-04', '2011-01-01', '2011-07-04',
            '2012-01-01', '2012-07-04', '2013-01-01', '2013-07-04',
            '2014-01-01', '2014-07-04', '2015-01-01', '2015-07-04',
            '2016-01-01', '2016-07-04', '2017-01-01', '2017-07-04',
            '2018-01-01', '2018-07-04', '2019-01-01', '2019-07-04',
            '2020-01-01', '2020-07-04'
        ]

        # Create empyt DataFrame
        zeros = np.zeros((7, 21))
        self.columns = [
            u'mariners_home',
            u'seahawks_home',
            u'sounders_home',
            u'trav_holiday',
            u'dang_holiday',
            u'night',
            u'Monday',
            u'Saturday',
            u'Sunday',
            u'Thursday',
            u'Tuesday',
            u'Wednesday',
            u'day_num',
            u'zone1',
            u'zone2',
            u'zone3',
            u'zone4',
            u'zone5',
            u'zone6',
            u'zone7',
            u'seasonality',
        ]
        self.X_test = pd.DataFrame(zeros, columns=self.columns)

    def fit(self):
        # Create scaler and scale X
        self.scaler = StandardScaler(with_mean=False)
        self.X = self.scaler.fit_transform(self.X)
        # Fit Poisson model to data
        self.poisson_model = Poisson(self.y, self.X).fit()

    def query_to_X(self, query):
        # Set zones
        self.X_test.ix[0, 'zone1'] = 1
        self.X_test.ix[1, 'zone2'] = 1
        self.X_test.ix[2, 'zone3'] = 1
        self.X_test.ix[3, 'zone4'] = 1
        self.X_test.ix[4, 'zone5'] = 1
        self.X_test.ix[5, 'zone6'] = 1
        self.X_test.ix[6, 'zone7'] = 1

        #Set home games:
        if self.query['home_game'] == 'mariners':
            self.X_test['mariners_home'] = 1
        if self.query['home_game'] == 'seahawks':
            self.X_test['seahawks_home'] = 1
        if self.query['home_game'] == 'sounders':
            self.X_test['sounders_home'] = 1

        # Set holidays
        self.X_test['trav_holiday'] = int(
            self.query['date_input'] in self.trav_hol)
        self.X_test['dang_holiday'] = int(
            self.query['date_input'] in self.dang_hol)

        # Set night:
        self.X_test['night'] = self.query['time_range']

        # Set weekday
        date_input = pd.to_datetime(self.query['date_input'])
        weekday = date_input.weekday_name
        if weekday in self.columns:
            self.X_test[weekday] = 1

        # Set day_num using timedelta with earliest date in dataset
        day0 = date(2010, 6, 29)
        self.X_test['day_num'] = (date_input.date() - day0).days

        # Set seasonality
        f = lambda x: 1 + cos(((2 * pi) / 365.25) * (x - 35))
        self.X_test['seasonality'] = self.X_test.day_num.apply(f)

        return self.X_test

    def predict(self, query):
        # Scale data and feed query DataFrame to model
        self.query = query
        self.X_test = self.query_to_X(self.query)
        self.X_scale = self.scaler.transform(self.X_test)
        self.preds = self.poisson_model.predict(self.X_scale)
        self.zones = [
            'zone1', 'zone2', 'zone3', 'zone4', 'zone5', 'zone6', 'zone7'
        ]
        self.results = zip(self.zones, self.preds)
        # Return a list of tuples
        return self.results
예제 #5
0
class PredictPlayerStats(ConvertMixin):

    def __init__(self, engine, player_name, stat_to_predict, opposing_team_name,
                 predictor_stats=('csum_min_kills', 'csum_min_minions_killed'),
                 defense_predictor_stats=('csum_prev_min_allowed_kills', 'csum_prev_min_allowed_assists'),
                 game_range=None):
        self.engine = engine
        self.player_name = player_name
        self.stat_to_predict = stat_to_predict
        if predictor_stats:
            self.predictor_stats = ('csum_prev_min_kills', 'csum_prev_min_minions_killed')
        else:
            self.predictor_stats = ('csum_prev_min_kills', 'csum_prev_min_minions_killed')
        role_stats = ('Jungler', 'Mid', 'Coach', 'Support', 'AD', 'Sub', 'Top')
        self.predictor_stats = self.predictor_stats + defense_predictor_stats + role_stats
        self.opposing_team_name = opposing_team_name
        self.player_stats_table_name = 'player_stats_df'
        self.processed_player_stars_table_name = 'processed_player_stats_df'
        self.key_stats = ('kills', 'deaths', 'assists', 'minions_killed', 'gold',
                          'k_a', 'a_over_k')
        self.game_range = game_range
        self._process_player_stats_and_train()

    def _process_player_stats_and_train(self):
        processed_player_stats_df = self._get_processed_player_stats_in_df()
        self.latest_predictor_numpy_array = self._get_latest_player_stats_numpy_array(processed_player_stats_df)
        print('latest predictors numpy array {}'.format(self.latest_predictor_numpy_array))
        predictors, y_array = self._get_predictors_in_numpy_arrays(processed_player_stats_df)
        self._train_model(predictors, y_array)

    def _get_latest_player_stats_numpy_array(self, processed_player_stats_df):
        player_id = self._get_player_id_by_player_name(self.player_name)
        player_stats_df = processed_player_stats_df[processed_player_stats_df['player_id'] == player_id]
        latest_player_stats_df = player_stats_df.sort(['game_id'], ascending=False).head(1)
        dict_player = latest_player_stats_df.to_dict('records')[0]
        player_predictor_stats = []
        for predictor_stat in self.predictor_stats:
            # print('processing predictor stat {}'.format(predictor_stat))
            player_predictor_stats.append(dict_player[predictor_stat])
        latest_predictor_numpy_array = numpy.array([player_predictor_stats])
        return latest_predictor_numpy_array

    def _get_predictors_in_numpy_arrays(self, processed_player_stats_df):
        player_game_records = self._get_predictors(processed_player_stats_df)
        game_list = []
        y_array_list = []
        for player_game_record in player_game_records:
            game_predictor_stats = []
            if not (numpy.isnan(player_game_record['csum_prev_min_kills'])
                    or numpy.isnan(player_game_record['csum_prev_min_allowed_kills'])):
                if player_game_record['csum_prev_min_assists'] != 0:
                    prev_predictor_stats = self._convert_predictors_to_prev_csum(self.predictor_stats)
                    for prev_predictor_stat in prev_predictor_stats:
                        game_predictor_stats.append(player_game_record[prev_predictor_stat])
                    game_list.append(game_predictor_stats)
                    y_array_list.append(player_game_record['y_element'])
        predictors = numpy.array(game_list)
        y_array = numpy.array([y_array_list])
        return predictors, y_array

    def _get_predictors(self, processed_player_stats_df):
        player_game_records = processed_player_stats_df.to_dict('records')
        player_game_records.sort(key=itemgetter('game_id'))
        for player_game_record in player_game_records:
            player_game_record['y_element'] = player_game_record[self.stat_to_predict]
        return player_game_records

    def _train_model(self, predictors, y_array):
        y_1darray = numpy.squeeze(y_array)
        self.poisson = Poisson(y_1darray, predictors)
        self.pos_result = self.poisson.fit(method='bfgs')

    def _get_game_ids_from_database(self):
        game_ids_row = Game.objects.values_list('id', flat=True)
        game_ids = [game for game in game_ids_row]
        return game_ids

    def _get_lastest_processed_team_stats_by_name(self):
        return ProcessedTeamStatsDf.objects.filter(name=self.opposing_team_name).order_by('-id').first()

    def _get_game_by_ids(self, game_ids):
        return Game.objects.filter(id__in=game_ids)

    def _get_player_id_by_player_name(self, player_name):
        player = Player.objects.filter(name=player_name)
        return player[0].id

    def _get_processed_player_stats_in_df(self):
        game_ids = self._get_game_ids_from_database()
        last_game_number = game_ids[-1]
        has_processed_team_stats_table = self.engine.has_table(self.processed_player_stars_table_name)
        if has_processed_team_stats_table:
            df_game_stats = pandas.read_sql(self.player_stats_table_name, self.engine)
            df_game_stats_all = df_game_stats[df_game_stats.game_id.isin(game_ids)]
            # Using game_numbers here since we need the last few games to check.
            max_game_id_cached = df_game_stats_all['game_id'].max()
            max_index_cached = df_game_stats_all['index'].max()
            if pandas.isnull(max_game_id_cached):
                max_game_id_cached = game_ids[0]
            # Check if all the game numbers have been cached,
            # if not return what game to start form and what game to end from.
            if max_game_id_cached != last_game_number:
                # Get the index of the max_game_id
                max_game_id_index = game_ids.index(max_game_id_cached)
                # Trim down the list to only the games that need to be retrieved,
                # start from the max_id + 1 because we don't
                # want to count max_id we already have it
                game_ids_to_find = game_ids[max_game_id_index:]
                games = self._get_game_by_ids(game_ids_to_find)
                player_stats_df = self._get_player_stats_in_df(games, max_index_cached)
                self._insert_into_player_stats_df_tables(player_stats_df)
            else:
                # If everything was cached return cached as true and just return the last numbers
                # I could do this part better.
                print("everything cached no need to retrieve from api")
        else:
            _get_player_stats_in_df = 0
            # Table did not exist, have to get all
            games = self._get_game_by_ids(game_ids)
            player_stats_df = self._get_player_stats_in_df(games, _get_player_stats_in_df)
            print('table does not exist inserting full table')
            self._insert_into_player_stats_df_tables(player_stats_df)
            print('table inserted')
        if self.game_range == '5':
            processed_player_stats_df = pandas.read_sql('select * from processed_player_stats_df_limit_5',
                                                              con=self.engine)
        elif self.game_range == '10':
            processed_player_stats_df = pandas.read_sql('select * from processed_player_stats_df_limit_10',
                                                              con=self.engine)
        else:
            processed_player_stats_df = pandas.read_sql_table(self.processed_player_stars_table_name, self.engine)
        return processed_player_stats_df

    def _process_player_stats_df(self, player_stats_df):
        player_stats_df = player_stats_df.sort(['game_id', 'player_id'])
        key_stats = ['game_length_minutes'] + (list(self.key_stats))
        player_stats_df['clean_kills'] = player_stats_df['kills']
        player_stats_df.ix[player_stats_df.clean_kills == 0, 'clean_kills'] = 1
        player_stats_df['k_a'] = \
            player_stats_df['kills'] + player_stats_df['assists']
        player_stats_df['a_over_k'] = \
            player_stats_df['assists'] / player_stats_df['clean_kills']
        player_stats_for_pivot = player_stats_df[['player_name', 'role']]
        player_stats_for_pivot['value'] = 1
        player_pivot_df = player_stats_for_pivot.pivot_table(index='player_name', columns='role', values='value')
        player_pivot_df.fillna(0, inplace=True)
        player_pivot_df.reset_index(inplace=True)
        player_stats_df = pandas.merge(player_stats_df, player_pivot_df, on='player_name')
        for key_stat in key_stats:
            print('doing key stats {}'.format(key_stat))
            player_stats_df['csum_{}'.format(key_stat)] = player_stats_df.groupby(by='player_id')[key_stat].cumsum()
            player_stats_df['csum_prev_{}'.format(key_stat)] = \
                player_stats_df['csum_{}'.format(key_stat)] - player_stats_df[key_stat]
            # player_stats_df['csum_prev_avg_{}'.format(key_stat)] = \
            #     player_stats_df['csum_prev_{}'.format(key_stat)] / player_stats_df['csum_prev_game_number']
            player_stats_df['per_min_{}'.format(key_stat)] = player_stats_df[key_stat] / \
                                                             player_stats_df['game_length_minutes']
            if key_stat not in ['game_number', 'game_length_minutes']:
                print('doing stats not game_number {}'.format(key_stat))
                player_stats_df['csum_min_{}'.format(key_stat)] = \
                    player_stats_df['csum_{}'.format(key_stat)] / player_stats_df['csum_game_length_minutes']
                player_stats_df['csum_prev_min_{}'.format(key_stat)] = \
                    player_stats_df['csum_prev_{}'.format(key_stat)] / player_stats_df['csum_prev_game_length_minutes']
                player_stats_df['csum_prev_min_{}'.format(key_stat)].fillna(0, inplace=True)
            player_stats_df = player_stats_df.sort(['game_id'])
        return player_stats_df

    def _get_player_stats_in_df(self, games, max_index_cached):
        player_stats_df = None
        for game in games:
            players_stats = self._convert_game_to_player_stats_df(game)
            if player_stats_df is None:
                player_stats_df = pandas.DataFrame(players_stats, index=list(range(max_index_cached, (max_index_cached + 10))))
            else:
                single_game_player_stats_df = pandas.DataFrame(players_stats, index=list(range(max_index_cached, (max_index_cached + 10))))
                player_stats_df = player_stats_df.append(single_game_player_stats_df)
            max_index_cached += 10
        return player_stats_df

    def _convert_game_to_player_stats_df(self, game):
        players_stats = game.playerstats_set.all()
        players_stats_dict = game.playerstats_set.all().values()
        player_stats_list = []
        for player_stats, player_stats_dict in zip(players_stats, players_stats_dict):
            player_stats_dict['game_length_minutes'] = float(game.game_length_minutes)
            player_stats_dict['gold'] = float(player_stats_dict['gold'])
            player_stats_dict['player_name'] = player_stats.player.name
            self._populate_player_stats_with_defense_stats(player_stats_dict, player_stats, game)
            player_stats_list.append(player_stats_dict)
        return player_stats_list

    def _populate_player_stats_with_defense_stats(self, player_stats_dict, player_stats, game):
        current_team = player_stats.team
        processed_team_stats_dict = game.processedteamstatsdf_set.exclude(team_name=current_team).values()[0]
        for key_stat in self.key_stats:
            player_stats_dict['csum_prev_min_allowed_{}'.format(key_stat)] = \
                processed_team_stats_dict['csum_prev_min_allowed_{}'.format(key_stat)]
            player_stats_dict['csum_min_allowed_{}'.format(key_stat)] = \
                processed_team_stats_dict['csum_min_allowed_{}'.format(key_stat)]

    def _insert_into_player_stats_df_tables(self, player_stats_df):
        player_stats_df.to_sql(self.player_stats_table_name, self.engine, if_exists='append')
        # Could be optimized kinda a hack
        player_stats_df = pandas.read_sql("select ps.*, p.role, p.image from player_stats_df ps, player p "
                                          "where ps.player_id = p.id", self.engine)
        processed_team_stats_df = self._process_player_stats_df(player_stats_df)
        processed_team_stats_df.to_sql(self.processed_player_stars_table_name, self.engine, if_exists='append')

    def predict_player_stat(self):
        #reshaped_numpy_array = numpy.reshape(self.latest_predictor_numpy_array, 3,1)
        probability_in_numpy_array = self.poisson.predict(self.pos_result.params, self.latest_predictor_numpy_array)
        return {self.player_name: probability_in_numpy_array}
예제 #6
0
class PoissonRegression(Learner):
    """
	The poisson regression learning algorithm. Given data, this class
	constructs and stores a probability unit regression mdl that can
	be used to quantify the probability of testing data-points taking
	on certain class values.
	"""
    def __init__(self, alpha: float, **params: any):
        """
		Initialises the Probit regression algorithm.

		:param alpha: regularization term alpha.
		:param params: Ignored.
		"""
        super().__init__(**params)
        self.name = 'Poisson Regression'
        self.alpha = alpha
        self.gamma = 0.5
        self.add_intercept = True
        self.binary_points = True

        self.beta = list()
        self.data: Optional[RecordSet] = None
        self.model: Optional[Poisson] = None  # will be set during fit

    def fit(self, rs: RecordSet) -> None:
        """
		fit a Probit regression mdl

		:param rs: The record set to fit with.
		"""
        # set params
        self.data = cp.deepcopy(rs)
        patterns = self.data.entries[:, :-1]
        out = self.data.entries[:, -1:]

        if self.add_intercept:
            intercept = np.ones((patterns.shape[0], 1))
            patterns = np.hstack((intercept, patterns))

        # avoid error
        if self.alpha == 0:
            raise Exception("Alpha Probit too low to obtain reliable results")

        self.model = Poisson(endog=out.ravel(), exog=patterns)
        self.model = self.model.fit_regularized(alpha=self.alpha,
                                                maxiter=10e8,
                                                disp=False)

    def predict(self, rs: RecordSet) -> np.ndarray:
        """
		Assigns a predicted class label to the given record sets.

		:param rs: The record set to assign predictions to.
		:return: A column vector of predictions corresponding to the record set's rows.
		"""
        # set params
        patterns = rs.entries[:, :-1]

        if self.add_intercept:
            intercept = np.ones((patterns.shape[0], 1))
            patterns = np.hstack((intercept, patterns))

        # predict
        predictions = self.model.predict(exog=patterns)

        if self.binary_points:
            predictions = self.discrete_points(predictions=predictions)

        # return 2d
        predictions = np.reshape(predictions, (-1, 1))
        return predictions

    def discrete_points(self, predictions):
        """
		Turns probabilities into discrete classes

		:param predictions: The predicted class probabilities
		:return: A vector with discrete classes
		"""
        n = predictions.shape[0]
        for i in range(0, n):
            if predictions[i] >= self.gamma:
                predictions[i] = 1
            else:
                predictions[i] = 0
        return predictions