class ZeroInflatedPoisson(GenericZeroInflated): __doc__ = """ Poisson Zero Inflated model for count data %(params)s %(extra_params)s Attributes ----------- endog : array A reference to the endogenous response variable exog : array A reference to the exogenous design. exog_infl: array A reference to the zero-inflated exogenous design. """ % {'params' : base._model_params_doc, 'extra_params' : _doc_zi_params + base._missing_param_doc} def __init__(self, endog, exog, exog_infl=None, offset=None, exposure=None, inflation='logit', missing='none', **kwargs): super(ZeroInflatedPoisson, self).__init__(endog, exog, offset=offset, inflation=inflation, exog_infl=exog_infl, exposure=exposure, missing=missing, **kwargs) self.model_main = Poisson(self.endog, self.exog, offset=offset, exposure=exposure) self.distribution = zipoisson self.result_class = ZeroInflatedPoissonResults self.result_class_wrapper = ZeroInflatedPoissonResultsWrapper self.result_class_reg = L1ZeroInflatedPoissonResults self.result_class_reg_wrapper = L1ZeroInflatedPoissonResultsWrapper def _hessian_main(self, params): params_infl = params[:self.k_inflate] params_main = params[self.k_inflate:] y = self.endog w = self.model_infl.predict(params_infl) w = np.clip(w, np.finfo(float).eps, 1 - np.finfo(float).eps) score = self.score(params) zero_idx = np.nonzero(y == 0)[0] nonzero_idx = np.nonzero(y)[0] mu = self.model_main.predict(params_main) hess_arr = np.zeros((self.k_exog, self.k_exog)) coeff = (1 + w[zero_idx] * (np.exp(mu[zero_idx]) - 1)) #d2l/dp2 for i in range(self.k_exog): for j in range(i, -1, -1): hess_arr[i, j] = (( self.exog[zero_idx, i] * self.exog[zero_idx, j] * mu[zero_idx] * (w[zero_idx] - 1) * (1 / coeff - w[zero_idx] * mu[zero_idx] * np.exp(mu[zero_idx]) / coeff**2)).sum() - (mu[nonzero_idx] * self.exog[nonzero_idx, i] * self.exog[nonzero_idx, j]).sum()) return hess_arr def _predict_prob(self, params, exog, exog_infl, exposure, offset): params_infl = params[:self.k_inflate] params_main = params[self.k_inflate:] counts = np.atleast_2d(np.arange(0, np.max(self.endog)+1)) if len(exog_infl.shape) < 2: transform = True w = np.atleast_2d( self.model_infl.predict(params_infl, exog_infl))[:, None] else: transform = False w = self.model_infl.predict(params_infl, exog_infl)[:, None] w = np.clip(w, np.finfo(float).eps, 1 - np.finfo(float).eps) mu = self.model_main.predict(params_main, exog, offset=offset)[:, None] result = self.distribution.pmf(counts, mu, w) return result[0] if transform else result def _get_start_params(self): start_params = self.model_main.fit(disp=0, method="nm").params start_params = np.append(np.ones(self.k_inflate) * 0.1, start_params) return start_params
"station_diur_temp_rng_c", "precipitation_amt_mm", "reanalysis_dew_point_temp_k", "reanalysis_air_temp_k", "reanalysis_relative_humidity_percent", "reanalysis_specific_humidity_g_per_kg", "reanalysis_precip_amt_kg_per_m2", "reanalysis_max_air_temp_k", "reanalysis_min_air_temp_k", "reanalysis_avg_temp_k", "reanalysis_tdtr_k", "ndvi_se", "ndvi_sw", "ndvi_ne", "ndvi_nw" ] n_features = len(features_list) df_train_features = df_train_features.fillna(df_train_features.mean()) df_test_features = df_test_features.fillna(df_test_features.mean()) X_train = df_train_features[features_list].values X_test = df_test_features[features_list].values y_train = df_train_labels["total_cases"].values # Model: poisson_mod = Poisson(endog=y_train, exog=X_train).fit(maxiter=61) print(poisson_mod.summary()) predictions = poisson_mod.predict(X_test) predictions_rounded = np.rint(predictions).astype(np.int64) print(predictions_rounded) write_result(predictions_rounded, "/poisson.csv", sample_source=sample_submission_path, write_source=predictions_path)
class ZeroInflatedPoisson(GenericZeroInflated): __doc__ = """ Poisson Zero Inflated Model %(params)s %(extra_params)s Attributes ---------- endog : ndarray A reference to the endogenous response variable exog : ndarray A reference to the exogenous design. exog_infl : ndarray A reference to the zero-inflated exogenous design. """ % {'params' : base._model_params_doc, 'extra_params' : _doc_zi_params + base._missing_param_doc} def __init__(self, endog, exog, exog_infl=None, offset=None, exposure=None, inflation='logit', missing='none', **kwargs): super(ZeroInflatedPoisson, self).__init__(endog, exog, offset=offset, inflation=inflation, exog_infl=exog_infl, exposure=exposure, missing=missing, **kwargs) self.model_main = Poisson(self.endog, self.exog, offset=offset, exposure=exposure) self.distribution = zipoisson self.result_class = ZeroInflatedPoissonResults self.result_class_wrapper = ZeroInflatedPoissonResultsWrapper self.result_class_reg = L1ZeroInflatedPoissonResults self.result_class_reg_wrapper = L1ZeroInflatedPoissonResultsWrapper def _hessian_main(self, params): params_infl = params[:self.k_inflate] params_main = params[self.k_inflate:] y = self.endog w = self.model_infl.predict(params_infl) w = np.clip(w, np.finfo(float).eps, 1 - np.finfo(float).eps) score = self.score(params) zero_idx = np.nonzero(y == 0)[0] nonzero_idx = np.nonzero(y)[0] mu = self.model_main.predict(params_main) hess_arr = np.zeros((self.k_exog, self.k_exog)) coeff = (1 + w[zero_idx] * (np.exp(mu[zero_idx]) - 1)) #d2l/dp2 for i in range(self.k_exog): for j in range(i, -1, -1): hess_arr[i, j] = (( self.exog[zero_idx, i] * self.exog[zero_idx, j] * mu[zero_idx] * (w[zero_idx] - 1) * (1 / coeff - w[zero_idx] * mu[zero_idx] * np.exp(mu[zero_idx]) / coeff**2)).sum() - (mu[nonzero_idx] * self.exog[nonzero_idx, i] * self.exog[nonzero_idx, j]).sum()) return hess_arr def _predict_prob(self, params, exog, exog_infl, exposure, offset, y_values=None): params_infl = params[:self.k_inflate] params_main = params[self.k_inflate:] if y_values is None: y_values = np.atleast_2d(np.arange(0, np.max(self.endog)+1)) if len(exog_infl.shape) < 2: transform = True w = np.atleast_2d( self.model_infl.predict(params_infl, exog_infl))[:, None] else: transform = False w = self.model_infl.predict(params_infl, exog_infl)[:, None] w = np.clip(w, np.finfo(float).eps, 1 - np.finfo(float).eps) mu = self.model_main.predict(params_main, exog, offset=offset)[:, None] result = self.distribution.pmf(y_values, mu, w) return result[0] if transform else result def _predict_var(self, params, mu, prob_infl): """predict values for conditional variance V(endog | exog) Parameters ---------- params : array_like The model parameters. This is only used to extract extra params like dispersion parameter. mu : array_like Array of mean predictions for main model. prob_inlf : array_like Array of predicted probabilities of zero-inflation `w`. Returns ------- Predicted conditional variance. """ w = prob_infl var_ = (1 - w) * mu * (1 + w * mu) return var_ def _get_start_params(self): start_params = self.model_main.fit(disp=0, method="nm").params start_params = np.append(np.ones(self.k_inflate) * 0.1, start_params) return start_params def get_distribution(self, params, exog=None, exog_infl=None, exposure=None, offset=None): """Get frozen instance of distribution based on predicted parameters. Parameters ---------- params : array_like The parameters of the model. exog : ndarray, optional Explanatory variables for the main count model. If ``exog`` is None, then the data from the model will be used. exog_infl : ndarray, optional Explanatory variables for the zero-inflation model. ``exog_infl`` has to be provided if ``exog`` was provided unless ``exog_infl`` in the model is only a constant. offset : ndarray, optional Offset is added to the linear predictor of the mean function with coefficient equal to 1. Default is zero if exog is not None, and the model offset if exog is None. exposure : ndarray, optional Log(exposure) is added to the linear predictor of the mean function with coefficient equal to 1. If exposure is specified, then it will be logged by the method. The user does not need to log it first. Default is one if exog is is not None, and it is the model exposure if exog is None. Returns ------- Instance of frozen scipy distribution subclass. """ mu = self.predict(params, exog=exog, exog_infl=exog_infl, exposure=exposure, offset=offset, which="mean-main") w = self.predict(params, exog=exog, exog_infl=exog_infl, exposure=exposure, offset=offset, which="prob-main") distr = self.distribution(mu[:, None], 1 - w[:, None]) return distr
class PoissonModel(object): def __init__(self, data): # data is path for csv file self.data = data self.df = pd.read_csv(self.data, low_memory=False) # Drop extra column self.df.drop(['Unnamed: 0'], axis=1, inplace=True) # Create X and y self.y = self.df.pop('freq') self.X = self.df # Busy travel times (2010-2020) self.trav_hol = [ '2010-11-23', '2010-11-25', '2010-11-26', '2010-12-24', '2010-12-25', '2010-12-26', '2011-11-23', '2011-11-24', '2011-11-25', '2011-12-24', '2011-12-25', '2011-12-26', '2012-11-21', '2012-11-22', '2012-11-23', '2012-12-24', '2012-12-25', '2012-12-26', '2013-11-27', '2013-11-28', '2013-11-29', '2013-12-24', '2013-12-25', '2013-12-26', '2014-11-26', '2014-11-27', '2014-11-28', '2014-12-24', '2014-12-25', '2014-12-26', '2015-11-25', '2015-11-26', '2015-11-27', '2015-12-24', '2015-12-25', '2015-12-26', '2016-11-23', '2016-11-24', '2016-11-25', '2016-12-24', '2016-12-25', '2016-12-26', '2017-11-22', '2017-11-23', '2017-11-24', '2017-12-24', '2017-12-25', '2017-12-26', '2018-11-21', '2018-11-22', '2018-11-23', '2018-12-24', '2018-12-25', '2018-12-26', '2019-11-27', '2019-11-28', '2019-11-29', '2019-12-24', '2019-12-25', '2019-12-26', '2020-11-25', '2020-11-26', '2020-11-27', '2020-12-24', '2020-12-25', '2020-12-26' ] # Dangerous holidays are New Years day and July 4th (2010-2020) self.dang_hol = [ '2010-01-01', '2010-07-04', '2011-01-01', '2011-07-04', '2012-01-01', '2012-07-04', '2013-01-01', '2013-07-04', '2014-01-01', '2014-07-04', '2015-01-01', '2015-07-04', '2016-01-01', '2016-07-04', '2017-01-01', '2017-07-04', '2018-01-01', '2018-07-04', '2019-01-01', '2019-07-04', '2020-01-01', '2020-07-04' ] # Create empyt DataFrame zeros = np.zeros((7, 21)) self.columns = [ u'mariners_home', u'seahawks_home', u'sounders_home', u'trav_holiday', u'dang_holiday', u'night', u'Monday', u'Saturday', u'Sunday', u'Thursday', u'Tuesday', u'Wednesday', u'day_num', u'zone1', u'zone2', u'zone3', u'zone4', u'zone5', u'zone6', u'zone7', u'seasonality', ] self.X_test = pd.DataFrame(zeros, columns=self.columns) def fit(self): # Create scaler and scale X self.scaler = StandardScaler(with_mean=False) self.X = self.scaler.fit_transform(self.X) # Fit Poisson model to data self.poisson_model = Poisson(self.y, self.X).fit() def query_to_X(self, query): # Set zones self.X_test.ix[0, 'zone1'] = 1 self.X_test.ix[1, 'zone2'] = 1 self.X_test.ix[2, 'zone3'] = 1 self.X_test.ix[3, 'zone4'] = 1 self.X_test.ix[4, 'zone5'] = 1 self.X_test.ix[5, 'zone6'] = 1 self.X_test.ix[6, 'zone7'] = 1 #Set home games: if self.query['home_game'] == 'mariners': self.X_test['mariners_home'] = 1 if self.query['home_game'] == 'seahawks': self.X_test['seahawks_home'] = 1 if self.query['home_game'] == 'sounders': self.X_test['sounders_home'] = 1 # Set holidays self.X_test['trav_holiday'] = int( self.query['date_input'] in self.trav_hol) self.X_test['dang_holiday'] = int( self.query['date_input'] in self.dang_hol) # Set night: self.X_test['night'] = self.query['time_range'] # Set weekday date_input = pd.to_datetime(self.query['date_input']) weekday = date_input.weekday_name if weekday in self.columns: self.X_test[weekday] = 1 # Set day_num using timedelta with earliest date in dataset day0 = date(2010, 6, 29) self.X_test['day_num'] = (date_input.date() - day0).days # Set seasonality f = lambda x: 1 + cos(((2 * pi) / 365.25) * (x - 35)) self.X_test['seasonality'] = self.X_test.day_num.apply(f) return self.X_test def predict(self, query): # Scale data and feed query DataFrame to model self.query = query self.X_test = self.query_to_X(self.query) self.X_scale = self.scaler.transform(self.X_test) self.preds = self.poisson_model.predict(self.X_scale) self.zones = [ 'zone1', 'zone2', 'zone3', 'zone4', 'zone5', 'zone6', 'zone7' ] self.results = zip(self.zones, self.preds) # Return a list of tuples return self.results
class PredictPlayerStats(ConvertMixin): def __init__(self, engine, player_name, stat_to_predict, opposing_team_name, predictor_stats=('csum_min_kills', 'csum_min_minions_killed'), defense_predictor_stats=('csum_prev_min_allowed_kills', 'csum_prev_min_allowed_assists'), game_range=None): self.engine = engine self.player_name = player_name self.stat_to_predict = stat_to_predict if predictor_stats: self.predictor_stats = ('csum_prev_min_kills', 'csum_prev_min_minions_killed') else: self.predictor_stats = ('csum_prev_min_kills', 'csum_prev_min_minions_killed') role_stats = ('Jungler', 'Mid', 'Coach', 'Support', 'AD', 'Sub', 'Top') self.predictor_stats = self.predictor_stats + defense_predictor_stats + role_stats self.opposing_team_name = opposing_team_name self.player_stats_table_name = 'player_stats_df' self.processed_player_stars_table_name = 'processed_player_stats_df' self.key_stats = ('kills', 'deaths', 'assists', 'minions_killed', 'gold', 'k_a', 'a_over_k') self.game_range = game_range self._process_player_stats_and_train() def _process_player_stats_and_train(self): processed_player_stats_df = self._get_processed_player_stats_in_df() self.latest_predictor_numpy_array = self._get_latest_player_stats_numpy_array(processed_player_stats_df) print('latest predictors numpy array {}'.format(self.latest_predictor_numpy_array)) predictors, y_array = self._get_predictors_in_numpy_arrays(processed_player_stats_df) self._train_model(predictors, y_array) def _get_latest_player_stats_numpy_array(self, processed_player_stats_df): player_id = self._get_player_id_by_player_name(self.player_name) player_stats_df = processed_player_stats_df[processed_player_stats_df['player_id'] == player_id] latest_player_stats_df = player_stats_df.sort(['game_id'], ascending=False).head(1) dict_player = latest_player_stats_df.to_dict('records')[0] player_predictor_stats = [] for predictor_stat in self.predictor_stats: # print('processing predictor stat {}'.format(predictor_stat)) player_predictor_stats.append(dict_player[predictor_stat]) latest_predictor_numpy_array = numpy.array([player_predictor_stats]) return latest_predictor_numpy_array def _get_predictors_in_numpy_arrays(self, processed_player_stats_df): player_game_records = self._get_predictors(processed_player_stats_df) game_list = [] y_array_list = [] for player_game_record in player_game_records: game_predictor_stats = [] if not (numpy.isnan(player_game_record['csum_prev_min_kills']) or numpy.isnan(player_game_record['csum_prev_min_allowed_kills'])): if player_game_record['csum_prev_min_assists'] != 0: prev_predictor_stats = self._convert_predictors_to_prev_csum(self.predictor_stats) for prev_predictor_stat in prev_predictor_stats: game_predictor_stats.append(player_game_record[prev_predictor_stat]) game_list.append(game_predictor_stats) y_array_list.append(player_game_record['y_element']) predictors = numpy.array(game_list) y_array = numpy.array([y_array_list]) return predictors, y_array def _get_predictors(self, processed_player_stats_df): player_game_records = processed_player_stats_df.to_dict('records') player_game_records.sort(key=itemgetter('game_id')) for player_game_record in player_game_records: player_game_record['y_element'] = player_game_record[self.stat_to_predict] return player_game_records def _train_model(self, predictors, y_array): y_1darray = numpy.squeeze(y_array) self.poisson = Poisson(y_1darray, predictors) self.pos_result = self.poisson.fit(method='bfgs') def _get_game_ids_from_database(self): game_ids_row = Game.objects.values_list('id', flat=True) game_ids = [game for game in game_ids_row] return game_ids def _get_lastest_processed_team_stats_by_name(self): return ProcessedTeamStatsDf.objects.filter(name=self.opposing_team_name).order_by('-id').first() def _get_game_by_ids(self, game_ids): return Game.objects.filter(id__in=game_ids) def _get_player_id_by_player_name(self, player_name): player = Player.objects.filter(name=player_name) return player[0].id def _get_processed_player_stats_in_df(self): game_ids = self._get_game_ids_from_database() last_game_number = game_ids[-1] has_processed_team_stats_table = self.engine.has_table(self.processed_player_stars_table_name) if has_processed_team_stats_table: df_game_stats = pandas.read_sql(self.player_stats_table_name, self.engine) df_game_stats_all = df_game_stats[df_game_stats.game_id.isin(game_ids)] # Using game_numbers here since we need the last few games to check. max_game_id_cached = df_game_stats_all['game_id'].max() max_index_cached = df_game_stats_all['index'].max() if pandas.isnull(max_game_id_cached): max_game_id_cached = game_ids[0] # Check if all the game numbers have been cached, # if not return what game to start form and what game to end from. if max_game_id_cached != last_game_number: # Get the index of the max_game_id max_game_id_index = game_ids.index(max_game_id_cached) # Trim down the list to only the games that need to be retrieved, # start from the max_id + 1 because we don't # want to count max_id we already have it game_ids_to_find = game_ids[max_game_id_index:] games = self._get_game_by_ids(game_ids_to_find) player_stats_df = self._get_player_stats_in_df(games, max_index_cached) self._insert_into_player_stats_df_tables(player_stats_df) else: # If everything was cached return cached as true and just return the last numbers # I could do this part better. print("everything cached no need to retrieve from api") else: _get_player_stats_in_df = 0 # Table did not exist, have to get all games = self._get_game_by_ids(game_ids) player_stats_df = self._get_player_stats_in_df(games, _get_player_stats_in_df) print('table does not exist inserting full table') self._insert_into_player_stats_df_tables(player_stats_df) print('table inserted') if self.game_range == '5': processed_player_stats_df = pandas.read_sql('select * from processed_player_stats_df_limit_5', con=self.engine) elif self.game_range == '10': processed_player_stats_df = pandas.read_sql('select * from processed_player_stats_df_limit_10', con=self.engine) else: processed_player_stats_df = pandas.read_sql_table(self.processed_player_stars_table_name, self.engine) return processed_player_stats_df def _process_player_stats_df(self, player_stats_df): player_stats_df = player_stats_df.sort(['game_id', 'player_id']) key_stats = ['game_length_minutes'] + (list(self.key_stats)) player_stats_df['clean_kills'] = player_stats_df['kills'] player_stats_df.ix[player_stats_df.clean_kills == 0, 'clean_kills'] = 1 player_stats_df['k_a'] = \ player_stats_df['kills'] + player_stats_df['assists'] player_stats_df['a_over_k'] = \ player_stats_df['assists'] / player_stats_df['clean_kills'] player_stats_for_pivot = player_stats_df[['player_name', 'role']] player_stats_for_pivot['value'] = 1 player_pivot_df = player_stats_for_pivot.pivot_table(index='player_name', columns='role', values='value') player_pivot_df.fillna(0, inplace=True) player_pivot_df.reset_index(inplace=True) player_stats_df = pandas.merge(player_stats_df, player_pivot_df, on='player_name') for key_stat in key_stats: print('doing key stats {}'.format(key_stat)) player_stats_df['csum_{}'.format(key_stat)] = player_stats_df.groupby(by='player_id')[key_stat].cumsum() player_stats_df['csum_prev_{}'.format(key_stat)] = \ player_stats_df['csum_{}'.format(key_stat)] - player_stats_df[key_stat] # player_stats_df['csum_prev_avg_{}'.format(key_stat)] = \ # player_stats_df['csum_prev_{}'.format(key_stat)] / player_stats_df['csum_prev_game_number'] player_stats_df['per_min_{}'.format(key_stat)] = player_stats_df[key_stat] / \ player_stats_df['game_length_minutes'] if key_stat not in ['game_number', 'game_length_minutes']: print('doing stats not game_number {}'.format(key_stat)) player_stats_df['csum_min_{}'.format(key_stat)] = \ player_stats_df['csum_{}'.format(key_stat)] / player_stats_df['csum_game_length_minutes'] player_stats_df['csum_prev_min_{}'.format(key_stat)] = \ player_stats_df['csum_prev_{}'.format(key_stat)] / player_stats_df['csum_prev_game_length_minutes'] player_stats_df['csum_prev_min_{}'.format(key_stat)].fillna(0, inplace=True) player_stats_df = player_stats_df.sort(['game_id']) return player_stats_df def _get_player_stats_in_df(self, games, max_index_cached): player_stats_df = None for game in games: players_stats = self._convert_game_to_player_stats_df(game) if player_stats_df is None: player_stats_df = pandas.DataFrame(players_stats, index=list(range(max_index_cached, (max_index_cached + 10)))) else: single_game_player_stats_df = pandas.DataFrame(players_stats, index=list(range(max_index_cached, (max_index_cached + 10)))) player_stats_df = player_stats_df.append(single_game_player_stats_df) max_index_cached += 10 return player_stats_df def _convert_game_to_player_stats_df(self, game): players_stats = game.playerstats_set.all() players_stats_dict = game.playerstats_set.all().values() player_stats_list = [] for player_stats, player_stats_dict in zip(players_stats, players_stats_dict): player_stats_dict['game_length_minutes'] = float(game.game_length_minutes) player_stats_dict['gold'] = float(player_stats_dict['gold']) player_stats_dict['player_name'] = player_stats.player.name self._populate_player_stats_with_defense_stats(player_stats_dict, player_stats, game) player_stats_list.append(player_stats_dict) return player_stats_list def _populate_player_stats_with_defense_stats(self, player_stats_dict, player_stats, game): current_team = player_stats.team processed_team_stats_dict = game.processedteamstatsdf_set.exclude(team_name=current_team).values()[0] for key_stat in self.key_stats: player_stats_dict['csum_prev_min_allowed_{}'.format(key_stat)] = \ processed_team_stats_dict['csum_prev_min_allowed_{}'.format(key_stat)] player_stats_dict['csum_min_allowed_{}'.format(key_stat)] = \ processed_team_stats_dict['csum_min_allowed_{}'.format(key_stat)] def _insert_into_player_stats_df_tables(self, player_stats_df): player_stats_df.to_sql(self.player_stats_table_name, self.engine, if_exists='append') # Could be optimized kinda a hack player_stats_df = pandas.read_sql("select ps.*, p.role, p.image from player_stats_df ps, player p " "where ps.player_id = p.id", self.engine) processed_team_stats_df = self._process_player_stats_df(player_stats_df) processed_team_stats_df.to_sql(self.processed_player_stars_table_name, self.engine, if_exists='append') def predict_player_stat(self): #reshaped_numpy_array = numpy.reshape(self.latest_predictor_numpy_array, 3,1) probability_in_numpy_array = self.poisson.predict(self.pos_result.params, self.latest_predictor_numpy_array) return {self.player_name: probability_in_numpy_array}
class PoissonRegression(Learner): """ The poisson regression learning algorithm. Given data, this class constructs and stores a probability unit regression mdl that can be used to quantify the probability of testing data-points taking on certain class values. """ def __init__(self, alpha: float, **params: any): """ Initialises the Probit regression algorithm. :param alpha: regularization term alpha. :param params: Ignored. """ super().__init__(**params) self.name = 'Poisson Regression' self.alpha = alpha self.gamma = 0.5 self.add_intercept = True self.binary_points = True self.beta = list() self.data: Optional[RecordSet] = None self.model: Optional[Poisson] = None # will be set during fit def fit(self, rs: RecordSet) -> None: """ fit a Probit regression mdl :param rs: The record set to fit with. """ # set params self.data = cp.deepcopy(rs) patterns = self.data.entries[:, :-1] out = self.data.entries[:, -1:] if self.add_intercept: intercept = np.ones((patterns.shape[0], 1)) patterns = np.hstack((intercept, patterns)) # avoid error if self.alpha == 0: raise Exception("Alpha Probit too low to obtain reliable results") self.model = Poisson(endog=out.ravel(), exog=patterns) self.model = self.model.fit_regularized(alpha=self.alpha, maxiter=10e8, disp=False) def predict(self, rs: RecordSet) -> np.ndarray: """ Assigns a predicted class label to the given record sets. :param rs: The record set to assign predictions to. :return: A column vector of predictions corresponding to the record set's rows. """ # set params patterns = rs.entries[:, :-1] if self.add_intercept: intercept = np.ones((patterns.shape[0], 1)) patterns = np.hstack((intercept, patterns)) # predict predictions = self.model.predict(exog=patterns) if self.binary_points: predictions = self.discrete_points(predictions=predictions) # return 2d predictions = np.reshape(predictions, (-1, 1)) return predictions def discrete_points(self, predictions): """ Turns probabilities into discrete classes :param predictions: The predicted class probabilities :return: A vector with discrete classes """ n = predictions.shape[0] for i in range(0, n): if predictions[i] >= self.gamma: predictions[i] = 1 else: predictions[i] = 0 return predictions