def predict(self, x): if self.coefficients is None: raise Exception('Model has not been fitted yet') x = core.enhance_matrix(x) reg_eval.regression_data_check(x, width=len(self.coefficients)) return x * self.coefficients
def evaluate(self, x, y_true): """ Evaluates the performance of the trained model on a global and variable level. For global, RSE, R^2 and F-statistic are standard. For variables the SE and t-statistic is used. :param x: Matrix of predictors :param y_true: Vector of true y values :return: """ x = core.enhance_matrix(x) y_pred = self.predict(x) global_metrics = [['RSE', reg_eval.residual_standard_error], ['R^2', reg_met.r_squared], ['F-statistic', reg_eval.f_statistic], ['p-value']] var_metrics = [['SE', reg_eval.standard_error_coefs], ['t-statistic', reg_eval.t_statistic], ['p-value']] glob_outcomes = {'Metric': [], 'Value': []} for i in global_metrics: if len(i) > 1: glob_outcomes['Metric'].append(i[0]) glob_outcomes['Value'].append(i[1](x=x, y_true=y_true, y_pred=y_pred, num_predictors=x.n_cols)) elif i[0] == 'p-value': glob_outcomes['Metric'].append(i[0]) glob_outcomes['Value'].append( f.sf(glob_outcomes['Value'][2], dfn=len(y_pred), dfd=x.n_cols - 1)) else: raise Exception('Single value metric not implemented') var_outcomes = { 'Column': list(range(x.n_cols)), 'Coefficient': self.coefficients.data } for i in var_metrics: if len(i) > 1: var_outcomes[i[0]] = i[1](x=x, y_true=y_true, y_pred=y_pred, coefs=var_outcomes['Coefficient']) elif i[0] == 'p-value': var_outcomes[i[0]] = [ 2 * t.sf(abs(float(score)), len(y_pred) - x.n_cols) for score in var_outcomes['t-statistic'] ] print(tabulate(glob_outcomes, headers='keys')) print(tabulate(var_outcomes, headers='keys')) return glob_outcomes, var_outcomes
def fit(self, x, y_true): """ Fits the parameters of X to predict the value y with model criterion :param x: Matrix with equal amounts of rows as y :param y_true: Column Vector with length equal to rows in X """ if self.criterion != 'LS': raise Exception('Only Least Squares is implemented') reg_eval.regression_data_check(x, y_true) x = core.enhance_matrix(x) self.coefficients = (x.transpose() * x).inverse() * x.transpose() * y_true
def t_statistic(x, y_true, y_pred, coefs, number_type=float, **kwargs): x = core.enhance_matrix(x) se_coefs = standard_error_coefs(x, y_true, y_pred, number_type) return [coefs[i] / se_coefs[i] for i in range(len(coefs))]
def standard_error_coefs(x, y_true, y_pred, number_type=float, **kwargs): x = core.enhance_matrix(x) mse = ((y_true - y_pred)**2).sum() / (len(y_true) - x.n_cols) matrix = (x.transpose() * x).inverse() return [(mse * matrix.data[i][i])**0.5 for i in range(x.n_cols)]
def score(self, x, y_true, metric=reg_met.r_squared, number_type=float): x = core.enhance_matrix(x) if self.coefficients is None: raise Exception('Model has not been fitted yet') return metric(y_true, self.predict(x), number_type)