def get_fit_model(score_list, label_list): p_train = np.array(score_list) y_train = np.array(label_list) ir = IR() ir.fit( p_train, y_train ) return ir
def test_isotonic_regression(): y = np.array([3, 7, 5, 9, 8, 7, 10]) y_ = np.array([3, 6, 6, 8, 8, 8, 10]) assert_array_equal(y_, isotonic_regression(y)) y = np.array([10, 0, 2]) y_ = np.array([4, 4, 4]) assert_array_equal(y_, isotonic_regression(y)) x = np.arange(len(y)) ir = IsotonicRegression(y_min=0., y_max=1.) ir.fit(x, y) assert_array_equal(ir.fit(x, y).transform(x), ir.fit_transform(x, y)) assert_array_equal(ir.transform(x), ir.predict(x)) # check that it is immune to permutation perm = np.random.permutation(len(y)) ir = IsotonicRegression(y_min=0., y_max=1.) assert_array_equal(ir.fit_transform(x[perm], y[perm]), ir.fit_transform(x, y)[perm]) assert_array_equal(ir.transform(x[perm]), ir.transform(x)[perm]) # check we don't crash when all x are equal: ir = IsotonicRegression() assert_array_equal(ir.fit_transform(np.ones(len(x)), y), np.mean(y))
class HTMLTime(object): """ >>> htmlTime = HTMLTime(pathToIDX) >>> t = htmlTime(frameNumber) """ def __init__(self, idx): super(HTMLTime, self).__init__() self.idx = idx # load .idx file using pandas df = read_table( self.idx, sep='\s+', names=['frame_number', 'frame_type', 'bytes', 'seconds'] ) x = np.array(df['frame_number'], dtype=np.float) y = np.array(df['seconds'], dtype=np.float) # train isotonic regression self.ir = IsotonicRegression(y_min=np.min(y), y_max=np.max(y)) self.ir.fit(x, y) # frame number support self.xmin = np.min(x) self.xmax = np.max(x) def __call__(self, frameNumber): return self.ir.transform([min(self.xmax, max(self.xmin, frameNumber) )])[0]
def test_isotonic_regression_ties_secondary_(): """ Test isotonic regression fit, transform and fit_transform against the "secondary" ties method and "pituitary" data from R "isotone" package, as detailed in: J. d. Leeuw, K. Hornik, P. Mair, Isotone Optimization in R: Pool-Adjacent-Violators Algorithm (PAVA) and Active Set Methods Set values based on pituitary example and the following R command detailed in the paper above: > library("isotone") > data("pituitary") > res1 <- gpava(pituitary$age, pituitary$size, ties="secondary") > res1$x `isotone` version: 1.0-2, 2014-09-07 R version: R version 3.1.1 (2014-07-10) """ x = [8, 8, 8, 10, 10, 10, 12, 12, 12, 14, 14] y = [21, 23.5, 23, 24, 21, 25, 21.5, 22, 19, 23.5, 25] y_true = [22.22222, 22.22222, 22.22222, 22.22222, 22.22222, 22.22222, 22.22222, 22.22222, 22.22222, 24.25, 24.25] # Check fit, transform and fit_transform ir = IsotonicRegression() ir.fit(x, y) assert_array_almost_equal(ir.transform(x), y_true, 4) assert_array_almost_equal(ir.fit_transform(x, y), y_true, 4)
def test_fast_predict(): # test that the faster prediction change doesn't # affect out-of-sample predictions: # https://github.com/scikit-learn/scikit-learn/pull/6206 rng = np.random.RandomState(123) n_samples = 10 ** 3 # X values over the -10,10 range X_train = 20.0 * rng.rand(n_samples) - 10 y_train = np.less(rng.rand(n_samples), expit(X_train)).astype('int64').astype('float64') weights = rng.rand(n_samples) # we also want to test that everything still works when some weights are 0 weights[rng.rand(n_samples) < 0.1] = 0 slow_model = IsotonicRegression(y_min=0, y_max=1, out_of_bounds="clip") fast_model = IsotonicRegression(y_min=0, y_max=1, out_of_bounds="clip") # Build interpolation function with ALL input data, not just the # non-redundant subset. The following 2 lines are taken from the # .fit() method, without removing unnecessary points X_train_fit, y_train_fit = slow_model._build_y(X_train, y_train, sample_weight=weights, trim_duplicates=False) slow_model._build_f(X_train_fit, y_train_fit) # fit with just the necessary data fast_model.fit(X_train, y_train, sample_weight=weights) X_test = 20.0 * rng.rand(n_samples) - 10 y_pred_slow = slow_model.predict(X_test) y_pred_fast = fast_model.predict(X_test) assert_array_equal(y_pred_slow, y_pred_fast)
def _gspv_interpolate_cloud(powers, velocities): from sklearn.isotonic import IsotonicRegression from scipy.interpolate import InterpolatedUnivariateSpline regressor = IsotonicRegression() regressor.fit(powers, velocities) x = np.linspace(min(powers), max(powers)) y = regressor.predict(x) return InterpolatedUnivariateSpline(x, y, k=1, ext=3)
def test_isotonic_duplicate_min_entry(): x = [0, 0, 1] y = [0, 0, 1] ir = IsotonicRegression(increasing=True, out_of_bounds="clip") ir.fit(x, y) all_predictions_finite = np.all(np.isfinite(ir.predict(x))) assert_true(all_predictions_finite)
def test_isotonic_mismatched_dtype(y_dtype): # regression test for #15004 # check that data are converted when X and y dtype differ reg = IsotonicRegression() y = np.array([2, 1, 4, 3, 5], dtype=y_dtype) X = np.arange(len(y), dtype=np.float32) reg.fit(X, y) assert reg.predict(X).dtype == X.dtype
def test_isotonic_duplicate_min_entry(): x = [0, 0, 1] y = [0, 0, 1] ir = IsotonicRegression(increasing=True, out_of_bounds="clip") ir.fit(x, y) all_predictions_finite = np.all(np.isfinite(ir.predict(x))) assert all_predictions_finite
def predict_probs(model, train_class, train_features, test_features, normalize_probs=None): """ Fit a given binary classification model to training sample features and return predicted probabilities for the positive class for the training and test samples. """ model.fit(train_features, train_class) train_prob, test_prob = [model.predict_proba(f)[:, 1] for f in (train_features, test_features)] if normalize_probs == "ROCSlope": # calibrate probabilities based on the estimated local slope # of the ROC curve chunk_size = 10 # number of instances for slope estimation n_train_pos = 301 # total number of positive (preictal) instances n_train_neg = 3766 # total negative (interictal) n_chunk_tot = 4000.0 / float(chunk_size) # estimated total in test data # sort training data classes by predicted probability sort_order = train_prob.argsort() p_sorted = train_prob[sort_order] c_sorted = train_class[sort_order] ix = np.array(range(len(train_prob))) # loop over chunks for i_ch in range(1 + (len(train_prob) - 1) / chunk_size): p_chunk, c_chunk = [ x[np.where((ix >= i_ch * chunk_size) & (ix < (i_ch + 1) * chunk_size))[0]] for x in (p_sorted, c_sorted) ] pmin = np.min(p_chunk) pmax = np.max(p_chunk) # compute TPR/FPR (relative to the entire training set) tpr = np.sum(c_chunk) / float(n_train_pos) fpr = np.sum(1 - c_chunk) / float(n_train_neg) # compute probability transformation for this chunk qc = (2.0 / np.pi) * np.arctan(tpr / (fpr + 1.0e-3 / float(n_train_neg))) qmin = np.max((0.0, qc - 0.5 / float(n_chunk_tot))) qmax = np.min((1.0, qc + 0.5 / float(n_chunk_tot))) # transform probabilities tr_p_ch = np.where((train_prob > pmin) & (train_prob <= pmax))[0] train_prob[tr_p_ch] = qmin + (train_prob[tr_p_ch] - pmin) * (qmax - qmin) / (pmax - pmin) te_p_ch = np.where((test_prob > pmin) & (test_prob <= pmax))[0] test_prob[te_p_ch] = qmin + (test_prob[te_p_ch] - pmin) * (qmax - qmin) / (pmax - pmin) elif normalize_probs == "LogShift": # shift probabilities in log(p/(1-p)) so that a fraction f_pre # of the samples has probability > 0.5, where f_pre is the # fraction of preictal samples in the training data f_pre = len(np.where(train_class)[0]) / float(len(train_class)) train_th, test_th = [sorted(p)[int((1.0 - f_pre) * len(p))] for p in (train_prob, test_prob)] train_prob, test_prob = [ (1.0 - pth) * p / (pth + p - 2.0 * pth * p) for (pth, p) in zip((train_th, test_th), (train_prob, test_prob)) ] elif normalize_probs == "IsoReg": # fit an isotonic regression model to training probabilities # and use the model to transform all probabilities prob_model = IsotonicRegression(out_of_bounds="clip") prob_model.fit(train_prob, train_class) train_prob, test_prob = [prob_model.transform(p) for p in (train_prob, test_prob)] elif normalize_probs is not None: sys.exit("Invalid value of normalize_probs:", str(normalize_probs)) return (train_prob, test_prob)
def calibrate_row(row): calibrator = IsotonicRegression(y_min=0, y_max=1) x = lab[~np.isnan(lab[row])][row].values y = lab[~np.isnan(lab[row])]['labels'].values calibrator.fit(x, y) lab[row] = calibrator.predict(lab[row].values) amb[row] = calibrator.predict(amb[row].values) unl[row] = calibrator.predict(unl[row].values) scr[row] = calibrator.predict(scr[row].values)
def do_cv_pred(train, test, files): print("------- do preds --------") ensemble_col = [f[1] for f in files] train_x = train[ensemble_col] test_x = test[ensemble_col].values.reshape(-1) train_y = train["target"] submission = pd.DataFrame() submission["card_id"] = test["card_id"] submission["target"] = 0 outliers = (train["target"] < -30).astype(int).values split_num = 5 skf = model_selection.StratifiedKFold(n_splits=split_num, shuffle=True, random_state=4590) train_preds = [] for idx, (train_index, test_index) in enumerate(skf.split(train, outliers)): X_train, X_test = train_x.iloc[train_index], train_x.iloc[ test_index] y_train, y_test = train_y.iloc[train_index], train_y.iloc[ test_index] reg = IsotonicRegression() X_train = X_train.values.reshape(-1) X_test = X_test.values.reshape(-1) reg.fit(X_train, y_train) valid_set_pred = reg.predict(X_test) print(y_test.describe()) temp = pd.DataFrame(valid_set_pred) print(temp.describe()) score = evaluator.rmse(y_test, valid_set_pred) print(score) y_pred = reg.predict(test_x) submission["target"] = submission["target"] + y_pred train_id = train.iloc[test_index] train_cv_prediction = pd.DataFrame() train_cv_prediction["card_id"] = train_id["card_id"] train_cv_prediction["cv_pred"] = valid_set_pred train_preds.append(train_cv_prediction) train_output = pd.concat(train_preds, axis=0) submission["target"] = submission["target"] / split_num submission.to_csv(path_const.OUTPUT_SUB, index=False) train_output["cv_pred"] = np.clip(train_output["cv_pred"], -33.219281, 18.0) train_output.to_csv(path_const.OUTPUT_OOF, index=False) df_pred = pd.merge(train[["card_id", "target"]], train_output, on="card_id") rmse_score = evaluator.rmse(df_pred["target"], df_pred["cv_pred"]) print(rmse_score)
def isotonic(self): clf = IsotonicRegression() train_x = self.train_x.to_list() train_y = self.train_y.to_list() test_x = self.test_x.to_list() clf.fit(train_x, train_y) test_y_pred = clf.predict(test_x) return test_y_pred
def fit(self, A, Y, weights, fit_init=None, refit=False, increasing=True): #fit isotonic regression model. model = IsotonicRegression(increasing=increasing, out_of_bounds="clip", y_min=0.0, y_max=1.0) model.fit(X=A, y=Y, sample_weight=weights) self.model_obj = model return (0)
def linear_regression(self, exp1, exp2, min_samples=5): X = [] Y = [] Xi = [] for i in sorted(exp1): if i in exp2: Xi.append(i) X.append(exp2[i]) Y.append(exp1[i]) X = np.r_[X] Y = np.r_[Y] Xi = np.r_[Xi] if X.size < min_samples: rscore = 0 slope = 0 warning = False else: # clean the inputs by isotonic regression warning, increasing_bool = check_increasing(Xi, Y) IR = IsotonicRegression(increasing=increasing_bool) IR.fit(Xi, Y) Y1 = IR.predict(Xi) vi = np.where(np.diff(Y1) < 0)[0] pieces = np.split(vi, np.where(np.diff(vi) != 1)[0] + 1) si = 0 for i in range(len(pieces) - 1): p1 = pieces[i] p2 = pieces[i + 1] if p1.size / (p2[0] - p1[0]) > 0.5: si = p1[0] break if si / X.size > 0.3: # if more than 1/4 data discarded si = vi[0] if si / X.size > 0.3: si = 0 X = X[si:] Y = Y[si:] X = X[:, np.newaxis] huber = HuberRegressor().fit(X, Y) inlier_mask = np.logical_not(huber.outliers_) if inlier_mask.sum() < min_samples: rscore = 0 slope = 0 else: sX = X[inlier_mask] sY = Y[inlier_mask] rscore = huber.score(sX, sY) slope = huber.coef_[0] return rscore, slope, warning
def train_rcir_cv(training_class, training_scores, validation_class, validation_scores, credible_level=.95, y_min=0, y_max=1, merge_criterion='auc_roc'): isotonic_regression_model = IsotonicRegression(y_min=y_min, y_max=y_max, out_of_bounds='clip') isotonic_regression_model.fit(X=training_scores, y=training_class) models = [] # Extract the interpolation model we need: tmp_x = isotonic_regression_model.f_.x tmp_y = isotonic_regression_model.f_.y # Do some corrections (if there are any) tmp = correct_for_point_bins(tmp_x, tmp_y) x = tmp['x'] y = tmp['y'] # Use new boundaries to create an interpolation model that does the heavy lifting of # reliably calibrated isotonic regression: interpolation_model = interp1d(x=x, y=y, bounds_error=False) interpolation_model._fill_value_below = min(y) interpolation_model._fill_value_above = max(y) training_probabilities = interpolation_model(training_scores) # The following array contains all information defining the IR transformation bin_summary = np.unique(training_probabilities, return_counts=True) credible_intervals = [ credible_interval(np.round(p * n), n) for (p, n) in zip(bin_summary[0], bin_summary[1]) ] width_of_intervals = np.array( [row['p_max'] - row['p_min'] for row in credible_intervals]) rcir_model = { 'model': interpolation_model, 'credible level': credible_level, 'credible intervals': credible_intervals, 'width of intervals': width_of_intervals, 'bin summary': bin_summary, 'd': -1 } metrics = estimate_performance(rcir_model['model'], validation_class, validation_scores) models.append([0, rcir_model['model'], metrics]) while (len(rcir_model['width of intervals']) > 2): # There still exists bins to merge rcir_model = merge_bin(rcir_model, training_class, training_scores, merge_criterion) metrics = estimate_performance(rcir_model['model'], validation_class, validation_scores) models.append([0, rcir_model['model'], metrics]) best_model_idx = [item[2]['auc_roc'] for item in models ].index(max([item[2]['auc_roc'] for item in models])) return (models[best_model_idx][1])
def test_isotonic_regression_oob_raise(): # Set y and x y = np.array([3, 7, 5, 9, 8, 7, 10]) x = np.arange(len(y)) # Create model and fit ir = IsotonicRegression(increasing='auto', out_of_bounds="raise") ir.fit(x, y) # Check that an exception is thrown assert_raises(ValueError, ir.predict, [min(x) - 10, max(x) + 10])
def fit(self, X, y): np.random.seed(self.random_state) n, m = X.shape idx = np.arange(n) self.estimators = [] if (self.distribution == "bernoulli" and (np.sum(y) < 3 or np.sum(y) > n - 3)): logging.error(("the target (y) needs to have " "at least one examples on each class")) return None i = 0 while i < self.n_paloboost: mask = np.full(n, True) if self.block_size is not None: n_block = int(n / self.block_size) + 1 mask_block = (np.random.rand(n_block) < self.subsample0) mask = np.repeat(mask_block, self.block_size)[:n] else: mask = (np.random.rand(n) < self.subsample0) X_i, y_i = X[mask, :], y[mask] X_j, y_j = X[~mask, :], y[~mask] if (self.distribution == "bernoulli" and (np.unique(y_i).shape[0] == 1 or np.unique(y_j).shape[0] == 1)): continue est = PaloBoost(distribution=self.distribution, learning_rate=self.learning_rate, max_depth=self.max_depth, n_estimators=self.n_estimators, subsample=self.subsample1, subsample_splts=self.subsample2, random_state=i * self.n_estimators) est.fit(X_i, y_i) self.estimators.append(est) if self.feature_importances_ is None: self.feature_importances_ = est.feature_importances_ else: self.feature_importances_ += est.feature_importances_ if (self.distribution == "bernoulli" and self.calibrate): z_j = est.predict_proba(X_j)[:, 1] clb = IsotonicRegression(y_min=0, y_max=1, out_of_bounds="clip") clb.fit(z_j, y_j) self.calibrators.append(clb) i += 1 self.feature_importances_ /= self.n_paloboost
def test_isotonic_regression_ties_max(): # Setup examples with ties on maximum x = [1, 2, 3, 4, 5, 5] y = [1, 2, 3, 4, 5, 6] y_true = [1, 2, 3, 4, 5.5, 5.5] # Check that we get identical results for fit/transform and fit_transform ir = IsotonicRegression() ir.fit(x, y) assert_array_equal(ir.fit(x, y).transform(x), ir.fit_transform(x, y)) assert_array_equal(y_true, ir.fit_transform(x, y))
def isotonicFit(thr, prec, maxThr=999): thr = np.array(thr) prec = np.array(prec) prec = prec[thr <= maxThr] thr = thr[thr <= maxThr] objFun = lambda thr, alpha, beta: alpha * thr**beta isoReg = IsotonicRegression(y_min=0, y_max=1) isoReg.fit(thr, prec) # joblib.dump(isoReg, "/home/rsanchez/Tesis/rriPredMethod/pyCode/webApp/rriPredWeb/media/scoreToPrecModel/mixed.isotonic.joblib") return lambda x: isoReg.predict(x), "isotonic"
def test_isotonic_regression_pickle(): y = np.array([3, 7, 5, 9, 8, 7, 10]) x = np.arange(len(y)) # Create model and fit ir = IsotonicRegression(increasing='auto', out_of_bounds="clip") ir.fit(x, y) ir_ser = pickle.dumps(ir, pickle.HIGHEST_PROTOCOL) ir2 = pickle.loads(ir_ser) np.testing.assert_array_equal(ir.predict(x), ir2.predict(x))
def test_isotonic_regression_oob_bad(): # Set y and x y = np.array([3, 7, 5, 9, 8, 7, 10]) x = np.arange(len(y)) # Create model and fit ir = IsotonicRegression(increasing='auto', out_of_bounds="xyz") ir.fit(x, y) # Make sure that we throw an error for bad out_of_bounds value assert_raises(ValueError, ir.predict, [min(x)-10, max(x)+10])
def isotonic_calibration_learner(df: pd.DataFrame, target_column: str = "target", prediction_column: str = "prediction", output_column: str = "calibrated_prediction", y_min: float = 0.0, y_max: float = 1.0) -> LearnerReturnType: """ Fits a single feature isotonic regression to the dataset. Parameters ---------- df : pandas.DataFrame A Pandas' DataFrame with features and target columns. The model will be trained to predict the target column from the features. target_column : str The name of the column in `df` that should be used as target for the model. This column should be binary, since this is a classification model. prediction_column : str The name of the column with the uncalibrated predictions from the model. output_column : str The name of the column with the calibrated predictions from the model. y_min: float Lower bound of Isotonic Regression y_max: float Upper bound of Isotonic Regression """ clf = IsotonicRegression(y_min=y_min, y_max=y_max, out_of_bounds='clip') clf.fit(df[prediction_column], df[target_column]) def p(new_df: pd.DataFrame) -> pd.DataFrame: return new_df.assign(**{output_column: clf.predict(new_df[prediction_column])}) p.__doc__ = learner_pred_fn_docstring("isotonic_calibration_learner") log = {'isotonic_calibration_learner': { 'output_column': output_column, 'target_column': target_column, 'prediction_column': prediction_column, 'package': "sklearn", 'package_version': sklearn.__version__, 'training_samples': len(df)}, 'object': clf} return p, p(df), log
def test_isotonic_regression_oob_bad(): # Set y and x y = np.array([3, 7, 5, 9, 8, 7, 10]) x = np.arange(len(y)) # Create model and fit ir = IsotonicRegression(increasing='auto', out_of_bounds="xyz") ir.fit(x, y) # Make sure that we throw an error for bad out_of_bounds value assert_raises(ValueError, ir.predict, [min(x) - 10, max(x) + 10])
def fit(self, p_input, y): if self.method == 'isotonic': calibrator = IsotonicRegression(out_of_bounds='clip') elif self.method == 'sigmoid': calibrator = _SigmoidCalibration() calibrator.fit(p_input, y) if self.method == 'sigmoid': self.a = calibrator.a_ self.b = calibrator.b_ self.calibrator = calibrator return self
def test_isotonic_regression_oob_bad(): # Set y and x y = np.array([3, 7, 5, 9, 8, 7, 10]) x = np.arange(len(y)) # Create model and fit ir = IsotonicRegression(increasing="auto", out_of_bounds="xyz") # Make sure that we throw an error for bad out_of_bounds value msg = "The argument ``out_of_bounds`` must be in 'nan', 'clip', 'raise'; got xyz" with pytest.raises(ValueError, match=msg): ir.fit(x, y)
def isotonicFit(thr, prec, maxThr=999): thr = np.array(thr) prec = np.array(prec) prec = prec[thr <= maxThr] thr = thr[thr <= maxThr] objFun = lambda thr, alpha, beta: alpha * thr**beta isoReg = IsotonicRegression(y_min=0, y_max=1) isoReg.fit(thr, prec) if save_isotonic_modelFname: joblib.dump(isoReg, save_isotonic_modelFname) return lambda x: isoReg.predict(x), "isotonic"
def test_isotonic_regression_oob_bad_after(): # Set y and x y = np.array([3, 7, 5, 9, 8, 7, 10]) x = np.arange(len(y)) # Create model and fit ir = IsotonicRegression(increasing='auto', out_of_bounds="raise") # Make sure that we throw an error for bad out_of_bounds value in transform ir.fit(x, y) ir.out_of_bounds = "xyz" assert_raises(ValueError, ir.transform, x)
def test_isotonic_regression_oob_nan(): # Set y and x y = np.array([3, 7, 5, 9, 8, 7, 10]) x = np.arange(len(y)) # Create model and fit ir = IsotonicRegression(increasing='auto', out_of_bounds="nan") ir.fit(x, y) # Predict from training and test x and check that we have two NaNs. y1 = ir.predict([min(x) - 10, max(x) + 10]) assert sum(np.isnan(y1)) == 2
def test_isotonic_regression_oob_nan(): # Set y and x y = np.array([3, 7, 5, 9, 8, 7, 10]) x = np.arange(len(y)) # Create model and fit ir = IsotonicRegression(increasing='auto', out_of_bounds="nan") ir.fit(x, y) # Predict from training and test x and check that we have two NaNs. y1 = ir.predict([min(x) - 10, max(x) + 10]) assert_equal(sum(np.isnan(y1)), 2)
class RankScoreIsoRegression(): """ References ---------- `Predicting Self-reported Customer Satisfaction of Interactions with a Corporate Call Center <http://ecmlpkdd2017.ijs.si/papers/paperID598.pdf>`, """ def __init__(self, mask_size=100, pr_args={}, ir_args={"out_of_bounds": "clip"}): """ Parameters ---------- mask_size : int, (default=100) Length of the mask for smoothing rank scores pr_args : dict, (default={}) Keyword arguments to PairwiseRankClf constructor ir_args : dict, (default={"out_of_bounds":"clip"}) Keyword arguments to IsotonicRegression constructor """ self.ir_args = ir_args self.pr_args = pr_args self.pr_clf = PairwiseRankClf(**pr_args) self.ir_model = IsotonicRegression(**ir_args) self.mask_size = mask_size def fit(self, X, y): self.pr_clf.fit(X, y) rank_scores = self.pr_clf.decision_function(X) if self.mask_size is None: self.ir_model.fit(rank_scores, y) else: mask = 1 + np.zeros((self.mask_size, )) idx = np.argsort(rank_scores) rank_scores_ordered = rank_scores[idx] y_ordered = y[idx] rank_scores_smoothed = np.convolve( rank_scores_ordered, mask, mode="valid") / float(mask.size) y_smoothed = np.convolve(y_ordered, mask, mode="valid") / float( mask.size) self.ir_model.fit(rank_scores_smoothed, y_smoothed) return self def rank_scores(self, X): return self.pr_clf.decision_function(X) def predict(self, X): return self.ir_model.predict(self.rank_scores(X))
def test_isotonic_regression_oob_raise(): # Set y and x y = np.array([3, 7, 5, 9, 8, 7, 10]) x = np.arange(len(y)) # Create model and fit ir = IsotonicRegression(increasing='auto', out_of_bounds="raise") ir.fit(x, y) # Check that an exception is thrown msg = 'A value in x_new is below the interpolation range' with pytest.raises(ValueError, match=msg): ir.predict([min(x) - 10, max(x) + 10])
def biasCorrection(model, tune, target): """Computes an isotonic regression to calibrated a pre-trained model :param model: The pre-trained model to calibrate :param tune: A Pandas dataframe with the data to use for calibration :param target: List of true values :return: The calibrated model """ inputValues = model.predict(tune) corrected_model = IsotonicRegression(out_of_bounds = 'clip') corrected_model.fit(inputValues, target.values) return corrected_model
class RC30(ClassifierMixin, BaseEstimator): def __init__(self, n_estimators=30, max_depth=3, min_samples_split=2, min_samples_leaf=1, ctype="isotonic"): self.n_estimators = n_estimators self.max_depth = max_depth self.min_samples_split = min_samples_split self.min_samples_leaf = min_samples_leaf self.ctype = ctype def fit(self, X, y): X, y = check_X_y(X, y) self.model = RandomForestClassifier(n_estimators=self.n_estimators, max_depth=self.max_depth, min_samples_split=self.min_samples_split, min_samples_leaf=self.min_samples_leaf) if self.ctype == "logistic": self.calibrator = LogisticRegression(C=1e20, solver="lbfgs") elif self.ctype == "isotonic": self.calibrator = IsotonicRegression(y_min=0, y_max=1, out_of_bounds="clip") X0, X1, y0, y1 = train_test_split(X, y, test_size=0.3) self.model.fit(X0, y0) if self.ctype == "logistic": y_est = self.model.predict_proba(X1)[:,[1]] self.calibrator.fit(y_est, y1) elif self.ctype == "isotonic": y_est = self.model.predict_proba(X1)[:,1] self.calibrator.fit(y_est, y1) self.is_fitted_ = True return self def predict_proba(self, X): X = check_array(X) check_is_fitted(self, 'is_fitted_') if self.ctype == "logistic": return self.calibrator.predict_proba( self.model.predict_proba(X)[:,[1]]) elif self.ctype == "isotonic": n, m = X.shape y = np.zeros((n,2)) y[:,1] = self.calibrator.predict( self.model.predict_proba(X)[:,1]) y[:,0] = 1 - y[:,1] return y
class IsotonicCalibrator(BaseEstimator, TransformerMixin): """ Calculates a likelihood ratio of a score value, provided it is from one of two distributions. Uses isotonic regression for interpolation. """ def __init__(self, add_one=False, add_misleading=0): """ Arguments: add_one: deprecated (same as add_misleading=1) add_misleading: int: add misleading data points on both sides (default: 0) """ if add_one: warnings.warn( 'parameter `add_one` is deprecated; use `add_misleading=1` instead' ) self.add_misleading = (1 if add_one else 0) + add_misleading self._ir = IsotonicRegression() def fit(self, X, y, **fit_params): # prevent extreme LRs if 'add_misleading' in fit_params: n_misleading = fit_params['add_misleading'] elif 'add_one' in fit_params: warnings.warn( 'parameter `add_one` is deprecated; use `add_misleading=1` instead' ) n_misleading = 1 if fit_params['add_one'] else 0 else: n_misleading = self.add_misleading if n_misleading > 0: X = np.concatenate([ X, np.ones(n_misleading) * (X.max() + 1), np.ones(n_misleading) * (X.min() - 1) ]) y = np.concatenate( [y, np.zeros(n_misleading), np.ones(n_misleading)]) prior = np.sum(y) / y.size weight = y * (1 - prior) + (1 - y) * prior self._ir.fit(X, y, sample_weight=weight) return self def transform(self, X): self.p1 = self._ir.transform(X) self.p0 = 1 - self.p1 return to_odds(self.p1)
class IsotonicCalibration(BaseEstimator, TransformerMixin): """ Построение модели изотонической регресии на наблюдениях: y_pred -> y_target """ def __init__(self): self.calibration = IsotonicRegression(out_of_bounds="clip") def fit(self, y_pred: pd.Series, y_true: pd.Series): self.calibration.fit(y_pred, y_true) return self def transform(self, y_pred): return self.calibration.transform(y_pred)
def bootstrap_calibrate_prob(labels, weights, probs, n_calibrations=30, threshold=0., symmetrize=False): """ Bootstrap isotonic calibration (borrowed from tata-antares/tagging_LHCb): * randomly divide data into train-test * on train isotonic is fitted and applyed to test * on test using calibrated probs p(B+) D2 and auc are calculated :param probs: probabilities, numpy.array of shape [n_samples] :param labels: numpy.array of shape [n_samples] with labels :param weights: numpy.array of shape [n_samples] :param threshold: float, to set labels 0/1j :param symmetrize: bool, do symmetric calibration, ex. for B+, B- :return: D2 array and auc array """ import numpy as np from sklearn.isotonic import IsotonicRegression from sklearn.cross_validation import train_test_split from sklearn.metrics import roc_auc_score aucs = [] D2_array = [] labels = (labels > threshold) * 1 for _ in range(n_calibrations): (train_probs, test_probs, train_labels, test_labels, train_weights, test_weights) = train_test_split(probs, labels, weights, train_size=0.5) iso_reg = IsotonicRegression(y_min=0, y_max=1, out_of_bounds='clip') if symmetrize: iso_reg.fit(np.r_[train_probs, 1 - train_probs], np.r_[train_labels > 0, train_labels <= 0], np.r_[train_weights, train_weights]) else: iso_reg.fit(train_probs, train_labels, train_weights) probs_calib = iso_reg.transform(test_probs) alpha = (1 - 2 * probs_calib)**2 aucs.append( roc_auc_score(test_labels, test_probs, sample_weight=test_weights)) D2_array.append(np.average(alpha, weights=test_weights)) return np.array(D2_array), np.array(aucs)
def test_isotonic_regression_oob_clip(): # Set y and x y = np.array([3, 7, 5, 9, 8, 7, 10]) x = np.arange(len(y)) # Create model and fit ir = IsotonicRegression(increasing='auto', out_of_bounds="clip") ir.fit(x, y) # Predict from training and test x and check that min/max match. y1 = ir.predict([min(x) - 10, max(x) + 10]) y2 = ir.predict(x) assert max(y1) == max(y2) assert min(y1) == min(y2)
def test_isotonic_regression_oob_clip(): # Set y and x y = np.array([3, 7, 5, 9, 8, 7, 10]) x = np.arange(len(y)) # Create model and fit ir = IsotonicRegression(increasing='auto', out_of_bounds="clip") ir.fit(x, y) # Predict from training and test x and check that min/max match. y1 = ir.predict([min(x) - 10, max(x) + 10]) y2 = ir.predict(x) assert_equal(max(y1), max(y2)) assert_equal(min(y1), min(y2))
def bootstrap_calibrate_prob(labels, weights, probs, n_calibrations=30, group_column=None, threshold=0., symmetrize=False, plot=False): """ Bootstrap isotonic calibration: * randomly divide data into train-test * on train isotonic is fitted and applyed to test * on test using calibrated probs p(B+) D2 and auc are calculated :param probs: probabilities, numpy.array of shape [n_samples] :param labels: numpy.array of shape [n_samples] with labels :param weights: numpy.array of shape [n_samples] :param threshold: float, to set labels 0/1 :param symmetrize: bool, do symmetric calibration, ex. for B+, B- :return: D2 array and auc array """ aucs = [] D2_array = [] labels = (labels > threshold) * 1 for _ in range(n_calibrations): if group_column is not None: train_probs, test_probs, train_labels, test_labels, train_weights, test_weights = train_test_split_group( group_column, probs, labels, weights, train_size=0.5) else: train_probs, test_probs, train_labels, test_labels, train_weights, test_weights = train_test_split( probs, labels, weights, train_size=0.5) iso_est = IsotonicRegression(y_min=0, y_max=1, out_of_bounds='clip') if symmetrize: train_weights = 0.5*train_weights; iso_est.fit(numpy.r_[train_probs, 1-train_probs], numpy.r_[train_labels > 0, train_labels <= 0], numpy.r_[train_weights, train_weights]) else: iso_est.fit(train_probs, train_labels, train_weights) probs_calib = iso_est.transform(test_probs) if plot: plt.figure(1,figsize=(6,5)) plt.scatter(train_probs, train_labels, color='black', zorder=20) X_test = numpy.linspace(0.001,0.999,500) y_test = iso_est.transform(X_test) plt.plot(X_test, y_test, color='blue', linewidth=3) plt.show() alpha = (1 - 2 * probs_calib) ** 2 aucs.append(roc_auc_score(test_labels, test_probs, sample_weight=test_weights)) D2_array.append(numpy.average(alpha, weights=test_weights)) return D2_array, aucs
def train_rcir( training_class, training_scores, # , validation_class, validation_scores, credible_level=.95, d=1, y_min=0, y_max=1, merge_criterion='auc_roc'): # Function for training reliably calibrated isotonic regression (RCIR) # Returns an RCIR-model # First, create an ordinary isotonic regression model isotonic_regression_model = IsotonicRegression(y_min=y_min, y_max=y_max, out_of_bounds='clip') isotonic_regression_model.fit(X=training_scores, y=training_class) # Extract the interpolation model we need: tmp_x = isotonic_regression_model.f_.x tmp_y = isotonic_regression_model.f_.y # Do some corrections (if there are any) tmp = correct_for_point_bins(tmp_x, tmp_y) x = tmp['x'] y = tmp['y'] # Use new boundaries to create an interpolation model that does the heavy lifting of # reliably calibrated isotonic regression: interpolation_model = interp1d(x=x, y=y, bounds_error=False) interpolation_model._fill_value_below = min(y) interpolation_model._fill_value_above = max(y) training_probabilities = interpolation_model(training_scores) # The following array contains all information defining the IR transformation bin_summary = np.unique(training_probabilities, return_counts=True) credible_intervals = [ credible_interval(np.round(p * n), n) for (p, n) in zip(bin_summary[0], bin_summary[1]) ] width_of_intervals = np.array( [row['p_max'] - row['p_min'] for row in credible_intervals]) rcir_model = { 'model': interpolation_model, 'credible level': credible_level, 'credible intervals': credible_intervals, 'width of intervals': width_of_intervals, 'bin summary': bin_summary, 'd': d } while (max(rcir_model['width of intervals']) > d): # Merge one more bin. rcir_model = merge_bin(rcir_model, training_class, training_scores, merge_criterion) return (rcir_model)
class Isotonic(Calibrator): def __init__(self): self.clf = IsotonicRegression(y_min=0.0, y_max=1.0, out_of_bounds='clip') def fit(self, y_pred, y_true): assert y_true is not None y_pred, y_true = Calibrator.validate(y_pred, y_true) self.clf.fit(y_pred, y_true) def predict(self, y_pred): y_pred, _ = Calibrator.validate(y_pred) y_calib = self.clf.predict(y_pred) return y_calib
def test_isotonic_regression(): y = np.array([3, 7, 5, 9, 8, 7, 10]) y_ = np.array([3, 6, 6, 8, 8, 8, 10]) assert_array_equal(y_, isotonic_regression(y)) x = np.arange(len(y)) ir = IsotonicRegression(y_min=0.0, y_max=1.0) ir.fit(x, y) assert_array_equal(ir.fit(x, y).transform(x), ir.fit_transform(x, y)) assert_array_equal(ir.transform(x), ir.predict(x)) # check that it is immune to permutation perm = np.random.permutation(len(y)) ir = IsotonicRegression(y_min=0.0, y_max=1.0) assert_array_equal(ir.fit_transform(x[perm], y[perm]), ir.fit_transform(x, y)[perm]) assert_array_equal(ir.transform(x[perm]), ir.transform(x)[perm])
class LLRIsotonicRegression(LLR): """Log-likelihood ratio estimation by isotonic regression""" def __init__(self, equal_priors=False): super(LLRIsotonicRegression, self).__init__() self.equal_priors = equal_priors def fit(self, X, Y): self.prior = self._get_prior(X, Y) scores, ratios = self._get_scores_ratios(X, Y) y_min = np.min(ratios) y_max = np.max(ratios) self.ir = IsotonicRegression(y_min=y_min, y_max=y_max) self.ir.fit(scores, ratios) return self def toLogLikelihoodRatio(self, scores): """Get log-likelihood ratio given scores Parameters ---------- scores : numpy array Test scores Returns ------- llr : numpy array Log-likelihood ratio array with same shape as input `scores` """ x_min = np.min(self.ir.X_) x_max = np.max(self.ir.X_) oob_min = np.where(scores < x_min) oob_max = np.where(scores > x_max) ok = np.where((scores >= x_min) * (scores <= x_max)) calibrated = np.zeros(scores.shape) calibrated[ok] = self.ir.transform(scores[ok]) calibrated[oob_min] = self.ir.y_min calibrated[oob_max] = self.ir.y_max return calibrated
def test_isotonic_dtype(): y = [2, 1, 4, 3, 5] weights = np.array([.9, .9, .9, .9, .9], dtype=np.float64) reg = IsotonicRegression() for dtype in (np.int32, np.int64, np.float32, np.float64): for sample_weight in (None, weights.astype(np.float32), weights): y_np = np.array(y, dtype=dtype) expected_dtype = \ check_array(y_np, dtype=[np.float64, np.float32], ensure_2d=False).dtype res = isotonic_regression(y_np, sample_weight=sample_weight) assert_equal(res.dtype, expected_dtype) X = np.arange(len(y)).astype(dtype) reg.fit(X, y_np, sample_weight=sample_weight) res = reg.predict(X) assert_equal(res.dtype, expected_dtype)
def calibrate_col(col): # isotonic not the best here, and faces numerical issues calibrator = IsotonicRegression(y_min=0, y_max=1) x = lab[~np.isnan(lab[col])][col].values y = lab[~np.isnan(lab[col])]['labels'].values # This worked with old sklearn try: # Old sklearn calibrator.fit(x.reshape(-1, 1), y) lab[col] = calibrator.predict(lab[col].values.reshape(-1, 1)) amb[col] = calibrator.predict(amb[col].values.reshape(-1, 1)) unl[col] = calibrator.predict(unl[col].values.reshape(-1, 1)) scr[col] = calibrator.predict(scr[col].values.reshape(-1, 1)) except ValueError: # Newer sklearn calibrator.fit(x.ravel(), y) lab[col] = calibrator.predict(lab[col].values.ravel()) amb[col] = calibrator.predict(amb[col].values.ravel()) unl[col] = calibrator.predict(unl[col].values.ravel()) scr[col] = calibrator.predict(scr[col].values.ravel())
def test_isotonic_zero_weight_loop(): # Test from @ogrisel's issue: # https://github.com/scikit-learn/scikit-learn/issues/4297 # Get deterministic RNG with seed rng = np.random.RandomState(42) # Create regression and samples regression = IsotonicRegression() n_samples = 50 x = np.linspace(-3, 3, n_samples) y = x + rng.uniform(size=n_samples) # Get some random weights and zero out w = rng.uniform(size=n_samples) w[5:8] = 0 regression.fit(x, y, sample_weight=w) # This will hang in failure case. regression.fit(x, y, sample_weight=w)
class IDXHack(object): """ Usage ===== >>> from mediaeval_util.repere import IDXHack >>> frame2time = IDXHack(args['--idx']) >>> trueTime = frame2time(opencvFrame, opencvTime) """ def __init__(self, idx=None): super(IDXHack, self).__init__() self.idx = idx if self.idx: # load .idx file using pandas df = read_table( self.idx, sep='\s+', names=['frame_number', 'frame_type', 'bytes', 'seconds'] ) x = np.array(df['frame_number'], dtype=np.float) y = np.array(df['seconds'], dtype=np.float) # train isotonic regression self.ir = IsotonicRegression(y_min=np.min(y), y_max=np.max(y)) self.ir.fit(x, y) # frame number support self.xmin = np.min(x) self.xmax = np.max(x) def __call__(self, opencvFrame, opencvTime): if self.idx is None: return opencvTime return self.ir.transform([min(self.xmax, max(self.xmin, opencvFrame) )])[0]
def test_permutation_invariance(): # check that fit is permutation invariant. # regression test of missing sorting of sample-weights ir = IsotonicRegression() x = [1, 2, 3, 4, 5, 6, 7] y = [1, 41, 51, 1, 2, 5, 24] sample_weight = [1, 2, 3, 4, 5, 6, 7] x_s, y_s, sample_weight_s = shuffle(x, y, sample_weight, random_state=0) y_transformed = ir.fit_transform(x, y, sample_weight=sample_weight) y_transformed_s = ir.fit(x_s, y_s, sample_weight=sample_weight_s).transform(x) assert_array_equal(y_transformed, y_transformed_s)
def bootstrap_calibrate_prob(labels, weights, probs, n_calibrations=30, threshold=0., symmetrize=False): """ Bootstrap isotonic calibration (borrowed from tata-antares/tagging_LHCb): * randomly divide data into train-test * on train isotonic is fitted and applyed to test * on test using calibrated probs p(B+) D2 and auc are calculated :param probs: probabilities, numpy.array of shape [n_samples] :param labels: numpy.array of shape [n_samples] with labels :param weights: numpy.array of shape [n_samples] :param threshold: float, to set labels 0/1j :param symmetrize: bool, do symmetric calibration, ex. for B+, B- :return: D2 array and auc array """ aucs = [] D2_array = [] labels = (labels > threshold) * 1 for _ in range(n_calibrations): (train_probs, test_probs, train_labels, test_labels, train_weights, test_weights) = train_test_split( probs, labels, weights, train_size=0.5) iso_reg = IsotonicRegression(y_min=0, y_max=1, out_of_bounds='clip') if symmetrize: iso_reg.fit(np.r_[train_probs, 1-train_probs], np.r_[train_labels > 0, train_labels <= 0], np.r_[train_weights, train_weights]) else: iso_reg.fit(train_probs, train_labels, train_weights) probs_calib = iso_reg.transform(test_probs) alpha = (1 - 2 * probs_calib) ** 2 aucs.append(roc_auc_score(test_labels, test_probs, sample_weight=test_weights)) D2_array.append(np.average(alpha, weights=test_weights)) return np.array(D2_array), np.array(aucs)
def test_isotonic_regression_with_ties_in_differently_sized_groups(): """ Non-regression test to handle issue 9432: https://github.com/scikit-learn/scikit-learn/issues/9432 Compare against output in R: > library("isotone") > x <- c(0, 1, 1, 2, 3, 4) > y <- c(0, 0, 1, 0, 0, 1) > res1 <- gpava(x, y, ties="secondary") > res1$x `isotone` version: 1.1-0, 2015-07-24 R version: R version 3.3.2 (2016-10-31) """ x = np.array([0, 1, 1, 2, 3, 4]) y = np.array([0, 0, 1, 0, 0, 1]) y_true = np.array([0., 0.25, 0.25, 0.25, 0.25, 1.]) ir = IsotonicRegression() ir.fit(x, y) assert_array_almost_equal(ir.transform(x), y_true) assert_array_almost_equal(ir.fit_transform(x, y), y_true)
class IsotonicCalibrator(BaseEstimator, RegressorMixin): """Probability calibration with isotonic regression. Note ---- This class backports and extends `sklearn.isotonic.IsotonicRegression`. """ def __init__(self, y_min=None, y_max=None, increasing=True, interpolation=False): """Constructor. Parameters ---------- * `y_min` [optional]: If not `None`, set the lowest value of the fit to `y_min`. * `y_max` [optional]: If not `None`, set the highest value of the fit to `y_max`. * `increasing` [boolean or string, default=`True`]: If boolean, whether or not to fit the isotonic regression with `y` increasing or decreasing. The string value `"auto"` determines whether `y` should increase or decrease based on the Spearman correlation estimate's sign. * `interpolation` [boolean, default=`False`]: Whether linear interpolation is enabled or not. """ self.y_min = y_min self.y_max = y_max self.increasing = increasing self.interpolation = interpolation def fit(self, T, y, sample_weight=None): """Fit using `T`, `y` as training data. Parameters ---------- * `T` [array-like, shape=(n_samples,)]: Training data. * `y` [array-like, shape=(n_samples,)]: Training target. * `sample_weight` [array-like, shape=(n_samples,), optional]: Weights. If set to None, all weights will be set to 1. Returns ------- * `self` [object]: `self`. Notes ----- `T` is stored for future use, as `predict` needs T to interpolate new input data. """ # Check input T = column_or_1d(T) # Fit isotonic regression self.ir_ = IsotonicRegression(y_min=self.y_min, y_max=self.y_max, increasing=self.increasing, out_of_bounds="clip") self.ir_.fit(T, y, sample_weight=sample_weight) # Interpolators if self.interpolation: p = self.ir_.transform(T) change_mask1 = (p - np.roll(p, 1)) > 0 change_mask2 = np.roll(change_mask1, -1) change_mask1[0] = True change_mask1[-1] = True change_mask2[0] = True change_mask2[-1] = True self.interp1_ = interp1d(T[change_mask1], p[change_mask1], bounds_error=False, fill_value=(0., 1.)) self.interp2_ = interp1d(T[change_mask2], p[change_mask2], bounds_error=False, fill_value=(0., 1.)) return self def predict(self, T): """Calibrate data. Parameters ---------- * `T` [array-like, shape=(n_samples,)]: Data to calibrate. Returns ------- * `Tt` [array, shape=(n_samples,)]: Calibrated data. """ if self.interpolation: T = column_or_1d(T) return 0.5 * (self.interp1_(T) + self.interp2_(T)) else: return self.ir_.transform(T)
def main(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000, dataset='mnist.pkl.gz', batch_size=20, n_hidden=[500, 500]): """ Demonstrate stochastic gradient descent optimization for a multilayer perceptron This is demonstrated on MNIST. :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient :type L1_reg: float :param L1_reg: L1-norm's weight when added to the cost (see regularization) :type L2_reg: float :param L2_reg: L2-norm's weight when added to the cost (see regularization) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type dataset: string :param dataset: the path of the MNIST dataset file from http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz """ if add_noise==True: datasets = load_data(dataset, nb_classes=nb_classes, binarize=binarize, noise_prop=noise_proportion) else: datasets = load_data(dataset, nb_classes=nb_classes, binarize=binarize) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] print('Showing data samples') if nb_classes == 2: labels = ['odd', 'even'] else: labels = [0,1,2,3,4,5,6,7,8,9] imshow_samples(train_set_x.get_value(), train_set_y, valid_set_x.get_value(), valid_set_y, num_samples=4, labels=labels) plt.pause(0.0001) diary.save_figure(plt, filename='samples', extension='svg') # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels rng = numpy.random.RandomState(1234) # construct the MLP class classifier = MLP( rng=rng, input=x, n_in=28 * 28, n_hidden=n_hidden, n_out=nb_classes ) # start-snippet-4 # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically cost = ( classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr ) # end-snippet-4 # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] } ) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] } ) training_error_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] } ) # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch test_loss_model = theano.function( inputs=[index], outputs=classifier.loss(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] } ) validation_loss_model = theano.function( inputs=[index], outputs=classifier.loss(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] } ) training_loss_model = theano.function( inputs=[index], outputs=classifier.loss(y), givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] } ) # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch test_accuracy_model = theano.function( inputs=[index], outputs=classifier.accuracy(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] } ) validation_accuracy_model = theano.function( inputs=[index], outputs=classifier.accuracy(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] } ) training_accuracy_model = theano.function( inputs=[index], outputs=classifier.accuracy(y), givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] } ) # compiling a Theano function that computes the predictions on the # training data training_predictions_model = theano.function( inputs=[index], outputs=classifier.predictions(), givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], } ) validation_predictions_model = theano.function( inputs=[index], outputs=classifier.predictions(), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], } ) # compiling a Theano function that computes the predictions on the # training data training_scores_model = theano.function( inputs=[index], outputs=classifier.scores(), givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], } ) validation_scores_model = theano.function( inputs=[index], outputs=classifier.scores(), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], } ) # start-snippet-5 # compute the gradient of cost with respect to theta (sotred in params) # the resulting gradients will be stored in a list gparams gparams = [T.grad(cost, param) for param in classifier.params] # specify how to update the parameters of the model as a list of # (variable, update expression) pairs # given two lists of the same length, A = [a1, a2, a3, a4] and # B = [b1, b2, b3, b4], zip generates a list C of same size, where each # element is a pair formed from the two lists : # C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)] updates = [ (param, param - learning_rate * gparam) for param, gparam in zip(classifier.params, gparams) ] # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) # end-snippet-5 ############### # TRAIN MODEL # ############### print('... training') # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience // 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = timeit.default_timer() error_tra = np.zeros(n_epochs+1) error_val = np.zeros(n_epochs+1) accuracy_tra = np.zeros(n_epochs+1) accuracy_val = np.zeros(n_epochs+1) epoch = 0 # Error in accuracy Y_train = train_set_y.eval() Y_valid = valid_set_y.eval() print('Model predict training scores') score_train = np.asarray([training_scores_model(i) for i in range(n_train_batches)]).reshape(-1,nb_classes)[:,1] if output_activation == 'isotonic_regression': # 4. Calibrate the network with isotonic regression in the full training ir = IsotonicRegression(increasing=True, out_of_bounds='clip', y_min=_EPSILON, y_max=(1-_EPSILON)) # b. Calibrate the scores print('Learning Isotonic Regression from TRAINING set') ir.fit(score_train, Y_train) # 5. Evaluate the performance with probabilities # b. Evaluation on validation set print('Model predict validation scores') score_val = np.asarray([validation_scores_model(i) for i in range(n_valid_batches)]).reshape(-1,nb_classes)[:,1] if output_activation == 'isotonic_regression': prob_train = ir.predict(score_train) print('IR predict validation probabilities') prob_val = ir.predict(score_val) else: prob_train = score_train prob_val = score_val error_tra[epoch] = compute_loss(prob_train, Y_train, loss) error_val[epoch] = compute_loss(prob_val, Y_valid, loss) accuracy_tra[epoch] = compute_accuracy(prob_train, Y_train) accuracy_val[epoch] = compute_accuracy(prob_val, Y_valid) diary.add_entry('training', [error_tra[epoch], accuracy_tra[epoch]]) diary.add_entry('validation', [error_val[epoch], accuracy_val[epoch]]) done_looping = False while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in range(n_train_batches): minibatch_avg_cost = train_model(minibatch_index) # iteration number iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in range(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) print( 'epoch %i, minibatch %i/%i, validation error %f %%' % ( epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100. ) ) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if ( this_validation_loss < best_validation_loss * improvement_threshold ): patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = [test_model(i) for i in range(n_test_batches)] test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break # Error in accuracy #training_loss = [training_loss_model(i) for i # in range(n_train_batches)] #validation_loss = [validation_loss_model(i) for i # in range(n_valid_batches)] #error_tra[epoch] = numpy.mean(training_loss) #error_val[epoch] = numpy.mean(validation_loss) #training_acc = [training_accuracy_model(i) for i # in range(n_train_batches)] #validation_acc = [validation_accuracy_model(i) for i # in range(n_valid_batches)] #accuracy_tra[epoch] = numpy.mean(training_acc) #accuracy_val[epoch] = numpy.mean(validation_acc) print('Model predict training scores') score_train = np.asarray([training_scores_model(i) for i in range(n_train_batches)]).reshape(-1,nb_classes)[:,1] if output_activation == 'isotonic_regression': # 4. Calibrate the network with isotonic regression in the full training # b. Calibrate the scores print('Learning Isotonic Regression from TRAINING set') ir.fit(score_train, Y_train) # 5. Evaluate the performance with probabilities # b. Evaluation on validation set print('Model predict validation scores') score_val = np.asarray([validation_scores_model(i) for i in range(n_valid_batches)]).reshape(-1,nb_classes)[:,1] if output_activation == 'isotonic_regression': prob_train = ir.predict(score_train) print('IR predict validation probabilities') prob_val = ir.predict(score_val) else: prob_train = score_train prob_val = score_val error_tra[epoch] = compute_loss(prob_train, Y_train, loss) error_val[epoch] = compute_loss(prob_val, Y_valid, loss) accuracy_tra[epoch] = compute_accuracy(prob_train, Y_train) accuracy_val[epoch] = compute_accuracy(prob_val, Y_valid) diary.add_entry('training', [error_tra[epoch], accuracy_tra[epoch]]) diary.add_entry('validation', [error_val[epoch], accuracy_val[epoch]]) plot_error(error_tra, error_val, epoch, 'loss') diary.save_figure(plt, filename='error', extension='svg') plot_accuracy(accuracy_tra, accuracy_val, epoch) diary.save_figure(plt, filename='accuracy', extension='svg') if nb_classes == 2: #prob_train = np.asarray([training_scores_model(i) for i # in range(n_train_batches)]).reshape(-1,nb_classes) #prob_val = np.asarray([validation_scores_model(i) for i # in range(n_valid_batches)]).reshape(-1,nb_classes) if output_activation == 'isotonic_regression': prob_lin = ir.predict(score_lin) plot_reliability_diagram(prob_train, Y_train, prob_val, Y_valid, epoch, prob_lin, score_lin) else: plot_reliability_diagram(prob_train, Y_train, prob_val, Y_valid, epoch) diary.save_figure(plt, filename='reliability_diagram', extension='svg') plot_histogram_scores(prob_train, prob_val, epoch=epoch) diary.save_figure(plt, filename='histogram_scores', extension='svg') #from IPython import embed #embed() plt.pause(0.0001) end_time = timeit.default_timer() print(('Optimization complete. Best validation score of %f %% ' 'obtained at iteration %i, with test performance %f %%') % (best_validation_loss * 100., best_iter + 1, test_score * 100.))
from sklearn.pipeline import Pipeline """ Predictions """ from keras.callbacks import History hist = History() model = Sequential() model.add(Dense(21, input_dim=16, init='uniform', activation='relu')) model.add(Dense(80, init='uniform', activation='relu')) model.add(Dense(80, init='uniform', activation='relu')) model.add(Dense(1, init='uniform', activation='sigmoid')) # Compile model model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) clf = GradientBoostingClassifier(n_estimators=100, verbose=2, learning_rate=0.05, max_depth=3, min_samples_leaf=1, random_state=1) clf = RandomForestClassifier(n_estimators=100, verbose=2) bdt = AdaBoostClassifier(base_estimator=clf, n_estimators=100) bdt.fit(x2, training_target) proba = bdt.predict_proba(y2) ir = IsotonicRegression() proba1 = ir.fit()
class InterpolatedIsotonicRegression(BaseEstimator, TransformerMixin, RegressorMixin): """Interpolated Isotonic Regression model. apply linear interpolation to transform piecewise constant isotonic regression model into piecewise linear model """ def __init__(self, y_min=None, y_max=None, increasing=True, out_of_bounds='nan'): self.y_min = y_min self.y_max = y_max self.increasing = increasing self.out_of_bounds = out_of_bounds def fit(self, X, y, sample_weight=None): """Fit the model using X, y as training data. Parameters ---------- X : array-like, shape=(n_samples,) Training data. y : array-like, shape=(n_samples,) Training target. sample_weight : array-like, shape=(n_samples,), optional, default: None Weights. If set to None, all weights will be set to 1 (equal weights). Returns ------- self : object Returns an instance of self. Notes ----- X is stored for future use, as `transform` needs X to interpolate new input data. """ self.iso_ = IsotonicRegression(y_min=self.y_min, y_max=self.y_max, increasing=self.increasing, out_of_bounds=self.out_of_bounds) self.iso_.fit(X, y, sample_weight=sample_weight) p = self.iso_.transform(X) change_mask1 = (p - np.roll(p, 1)) > 0 change_mask2 = np.roll(change_mask1, -1) change_mask1[0] = True change_mask1[-1] = True change_mask2[0] = True change_mask2[-1] = True self.iso_interp1_ = interp1d(X[change_mask1], p[change_mask1], bounds_error=False, fill_value=(0., 1.)) self.iso_interp2_ = interp1d(X[change_mask2], p[change_mask2], bounds_error=False, fill_value=(0., 1.)) return self def transform(self, T): """Transform new data by linear interpolation Parameters ---------- T : array-like, shape=(n_samples,) Data to transform. Returns ------- T_ : array, shape=(n_samples,) The transformed data """ return 0.5 * (self.iso_interp1_(T) + self.iso_interp2_(T)) def predict(self, T): """Predict new data by linear interpolation. Parameters ---------- T : array-like, shape=(n_samples,) Data to transform. Returns ------- T_ : array, shape=(n_samples,) Transformed data. """ return self.transform(T)
#error from sklearn.cross_validation import train_test_split from sklearn.neighbors import KNeighborsClassifier from sklearn import metrics from sklearn.svm import SVC import numpy as np import pandas as pd from sklearn.isotonic import IsotonicRegression df=pd.read_csv('newtest.csv') df1=pd.read_csv('newtest1.csv') x=df.drop(['tag'],axis=1) y=df.drop(['kx','ky','kz','wa','wb','wc','wd','we','wf'],axis=1) X=df1.drop(['tag'],axis=1) Y=df1.drop(['kx','ky','kz','wa','wb','wc','wd','we','wf'],axis=1) X_train , X_test , Y_train , Y_test = train_test_split(x,y , random_state=5) ir=IsotonicRegression() ir.fit(X_train,Y_train) print ir.score(X_test,Y_test)