Python IsotonicRegression.predict 예제들, sklearn.isotonic.IsotonicRegression.predict Python 예제들

예제 #1

0

파일 보기

파일: sppp.py 프로젝트: dgarhdez/power-price-predictor

def isotonicPredict(trainData, evalData):
    """
    Based on http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LinearRegression.html
    
    Fits linear regression (PMD ~ HT) between iniDate and endDate and then predicts values for fcDate.
    Requires training and evaluation datasets to be input
    
    Train-test split with a test size of 25 %
    
    :trainData: pandas dataframe
    :evalData: pandas dataframe
    """
    algo = 'isotonic'

    #try:
    X_train, X_test, y_train, y_test = train_test_split(
        trainData['HT'].values.reshape(-1, 1),
        trainData['600'].values.reshape(-1, 1),
        test_size=.25,
        random_state=42)

    # regression
    iso = IsotonicRegression(out_of_bounds='clip')
    iso.fit(X_train.flatten(), y_train.flatten())
    testPred = iso.predict(X_test.flatten())
    r2_test = r2_score(y_test, testPred)
    isoPred = iso.predict(evalData['HT'].values.reshape(-1, 1).flatten())

    # results
    dicResults = {'r2': r2_test, 'pred': isoPred.flatten(), 'name': algo}

    return dicResults

예제 #2

0

파일 보기

def test_fast_predict():
    # test that the faster prediction change doesn't
    # affect out-of-sample predictions:
    # https://github.com/scikit-learn/scikit-learn/pull/6206
    rng = np.random.RandomState(123)
    n_samples = 10**3
    # X values over the -10,10 range
    X_train = 20.0 * rng.rand(n_samples) - 10
    y_train = np.less(rng.rand(n_samples),
                      expit(X_train)).astype('int64').astype('float64')

    weights = rng.rand(n_samples)
    # we also want to test that everything still works when some weights are 0
    weights[rng.rand(n_samples) < 0.1] = 0

    slow_model = IsotonicRegression(y_min=0, y_max=1, out_of_bounds="clip")
    fast_model = IsotonicRegression(y_min=0, y_max=1, out_of_bounds="clip")

    # Build interpolation function with ALL input data, not just the
    # non-redundant subset. The following 2 lines are taken from the
    # .fit() method, without removing unnecessary points
    X_train_fit, y_train_fit = slow_model._build_y(X_train,
                                                   y_train,
                                                   sample_weight=weights,
                                                   trim_duplicates=False)
    slow_model._build_f(X_train_fit, y_train_fit)

    # fit with just the necessary data
    fast_model.fit(X_train, y_train, sample_weight=weights)

    X_test = 20.0 * rng.rand(n_samples) - 10
    y_pred_slow = slow_model.predict(X_test)
    y_pred_fast = fast_model.predict(X_test)

    assert_array_equal(y_pred_slow, y_pred_fast)

예제 #3

0

파일 보기

파일: test_isotonic.py 프로젝트: allefpablo/scikit-learn

def test_fast_predict():
    # test that the faster prediction change doesn't
    # affect out-of-sample predictions:
    # https://github.com/scikit-learn/scikit-learn/pull/6206
    rng = np.random.RandomState(123)
    n_samples = 10 ** 3
    # X values over the -10,10 range
    X_train = 20.0 * rng.rand(n_samples) - 10
    y_train = np.less(rng.rand(n_samples),
                      expit(X_train)).astype('int64').astype('float64')

    weights = rng.rand(n_samples)
    # we also want to test that everything still works when some weights are 0
    weights[rng.rand(n_samples) < 0.1] = 0

    slow_model = IsotonicRegression(y_min=0, y_max=1, out_of_bounds="clip")
    fast_model = IsotonicRegression(y_min=0, y_max=1, out_of_bounds="clip")

    # Build interpolation function with ALL input data, not just the
    # non-redundant subset. The following 2 lines are taken from the
    # .fit() method, without removing unnecessary points
    X_train_fit, y_train_fit = slow_model._build_y(X_train, y_train,
                                                   sample_weight=weights,
                                                   trim_duplicates=False)
    slow_model._build_f(X_train_fit, y_train_fit)

    # fit with just the necessary data
    fast_model.fit(X_train, y_train, sample_weight=weights)

    X_test = 20.0 * rng.rand(n_samples) - 10
    y_pred_slow = slow_model.predict(X_test)
    y_pred_fast = fast_model.predict(X_test)

    assert_array_equal(y_pred_slow, y_pred_fast)

예제 #4

0

파일 보기

파일: isotonic_ens.py 프로젝트: backonhighway/kaggle_elo

    def do_preds(train, test, files):
        train = train.sort_values(by="bin", ascending=False)
        test = test.sort_values(by="bin", ascending=False)
        train["bin"] = train["bin"] * -33.219281
        test["bin"] = test["bin"] * -33.219281
        print(train.head())
        print(train.tail())

        print("------- do preds --------")
        ensemble_col = [f[1] for f in files]
        train_x = train[ensemble_col].values.reshape(-1)
        reg = IsotonicRegression()
        reg.fit(train_x, train["target"])
        y_pred = reg.predict(train_x)
        score = evaluator.rmse(train["target"], y_pred)
        print(score)

        test_x = test[ensemble_col].values.reshape(-1)
        y_pred = reg.predict(test_x)
        sub = pd.DataFrame()
        sub["card_id"] = test["card_id"]
        sub["target"] = y_pred
        print(train["target"].describe())
        # print(train["big"].describe())
        print(sub["target"].describe())
        sub.to_csv(path_const.OUTPUT_ENS, index=False)

예제 #5

0

파일 보기

 def calibrate_row(row):
     calibrator = IsotonicRegression(y_min=0, y_max=1)
     x = lab[~np.isnan(lab[row])][row].values
     y = lab[~np.isnan(lab[row])]['labels'].values
     calibrator.fit(x, y)
     lab[row] = calibrator.predict(lab[row].values)
     amb[row] = calibrator.predict(amb[row].values)
     unl[row] = calibrator.predict(unl[row].values)
     scr[row] = calibrator.predict(scr[row].values)

예제 #6

0

파일 보기

파일: isotonic_ens.py 프로젝트: backonhighway/kaggle_elo

    def do_cv_pred(train, test, files):
        print("------- do preds --------")
        ensemble_col = [f[1] for f in files]
        train_x = train[ensemble_col]
        test_x = test[ensemble_col].values.reshape(-1)
        train_y = train["target"]

        submission = pd.DataFrame()
        submission["card_id"] = test["card_id"]
        submission["target"] = 0

        outliers = (train["target"] < -30).astype(int).values
        split_num = 5
        skf = model_selection.StratifiedKFold(n_splits=split_num,
                                              shuffle=True,
                                              random_state=4590)
        train_preds = []
        for idx, (train_index,
                  test_index) in enumerate(skf.split(train, outliers)):
            X_train, X_test = train_x.iloc[train_index], train_x.iloc[
                test_index]
            y_train, y_test = train_y.iloc[train_index], train_y.iloc[
                test_index]

            reg = IsotonicRegression()
            X_train = X_train.values.reshape(-1)
            X_test = X_test.values.reshape(-1)
            reg.fit(X_train, y_train)
            valid_set_pred = reg.predict(X_test)
            print(y_test.describe())
            temp = pd.DataFrame(valid_set_pred)
            print(temp.describe())
            score = evaluator.rmse(y_test, valid_set_pred)
            print(score)

            y_pred = reg.predict(test_x)
            submission["target"] = submission["target"] + y_pred
            train_id = train.iloc[test_index]
            train_cv_prediction = pd.DataFrame()
            train_cv_prediction["card_id"] = train_id["card_id"]
            train_cv_prediction["cv_pred"] = valid_set_pred
            train_preds.append(train_cv_prediction)

        train_output = pd.concat(train_preds, axis=0)

        submission["target"] = submission["target"] / split_num
        submission.to_csv(path_const.OUTPUT_SUB, index=False)

        train_output["cv_pred"] = np.clip(train_output["cv_pred"], -33.219281,
                                          18.0)
        train_output.to_csv(path_const.OUTPUT_OOF, index=False)

        df_pred = pd.merge(train[["card_id", "target"]],
                           train_output,
                           on="card_id")
        rmse_score = evaluator.rmse(df_pred["target"], df_pred["cv_pred"])
        print(rmse_score)

예제 #7

0

파일 보기

파일: test_isotonic.py 프로젝트: yoyo-cup/scikit-learn

def test_isotonic_regression_oob_raise():
    # Set y and x
    y = np.array([3, 7, 5, 9, 8, 7, 10])
    x = np.arange(len(y))

    # Create model and fit
    ir = IsotonicRegression(increasing='auto', out_of_bounds="raise")
    ir.fit(x, y)

    # Check that an exception is thrown
    msg = 'A value in x_new is below the interpolation range'
    with pytest.raises(ValueError, match=msg):
        ir.predict([min(x) - 10, max(x) + 10])

예제 #8

0

파일 보기

파일: test_isotonic.py 프로젝트: AlexisMignon/scikit-learn

def test_isotonic_regression_oob_clip():
    # Set y and x
    y = np.array([3, 7, 5, 9, 8, 7, 10])
    x = np.arange(len(y))

    # Create model and fit
    ir = IsotonicRegression(increasing='auto', out_of_bounds="clip")
    ir.fit(x, y)

    # Predict from  training and test x and check that min/max match.
    y1 = ir.predict([min(x) - 10, max(x) + 10])
    y2 = ir.predict(x)
    assert_equal(max(y1), max(y2))
    assert_equal(min(y1), min(y2))

예제 #9

0

파일 보기

def test_isotonic_regression_oob_clip():
    # Set y and x
    y = np.array([3, 7, 5, 9, 8, 7, 10])
    x = np.arange(len(y))

    # Create model and fit
    ir = IsotonicRegression(increasing='auto', out_of_bounds="clip")
    ir.fit(x, y)

    # Predict from  training and test x and check that min/max match.
    y1 = ir.predict([min(x) - 10, max(x) + 10])
    y2 = ir.predict(x)
    assert max(y1) == max(y2)
    assert min(y1) == min(y2)

예제 #10

0

파일 보기

class Recalibrator:
    def __init__(self, model, data, args):
        self.args = args
        self.model = model

        inputs, labels = data[0].to(args.device), data[1].to(args.device)
        with torch.no_grad():
            outputs = model(inputs)

        labels = torch.sort(labels.flatten())[0].cpu().numpy()
        outputs = torch.sort(outputs.flatten())[0].cpu().numpy()
        #         plt.scatter(outputs, labels)
        #         plt.show()

        #         plt.hist(outputs, bins=30, alpha=0.5, color='r')
        #         plt.hist(labels, bins=30, alpha=0.5, color='g')
        #         plt.show()
        # print(labels.shape, outputs.shape)
        self.iso = IsotonicRegression(out_of_bounds='clip', increasing=True)
        self.iso = self.iso.fit(outputs, labels)

    def adjust(self, original_y):
        original_shape = original_y.shape
        return torch.from_numpy(self.iso.predict(
            original_y.cpu().flatten())).view(original_shape).to(
                self.args.device)

예제 #11

0

파일 보기

def cali(fname, predict_name, out_name, mode='ctr'):
    if mode == 'ctr':
        true_col = 'actual_click'
        prob_col = 'ctr'
    if mode == 'cvr':
        true_col = 'actual_purchase'
        prob_col = 'cvr'
    pred_df = pd.read_csv(predict_name, names=columns)
    nn = pred_df.shape[0]
    df = pd.read_csv(fname, names=columns)
    n = df.shape[0]
    y_true = df[true_col].values
    y_prob = df[prob_col].values
    #fraction_of_positives, mean_predicted_value = cali.calibration_curve(y_true, y_prob, normalize=False, n_bins=10)
    #plt.figure()
    #plt.plot(mean_predicted_value,fraction_of_positives)
    #plt.show()
    #plt.close()
    ir = IsotonicRegression()
    y = ir.fit_transform(y_prob, y_true)
    y_pred = ir.predict(pred_df[prob_col].values)
    nn = y_pred.shape[0]
    h = open(out_name, 'w')
    for i in range(nn):
        if i < nn - 1:
            h.write(str(y_pred[i]) + '\n')
        else:
            h.write(str(y_pred[i]))
    h.close()

예제 #12

0

파일 보기

파일: error_model.py 프로젝트: hawaiidatascience/metaflowmics

def regression_model(freqs, deg=2, same=False, method='isotonic'):
    '''
    - qual: all measured quality scores when a transition was observed
    - proportion of transitions for a given quality
    '''

    observed_transitions = (~np.isnan(freqs)) & (freqs>0)

    x = np.arange(1, 41)[observed_transitions]
    y = -np.log10(freqs[observed_transitions])

    if len(x) == 0:
        return np.zeros(40)

    if method == 'polynomial':
        z = np.polyfit(x, y, 3)
        polynom = np.poly1d(z)
        y_interp = 10**-polynom(np.arange(1, 41))
    elif method == 'isotonic':
        ir = IsotonicRegression(y_min=0, out_of_bounds='clip', increasing=not same)
        ir.fit(x, y)
        y_interp = 10**-ir.predict(np.arange(1, 41))
    else:
        print('Unknown method: {}. Aborting.'.format(method))
        exit(1)
    return y_interp

예제 #13

0

파일 보기

def test_isotonic_regression():
    y = np.array([3, 7, 5, 9, 8, 7, 10])
    y_ = np.array([3, 6, 6, 8, 8, 8, 10])
    assert_array_equal(y_, isotonic_regression(y))

    y = np.array([10, 0, 2])
    y_ = np.array([4, 4, 4])
    assert_array_equal(y_, isotonic_regression(y))

    x = np.arange(len(y))
    ir = IsotonicRegression(y_min=0., y_max=1.)
    ir.fit(x, y)
    assert_array_equal(ir.fit(x, y).transform(x), ir.fit_transform(x, y))
    assert_array_equal(ir.transform(x), ir.predict(x))

    # check that it is immune to permutation
    perm = np.random.permutation(len(y))
    ir = IsotonicRegression(y_min=0., y_max=1.)
    assert_array_equal(ir.fit_transform(x[perm], y[perm]),
                       ir.fit_transform(x, y)[perm])
    assert_array_equal(ir.transform(x[perm]), ir.transform(x)[perm])

    # check we don't crash when all x are equal:
    ir = IsotonicRegression()
    assert_array_equal(ir.fit_transform(np.ones(len(x)), y), np.mean(y))

예제 #14

0

파일 보기

파일: test_isotonic.py 프로젝트: yoyo-cup/scikit-learn

def test_isotonic_2darray_more_than_1_feature():
    # Ensure IsotonicRegression raises error if input has more than 1 feature
    X = np.arange(10)
    X_2d = np.c_[X, X]
    y = np.arange(10)

    msg = "should be a 1d array or 2d array with 1 feature"
    with pytest.raises(ValueError, match=msg):
        IsotonicRegression().fit(X_2d, y)

    iso_reg = IsotonicRegression().fit(X, y)
    with pytest.raises(ValueError, match=msg):
        iso_reg.predict(X_2d)

    with pytest.raises(ValueError, match=msg):
        iso_reg.transform(X_2d)

예제 #15

0

파일 보기

파일: calibrate.py 프로젝트: mkranzlein/calibration

def isotonic_regression(scores_dev: torch.Tensor, scores_test: torch.Tensor,
                        labels_dev: torch.Tensor, labels_test: torch.Tensor):
    """Calibrates confidence scores using scikit-learn implementation of isotonic regression.

        Isotonic regression does not require bins for recalibration.

    Args:
        scores_dev: [n, t] Tensor of confidence scores (e.g. softmaxed logits) for dev set.
        scores_test: [n, t] Tensor of confidence scores (e.g. softmaxed logits) for test set.
        labels_dev: [n, t] One-hot tensor of labels for dev set.
        labels_test: [n, t] One-hot tensor of labels for test set.
    """

    logger.info("Starting isotonic regression...")

    # Scores need to be moved to CPU for sklearn model
    flattened_scores_dev = scores_dev.reshape(-1).cpu()
    flattened_labels_dev = labels_dev.reshape(-1).cpu()
    flattened_scores_test = (scores_test.reshape(-1)).cpu()

    model = IsotonicRegression(y_min=0, y_max=1)
    model.fit(X=flattened_scores_dev, y=flattened_labels_dev)
    predictions = torch.Tensor(model.predict(flattened_scores_test)).cuda()

    calibrated_scores_test = predictions

    return calibrated_scores_test

예제 #16

0

파일 보기

파일: eval.py 프로젝트: bofei5675/KGs_inferences

def eval(scores_dir):
    y_preds = []
    y_trues = []

    for fn in os.listdir(scores_dir):
        print(fn)
        y_pred, y_true = np.loadtxt(os.path.join(scores_dir, fn),
                                    delimiter=',',
                                    usecols=(3, 4),
                                    unpack=True)  ##, max_rows=10
        y_preds.extend(y_pred)
        y_trues.extend(y_true)
    y_preds = np.array(y_preds)
    y_trues = np.array(y_trues)
    iso_reg = IsotonicRegression().fit(y_preds, y_trues)
    y_probs = iso_reg.predict(y_preds)
    auc_roc = roc_auc_score(y_trues, y_probs)
    auc_pr = average_precision_score(y_trues, y_probs)

    y_predicts = np.where(y_probs > 0.5, 1.0, 0.0)
    accuracy = accuracy_score(y_trues, y_predicts)
    f1 = f1_score(y_trues, y_predicts)
    print(
        f'total length is {len(y_trues)} \n auc score for roc is {auc_roc} and the auc for pr is {auc_pr}, f1 score is {f1}, accuracy is {accuracy}'
    )
    res = np.vstack((y_preds, y_trues, y_probs)).T
    np.savetxt('/home/yh1844/inference-2019/eval/eval.txt', res)

예제 #17

0

파일 보기

파일: calibration.py 프로젝트: isotlaboratory/ClassifierCalibration-Code

class IsotonicRegressionCalibrator:
    def __init__(self):
        self.Calibrator = IsotonicRegression(out_of_bounds='clip')

    def train(self, h0, h1):
        #fits the IR model, self.Calibrator, to scores h0 and h1. Assumes labels of h0 is 0 (normal) and labels of h1 is 1 (anamolous)
        #expects single dimensional arrays of anyform (list, numpy, mx1, 1xm, etc.)

        n0 = np.size(h0)
        n1 = np.size(h1)
        H = np.append(np.reshape(h0, n0), np.reshape(h1, n1))

        y = np.append(np.zeros(n0), np.ones(n1))  #labels

        self.Calibrator.fit(H, y)

    def test(self, H):
        #Reutrns the predicted posterioir probabilities of the list of scores H using the fitted IR model in self.Calibrator.
        #expects single dimensional array of anyform (list, numpy, mx1, 1xm, etc.)
        #   Keyword arguments:
        #       H - classifier scores

        return self.Calibrator.predict(H)

    def toString(self):
        return "Isotonic"

예제 #18

0

파일 보기

파일: test_isotonic.py 프로젝트: AlexisMignon/scikit-learn

def test_isotonic_regression():
    y = np.array([3, 7, 5, 9, 8, 7, 10])
    y_ = np.array([3, 6, 6, 8, 8, 8, 10])
    assert_array_equal(y_, isotonic_regression(y))

    y = np.array([10, 0, 2])
    y_ = np.array([4, 4, 4])
    assert_array_equal(y_, isotonic_regression(y))

    x = np.arange(len(y))
    ir = IsotonicRegression(y_min=0., y_max=1.)
    ir.fit(x, y)
    assert_array_equal(ir.fit(x, y).transform(x), ir.fit_transform(x, y))
    assert_array_equal(ir.transform(x), ir.predict(x))

    # check that it is immune to permutation
    perm = np.random.permutation(len(y))
    ir = IsotonicRegression(y_min=0., y_max=1.)
    assert_array_equal(ir.fit_transform(x[perm], y[perm]),
                       ir.fit_transform(x, y)[perm])
    assert_array_equal(ir.transform(x[perm]), ir.transform(x)[perm])

    # check we don't crash when all x are equal:
    ir = IsotonicRegression()
    assert_array_equal(ir.fit_transform(np.ones(len(x)), y), np.mean(y))

예제 #19

0

파일 보기

파일: test_calibration.py 프로젝트: siawayforward/the-library-is-open

def test_calibration_ensemble_false(data, method):
    # Test that `ensemble=False` is the same as using predictions from
    # `cross_val_predict` to train calibrator.
    X, y = data
    clf = LinearSVC(random_state=7)

    cal_clf = CalibratedClassifierCV(clf, method=method, cv=3, ensemble=False)
    cal_clf.fit(X, y)
    cal_probas = cal_clf.predict_proba(X)

    # Get probas manually
    unbiased_preds = cross_val_predict(clf,
                                       X,
                                       y,
                                       cv=3,
                                       method="decision_function")
    if method == "isotonic":
        calibrator = IsotonicRegression(out_of_bounds="clip")
    else:
        calibrator = _SigmoidCalibration()
    calibrator.fit(unbiased_preds, y)
    # Use `clf` fit on all data
    clf.fit(X, y)
    clf_df = clf.decision_function(X)
    manual_probas = calibrator.predict(clf_df)
    assert_allclose(cal_probas[:, 1], manual_probas)

예제 #20

0

파일 보기

 def fnIsotonicRegression(self, year, avgTemp, predictYear):
     feature_train, feature_test, target_train, target_test = train_test_split(
         year, avgTemp, test_size=0.1, random_state=42)
     isoReg = IsotonicRegression()
     isoReg.fit(feature_train, target_train)
     return (isoReg.score(feature_test,
                          target_test), isoReg.predict(predictYear))

예제 #21

0

파일 보기

def _gspv_interpolate_cloud(powers, velocities):
    from sklearn.isotonic import IsotonicRegression
    from scipy.interpolate import InterpolatedUnivariateSpline
    regressor = IsotonicRegression()
    regressor.fit(powers, velocities)
    x = np.linspace(min(powers), max(powers))
    y = regressor.predict(x)
    return InterpolatedUnivariateSpline(x, y, k=1, ext=3)

예제 #22

0

파일 보기

def test_isotonic_mismatched_dtype(y_dtype):
    # regression test for #15004
    # check that data are converted when X and y dtype differ
    reg = IsotonicRegression()
    y = np.array([2, 1, 4, 3, 5], dtype=y_dtype)
    X = np.arange(len(y), dtype=np.float32)
    reg.fit(X, y)
    assert reg.predict(X).dtype == X.dtype

예제 #23

0

파일 보기

파일: test_isotonic.py 프로젝트: yoyo-cup/scikit-learn

def test_isotonic_non_regression_inf_slope():
    # Non-regression test to ensure that inf values are not returned
    # see: https://github.com/scikit-learn/scikit-learn/issues/10903
    X = np.array([0., 4.1e-320, 4.4e-314, 1.])
    y = np.array([0.42, 0.42, 0.44, 0.44])
    ireg = IsotonicRegression().fit(X, y)
    y_pred = ireg.predict(np.array([0, 2.1e-319, 5.4e-316, 1e-10]))
    assert np.all(np.isfinite(y_pred))

예제 #24

0

파일 보기

파일: test_isotonic.py 프로젝트: AlexisMignon/scikit-learn

def test_isotonic_duplicate_min_entry():
    x = [0, 0, 1]
    y = [0, 0, 1]

    ir = IsotonicRegression(increasing=True, out_of_bounds="clip")
    ir.fit(x, y)
    all_predictions_finite = np.all(np.isfinite(ir.predict(x)))
    assert_true(all_predictions_finite)

예제 #25

0

파일 보기

def test_isotonic_duplicate_min_entry():
    x = [0, 0, 1]
    y = [0, 0, 1]

    ir = IsotonicRegression(increasing=True, out_of_bounds="clip")
    ir.fit(x, y)
    all_predictions_finite = np.all(np.isfinite(ir.predict(x)))
    assert all_predictions_finite

예제 #26

0

파일 보기

    def _configure_classifier(self, X1, test_size=0.35):
        """ The neural network below captures the residual non-linearity after the transformations above.

        :param X1: array_like, shape (n_samples, n_features)
            List of n_features-dimensional data points to be modelled.  Each row
            corresponds to a single data point.
        :param float test_size: fraction of X1 used for probability calibration. Default is 0.4.
        """
        # fitting uniform data sample vs observed data

        # set random state
        np.random.seed(self.random_state)

        # make training sample and labels
        # use test sample below for probability calibration.
        X1_train, X1_test, y1_train, y1_test = train_test_split(
            X1,
            np.ones(X1.shape[0]),
            test_size=test_size,
            random_state=self.random_state)
        X0_train = np.random.uniform(size=X1_train.shape)
        X_train = np.concatenate([X0_train, X1_train], axis=0)
        y_train = np.concatenate([np.zeros(X1_train.shape[0]), y1_train],
                                 axis=None)

        self.clf = self.clf.fit(X_train, y_train)
        # self.train_data = (X_train, y_train)
        # self.test_data = (X1_test, y1_test)

        # Calibrate probabilities manually. (Used for weights calculation.)
        X0_test = np.random.uniform(size=(1000000, X1.shape[1]))
        p0 = self.clf.predict_proba(X0_test)[:, 1]
        p1 = self.clf.predict_proba(X1_test)[:, 1]

        hist_p0, bin_edges = np.histogram(p0, bins=100, range=(0, 1))
        hist_p1, bin_edges = np.histogram(p1, bins=100, range=(0, 1))
        bin_centers = bin_edges[:-1] + 0.005

        hnorm_p0 = hist_p0 / sum(hist_p0)
        hnorm_p1 = hist_p1 / sum(hist_p1)
        hnorm_sum = hnorm_p0 + hnorm_p1
        p1cb = np.divide(hnorm_p1,
                         hnorm_sum,
                         out=np.zeros_like(hnorm_p1),
                         where=hnorm_sum != 0)
        # self.p1cb = p1cb, bin_centers

        # use isotonic regression to smooth out potential fluctuations in the p1 values
        # isotonic regression assumes that p1 can only be a rising function.
        # I’m assuming that if a classifier predicts a higher probability, the calibrated probability
        # will also be higher. This may not always be right, but I think generally it is a safe one.
        iso_reg = IsotonicRegression().fit(bin_centers, p1cb)
        p1pred = iso_reg.predict(bin_centers)
        self.p1f_ = interpolate.interp1d(bin_edges[:-1],
                                         p1pred,
                                         kind='previous',
                                         bounds_error=False,
                                         fill_value="extrapolate")

예제 #27

0

파일 보기

파일: util_calibration.py 프로젝트: zhang64-llnl/Mix-n-Match-Calibration

def mir_calibrate(logit,label,logit_eval):
    p = np.exp(logit)/np.sum(np.exp(logit),1)[:,None] 
    p_eval = np.exp(logit_eval)/np.sum(np.exp(logit_eval),1)[:,None]
    ir = IsotonicRegression(out_of_bounds='clip')
    y_ = ir.fit_transform(p.flatten(), (label.flatten()))
    yt_ = ir.predict(p_eval.flatten())
    
    p = yt_.reshape(logit_eval.shape)+1e-9*p_eval
    return p

예제 #28

0

파일 보기

파일: Regression.py 프로젝트: gsasikiran/Comparative-Evaluation-of-Pretrained-Transfer-Learning-Models-on-ASAG

    def isotonic(self):

        clf = IsotonicRegression()
        train_x = self.train_x.to_list()
        train_y = self.train_y.to_list()
        test_x = self.test_x.to_list()
        clf.fit(train_x, train_y)
        test_y_pred = clf.predict(test_x)
        return test_y_pred

예제 #29

0

파일 보기

파일: callers.py 프로젝트: XiaoTaoWang/NeoLoopFinder

    def linear_regression(self, exp1, exp2, min_samples=5):

        X = []
        Y = []
        Xi = []
        for i in sorted(exp1):
            if i in exp2:
                Xi.append(i)
                X.append(exp2[i])
                Y.append(exp1[i])
        X = np.r_[X]
        Y = np.r_[Y]
        Xi = np.r_[Xi]

        if X.size < min_samples:
            rscore = 0
            slope = 0
            warning = False
        else:
            # clean the inputs by isotonic regression
            warning, increasing_bool = check_increasing(Xi, Y)
            IR = IsotonicRegression(increasing=increasing_bool)
            IR.fit(Xi, Y)
            Y1 = IR.predict(Xi)
            vi = np.where(np.diff(Y1) < 0)[0]
            pieces = np.split(vi, np.where(np.diff(vi) != 1)[0] + 1)
            si = 0
            for i in range(len(pieces) - 1):
                p1 = pieces[i]
                p2 = pieces[i + 1]
                if p1.size / (p2[0] - p1[0]) > 0.5:
                    si = p1[0]
                    break

            if si / X.size > 0.3:  # if more than 1/4 data discarded
                si = vi[0]
                if si / X.size > 0.3:
                    si = 0

            X = X[si:]
            Y = Y[si:]

            X = X[:, np.newaxis]
            huber = HuberRegressor().fit(X, Y)
            inlier_mask = np.logical_not(huber.outliers_)
            if inlier_mask.sum() < min_samples:
                rscore = 0
                slope = 0
            else:
                sX = X[inlier_mask]
                sY = Y[inlier_mask]
                rscore = huber.score(sX, sY)
                slope = huber.coef_[0]

        return rscore, slope, warning

예제 #30

0

파일 보기

파일: results.py 프로젝트: sdvillal/ccl-malaria

 def calibrate_col(col):
     # isotonic not the best here, and faces numerical issues
     calibrator = IsotonicRegression(y_min=0, y_max=1)
     x = lab[~np.isnan(lab[col])][col].values
     y = lab[~np.isnan(lab[col])]['labels'].values
     # This worked with old sklearn
     try:
         # Old sklearn
         calibrator.fit(x.reshape(-1, 1), y)
         lab[col] = calibrator.predict(lab[col].values.reshape(-1, 1))
         amb[col] = calibrator.predict(amb[col].values.reshape(-1, 1))
         unl[col] = calibrator.predict(unl[col].values.reshape(-1, 1))
         scr[col] = calibrator.predict(scr[col].values.reshape(-1, 1))
     except ValueError:
         # Newer sklearn
         calibrator.fit(x.ravel(), y)
         lab[col] = calibrator.predict(lab[col].values.ravel())
         amb[col] = calibrator.predict(amb[col].values.ravel())
         unl[col] = calibrator.predict(unl[col].values.ravel())
         scr[col] = calibrator.predict(scr[col].values.ravel())

예제 #31

0

파일 보기

파일: test_isotonic.py 프로젝트: yoyo-cup/scikit-learn

def test_isotonic_make_unique_tolerance():
    # Check that averaging of targets for duplicate X is done correctly,
    # taking into account tolerance
    X = np.array([0, 1, 1 + 1e-16, 2], dtype=np.float64)
    y = np.array([0, 1, 2, 3], dtype=np.float64)
    ireg = IsotonicRegression().fit(X, y)
    y_pred = ireg.predict([0, 0.5, 1, 1.5, 2])

    assert_array_equal(y_pred, np.array([0, 0.75, 1.5, 2.25, 3]))
    assert_array_equal(ireg.X_thresholds_, np.array([0., 1., 2.]))
    assert_array_equal(ireg.y_thresholds_, np.array([0., 1.5, 3.]))

예제 #32

0

파일 보기

파일: util_calibration.py 프로젝트: zhang64-llnl/Mix-n-Match-Calibration

def irova_calibrate(logit,label,logit_eval):
    p = np.exp(logit)/np.sum(np.exp(logit),1)[:,None] 
    p_eval = np.exp(logit_eval)/np.sum(np.exp(logit_eval),1)[:,None]
    

    for ii in range(p_eval.shape[1]):
        ir = IsotonicRegression(out_of_bounds='clip')
        y_ = ir.fit_transform(p[:,ii], label[:,ii])
        p_eval[:,ii] = ir.predict(p_eval[:,ii])+1e-9*p_eval[:,ii]
    return p_eval
    return p_eval

예제 #33

0

파일 보기

파일: test_isotonic.py 프로젝트: yoyo-cup/scikit-learn

def test_input_shape_validation():
    # Test from #15012
    # Check that IsotonicRegression can handle 2darray with only 1 feature
    X = np.arange(10)
    X_2d = X.reshape(-1, 1)
    y = np.arange(10)

    iso_reg = IsotonicRegression().fit(X, y)
    iso_reg_2d = IsotonicRegression().fit(X_2d, y)

    assert iso_reg.X_max_ == iso_reg_2d.X_max_
    assert iso_reg.X_min_ == iso_reg_2d.X_min_
    assert iso_reg.y_max == iso_reg_2d.y_max
    assert iso_reg.y_min == iso_reg_2d.y_min
    assert_array_equal(iso_reg.X_thresholds_, iso_reg_2d.X_thresholds_)
    assert_array_equal(iso_reg.y_thresholds_, iso_reg_2d.y_thresholds_)

    y_pred1 = iso_reg.predict(X)
    y_pred2 = iso_reg_2d.predict(X_2d)
    assert_allclose(y_pred1, y_pred2)

예제 #34

0

파일 보기

def test_isotonic_regression_pickle():
    y = np.array([3, 7, 5, 9, 8, 7, 10])
    x = np.arange(len(y))

    # Create model and fit
    ir = IsotonicRegression(increasing='auto', out_of_bounds="clip")
    ir.fit(x, y)

    ir_ser = pickle.dumps(ir, pickle.HIGHEST_PROTOCOL)
    ir2 = pickle.loads(ir_ser)
    np.testing.assert_array_equal(ir.predict(x), ir2.predict(x))

예제 #35

0

파일 보기

파일: test_isotonic.py 프로젝트: AlexisMignon/scikit-learn

def test_isotonic_regression_pickle():
    y = np.array([3, 7, 5, 9, 8, 7, 10])
    x = np.arange(len(y))

    # Create model and fit
    ir = IsotonicRegression(increasing='auto', out_of_bounds="clip")
    ir.fit(x, y)

    ir_ser = pickle.dumps(ir, pickle.HIGHEST_PROTOCOL)
    ir2 = pickle.loads(ir_ser)
    np.testing.assert_array_equal(ir.predict(x), ir2.predict(x))

예제 #36

0

파일 보기

파일: thr_vs_precision.py 프로젝트: minghao2016/BIPSPI

def isotonicFit(thr, prec, maxThr=999):
    thr = np.array(thr)
    prec = np.array(prec)
    prec = prec[thr <= maxThr]
    thr = thr[thr <= maxThr]
    objFun = lambda thr, alpha, beta: alpha * thr**beta

    isoReg = IsotonicRegression(y_min=0, y_max=1)
    isoReg.fit(thr, prec)
    #  joblib.dump(isoReg, "/home/rsanchez/Tesis/rriPredMethod/pyCode/webApp/rriPredWeb/media/scoreToPrecModel/mixed.isotonic.joblib")
    return lambda x: isoReg.predict(x), "isotonic"

예제 #37

0

파일 보기

def test_isotonic_regression_oob_nan():
    # Set y and x
    y = np.array([3, 7, 5, 9, 8, 7, 10])
    x = np.arange(len(y))

    # Create model and fit
    ir = IsotonicRegression(increasing='auto', out_of_bounds="nan")
    ir.fit(x, y)

    # Predict from  training and test x and check that we have two NaNs.
    y1 = ir.predict([min(x) - 10, max(x) + 10])
    assert sum(np.isnan(y1)) == 2

예제 #38

0

파일 보기

파일: test_isotonic.py 프로젝트: AlexisMignon/scikit-learn

def test_isotonic_regression_oob_nan():
    # Set y and x
    y = np.array([3, 7, 5, 9, 8, 7, 10])
    x = np.arange(len(y))

    # Create model and fit
    ir = IsotonicRegression(increasing='auto', out_of_bounds="nan")
    ir.fit(x, y)

    # Predict from  training and test x and check that we have two NaNs.
    y1 = ir.predict([min(x) - 10, max(x) + 10])
    assert_equal(sum(np.isnan(y1)), 2)

예제 #39

0

파일 보기

파일: test_isotonic.py 프로젝트: kkuunnddaann/scikit-learn

def test_isotonic_regression():
    y = np.array([3, 7, 5, 9, 8, 7, 10])
    y_ = np.array([3, 6, 6, 8, 8, 8, 10])
    assert_array_equal(y_, isotonic_regression(y))

    x = np.arange(len(y))
    ir = IsotonicRegression(y_min=0.0, y_max=1.0)
    ir.fit(x, y)
    assert_array_equal(ir.fit(x, y).transform(x), ir.fit_transform(x, y))
    assert_array_equal(ir.transform(x), ir.predict(x))

    # check that it is immune to permutation
    perm = np.random.permutation(len(y))
    ir = IsotonicRegression(y_min=0.0, y_max=1.0)
    assert_array_equal(ir.fit_transform(x[perm], y[perm]), ir.fit_transform(x, y)[perm])
    assert_array_equal(ir.transform(x[perm]), ir.transform(x)[perm])

예제 #40

0

파일 보기

파일: test_isotonic.py 프로젝트: allefpablo/scikit-learn

def test_isotonic_dtype():
    y = [2, 1, 4, 3, 5]
    weights = np.array([.9, .9, .9, .9, .9], dtype=np.float64)
    reg = IsotonicRegression()

    for dtype in (np.int32, np.int64, np.float32, np.float64):
        for sample_weight in (None, weights.astype(np.float32), weights):
            y_np = np.array(y, dtype=dtype)
            expected_dtype = \
                check_array(y_np, dtype=[np.float64, np.float32],
                            ensure_2d=False).dtype

            res = isotonic_regression(y_np, sample_weight=sample_weight)
            assert_equal(res.dtype, expected_dtype)

            X = np.arange(len(y)).astype(dtype)
            reg.fit(X, y_np, sample_weight=sample_weight)
            res = reg.predict(X)
            assert_equal(res.dtype, expected_dtype)

예제 #41

0

파일 보기

파일: train.py 프로젝트: perellonieto/deep_calibration

def main_old():
    model = create_model(nb_classes, optimizer, loss)

    if keras_plot_available:
        plot(model, to_file='model.png')

    (X_train, Y_train), (X_val, Y_val) = get_mnist_data(
            train_size, binarize, add_noise, noise_proportion, test=False)

    print('Showing data samples')
    imshow_samples(X_train, Y_train, X_val, Y_val, 5)
    diary.save_figure(plt, filename='samples', extension='svg')

    print('Creating error and accuracy vectors')
    error_train  = np.zeros(num_epochs+1)
    error_val = np.zeros(num_epochs+1)
    accuracy_train = np.zeros(num_epochs+1)
    accuracy_val = np.zeros(num_epochs+1)

    print('Model predict training scores')
    score_train = model.predict(X_train).flatten()
    if output_activation == 'isotonic_regression':
        # 4. Calibrate the network with isotonic regression in the full training
        ir = IsotonicRegression(increasing=True, out_of_bounds='clip',
                                y_min=_EPSILON, y_max=(1-_EPSILON))
        #   b. Calibrate the scores
        print('Learning Isotonic Regression from TRAINING set')
        ir.fit(score_train, Y_train)

    # 5. Evaluate the performance with probabilities
    #   b. Evaluation on validation set
    print('Model predict validation scores')
    score_val = model.predict(X_val).flatten()
    if output_activation == 'isotonic_regression':
        prob_train = ir.predict(score_train)
        print('IR predict validation probabilities')
        prob_val  = ir.predict(score_val)
    else:
        prob_train = score_train
        prob_val = score_val

    error_train[0] = compute_loss(prob_train, Y_train, loss)
    accuracy_train[0] = compute_accuracy(prob_train, Y_train)
    error_val[0] = compute_loss(prob_val, Y_val, loss)
    accuracy_val[0] = compute_accuracy(prob_val, Y_val)

    # SHOW INITIAL PERFORMANCE
    print(("train:  error = {}, acc = {}\n"
           "valid:  error = {}, acc = {}").format(
                        error_train[0], accuracy_train[0],
                        error_val[0], accuracy_val[0]))

    diary.add_entry('training', [error_train[0], accuracy_train[0]])
    diary.add_entry('validation', [error_val[0], accuracy_val[0]])

    num_minibatches = np.ceil(np.true_divide(train_size,batch_size)).astype('int')
    for epoch in range(1,num_epochs+1):
        for iteration in range(num_minibatches):
            # Given that the probabilities are calibrated
            # 1. Choose the next minibatch
            print('EPOCH {}'.format(epoch))
            minibatch_id = get_minibatch_id(train_size, batch_size,
                                             method=minibatch_method,
                                             iteration=iteration)
            X_train_mb = X_train[minibatch_id]
            Y_train_mb = Y_train[minibatch_id]

            if output_activation == 'isotonic_regression':
                # 2. Compute the new values for the labels on this minibatch
                #   a. Predict the scores using the network
                print('\tMODEL PREDICTING TRAINING SCORES')
                score_train_mb = model.predict(X_train_mb).flatten()
                #   b. Predict the probabilities using IR
                print('\tIR PREDICTING TRAINING PROBABILITIES')
                prob_train_mb = ir.predict(score_train_mb.flatten())
                #   c. Compute the gradients of IR
                g_prob_train_mb = isotonic_gradients(ir, prob_train_mb)
                #   c. Compute new values for the labels
                #Y_train_mb_new = prob_train_mb + Y_train_mb
                Y_train_mb_new = prob_train_mb + \
                                 np.divide(np.multiply(prob_train_mb - Y_train_mb,
                                                       g_prob_train_mb),
                                           np.multiply(prob_train_mb,
                                                       1 - prob_train_mb))
            else:
                Y_train_mb_new = Y_train_mb

            # 3. Train the network on this minibatch
            #    Be advised that the errors shown by Keras on the training
            #    set are really for this minibatch.
            print('\tTRAINING MODEL')
            model.fit(X_train_mb, Y_train_mb_new, nb_epoch=1,
                    batch_size=inner_batch_size, show_accuracy=True, verbose=1,
                    validation_data=(X_val,Y_val))

            if output_activation == 'isotonic_regression':
                # 4. Calibrate the network with isotonic regression in the full training
                #   a. Get the new scores from the model
                print('\tModel predict training scores')
                score_train = model.predict(X_train).flatten()
                #   b. Calibrate the scores
                print('\tLearning Isotonic Regression from TRAINING set')
                ir.fit(score_train, Y_train)

        # Evaluate epoch on the full training and validation set
        # 5. Evaluate the performance with the calibrated probabilities
        print('\tModel predict training scores')
        score_train = model.predict(X_train).flatten()
        print('\tModel predict validation scores')
        score_val = model.predict(X_val).flatten()
        if output_activation == 'isotonic_regression':
            #   a. Evaluation on TRAINING set
            print('\tIR predict training probabilities')
            prob_train = ir.predict(score_train.flatten())
            #   b. Evaluation on VALIDATION set
            print('\tIR predict validation probabilities')
            prob_val  = ir.predict(score_val.flatten())
        else:
            prob_train = score_train
            prob_val = score_val

        error_train[epoch] = compute_loss(prob_train, Y_train, loss)
        accuracy_train[epoch] = compute_accuracy(prob_train, Y_train)
        error_val[epoch] = compute_loss(prob_val, Y_val, loss)
        accuracy_val[epoch] = compute_accuracy(prob_val, Y_val)
        # SHOW PERFORMANCE ON MINIBATCH
        print(("\ttrain:  error = {}, acc = {}\n"
               "\tvalid:  error = {}, acc = {}").format(
                            error_train[epoch], accuracy_train[epoch],
                            error_val[epoch], accuracy_val[epoch]))

        # SAVE PERFORMANCE ON epoch
        diary.add_entry('training', [error_train[epoch], accuracy_train[epoch]])
        diary.add_entry('validation', [error_val[epoch], accuracy_val[epoch]])

        # PLOTS
        print('\tUpdating all plots')
        if output_activation == 'isotonic_regression':
            prob_lin = ir.predict(score_lin)
            plot_reliability_diagram(prob_train, Y_train, prob_val, Y_val, epoch,
                                     score_lin=score_lin, prob_lin=prob_lin)
        else:
            plot_reliability_diagram(prob_train, Y_train, prob_val, Y_val, epoch)
        diary.save_figure(plt, filename='reliability_diagram', extension='svg')
        plot_histogram_scores(prob_train, epoch)
        diary.save_figure(plt, filename='histogram_scores', extension='svg')
        plot_accuracy(accuracy_train, accuracy_val, epoch)
        diary.save_figure(plt, filename='accuracy', extension='svg')
        plot_error(error_train, error_val, epoch, loss)
        diary.save_figure(plt, filename='error', extension='svg')
        plt.pause(0.0001)

예제 #42

0

파일 보기

파일: train.py 프로젝트: perellonieto/deep_calibration

def main(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000,
             dataset='mnist.pkl.gz', batch_size=20, n_hidden=[500, 500]):
    """
    Demonstrate stochastic gradient descent optimization for a multilayer
    perceptron

    This is demonstrated on MNIST.

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
    gradient

    :type L1_reg: float
    :param L1_reg: L1-norm's weight when added to the cost (see
    regularization)

    :type L2_reg: float
    :param L2_reg: L2-norm's weight when added to the cost (see
    regularization)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type dataset: string
    :param dataset: the path of the MNIST dataset file from
                 http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz


   """
    if add_noise==True:
        datasets = load_data(dataset, nb_classes=nb_classes, binarize=binarize,
                             noise_prop=noise_proportion)
    else:
        datasets = load_data(dataset, nb_classes=nb_classes, binarize=binarize)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    print('Showing data samples')
    if nb_classes == 2:
        labels = ['odd', 'even']
    else:
        labels = [0,1,2,3,4,5,6,7,8,9]
    imshow_samples(train_set_x.get_value(), train_set_y,
            valid_set_x.get_value(), valid_set_y, num_samples=4, labels=labels)
    plt.pause(0.0001)
    diary.save_figure(plt, filename='samples', extension='svg')

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
                        # [int] labels

    rng = numpy.random.RandomState(1234)

    # construct the MLP class
    classifier = MLP(
        rng=rng,
        input=x,
        n_in=28 * 28,
        n_hidden=n_hidden,
        n_out=nb_classes
    )

    # start-snippet-4
    # the cost we minimize during training is the negative log likelihood of
    # the model plus the regularization terms (L1 and L2); cost is expressed
    # here symbolically
    cost = (
        classifier.negative_log_likelihood(y)
        + L1_reg * classifier.L1
        + L2_reg * classifier.L2_sqr
    )
    # end-snippet-4

    # compiling a Theano function that computes the mistakes that are made
    # by the model on a minibatch
    test_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        }
    )

    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        }
    )

    training_error_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        }
    )

    # compiling a Theano function that computes the mistakes that are made
    # by the model on a minibatch
    test_loss_model = theano.function(
        inputs=[index],
        outputs=classifier.loss(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        }
    )

    validation_loss_model = theano.function(
        inputs=[index],
        outputs=classifier.loss(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        }
    )

    training_loss_model = theano.function(
        inputs=[index],
        outputs=classifier.loss(y),
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        }
    )

    # compiling a Theano function that computes the mistakes that are made
    # by the model on a minibatch
    test_accuracy_model = theano.function(
        inputs=[index],
        outputs=classifier.accuracy(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        }
    )

    validation_accuracy_model = theano.function(
        inputs=[index],
        outputs=classifier.accuracy(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        }
    )

    training_accuracy_model = theano.function(
        inputs=[index],
        outputs=classifier.accuracy(y),
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        }
    )

    # compiling a Theano function that computes the predictions on the
    # training data
    training_predictions_model = theano.function(
        inputs=[index],
        outputs=classifier.predictions(),
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
        }
    )

    validation_predictions_model = theano.function(
        inputs=[index],
        outputs=classifier.predictions(),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
        }
    )

    # compiling a Theano function that computes the predictions on the
    # training data
    training_scores_model = theano.function(
        inputs=[index],
        outputs=classifier.scores(),
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
        }
    )

    validation_scores_model = theano.function(
        inputs=[index],
        outputs=classifier.scores(),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
        }
    )

    # start-snippet-5
    # compute the gradient of cost with respect to theta (sotred in params)
    # the resulting gradients will be stored in a list gparams
    gparams = [T.grad(cost, param) for param in classifier.params]

    # specify how to update the parameters of the model as a list of
    # (variable, update expression) pairs

    # given two lists of the same length, A = [a1, a2, a3, a4] and
    # B = [b1, b2, b3, b4], zip generates a list C of same size, where each
    # element is a pair formed from the two lists :
    #    C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)]
    updates = [
        (param, param - learning_rate * gparam)
        for param, gparam in zip(classifier.params, gparams)
    ]

    # compiling a Theano function `train_model` that returns the cost, but
    # in the same time updates the parameter of the model based on the rules
    # defined in `updates`
    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )
    # end-snippet-5

    ###############
    # TRAIN MODEL #
    ###############
    print('... training')

    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
                           # found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                   # considered significant
    validation_frequency = min(n_train_batches, patience // 2)
                                  # go through this many
                                  # minibatche before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = timeit.default_timer()

    error_tra = np.zeros(n_epochs+1)
    error_val = np.zeros(n_epochs+1)
    accuracy_tra = np.zeros(n_epochs+1)
    accuracy_val = np.zeros(n_epochs+1)

    epoch = 0
    # Error in accuracy
    Y_train = train_set_y.eval()
    Y_valid = valid_set_y.eval()

    print('Model predict training scores')
    score_train = np.asarray([training_scores_model(i) for i
        in range(n_train_batches)]).reshape(-1,nb_classes)[:,1]

    if output_activation == 'isotonic_regression':
        # 4. Calibrate the network with isotonic regression in the full training
        ir = IsotonicRegression(increasing=True, out_of_bounds='clip',
                                y_min=_EPSILON, y_max=(1-_EPSILON))
        #   b. Calibrate the scores
        print('Learning Isotonic Regression from TRAINING set')
        ir.fit(score_train, Y_train)

    # 5. Evaluate the performance with probabilities
    #   b. Evaluation on validation set
    print('Model predict validation scores')
    score_val = np.asarray([validation_scores_model(i) for i
        in range(n_valid_batches)]).reshape(-1,nb_classes)[:,1]
    if output_activation == 'isotonic_regression':
        prob_train = ir.predict(score_train)
        print('IR predict validation probabilities')
        prob_val  = ir.predict(score_val)
    else:
        prob_train = score_train
        prob_val = score_val

    error_tra[epoch] = compute_loss(prob_train, Y_train, loss)
    error_val[epoch] = compute_loss(prob_val, Y_valid, loss)
    accuracy_tra[epoch] = compute_accuracy(prob_train, Y_train)
    accuracy_val[epoch] = compute_accuracy(prob_val, Y_valid)

    diary.add_entry('training', [error_tra[epoch], accuracy_tra[epoch]])
    diary.add_entry('validation', [error_val[epoch], accuracy_val[epoch]])

    done_looping = False
    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in range(n_train_batches):

            minibatch_avg_cost = train_model(minibatch_index)
            # iteration number
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                # compute zero-one loss on validation set
                validation_losses = [validate_model(i) for i
                                     in range(n_valid_batches)]
                this_validation_loss = numpy.mean(validation_losses)

                print(
                    'epoch %i, minibatch %i/%i, validation error %f %%' %
                    (
                        epoch,
                        minibatch_index + 1,
                        n_train_batches,
                        this_validation_loss * 100.
                    )
                )

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:
                    #improve patience if loss improvement is good enough
                    if (
                        this_validation_loss < best_validation_loss *
                        improvement_threshold
                    ):
                        patience = max(patience, iter * patience_increase)

                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = [test_model(i) for i
                                   in range(n_test_batches)]
                    test_score = numpy.mean(test_losses)

                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

        # Error in accuracy
        #training_loss = [training_loss_model(i) for i
        #                     in range(n_train_batches)]
        #validation_loss = [validation_loss_model(i) for i
        #                     in range(n_valid_batches)]
        #error_tra[epoch] = numpy.mean(training_loss)
        #error_val[epoch] = numpy.mean(validation_loss)
        #training_acc = [training_accuracy_model(i) for i
        #                     in range(n_train_batches)]
        #validation_acc = [validation_accuracy_model(i) for i
        #                     in range(n_valid_batches)]
        #accuracy_tra[epoch] = numpy.mean(training_acc)
        #accuracy_val[epoch] = numpy.mean(validation_acc)

        print('Model predict training scores')
        score_train = np.asarray([training_scores_model(i) for i
            in range(n_train_batches)]).reshape(-1,nb_classes)[:,1]

        if output_activation == 'isotonic_regression':
            # 4. Calibrate the network with isotonic regression in the full training
            #   b. Calibrate the scores
            print('Learning Isotonic Regression from TRAINING set')
            ir.fit(score_train, Y_train)

        # 5. Evaluate the performance with probabilities
        #   b. Evaluation on validation set
        print('Model predict validation scores')
        score_val = np.asarray([validation_scores_model(i) for i
            in range(n_valid_batches)]).reshape(-1,nb_classes)[:,1]
        if output_activation == 'isotonic_regression':
            prob_train = ir.predict(score_train)
            print('IR predict validation probabilities')
            prob_val  = ir.predict(score_val)
        else:
            prob_train = score_train
            prob_val = score_val

        error_tra[epoch] = compute_loss(prob_train, Y_train, loss)
        error_val[epoch] = compute_loss(prob_val, Y_valid, loss)
        accuracy_tra[epoch] = compute_accuracy(prob_train, Y_train)
        accuracy_val[epoch] = compute_accuracy(prob_val, Y_valid)

        diary.add_entry('training', [error_tra[epoch], accuracy_tra[epoch]])
        diary.add_entry('validation', [error_val[epoch], accuracy_val[epoch]])

        plot_error(error_tra, error_val, epoch, 'loss')
        diary.save_figure(plt, filename='error', extension='svg')
        plot_accuracy(accuracy_tra, accuracy_val, epoch)
        diary.save_figure(plt, filename='accuracy', extension='svg')
        if nb_classes == 2:
            #prob_train = np.asarray([training_scores_model(i) for i
            #                 in range(n_train_batches)]).reshape(-1,nb_classes)
            #prob_val = np.asarray([validation_scores_model(i) for i
            #                 in range(n_valid_batches)]).reshape(-1,nb_classes)
            if output_activation == 'isotonic_regression':
                prob_lin = ir.predict(score_lin)
                plot_reliability_diagram(prob_train, Y_train,
                                     prob_val, Y_valid, epoch,
                                     prob_lin, score_lin)
            else:
                plot_reliability_diagram(prob_train, Y_train,
                                     prob_val, Y_valid, epoch)
            diary.save_figure(plt, filename='reliability_diagram', extension='svg')
            plot_histogram_scores(prob_train, prob_val, epoch=epoch)
            diary.save_figure(plt, filename='histogram_scores', extension='svg')
        #from IPython import embed
        #embed()
        plt.pause(0.0001)

    end_time = timeit.default_timer()
    print(('Optimization complete. Best validation score of %f %% '
           'obtained at iteration %i, with test performance %f %%') %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))

예제 #43

0

파일 보기

파일: logistic_sgd_gaussians.py 프로젝트: perellonieto/deep_calibration

def sgd_optimization_gauss(learning_rate=0.13, n_epochs=1000,
                           batch_size=600):
    """
    Demonstrate stochastic gradient descent optimization of a log-linear
    model

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
                          gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    """
    #datasets = load_data(dataset)

    diary = Diary(name='experiment', path='results')
    diary.add_notebook('training')
    diary.add_notebook('validation')

    diary.add_notebook('data')
    samples=[4000,10000]
    diary.add_entry('data', ['samples', samples])
    diary.add_entry('data', ['num_classes', len(samples)])
    diary.add_entry('data', ['batch_size', batch_size])

    #means=[[0,0],[5,5]]
    #cov=[[[1,0],[0,1]],[[3,0],[0,3]]]
    #diary.add_entry('data', ['means', means])
    #diary.add_entry('data', ['covariance', cov])
    #datasets = generate_gaussian_data(means, cov, samples)
    datasets = generate_opposite_cs_data(samples)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    diary.add_entry('data', ['train_size', len(train_set_y.eval())])
    diary.add_entry('data', ['valid_size', len(valid_set_y.eval())])
    diary.add_entry('data', ['test_size', len(test_set_y.eval())])

    pt = PresentationTier()
    pt.plot_samples(train_set_x.eval(), train_set_y.eval())

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size

    delta = 20
    x_min = numpy.min(train_set_x.eval(),axis=0)
    x_max = numpy.max(train_set_x.eval(),axis=0)
    x1_lin = numpy.linspace(x_min[0], x_max[0], delta)
    x2_lin = numpy.linspace(x_min[1], x_max[1], delta)

    MX1, MX2 = numpy.meshgrid(x1_lin, x2_lin)
    x_grid = numpy.asarray([MX1.flatten(),MX2.flatten()]).T
    grid_set_x = theano.shared(numpy.asarray(x_grid,
                                             dtype=theano.config.floatX),
                               borrow=True)
    n_grid_batches = grid_set_x.get_value(borrow=True).shape[0] / batch_size

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
                           # [int] labels

    # construct the logistic regression class
    # Each MNIST image has size 28*28
    n_in = train_set_x.eval().shape[-1]
    n_out = max(train_set_y.eval()) + 1
    classifier = LogisticRegression(input=x, n_in=n_in, n_out=n_out)

    # the cost we minimize during training is the negative log likelihood of
    # the model in symbolic format
    cost = classifier.negative_log_likelihood(y)

    # compiling a Theano function that computes the mistakes that are made by
    # the model on a minibatch
    test_model = theano.function(inputs=[index],
            outputs=classifier.errors(y),
            givens={
                x: test_set_x[index * batch_size: (index + 1) * batch_size],
                y: test_set_y[index * batch_size: (index + 1) * batch_size]})

    validate_model = theano.function(inputs=[index],
            outputs=classifier.errors(y),
            givens={
                x: valid_set_x[index * batch_size:(index + 1) * batch_size],
                y: valid_set_y[index * batch_size:(index + 1) * batch_size]})

    # Scores
    grid_scores_model = theano.function(inputs=[],
            outputs=classifier.scores(),
            givens={
                x: grid_set_x})

    training_scores_model = theano.function(
        inputs=[index],
        outputs=classifier.scores(),
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
        }
    )

    validation_scores_model = theano.function(
        inputs=[index],
        outputs=classifier.scores(),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
        }
    )
    # compute the gradient of cost with respect to theta = (W,b)
    g_w = T.grad(cost=cost, wrt=classifier.w)
    g_b = T.grad(cost=cost, wrt=classifier.b)

    # specify how to update the parameters of the model as a list of
    # (variable, update expression) pairs.
    updates = [(classifier.w, classifier.w - learning_rate * g_w),
               (classifier.b, classifier.b - learning_rate * g_b)]

    # compiling a Theano function `train_model` that returns the cost, but in
    # the same time updates the parameter of the model based on the rules
    # defined in `updates`
    train_model = theano.function(inputs=[index],
            outputs=cost,
            updates=updates,
            givens={
                x: train_set_x[index * batch_size:(index + 1) * batch_size],
                y: train_set_y[index * batch_size:(index + 1) * batch_size]})

    # Accuracy
    validation_accuracy_model = theano.function(
        inputs=[index],
        outputs=classifier.accuracy(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        }
    )

    training_accuracy_model = theano.function(
        inputs=[index],
        outputs=classifier.accuracy(y),
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        }
    )

    # Loss
    training_error_model = theano.function(inputs=[index],
            outputs=classifier.errors(y),
            givens={
                x: train_set_x[index * batch_size:(index + 1) * batch_size],
                y: train_set_y[index * batch_size:(index + 1) * batch_size]})

    validation_error_model = theano.function(inputs=[index],
            outputs=classifier.errors(y),
            givens={
                x: valid_set_x[index * batch_size:(index + 1) * batch_size],
                y: valid_set_y[index * batch_size:(index + 1) * batch_size]})

    ###############
    # TRAIN MODEL #
    ###############
    print '... training the model'
    # early-stopping parameters
    patience = 5000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
                                  # found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                  # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
                                  # go through this many
                                  # minibatche before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    print('Creating error and accuracy vectors')
    error_train  = numpy.zeros(n_epochs+1)
    error_val = numpy.zeros(n_epochs+1)
    accuracy_train = numpy.zeros(n_epochs+1)
    accuracy_val = numpy.zeros(n_epochs+1)
    # Results for Isotonic Regression
    error_train_ir  = numpy.zeros(n_epochs+1)
    error_val_ir = numpy.zeros(n_epochs+1)
    accuracy_train_ir = numpy.zeros(n_epochs+1)
    accuracy_val_ir = numpy.zeros(n_epochs+1)

    best_params = None
    best_validation_loss = numpy.inf
    test_score = 0.
    start_time = time.clock()

    ir = IsotonicRegression(increasing=True, out_of_bounds='clip',
                            y_min=0, y_max=1)
    done_looping = False
    epoch = 0
    CS = None
    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):

            minibatch_avg_cost = train_model(minibatch_index)
            # iteration number
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                # compute zero-one loss on validation set
                validation_losses = [validate_model(i)
                                     for i in xrange(n_valid_batches)]
                this_validation_loss = numpy.mean(validation_losses)

                print('epoch %i, minibatch %i/%i, validation error %f %%' % \
                    (epoch, minibatch_index + 1, n_train_batches,
                    this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:
                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    best_validation_loss = this_validation_loss
                    # test it on the test set

                    test_losses = [test_model(i)
                                   for i in xrange(n_test_batches)]
                    test_score = numpy.mean(test_losses)

                    print(('     epoch %i, minibatch %i/%i, test error of best'
                       ' model %f %%') %
                        (epoch, minibatch_index + 1, n_train_batches,
                         test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

        scores_grid = grid_scores_model()
        fig = pt.update_contourline(grid_set_x.eval(), scores_grid, delta)
        diary.save_figure(fig, filename='contour_lines', extension='svg')
        scores_train = numpy.asarray([training_scores_model(i) for i
                                    in range(n_train_batches)]).flatten()
        scores_val = numpy.asarray([validation_scores_model(i) for i
                                  in range(n_valid_batches)]).flatten()

        print('Learning Isotonic Regression from TRAINING set')
        ir.fit(scores_train, train_set_y.eval())
        scores_train_ir = ir.predict(scores_train)
        print('IR predict validation probabilities')
        scores_val_ir  = ir.predict(scores_val)

        scores_set = (scores_train, scores_val, scores_train_ir,
                      scores_val_ir)
        labels_set = (train_set_y.eval(), valid_set_y.eval(),
                      train_set_y.eval(), valid_set_y.eval())
        legend = ['train', 'valid', 'iso. train', 'iso. valid']
        fig = pt.plot_reliability_diagram(scores_set, labels_set, legend)
        diary.save_figure(fig, filename='reliability_diagram', extension='svg')

        # TODO add reliability map
        scores_set = (scores_train)
        prob_set = (train_set_y.eval())
        fig = pt.plot_reliability_map(scores_set, labels_set, legend)
        diary.save_figure(fig, filename='reliability_map', extension='svg')

        fig = pt.plot_histogram_scores(scores_set)
        diary.save_figure(fig, filename='histogram_scores', extension='svg')

        # Performance
        accuracy_train[epoch] = numpy.asarray([training_accuracy_model(i) for i
                                in range(n_train_batches)]).flatten().mean()
        accuracy_val[epoch] = numpy.asarray([validation_accuracy_model(i) for i
                                in range(n_valid_batches)]).flatten().mean()
        error_train[epoch] = numpy.asarray([training_error_model(i) for i
                                in range(n_train_batches)]).flatten().mean()
        error_val[epoch] = numpy.asarray([validation_error_model(i) for i
                               in range(n_valid_batches)]).flatten().mean()

        accuracy_train_ir[epoch] = compute_accuracy(scores_train_ir, train_set_y.eval())
        accuracy_val_ir[epoch] = compute_accuracy(scores_val_ir, valid_set_y.eval())
        error_train_ir[epoch]  = compute_cross_entropy(scores_train_ir, train_set_y.eval())
        error_val_ir[epoch]  = compute_cross_entropy(scores_val_ir, valid_set_y.eval())

        diary.add_entry('training', [error_train[epoch], accuracy_train[epoch]])
        diary.add_entry('validation', [error_val[epoch], accuracy_val[epoch]])

        accuracy_set = (accuracy_train[1:epoch], accuracy_val[1:epoch],
                        accuracy_train_ir[1:epoch], accuracy_val_ir[1:epoch])
        fig = pt.plot_accuracy(accuracy_set, legend)
        diary.save_figure(fig, filename='accuracy', extension='svg')

        error_set = (error_train[1:epoch], error_val[1:epoch],
                     error_train_ir[1:epoch], error_val_ir[1:epoch])
        fig = pt.plot_error(error_set, legend, 'cross-entropy')
        diary.save_figure(fig, filename='error', extension='svg')


    pt.update_contourline(grid_set_x.eval(), scores_grid, delta,
            clabel=True)
    end_time = time.clock()
    print(('Optimization complete with best validation score of %f %%,'
           'with test performance %f %%') %
                 (best_validation_loss * 100., test_score * 100.))
    print 'The code run for %d epochs, with %f epochs/sec' % (
        epoch, 1. * epoch / (end_time - start_time))

예제 #44

0

파일 보기

파일: FTRL.py 프로젝트: Arcady27/Credit-Risk-Assessment

            sum_x = 0.0
            sum_y = 0.0
            for j in range(len(learner.predictions[i])):
                sum_x += learner.predictions[i][j][0]
                sum_y += learner.predictions[i][j][1]

            if sum_x > 0:
                regression_x.append(sum_x/max(1, len(learner.predictions[i])))
                regression_y.append(sum_y/max(1, len(learner.predictions[i])))
        regression_x.append(1.0)
        regression_y.append(1.0)

        ir = IsotonicRegression(increasing=True)

        fit = ir.fit(regression_x, regression_y)
        y_ = ir.predict(regression_x)
        plt.plot(regression_x, regression_y, 'g.', markersize=12)
        plt.plot(regression_x, y_, 'r-', markersize=5)
        plt.show()

        #learner.validation_set=zip(learner.validation_set)
        predictions_calibrated = ir.predict(learner.validation_set[0]).tolist()
        predictions_combined = learner.validation_set[0]

        for i in range(len(predictions_combined)):
            if 0.2 < predictions_combined[i] < 0.8:
                predictions_combined[i] = predictions_calibrated[i]

        loss_combined = logloss_arr(predictions_combined,learner.validation_set[1])/len(learner.validation_set[1])
        loss_calibrated = logloss_arr(predictions_calibrated,learner.validation_set[1])/len(learner.validation_set[1])

예제 #45

0

파일 보기

파일: 2_calibrate.py 프로젝트: jlef/Otto-Group

                , 'Class_5' : y_5
                , 'Class_6' : y_6
                , 'Class_7' : y_7
                , 'Class_8' : y_8
                , 'Class_9' : y_9
                
                })
                
cols = cv10fold.calibrated.columns.tolist()
cols = cols[-1:] + cols[:-1]
cv10fold.calibrated = cv10fold.calibrated[cols]

#for validation purposes
cv10fold.calibrated.to_csv('csvs\\cv10fold.calibrated.csv', index=False)

yt_1 = ir1.predict(test.ix[:,0])
yt_2 = ir2.predict(test.ix[:,1])
yt_3 = ir3.predict(test.ix[:,2])
yt_4 = ir4.predict(test.ix[:,3])
yt_5 = ir5.predict(test.ix[:,4])
yt_6 = ir6.predict(test.ix[:,5])
yt_7 = ir7.predict(test.ix[:,6])
yt_8 = ir8.predict(test.ix[:,7])
yt_9 = ir9.predict(test.ix[:,8])

test.calibrated = pd.DataFrame({'id' : testId
                    , 'Class_1' : yt_1
                    , 'Class_2' : yt_2
                    , 'Class_3' : yt_3
                    , 'Class_4' : yt_4
                    , 'Class_5' : yt_5

예제 #46

0

파일 보기

파일: calibration_methods.py 프로젝트: perellonieto/deep_calibration

                1,0,0,0,0,0,1,1,1,1,
                1,0,0,0,0,0,1,1,1,1,
                1,0,0,0,0,0,1,1,1,1,
                1,0,0,0,0,0,1,1,1,1,
                1,0,0,0,0,0,1,1,1,1])

print('Learning Isotonic Regression')
ir = IsotonicRegression(increasing=True, out_of_bounds='clip',
                        y_min=_EPSILON, y_max=(1-_EPSILON))
ir.fit(S, Y)
print('Learning Logistic Regression')
lr = LogisticRegression(C=1., solver='lbfgs')
lr.fit(S.reshape(-1,1), Y)



scores_set = [S, ir.predict(S), lr.predict_proba(S.reshape(-1,1))[:,1]]
labels_set = [Y, Y, Y]
legend = ['Y', 'IR', 'LR']

pt = PresentationTier()
fig = pt.plot_reliability_diagram(scores_set, labels_set, legend,
        original_first=True, alpha=alpha)

scores_lin = np.linspace(0,1,100)
scores_set = [S, scores_lin, scores_lin]
prob_set = [Y, ir.predict(scores_lin),
            lr.predict_proba(scores_lin.reshape(-1,1))[:,1]]
fig = pt.plot_reliability_map(scores_set, prob_set, legend,
        original_first=True, alpha=alpha)

예제 #47

0

파일 보기

파일: naive_bayes_noFS.py 프로젝트: poddar/predictive

def IsotonicRegression_pred(y_train, predictions_train, test_preds, bin_step, y_test):
    # Y Training Target sort the y_test
    # X Training Data use the indexes of sorted(y_test)
    # y_train_len=len(y_train)

    # if bin_step<1:
    #     step_count = 1/bin_step
    # else:
    #     step_count = int(math.floor(y_train_len/bin_step))

    # step_element_count = int(math.floor(y_train_len/step_count))

    # bin_start_indexes=np.array(range(0,step_count))*step_element_count

    predictions_np = np.array(predictions_train, float)
    predictions_sorted = np.sort(predictions_np)
    predictions_sorted_indexes = predictions_np.argsort()

    y_train_arranged = np.array(y_train, float)[predictions_sorted_indexes].ravel()
    # not_binned_y_train_arranged         =   y_train_arranged[:]

    # for index in range(len(bin_start_indexes)-1):
    #     pin  = bin_start_indexes[index]
    #     pend = bin_start_indexes[index+1]
    #     y_train_arranged[pin:pend] = np.average(y_train_arranged[pin:pend])
    # if bin_start_indexes[-1]<y_train_len:
    #     pin  = bin_start_indexes[-1]
    #     pend = y_train_len
    #     y_train_arranged[pin:pend] = np.average(y_train_arranged[pin:pend])

    ir = IsotonicRegression()

    y_ir = ir.fit_transform(predictions_sorted, y_train_arranged)
    y_ir_pred = ir.predict(predictions_sorted)

    # print "min(y_train_arranged)    :",    min(y_train_arranged)
    # print "max(y_train_arranged)    :",    max(y_train_arranged)
    # print "min(predictions_sorted)  :",    min(predictions_sorted)
    # print "max(predictions_sorted)  :",    max(predictions_sorted)
    # print "min(test_preds)          :",    min(test_preds)
    # print "max(test_preds)          :",    max(test_preds)
    # if max(test_preds)>=max(y_train_arranged):
    # np.arrya(test_preds>max(y_train_arranged))==True

    max_indexes = np.array((np.where(test_preds > max(y_train_arranged))), int).ravel()
    if len(max_indexes) != 0:
        for m_i in max_indexes:
            test_preds[m_i] = max(y_train_arranged)

    test_preds_sorted = np.sort(np.array(test_preds))

    predictions_ir = ir.predict(test_preds)

    ind = np.where(np.isnan(predictions_ir))[0]
    preds_test_min = np.nanmin(predictions_ir)
    if len(ind) != 0:
        for i in ind:
            predictions_ir[i] = preds_test_min

    # ==============WRITING TO CSV================
    # d_train={'y_train'          :np.array(y_train,float)[predictions_sorted_indexes].ravel(),
    #          'y_train_bin'      :np.array(y_train_arranged).ravel(),
    #          'train_preds'      :np.array(predictions_sorted).ravel(),
    #          'train_preds_ir'   :y_ir}

    # df_train=pd.DataFrame(d_train)
    # df_train.to_csv("train_IR.csv")

    # d_test={'y_test'            :np.array(y_test).ravel(),
    #         'test_preds'        :np.array(test_preds).ravel(),
    #         'test_preds_ir'     :predictions_ir}
    # df_test=pd.DataFrame(d_test)
    # df_test.to_csv("test_IR.csv")

    # score_test_ir=ir.score(test_preds,y_test)
    score_test_ir = 0

    return predictions_ir, y_ir_pred, ir.get_params(deep=True), score_test_ir