예제 #1
0
    def test_icp_classification_tree(self):
        # -----------------------------------------------------------------------------
        # Setup training, calibration and test indices
        # -----------------------------------------------------------------------------
        data = load_iris()

        idx = np.random.permutation(data.target.size)
        train = idx[:int(idx.size / 3)]
        calibrate = idx[int(idx.size / 3):int(2 * idx.size / 3)]
        test = idx[int(2 * idx.size / 3):]

        # -----------------------------------------------------------------------------
        # Train and calibrate
        # -----------------------------------------------------------------------------
        icp = IcpClassifier(
            ClassifierNc(ClassifierAdapter(DecisionTreeClassifier()),
                         MarginErrFunc()))
        icp.fit(data.data[train, :], data.target[train])
        icp.calibrate(data.data[calibrate, :], data.target[calibrate])

        # -----------------------------------------------------------------------------
        # Predict
        # -----------------------------------------------------------------------------
        prediction = icp.predict(data.data[test, :], significance=0.1)
        header = np.array(["c0", "c1", "c2", "Truth"])
        table = np.vstack([prediction.T, data.target[test]]).T
        df = pd.DataFrame(np.vstack([header, table]))
        print(df)
    def ccp_predict(self, data_lbld, data_unlbld, new_lbld):

        # Create SMOTE instance for class rebalancing
        smote = SMOTE(random_state=self.random_state)

        # Create instance of classifier
        classifier_y = self.classifiers['classifier_y']
        parameters_y = self.clf_parameters['classifier_y']

        clf = classifier_y.set_params(**parameters_y)

        X = data_lbld.iloc[:, :-2]
        y = data_lbld.iloc[:, -1]

        X_new = new_lbld.iloc[:, :-2]
        y_new = new_lbld.iloc[:, -1]

        X = X.append(X_new, sort=False)
        y = y.append(y_new)

        X_unlbld = data_unlbld.iloc[:, :-2]

        sss = StratifiedKFold(n_splits=5, random_state=self.random_state)
        sss.get_n_splits(X, y)

        p_values = []

        for train_index, calib_index in sss.split(X, y):
            X_train, X_calib = X.iloc[train_index], X.iloc[calib_index]
            y_train, y_calib = y.iloc[train_index], y.iloc[calib_index]

            if self.rebalancing_parameters['SMOTE_y']:
                X_train, y_train = smote.fit_resample(X_train, y_train)
                clf.fit(X_train[:, :-1], y_train, sample_weight=X_train[:, -1])
            else:
                clf.fit(X_train.iloc[:, :-1],
                        y_train,
                        sample_weight=X_train.iloc[:, -1])

            nc = NcFactory.create_nc(clf, MarginErrFunc())
            icp = IcpClassifier(nc)

            if self.rebalancing_parameters['SMOTE_y']:
                icp.fit(X_train[:, :-1], y_train)
            else:
                icp.fit(X_train.iloc[:, :-1].values, y_train)

            icp.calibrate(X_calib.iloc[:, :-1].values, y_calib)

            # Predict confidences for validation sample and unlabeled sample
            p_values.append(
                icp.predict(X_unlbld.iloc[:, :-1].values, significance=None))

        mean_p_values = np.array(p_values).mean(axis=0)
        ccp_predictions = pd.DataFrame(mean_p_values,
                                       columns=['mean_p_0', 'mean_p_1'])
        ccp_predictions["credibility"] = [
            row.max() for _, row in ccp_predictions.iterrows()
        ]
        ccp_predictions["confidence"] = [
            1 - row.min() for _, row in ccp_predictions.iterrows()
        ]

        ccp_predictions.index = X_unlbld.index

        return ccp_predictions
예제 #3
0
icp.fit(X[train, :], y[train])
icp.calibrate(X[calibrate, :], y[calibrate])

ccp = CrossConformalClassifier(
    IcpClassifier(
        ProbEstClassifierNc(DecisionTreeClassifier(), inverse_probability)))
ccp.fit(X[train, :], y[train])

acp = AggregatedCp(
    IcpClassifier(
        ProbEstClassifierNc(DecisionTreeClassifier(), inverse_probability)),
    CrossSampler())
acp.fit(X[train, :], y[train])

# -----------------------------------------------------------------------------
# Predict
# -----------------------------------------------------------------------------
print('# Inductive')
prediction = icp.predict(X[test, :], significance=0.1)
for pred, actual in zip(prediction[:5], y[test]):
    print(pred, actual)

print('\n# Cross')
prediction = ccp.predict(X[test, :], significance=0.1)
for pred, actual in zip(prediction[:5], y[test]):
    print(pred, actual)

print('\n# Aggre')
prediction = acp.predict(X[test, :], significance=0.1)
for pred, actual in zip(prediction[:5], y[test]):
    print(pred, actual)
예제 #4
0
    data.loc[data['target'] < 0, 'target'] = 0
    labels = data['id']
    ll = len(labels)
    target = data['target'].values
    test = data.drop(['id'], axis=1)
    test = test.drop(['target'], axis=1).values

    f = open(modelfile, mode='rb')
    nmodels_built = cloudpickle.load(f)
    print("Models built:", nmodels_built)
    if nmodels > nmodels_built:
        print(
            "More models ordered (", nmodels,
            ") than the file contains. Setting the number of models to built models."
        )
        nmodels = nmodels_built

    for xx in range(1, nmodels + 1):
        print("Predicting from model", xx)
        modelfile2 = infile + "_nonconf" + "_" + str(xx) + ".model"
        num = [xx] * ll
        icp_norm = cloudpickle.load(f)
        predicted = icp_norm.predict(test)
        predicted0 = [x[0] for x in predicted]
        predicted1 = [x[1] for x in predicted]
        writeOutListsApp(outfile,
                         [labels, predicted0, predicted1, target, num])
    f.close()

print(" - finished\n")
from sklearn.datasets import load_iris

from nonconformist.icp import IcpClassifier
from nonconformist.nc import ProbEstClassifierNc, margin

# -----------------------------------------------------------------------------
# Setup training, calibration and test indices
# -----------------------------------------------------------------------------
data = load_iris()

idx = np.random.permutation(data.target.size)
train = idx[: int(idx.size / 3)]
calibrate = idx[int(idx.size / 3) : int(2 * idx.size / 3)]
test = idx[int(2 * idx.size / 3) :]

# -----------------------------------------------------------------------------
# Train and calibrate
# -----------------------------------------------------------------------------
icp = IcpClassifier(ProbEstClassifierNc(DecisionTreeClassifier(), margin))
icp.fit(data.data[train, :], data.target[train])
icp.calibrate(data.data[calibrate, :], data.target[calibrate])

# -----------------------------------------------------------------------------
# Predict
# -----------------------------------------------------------------------------
prediction = icp.predict(data.data[test, :], significance=0.1)
header = np.array(["c0", "c1", "c2", "Truth"])
table = np.vstack([prediction.T, data.target[test]]).T
df = pd.DataFrame(np.vstack([header, table]))
print(df)
예제 #6
0
def split_data(data, n_train, n_test):
    n_train = n_train * len(data) // (n_train + n_test)
    n_test = len(data) - n_train
    ind = np.random.permutation(len(data))
    return data[ind[:n_train]], data[ind[n_train:n_train + n_test]]


#data = Orange.data.Table("../data/usps.tab")
data = Orange.data.Table("iris")

for sig in np.linspace(0.0, 0.4, 11):
    errs, szs = [], []
    for rep in range(10):
        #train, test = split_data(data, 7200, 2098)
        train, test = split_data(data, 2, 1)
        train, calib = split_data(train, 2, 1)

        #icp = IcpClassifier(ProbEstClassifierNc(DecisionTreeClassifier(), margin))
        icp = IcpClassifier(ProbEstClassifierNc(LogisticRegression(), margin))
        #icp = ICP()
        icp.fit(train.X, train.Y)
        icp.calibrate(calib.X, calib.Y)
        pred = icp.predict(test.X, significance=sig)

        acc = sum(p[y] for p, y in zip(pred, test.Y)) / len(pred)
        err = 1 - acc
        sz = sum(sum(p) for p in pred) / len(pred)
        errs.append(err)
        szs.append(sz)
    print(sig, np.mean(errs), np.mean(szs))
from nonconformist.base import ClassifierAdapter
from nonconformist.icp import IcpClassifier
from nonconformist.nc import ClassifierNc, MarginErrFunc

# -----------------------------------------------------------------------------
# Setup training, calibration and test indices
# -----------------------------------------------------------------------------
data = load_iris()

idx = np.random.permutation(data.target.size)
train = idx[:int(idx.size / 3)]
calibrate = idx[int(idx.size / 3):int(2 * idx.size / 3)]
test = idx[int(2 * idx.size / 3):]

# -----------------------------------------------------------------------------
# Train and calibrate
# -----------------------------------------------------------------------------
icp = IcpClassifier(
    ClassifierNc(ClassifierAdapter(DecisionTreeClassifier()), MarginErrFunc()))
icp.fit(data.data[train, :], data.target[train])
icp.calibrate(data.data[calibrate, :], data.target[calibrate])

# -----------------------------------------------------------------------------
# Predict
# -----------------------------------------------------------------------------
prediction = icp.predict(data.data[test, :], significance=0.1)
header = np.array(["c0", "c1", "c2", "Truth"])
table = np.vstack([prediction.T, data.target[test]]).T
df = pd.DataFrame(np.vstack([header, table]))
print(df)