Ejemplo n.º 1
0
    def test_icp_classification_tree(self):
        # -----------------------------------------------------------------------------
        # Setup training, calibration and test indices
        # -----------------------------------------------------------------------------
        data = load_iris()

        idx = np.random.permutation(data.target.size)
        train = idx[:int(idx.size / 3)]
        calibrate = idx[int(idx.size / 3):int(2 * idx.size / 3)]
        test = idx[int(2 * idx.size / 3):]

        # -----------------------------------------------------------------------------
        # Train and calibrate
        # -----------------------------------------------------------------------------
        icp = IcpClassifier(
            ClassifierNc(ClassifierAdapter(DecisionTreeClassifier()),
                         MarginErrFunc()))
        icp.fit(data.data[train, :], data.target[train])
        icp.calibrate(data.data[calibrate, :], data.target[calibrate])

        # -----------------------------------------------------------------------------
        # Predict
        # -----------------------------------------------------------------------------
        prediction = icp.predict(data.data[test, :], significance=0.1)
        header = np.array(["c0", "c1", "c2", "Truth"])
        table = np.vstack([prediction.T, data.target[test]]).T
        df = pd.DataFrame(np.vstack([header, table]))
        print(df)
Ejemplo n.º 2
0
    def SelectLabeled(self, labeled_data_x, labeled_data_y, unlabeled_data_x):
        # just append train data to labeled data
        labeled_x = np.concatenate((self.init_labeled_data_x, labeled_data_x)) \
            if len(labeled_data_x) > 0 else self.init_labeled_data_x
        labeled_y = np.concatenate((self.init_labeled_data_y, labeled_data_y)) \
            if len(labeled_data_x) > 0 else self.init_labeled_data_y
        #

        # create model to predict with confidence and credibility
        model = ClassifierAdapter(
            DecisionTreeClassifier(random_state=config.random_state,
                                   min_samples_leaf=config.min_samples_leaf))
        nc = ClassifierNc(model, MarginErrFunc())
        model_icp = IcpClassifier(nc, smoothing=True)
        model_icp.fit(labeled_x, labeled_y)
        model_icp.calibrate(self.calibration_data_x, self.calibration_data_y)
        s = model_icp.predict_conf(unlabeled_data_x)
        print(s)
        #

        # selection method
        labeled_ind = [
            i for i, a in enumerate(s)
            if a[1] > config.confidence and a[2] > config.credibility
        ]
        unlabeled_ind = [
            i for i, a in enumerate(s)
            if a[1] < config.confidence or a[2] < config.credibility
        ]

        labeled_unlabeled_x, labeled_unlabeled_y, unlabeled_data_x = \
            np.take(unlabeled_data_x, labeled_ind, axis=0), np.take(s.T, labeled_ind), np.take(unlabeled_data_x,
                                                                                               unlabeled_ind, axis=0)
        #

        return labeled_unlabeled_x, labeled_unlabeled_y, unlabeled_data_x
Ejemplo n.º 3
0
    def test_confidence_credibility(self):

        data = load_iris()
        x, y = data.data, data.target

        for i, y_ in enumerate(np.unique(y)):
            y[y == y_] = i

        n_instances = y.size
        idx = np.random.permutation(n_instances)

        train_idx = idx[:int(n_instances / 3)]
        cal_idx = idx[int(n_instances / 3):2 * int(n_instances / 3)]
        test_idx = idx[2 * int(n_instances / 3):]

        nc = ClassifierNc(ClassifierAdapter(RandomForestClassifier()))
        icp = IcpClassifier(nc)

        icp.fit(x[train_idx, :], y[train_idx])
        icp.calibrate(x[cal_idx, :], y[cal_idx])

        print(
            pd.DataFrame(icp.predict_conf(x[test_idx, :]),
                         columns=["Label", "Confidence", "Credibility"]))
    def ccp_predict(self, data_lbld, data_unlbld, new_lbld):

        # Create SMOTE instance for class rebalancing
        smote = SMOTE(random_state=self.random_state)

        # Create instance of classifier
        classifier_y = self.classifiers['classifier_y']
        parameters_y = self.clf_parameters['classifier_y']

        clf = classifier_y.set_params(**parameters_y)

        X = data_lbld.iloc[:, :-2]
        y = data_lbld.iloc[:, -1]

        X_new = new_lbld.iloc[:, :-2]
        y_new = new_lbld.iloc[:, -1]

        X = X.append(X_new, sort=False)
        y = y.append(y_new)

        X_unlbld = data_unlbld.iloc[:, :-2]

        sss = StratifiedKFold(n_splits=5, random_state=self.random_state)
        sss.get_n_splits(X, y)

        p_values = []

        for train_index, calib_index in sss.split(X, y):
            X_train, X_calib = X.iloc[train_index], X.iloc[calib_index]
            y_train, y_calib = y.iloc[train_index], y.iloc[calib_index]

            if self.rebalancing_parameters['SMOTE_y']:
                X_train, y_train = smote.fit_resample(X_train, y_train)
                clf.fit(X_train[:, :-1], y_train, sample_weight=X_train[:, -1])
            else:
                clf.fit(X_train.iloc[:, :-1],
                        y_train,
                        sample_weight=X_train.iloc[:, -1])

            nc = NcFactory.create_nc(clf, MarginErrFunc())
            icp = IcpClassifier(nc)

            if self.rebalancing_parameters['SMOTE_y']:
                icp.fit(X_train[:, :-1], y_train)
            else:
                icp.fit(X_train.iloc[:, :-1].values, y_train)

            icp.calibrate(X_calib.iloc[:, :-1].values, y_calib)

            # Predict confidences for validation sample and unlabeled sample
            p_values.append(
                icp.predict(X_unlbld.iloc[:, :-1].values, significance=None))

        mean_p_values = np.array(p_values).mean(axis=0)
        ccp_predictions = pd.DataFrame(mean_p_values,
                                       columns=['mean_p_0', 'mean_p_1'])
        ccp_predictions["credibility"] = [
            row.max() for _, row in ccp_predictions.iterrows()
        ]
        ccp_predictions["confidence"] = [
            1 - row.min() for _, row in ccp_predictions.iterrows()
        ]

        ccp_predictions.index = X_unlbld.index

        return ccp_predictions
Ejemplo n.º 5
0
# -----------------------------------------------------------------------------
data = Orange.data.Table('iris')
X, y = data.X, data.Y

idx = np.random.permutation(y.size)
train = idx[:idx.size // 3]
calibrate = idx[idx.size // 3:2 * idx.size // 3]
test = idx[2 * idx.size // 3:]

# -----------------------------------------------------------------------------
# Train and calibrate
# -----------------------------------------------------------------------------
icp = IcpClassifier(
    ProbEstClassifierNc(DecisionTreeClassifier(), inverse_probability))
icp.fit(X[train, :], y[train])
icp.calibrate(X[calibrate, :], y[calibrate])

ccp = CrossConformalClassifier(
    IcpClassifier(
        ProbEstClassifierNc(DecisionTreeClassifier(), inverse_probability)))
ccp.fit(X[train, :], y[train])

acp = AggregatedCp(
    IcpClassifier(
        ProbEstClassifierNc(DecisionTreeClassifier(), inverse_probability)),
    CrossSampler())
acp.fit(X[train, :], y[train])

# -----------------------------------------------------------------------------
# Predict
# -----------------------------------------------------------------------------
Ejemplo n.º 6
0
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_iris

from nonconformist.base import ClassifierAdapter
from nonconformist.icp import IcpClassifier
from nonconformist.nc import ClassifierNc

data = load_iris()
x, y = data.data, data.target

for i, y_ in enumerate(np.unique(y)):
    y[y == y_] = i

n_instances = y.size
idx = np.random.permutation(n_instances)

train_idx = idx[:int(n_instances / 3)]
cal_idx = idx[int(n_instances / 3):2 * int(n_instances / 3)]
test_idx = idx[2 * int(n_instances / 3):]

nc = ClassifierNc(ClassifierAdapter(RandomForestClassifier()))
icp = IcpClassifier(nc)

icp.fit(x[train_idx, :], y[train_idx])
icp.calibrate(x[cal_idx, :], y[cal_idx])

print(
    pd.DataFrame(icp.predict_conf(x[test_idx, :]),
                 columns=["Label", "Confidence", "Credibility"]))
Ejemplo n.º 7
0
    for xx in range(1, nmodels + 1):
        modelfile2 = infile + "_nonconf" + "_" + str(xx) + ".model"
        print("Working on model", xx)

        idx = np.random.permutation(int(len(train)))
        print(idx)
        trainset = idx[:part1]
        calset = idx[part1:]

        nc = ProbEstClassifierNc(RandomForestClassifier,
                                 margin,
                                 model_params={'n_estimators': 100})
        icp_norm = IcpClassifier(nc, condition=lambda instance: instance[1])

        icp_norm.fit(train[trainset], target[trainset])
        icp_norm.calibrate(train[calset], target[calset])
        cloudpickle.dump(icp_norm, f)
    f.close()

if mode != 't':
    outfile = predfile + "_nonconf_pred100sum.csv"
    f2 = open(outfile, 'w')
    f2.write('id\tp-value_low_class\tp-value_high_class\tclass\tmodel\n')
    f2.close()

    data = pd.read_csv(predfile, sep='\t', header=0, index_col=None)
    data.loc[data['target'] < 0, 'target'] = 0
    labels = data['id']
    ll = len(labels)
    target = data['target'].values
    test = data.drop(['id'], axis=1)
from sklearn.datasets import load_iris

from nonconformist.icp import IcpClassifier
from nonconformist.nc import ProbEstClassifierNc, margin

# -----------------------------------------------------------------------------
# Setup training, calibration and test indices
# -----------------------------------------------------------------------------
data = load_iris()

idx = np.random.permutation(data.target.size)
train = idx[: int(idx.size / 3)]
calibrate = idx[int(idx.size / 3) : int(2 * idx.size / 3)]
test = idx[int(2 * idx.size / 3) :]

# -----------------------------------------------------------------------------
# Train and calibrate
# -----------------------------------------------------------------------------
icp = IcpClassifier(ProbEstClassifierNc(DecisionTreeClassifier(), margin))
icp.fit(data.data[train, :], data.target[train])
icp.calibrate(data.data[calibrate, :], data.target[calibrate])

# -----------------------------------------------------------------------------
# Predict
# -----------------------------------------------------------------------------
prediction = icp.predict(data.data[test, :], significance=0.1)
header = np.array(["c0", "c1", "c2", "Truth"])
table = np.vstack([prediction.T, data.target[test]]).T
df = pd.DataFrame(np.vstack([header, table]))
print(df)
Ejemplo n.º 9
0
def split_data(data, n_train, n_test):
    n_train = n_train * len(data) // (n_train + n_test)
    n_test = len(data) - n_train
    ind = np.random.permutation(len(data))
    return data[ind[:n_train]], data[ind[n_train:n_train + n_test]]


#data = Orange.data.Table("../data/usps.tab")
data = Orange.data.Table("iris")

for sig in np.linspace(0.0, 0.4, 11):
    errs, szs = [], []
    for rep in range(10):
        #train, test = split_data(data, 7200, 2098)
        train, test = split_data(data, 2, 1)
        train, calib = split_data(train, 2, 1)

        #icp = IcpClassifier(ProbEstClassifierNc(DecisionTreeClassifier(), margin))
        icp = IcpClassifier(ProbEstClassifierNc(LogisticRegression(), margin))
        #icp = ICP()
        icp.fit(train.X, train.Y)
        icp.calibrate(calib.X, calib.Y)
        pred = icp.predict(test.X, significance=sig)

        acc = sum(p[y] for p, y in zip(pred, test.Y)) / len(pred)
        err = 1 - acc
        sz = sum(sum(p) for p in pred) / len(pred)
        errs.append(err)
        szs.append(sz)
    print(sig, np.mean(errs), np.mean(szs))
Ejemplo n.º 10
0
import pandas as pd

from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_iris

from nonconformist.base import ClassifierAdapter
from nonconformist.icp import IcpClassifier
from nonconformist.nc import ClassifierNc

data = load_iris()
x, y = data.data, data.target

for i, y_ in enumerate(np.unique(y)):
	y[y == y_] = i

n_instances = y.size
idx = np.random.permutation(n_instances)

train_idx = idx[:int(n_instances / 3)]
cal_idx = idx[int(n_instances / 3):2 * int(n_instances / 3)]
test_idx = idx[2 * int(n_instances / 3):]

nc = ClassifierNc(ClassifierAdapter(RandomForestClassifier()))
icp = IcpClassifier(nc)

icp.fit(x[train_idx, :], y[train_idx])
icp.calibrate(x[cal_idx, :], y[cal_idx])


print(pd.DataFrame(icp.predict_conf(x[test_idx, :]),
				   columns=['Label', 'Confidence', 'Credibility']))
from nonconformist.base import ClassifierAdapter
from nonconformist.icp import IcpClassifier
from nonconformist.nc import ClassifierNc, MarginErrFunc

# -----------------------------------------------------------------------------
# Setup training, calibration and test indices
# -----------------------------------------------------------------------------
data = load_iris()

idx = np.random.permutation(data.target.size)
train = idx[:int(idx.size / 3)]
calibrate = idx[int(idx.size / 3):int(2 * idx.size / 3)]
test = idx[int(2 * idx.size / 3):]

# -----------------------------------------------------------------------------
# Train and calibrate
# -----------------------------------------------------------------------------
icp = IcpClassifier(
    ClassifierNc(ClassifierAdapter(DecisionTreeClassifier()), MarginErrFunc()))
icp.fit(data.data[train, :], data.target[train])
icp.calibrate(data.data[calibrate, :], data.target[calibrate])

# -----------------------------------------------------------------------------
# Predict
# -----------------------------------------------------------------------------
prediction = icp.predict(data.data[test, :], significance=0.1)
header = np.array(["c0", "c1", "c2", "Truth"])
table = np.vstack([prediction.T, data.target[test]]).T
df = pd.DataFrame(np.vstack([header, table]))
print(df)