def test_icp_classification_tree(self): # ----------------------------------------------------------------------------- # Setup training, calibration and test indices # ----------------------------------------------------------------------------- data = load_iris() idx = np.random.permutation(data.target.size) train = idx[:int(idx.size / 3)] calibrate = idx[int(idx.size / 3):int(2 * idx.size / 3)] test = idx[int(2 * idx.size / 3):] # ----------------------------------------------------------------------------- # Train and calibrate # ----------------------------------------------------------------------------- icp = IcpClassifier( ClassifierNc(ClassifierAdapter(DecisionTreeClassifier()), MarginErrFunc())) icp.fit(data.data[train, :], data.target[train]) icp.calibrate(data.data[calibrate, :], data.target[calibrate]) # ----------------------------------------------------------------------------- # Predict # ----------------------------------------------------------------------------- prediction = icp.predict(data.data[test, :], significance=0.1) header = np.array(["c0", "c1", "c2", "Truth"]) table = np.vstack([prediction.T, data.target[test]]).T df = pd.DataFrame(np.vstack([header, table])) print(df)
def SelectLabeled(self, labeled_data_x, labeled_data_y, unlabeled_data_x): # just append train data to labeled data labeled_x = np.concatenate((self.init_labeled_data_x, labeled_data_x)) \ if len(labeled_data_x) > 0 else self.init_labeled_data_x labeled_y = np.concatenate((self.init_labeled_data_y, labeled_data_y)) \ if len(labeled_data_x) > 0 else self.init_labeled_data_y # # create model to predict with confidence and credibility model = ClassifierAdapter( DecisionTreeClassifier(random_state=config.random_state, min_samples_leaf=config.min_samples_leaf)) nc = ClassifierNc(model, MarginErrFunc()) model_icp = IcpClassifier(nc, smoothing=True) model_icp.fit(labeled_x, labeled_y) model_icp.calibrate(self.calibration_data_x, self.calibration_data_y) s = model_icp.predict_conf(unlabeled_data_x) print(s) # # selection method labeled_ind = [ i for i, a in enumerate(s) if a[1] > config.confidence and a[2] > config.credibility ] unlabeled_ind = [ i for i, a in enumerate(s) if a[1] < config.confidence or a[2] < config.credibility ] labeled_unlabeled_x, labeled_unlabeled_y, unlabeled_data_x = \ np.take(unlabeled_data_x, labeled_ind, axis=0), np.take(s.T, labeled_ind), np.take(unlabeled_data_x, unlabeled_ind, axis=0) # return labeled_unlabeled_x, labeled_unlabeled_y, unlabeled_data_x
def test_confidence_credibility(self): data = load_iris() x, y = data.data, data.target for i, y_ in enumerate(np.unique(y)): y[y == y_] = i n_instances = y.size idx = np.random.permutation(n_instances) train_idx = idx[:int(n_instances / 3)] cal_idx = idx[int(n_instances / 3):2 * int(n_instances / 3)] test_idx = idx[2 * int(n_instances / 3):] nc = ClassifierNc(ClassifierAdapter(RandomForestClassifier())) icp = IcpClassifier(nc) icp.fit(x[train_idx, :], y[train_idx]) icp.calibrate(x[cal_idx, :], y[cal_idx]) print( pd.DataFrame(icp.predict_conf(x[test_idx, :]), columns=["Label", "Confidence", "Credibility"]))
def ccp_predict(self, data_lbld, data_unlbld, new_lbld): # Create SMOTE instance for class rebalancing smote = SMOTE(random_state=self.random_state) # Create instance of classifier classifier_y = self.classifiers['classifier_y'] parameters_y = self.clf_parameters['classifier_y'] clf = classifier_y.set_params(**parameters_y) X = data_lbld.iloc[:, :-2] y = data_lbld.iloc[:, -1] X_new = new_lbld.iloc[:, :-2] y_new = new_lbld.iloc[:, -1] X = X.append(X_new, sort=False) y = y.append(y_new) X_unlbld = data_unlbld.iloc[:, :-2] sss = StratifiedKFold(n_splits=5, random_state=self.random_state) sss.get_n_splits(X, y) p_values = [] for train_index, calib_index in sss.split(X, y): X_train, X_calib = X.iloc[train_index], X.iloc[calib_index] y_train, y_calib = y.iloc[train_index], y.iloc[calib_index] if self.rebalancing_parameters['SMOTE_y']: X_train, y_train = smote.fit_resample(X_train, y_train) clf.fit(X_train[:, :-1], y_train, sample_weight=X_train[:, -1]) else: clf.fit(X_train.iloc[:, :-1], y_train, sample_weight=X_train.iloc[:, -1]) nc = NcFactory.create_nc(clf, MarginErrFunc()) icp = IcpClassifier(nc) if self.rebalancing_parameters['SMOTE_y']: icp.fit(X_train[:, :-1], y_train) else: icp.fit(X_train.iloc[:, :-1].values, y_train) icp.calibrate(X_calib.iloc[:, :-1].values, y_calib) # Predict confidences for validation sample and unlabeled sample p_values.append( icp.predict(X_unlbld.iloc[:, :-1].values, significance=None)) mean_p_values = np.array(p_values).mean(axis=0) ccp_predictions = pd.DataFrame(mean_p_values, columns=['mean_p_0', 'mean_p_1']) ccp_predictions["credibility"] = [ row.max() for _, row in ccp_predictions.iterrows() ] ccp_predictions["confidence"] = [ 1 - row.min() for _, row in ccp_predictions.iterrows() ] ccp_predictions.index = X_unlbld.index return ccp_predictions
# Setup training, calibration and test indices # ----------------------------------------------------------------------------- data = Orange.data.Table('iris') X, y = data.X, data.Y idx = np.random.permutation(y.size) train = idx[:idx.size // 3] calibrate = idx[idx.size // 3:2 * idx.size // 3] test = idx[2 * idx.size // 3:] # ----------------------------------------------------------------------------- # Train and calibrate # ----------------------------------------------------------------------------- icp = IcpClassifier( ProbEstClassifierNc(DecisionTreeClassifier(), inverse_probability)) icp.fit(X[train, :], y[train]) icp.calibrate(X[calibrate, :], y[calibrate]) ccp = CrossConformalClassifier( IcpClassifier( ProbEstClassifierNc(DecisionTreeClassifier(), inverse_probability))) ccp.fit(X[train, :], y[train]) acp = AggregatedCp( IcpClassifier( ProbEstClassifierNc(DecisionTreeClassifier(), inverse_probability)), CrossSampler()) acp.fit(X[train, :], y[train]) # ----------------------------------------------------------------------------- # Predict
from sklearn.ensemble import RandomForestClassifier from sklearn.datasets import load_iris from nonconformist.base import ClassifierAdapter from nonconformist.icp import IcpClassifier from nonconformist.nc import ClassifierNc data = load_iris() x, y = data.data, data.target for i, y_ in enumerate(np.unique(y)): y[y == y_] = i n_instances = y.size idx = np.random.permutation(n_instances) train_idx = idx[:int(n_instances / 3)] cal_idx = idx[int(n_instances / 3):2 * int(n_instances / 3)] test_idx = idx[2 * int(n_instances / 3):] nc = ClassifierNc(ClassifierAdapter(RandomForestClassifier())) icp = IcpClassifier(nc) icp.fit(x[train_idx, :], y[train_idx]) icp.calibrate(x[cal_idx, :], y[cal_idx]) print( pd.DataFrame(icp.predict_conf(x[test_idx, :]), columns=["Label", "Confidence", "Credibility"]))
part1 = int(0.7 * len(train)) for xx in range(1, nmodels + 1): modelfile2 = infile + "_nonconf" + "_" + str(xx) + ".model" print("Working on model", xx) idx = np.random.permutation(int(len(train))) print(idx) trainset = idx[:part1] calset = idx[part1:] nc = ProbEstClassifierNc(RandomForestClassifier, margin, model_params={'n_estimators': 100}) icp_norm = IcpClassifier(nc, condition=lambda instance: instance[1]) icp_norm.fit(train[trainset], target[trainset]) icp_norm.calibrate(train[calset], target[calset]) cloudpickle.dump(icp_norm, f) f.close() if mode != 't': outfile = predfile + "_nonconf_pred100sum.csv" f2 = open(outfile, 'w') f2.write('id\tp-value_low_class\tp-value_high_class\tclass\tmodel\n') f2.close() data = pd.read_csv(predfile, sep='\t', header=0, index_col=None) data.loc[data['target'] < 0, 'target'] = 0 labels = data['id'] ll = len(labels) target = data['target'].values
from sklearn.datasets import load_iris from nonconformist.icp import IcpClassifier from nonconformist.nc import ProbEstClassifierNc, margin # ----------------------------------------------------------------------------- # Setup training, calibration and test indices # ----------------------------------------------------------------------------- data = load_iris() idx = np.random.permutation(data.target.size) train = idx[: int(idx.size / 3)] calibrate = idx[int(idx.size / 3) : int(2 * idx.size / 3)] test = idx[int(2 * idx.size / 3) :] # ----------------------------------------------------------------------------- # Train and calibrate # ----------------------------------------------------------------------------- icp = IcpClassifier(ProbEstClassifierNc(DecisionTreeClassifier(), margin)) icp.fit(data.data[train, :], data.target[train]) icp.calibrate(data.data[calibrate, :], data.target[calibrate]) # ----------------------------------------------------------------------------- # Predict # ----------------------------------------------------------------------------- prediction = icp.predict(data.data[test, :], significance=0.1) header = np.array(["c0", "c1", "c2", "Truth"]) table = np.vstack([prediction.T, data.target[test]]).T df = pd.DataFrame(np.vstack([header, table])) print(df)
def split_data(data, n_train, n_test): n_train = n_train * len(data) // (n_train + n_test) n_test = len(data) - n_train ind = np.random.permutation(len(data)) return data[ind[:n_train]], data[ind[n_train:n_train + n_test]] #data = Orange.data.Table("../data/usps.tab") data = Orange.data.Table("iris") for sig in np.linspace(0.0, 0.4, 11): errs, szs = [], [] for rep in range(10): #train, test = split_data(data, 7200, 2098) train, test = split_data(data, 2, 1) train, calib = split_data(train, 2, 1) #icp = IcpClassifier(ProbEstClassifierNc(DecisionTreeClassifier(), margin)) icp = IcpClassifier(ProbEstClassifierNc(LogisticRegression(), margin)) #icp = ICP() icp.fit(train.X, train.Y) icp.calibrate(calib.X, calib.Y) pred = icp.predict(test.X, significance=sig) acc = sum(p[y] for p, y in zip(pred, test.Y)) / len(pred) err = 1 - acc sz = sum(sum(p) for p in pred) / len(pred) errs.append(err) szs.append(sz) print(sig, np.mean(errs), np.mean(szs))
import pandas as pd from sklearn.ensemble import RandomForestClassifier from sklearn.datasets import load_iris from nonconformist.base import ClassifierAdapter from nonconformist.icp import IcpClassifier from nonconformist.nc import ClassifierNc data = load_iris() x, y = data.data, data.target for i, y_ in enumerate(np.unique(y)): y[y == y_] = i n_instances = y.size idx = np.random.permutation(n_instances) train_idx = idx[:int(n_instances / 3)] cal_idx = idx[int(n_instances / 3):2 * int(n_instances / 3)] test_idx = idx[2 * int(n_instances / 3):] nc = ClassifierNc(ClassifierAdapter(RandomForestClassifier())) icp = IcpClassifier(nc) icp.fit(x[train_idx, :], y[train_idx]) icp.calibrate(x[cal_idx, :], y[cal_idx]) print(pd.DataFrame(icp.predict_conf(x[test_idx, :]), columns=['Label', 'Confidence', 'Credibility']))
from nonconformist.base import ClassifierAdapter from nonconformist.icp import IcpClassifier from nonconformist.nc import ClassifierNc, MarginErrFunc # ----------------------------------------------------------------------------- # Setup training, calibration and test indices # ----------------------------------------------------------------------------- data = load_iris() idx = np.random.permutation(data.target.size) train = idx[:int(idx.size / 3)] calibrate = idx[int(idx.size / 3):int(2 * idx.size / 3)] test = idx[int(2 * idx.size / 3):] # ----------------------------------------------------------------------------- # Train and calibrate # ----------------------------------------------------------------------------- icp = IcpClassifier( ClassifierNc(ClassifierAdapter(DecisionTreeClassifier()), MarginErrFunc())) icp.fit(data.data[train, :], data.target[train]) icp.calibrate(data.data[calibrate, :], data.target[calibrate]) # ----------------------------------------------------------------------------- # Predict # ----------------------------------------------------------------------------- prediction = icp.predict(data.data[test, :], significance=0.1) header = np.array(["c0", "c1", "c2", "Truth"]) table = np.vstack([prediction.T, data.target[test]]).T df = pd.DataFrame(np.vstack([header, table])) print(df)