def test_icp_classification_tree(self): # ----------------------------------------------------------------------------- # Setup training, calibration and test indices # ----------------------------------------------------------------------------- data = load_iris() idx = np.random.permutation(data.target.size) train = idx[:int(idx.size / 3)] calibrate = idx[int(idx.size / 3):int(2 * idx.size / 3)] test = idx[int(2 * idx.size / 3):] # ----------------------------------------------------------------------------- # Train and calibrate # ----------------------------------------------------------------------------- icp = IcpClassifier( ClassifierNc(ClassifierAdapter(DecisionTreeClassifier()), MarginErrFunc())) icp.fit(data.data[train, :], data.target[train]) icp.calibrate(data.data[calibrate, :], data.target[calibrate]) # ----------------------------------------------------------------------------- # Predict # ----------------------------------------------------------------------------- prediction = icp.predict(data.data[test, :], significance=0.1) header = np.array(["c0", "c1", "c2", "Truth"]) table = np.vstack([prediction.T, data.target[test]]).T df = pd.DataFrame(np.vstack([header, table])) print(df)
def ccp_predict(self, data_lbld, data_unlbld, new_lbld): # Create SMOTE instance for class rebalancing smote = SMOTE(random_state=self.random_state) # Create instance of classifier classifier_y = self.classifiers['classifier_y'] parameters_y = self.clf_parameters['classifier_y'] clf = classifier_y.set_params(**parameters_y) X = data_lbld.iloc[:, :-2] y = data_lbld.iloc[:, -1] X_new = new_lbld.iloc[:, :-2] y_new = new_lbld.iloc[:, -1] X = X.append(X_new, sort=False) y = y.append(y_new) X_unlbld = data_unlbld.iloc[:, :-2] sss = StratifiedKFold(n_splits=5, random_state=self.random_state) sss.get_n_splits(X, y) p_values = [] for train_index, calib_index in sss.split(X, y): X_train, X_calib = X.iloc[train_index], X.iloc[calib_index] y_train, y_calib = y.iloc[train_index], y.iloc[calib_index] if self.rebalancing_parameters['SMOTE_y']: X_train, y_train = smote.fit_resample(X_train, y_train) clf.fit(X_train[:, :-1], y_train, sample_weight=X_train[:, -1]) else: clf.fit(X_train.iloc[:, :-1], y_train, sample_weight=X_train.iloc[:, -1]) nc = NcFactory.create_nc(clf, MarginErrFunc()) icp = IcpClassifier(nc) if self.rebalancing_parameters['SMOTE_y']: icp.fit(X_train[:, :-1], y_train) else: icp.fit(X_train.iloc[:, :-1].values, y_train) icp.calibrate(X_calib.iloc[:, :-1].values, y_calib) # Predict confidences for validation sample and unlabeled sample p_values.append( icp.predict(X_unlbld.iloc[:, :-1].values, significance=None)) mean_p_values = np.array(p_values).mean(axis=0) ccp_predictions = pd.DataFrame(mean_p_values, columns=['mean_p_0', 'mean_p_1']) ccp_predictions["credibility"] = [ row.max() for _, row in ccp_predictions.iterrows() ] ccp_predictions["confidence"] = [ 1 - row.min() for _, row in ccp_predictions.iterrows() ] ccp_predictions.index = X_unlbld.index return ccp_predictions
icp.fit(X[train, :], y[train]) icp.calibrate(X[calibrate, :], y[calibrate]) ccp = CrossConformalClassifier( IcpClassifier( ProbEstClassifierNc(DecisionTreeClassifier(), inverse_probability))) ccp.fit(X[train, :], y[train]) acp = AggregatedCp( IcpClassifier( ProbEstClassifierNc(DecisionTreeClassifier(), inverse_probability)), CrossSampler()) acp.fit(X[train, :], y[train]) # ----------------------------------------------------------------------------- # Predict # ----------------------------------------------------------------------------- print('# Inductive') prediction = icp.predict(X[test, :], significance=0.1) for pred, actual in zip(prediction[:5], y[test]): print(pred, actual) print('\n# Cross') prediction = ccp.predict(X[test, :], significance=0.1) for pred, actual in zip(prediction[:5], y[test]): print(pred, actual) print('\n# Aggre') prediction = acp.predict(X[test, :], significance=0.1) for pred, actual in zip(prediction[:5], y[test]): print(pred, actual)
data.loc[data['target'] < 0, 'target'] = 0 labels = data['id'] ll = len(labels) target = data['target'].values test = data.drop(['id'], axis=1) test = test.drop(['target'], axis=1).values f = open(modelfile, mode='rb') nmodels_built = cloudpickle.load(f) print("Models built:", nmodels_built) if nmodels > nmodels_built: print( "More models ordered (", nmodels, ") than the file contains. Setting the number of models to built models." ) nmodels = nmodels_built for xx in range(1, nmodels + 1): print("Predicting from model", xx) modelfile2 = infile + "_nonconf" + "_" + str(xx) + ".model" num = [xx] * ll icp_norm = cloudpickle.load(f) predicted = icp_norm.predict(test) predicted0 = [x[0] for x in predicted] predicted1 = [x[1] for x in predicted] writeOutListsApp(outfile, [labels, predicted0, predicted1, target, num]) f.close() print(" - finished\n")
from sklearn.datasets import load_iris from nonconformist.icp import IcpClassifier from nonconformist.nc import ProbEstClassifierNc, margin # ----------------------------------------------------------------------------- # Setup training, calibration and test indices # ----------------------------------------------------------------------------- data = load_iris() idx = np.random.permutation(data.target.size) train = idx[: int(idx.size / 3)] calibrate = idx[int(idx.size / 3) : int(2 * idx.size / 3)] test = idx[int(2 * idx.size / 3) :] # ----------------------------------------------------------------------------- # Train and calibrate # ----------------------------------------------------------------------------- icp = IcpClassifier(ProbEstClassifierNc(DecisionTreeClassifier(), margin)) icp.fit(data.data[train, :], data.target[train]) icp.calibrate(data.data[calibrate, :], data.target[calibrate]) # ----------------------------------------------------------------------------- # Predict # ----------------------------------------------------------------------------- prediction = icp.predict(data.data[test, :], significance=0.1) header = np.array(["c0", "c1", "c2", "Truth"]) table = np.vstack([prediction.T, data.target[test]]).T df = pd.DataFrame(np.vstack([header, table])) print(df)
def split_data(data, n_train, n_test): n_train = n_train * len(data) // (n_train + n_test) n_test = len(data) - n_train ind = np.random.permutation(len(data)) return data[ind[:n_train]], data[ind[n_train:n_train + n_test]] #data = Orange.data.Table("../data/usps.tab") data = Orange.data.Table("iris") for sig in np.linspace(0.0, 0.4, 11): errs, szs = [], [] for rep in range(10): #train, test = split_data(data, 7200, 2098) train, test = split_data(data, 2, 1) train, calib = split_data(train, 2, 1) #icp = IcpClassifier(ProbEstClassifierNc(DecisionTreeClassifier(), margin)) icp = IcpClassifier(ProbEstClassifierNc(LogisticRegression(), margin)) #icp = ICP() icp.fit(train.X, train.Y) icp.calibrate(calib.X, calib.Y) pred = icp.predict(test.X, significance=sig) acc = sum(p[y] for p, y in zip(pred, test.Y)) / len(pred) err = 1 - acc sz = sum(sum(p) for p in pred) / len(pred) errs.append(err) szs.append(sz) print(sig, np.mean(errs), np.mean(szs))
from nonconformist.base import ClassifierAdapter from nonconformist.icp import IcpClassifier from nonconformist.nc import ClassifierNc, MarginErrFunc # ----------------------------------------------------------------------------- # Setup training, calibration and test indices # ----------------------------------------------------------------------------- data = load_iris() idx = np.random.permutation(data.target.size) train = idx[:int(idx.size / 3)] calibrate = idx[int(idx.size / 3):int(2 * idx.size / 3)] test = idx[int(2 * idx.size / 3):] # ----------------------------------------------------------------------------- # Train and calibrate # ----------------------------------------------------------------------------- icp = IcpClassifier( ClassifierNc(ClassifierAdapter(DecisionTreeClassifier()), MarginErrFunc())) icp.fit(data.data[train, :], data.target[train]) icp.calibrate(data.data[calibrate, :], data.target[calibrate]) # ----------------------------------------------------------------------------- # Predict # ----------------------------------------------------------------------------- prediction = icp.predict(data.data[test, :], significance=0.1) header = np.array(["c0", "c1", "c2", "Truth"]) table = np.vstack([prediction.T, data.target[test]]).T df = pd.DataFrame(np.vstack([header, table])) print(df)