def preprocess(percentage, basicNN=False): printOn.blockPrint() if basicNN == True: test, unlabel, label, true, x, y, x_true, y_true, x_test, y_test = read.read( file='diabetes.csv', drop=None, retNum=1, chopNum=1, unlabel_percentage=percentage, ytrain=True) else: test, unlabel, label, true, x, y, x_true, y_true = read.read( file='diabetes.csv', drop=None, retNum=1, chopNum=1, unlabel_percentage=percentage) clfs = classifiers.ensemble(x, y) printOn.enablePrint() for point in test: point.insert(0, point.pop()) if basicNN == True: return unlabel, clfs, true, x, y, test, y_test, x_test else: return unlabel, clfs, true, x, y, test
def mleadaboost(unlabel, clfs, true, x, y, test): printOn.blockPrint() noisy_labels, confusion_matrixs, count_vi, answer = wrapperDS.run( unlabel, clfs, true) printOn.enablePrint() df_noise_x, df_noise_y, noiseLabel = shuffle.run(unlabel, noisy_labels, x, y) bdt = AdaBoostClassifier(DecisionTreeClassifier(max_depth=1), algorithm="SAMME", n_estimators=20) bdt.fit(df_noise_x, df_noise_y) err1 = errorTest.test(bdt, test, 2) return err1
def preprocess(percentage, basicNN=False): printOn.blockPrint() if basicNN == True: test, unlabel, label, true, x, y, x_true, y_true, x_test, y_test = read.read( file='data.csv', drop=['id'], retNum=1, chopNum=0, unlabel_percentage=percentage, ytrain=True) else: test, unlabel, label, true, x, y, x_true, y_true = read.read( file='data.csv', drop=['id'], retNum=1, chopNum=0, unlabel_percentage=percentage) clfs = classifiers.ensemble(x, y) printOn.enablePrint() if basicNN == True: return unlabel, clfs, true, x, y, test, y_test, x_test else: return unlabel, clfs, true, x, y, test