def mlecsvm(unlabel, clfs, true, x, y, test): printOn.blockPrint() noisy_labels, confusion_matrixs, count_vi, answer = wrapperDS.run( unlabel, clfs, true) printOn.enablePrint() df_noise_x, df_noise_y, noiseLabel = shuffle.run(unlabel, noisy_labels, x, y) ground = [] X_test = [] for point in test: ground.append(int(point[0])) X_test.append(point[1:]) clf = CSVM.SVM(C=1000.1) X_train, y_train = np.asarray(df_noise_x), np.asarray(df_noise_y) printOn.blockPrint() clf.fit(X_train, y_train) X_test = np.asarray(X_test) y_predict = clf.predict(X_test) predict = y_predict.tolist() printOn.enablePrint() count = 0 for index in range(len(ground)): est = predict[index] truth = ground[index] if est != truth: count += 1 return count / len(ground)
def mlenoiseboost_viz(percentage): unlabel, clfs, true, x, y, test = cancer.preprocess(percentage) printOn.blockPrint() one = 0 zero = 0 for label in y: if int(label) == 1: one += 1 else: zero += 1 one /= len(y) zero /= len(y) noisy_labels, confusion_matrixs, count_vi, answer = wrapperDS.run( unlabel, clfs, true) printOn.enablePrint() df_noise_x, df_noise_y, noiseLabel = shuffle.run(unlabel, noisy_labels, x, y) # get boosting ready ones, agree = boosting.probab(answer, noisy_labels, 0) err_pos, err_neg = boosting.calcError(confusion_matrixs, ones, agree, one, zero) df_noise_x = np.asarray(df_noise_x) df_noise_y = np.asarray(df_noise_y) # run adaboosting clfs1 = boosting.adaboost_clf(df_noise_y, df_noise_x, 20, err_pos, err_neg, noiseLabel) return df_noise_x, df_noise_y, clfs1
def mlenoiseboost(unlabel, clfs, true, x, y, test): printOn.blockPrint() one = 0 zero = 0 for label in y: if int(label) == 1: one += 1 else: zero += 1 one /= len(y) zero /= len(y) noisy_labels, confusion_matrixs, count_vi, answer = wrapperDS.run( unlabel, clfs, true) printOn.enablePrint() df_noise_x, df_noise_y, noiseLabel = shuffle.run(unlabel, noisy_labels, x, y) # get boosting ready ones, agree = boosting.probab(answer, noisy_labels, 0) err_pos, err_neg = boosting.calcError(confusion_matrixs, ones, agree, one, zero) lenU = len(unlabel) errt = (err_neg + err_pos) * 100 val = (count_vi / lenU) * 100 # run adaboosting clfs1 = boosting.adaboost_clf(df_noise_y, df_noise_x, 20, err_pos, err_neg, noiseLabel) # caclulate the error rate err1 = errorTest.test(clfs1, test, 1) return err1
def preprocess(percentage, basicNN=False): printOn.blockPrint() if basicNN == True: test, unlabel, label, true, x, y, x_true, y_true, x_test, y_test = read.read( file='diabetes.csv', drop=None, retNum=1, chopNum=1, unlabel_percentage=percentage, ytrain=True) else: test, unlabel, label, true, x, y, x_true, y_true = read.read( file='diabetes.csv', drop=None, retNum=1, chopNum=1, unlabel_percentage=percentage) clfs = classifiers.ensemble(x, y) printOn.enablePrint() for point in test: point.insert(0, point.pop()) if basicNN == True: return unlabel, clfs, true, x, y, test, y_test, x_test else: return unlabel, clfs, true, x, y, test
def mleadaboost(unlabel, clfs, true, x, y, test): printOn.blockPrint() noisy_labels, confusion_matrixs, count_vi, answer = wrapperDS.run( unlabel, clfs, true) printOn.enablePrint() df_noise_x, df_noise_y, noiseLabel = shuffle.run(unlabel, noisy_labels, x, y) bdt = AdaBoostClassifier(DecisionTreeClassifier(max_depth=1), algorithm="SAMME", n_estimators=20) bdt.fit(df_noise_x, df_noise_y) err1 = errorTest.test(bdt, test, 2) return err1
def preprocess(percentage, basicNN=False): printOn.blockPrint() if basicNN == True: test, unlabel, label, true, x, y, x_true, y_true, x_test, y_test = read.read( file='data.csv', drop=['id'], retNum=1, chopNum=0, unlabel_percentage=percentage, ytrain=True) else: test, unlabel, label, true, x, y, x_true, y_true = read.read( file='data.csv', drop=['id'], retNum=1, chopNum=0, unlabel_percentage=percentage) clfs = classifiers.ensemble(x, y) printOn.enablePrint() if basicNN == True: return unlabel, clfs, true, x, y, test, y_test, x_test else: return unlabel, clfs, true, x, y, test