Esempio n. 1
0
def mlecsvm(unlabel, clfs, true, x, y, test):
    printOn.blockPrint()
    noisy_labels, confusion_matrixs, count_vi, answer = wrapperDS.run(
        unlabel, clfs, true)
    printOn.enablePrint()
    df_noise_x, df_noise_y, noiseLabel = shuffle.run(unlabel, noisy_labels, x,
                                                     y)

    ground = []
    X_test = []
    for point in test:
        ground.append(int(point[0]))
        X_test.append(point[1:])

    clf = CSVM.SVM(C=1000.1)
    X_train, y_train = np.asarray(df_noise_x), np.asarray(df_noise_y)
    printOn.blockPrint()
    clf.fit(X_train, y_train)
    X_test = np.asarray(X_test)
    y_predict = clf.predict(X_test)
    predict = y_predict.tolist()
    printOn.enablePrint()
    count = 0
    for index in range(len(ground)):
        est = predict[index]
        truth = ground[index]
        if est != truth:
            count += 1
    return count / len(ground)
Esempio n. 2
0
def mlenoiseboost_viz(percentage):
    unlabel, clfs, true, x, y, test = cancer.preprocess(percentage)
    printOn.blockPrint()
    one = 0
    zero = 0
    for label in y:
        if int(label) == 1:
            one += 1
        else:
            zero += 1
    one /= len(y)
    zero /= len(y)
    noisy_labels, confusion_matrixs, count_vi, answer = wrapperDS.run(
        unlabel, clfs, true)
    printOn.enablePrint()
    df_noise_x, df_noise_y, noiseLabel = shuffle.run(unlabel, noisy_labels, x,
                                                     y)
    # get boosting ready
    ones, agree = boosting.probab(answer, noisy_labels, 0)
    err_pos, err_neg = boosting.calcError(confusion_matrixs, ones, agree, one,
                                          zero)

    df_noise_x = np.asarray(df_noise_x)
    df_noise_y = np.asarray(df_noise_y)

    # run adaboosting
    clfs1 = boosting.adaboost_clf(df_noise_y, df_noise_x, 20, err_pos, err_neg,
                                  noiseLabel)

    return df_noise_x, df_noise_y, clfs1
Esempio n. 3
0
def mlenoiseboost(unlabel, clfs, true, x, y, test):
    printOn.blockPrint()
    one = 0
    zero = 0
    for label in y:
        if int(label) == 1:
            one += 1
        else:
            zero += 1
    one /= len(y)
    zero /= len(y)
    noisy_labels, confusion_matrixs, count_vi, answer = wrapperDS.run(
        unlabel, clfs, true)
    printOn.enablePrint()
    df_noise_x, df_noise_y, noiseLabel = shuffle.run(unlabel, noisy_labels, x,
                                                     y)
    # get boosting ready
    ones, agree = boosting.probab(answer, noisy_labels, 0)
    err_pos, err_neg = boosting.calcError(confusion_matrixs, ones, agree, one,
                                          zero)
    lenU = len(unlabel)
    errt = (err_neg + err_pos) * 100
    val = (count_vi / lenU) * 100
    # run adaboosting
    clfs1 = boosting.adaboost_clf(df_noise_y, df_noise_x, 20, err_pos, err_neg,
                                  noiseLabel)

    # caclulate the error rate
    err1 = errorTest.test(clfs1, test, 1)

    return err1
def preprocess(percentage, basicNN=False):
    printOn.blockPrint()
    if basicNN == True:
        test, unlabel, label, true, x, y, x_true, y_true, x_test, y_test = read.read(
            file='diabetes.csv',
            drop=None,
            retNum=1,
            chopNum=1,
            unlabel_percentage=percentage,
            ytrain=True)
    else:
        test, unlabel, label, true, x, y, x_true, y_true = read.read(
            file='diabetes.csv',
            drop=None,
            retNum=1,
            chopNum=1,
            unlabel_percentage=percentage)
    clfs = classifiers.ensemble(x, y)
    printOn.enablePrint()
    for point in test:
        point.insert(0, point.pop())
    if basicNN == True:
        return unlabel, clfs, true, x, y, test, y_test, x_test
    else:
        return unlabel, clfs, true, x, y, test
Esempio n. 5
0
def mleadaboost(unlabel, clfs, true, x, y, test):
    printOn.blockPrint()
    noisy_labels, confusion_matrixs, count_vi, answer = wrapperDS.run(
        unlabel, clfs, true)
    printOn.enablePrint()
    df_noise_x, df_noise_y, noiseLabel = shuffle.run(unlabel, noisy_labels, x,
                                                     y)
    bdt = AdaBoostClassifier(DecisionTreeClassifier(max_depth=1),
                             algorithm="SAMME",
                             n_estimators=20)
    bdt.fit(df_noise_x, df_noise_y)

    err1 = errorTest.test(bdt, test, 2)
    return err1
Esempio n. 6
0
def preprocess(percentage, basicNN=False):
    printOn.blockPrint()
    if basicNN == True:
        test, unlabel, label, true, x, y, x_true, y_true, x_test, y_test = read.read(
            file='data.csv',
            drop=['id'],
            retNum=1,
            chopNum=0,
            unlabel_percentage=percentage,
            ytrain=True)
    else:
        test, unlabel, label, true, x, y, x_true, y_true = read.read(
            file='data.csv',
            drop=['id'],
            retNum=1,
            chopNum=0,
            unlabel_percentage=percentage)
    clfs = classifiers.ensemble(x, y)
    printOn.enablePrint()
    if basicNN == True:
        return unlabel, clfs, true, x, y, test, y_test, x_test
    else:
        return unlabel, clfs, true, x, y, test