Ejemplo n.º 1
0
def make_summary(X_Fit, X, accuracy, accuracy_no, matrix, matrix_no):

    print(bcolors.YELLOW + 'NO SELECTION: \n' + bcolors.ENDC)

    print(bcolors.YELLOW + 'NB_OF_FEATURES: ' + str(X.shape[1]) + bcolors.ENDC)
    print(bcolors.YELLOW + 'ACCURACY: ' + str(accuracy_no) + bcolors.ENDC)
    print(bcolors.YELLOW + 'PRECISION: ' + str(calculatePrecision(matrix_no)) +
          bcolors.ENDC)
    print(bcolors.YELLOW + 'FPT_TPR: ' + str(calculateFPR_TPR_TNR(matrix_no)) +
          bcolors.ENDC)
    print(bcolors.YELLOW + 'F1 SCORE: ' + str(calculateF1(matrix_no)) +
          bcolors.ENDC)
    print(bcolors.YELLOW + 'MATRIX: \n' + str(matrix_no) + bcolors.ENDC)

    print(bcolors.GREEN + '\nWITH SELECTION: \n' + bcolors.ENDC)

    print(bcolors.GREEN + 'NB_OF_FEATURES: ' + str(X_Fit.shape[1]) +
          bcolors.ENDC)
    print(bcolors.GREEN + 'ACCURACY: ' + str(accuracy) + bcolors.ENDC)
    print(bcolors.GREEN + 'PRECISION: ' + str(calculatePrecision(matrix)) +
          bcolors.ENDC)
    print(bcolors.GREEN + 'FPT_TPR: ' + str(calculateFPR_TPR_TNR(matrix)) +
          bcolors.ENDC)
    print(bcolors.GREEN + 'F1 SCORE: ' + str(calculateF1(matrix)) +
          bcolors.ENDC)
    print(bcolors.GREEN + 'MATRIX: \n' + str(matrix) + bcolors.ENDC)
def make_experiment(file, set, elements):
    [X, y] = elements

    table = pd.DataFrame(X)
    table['y'] = y
    table.sort_values('y', inplace=True)
    print(table)
    X_tab = table.drop('y', axis=1)
    y_tab = table['y']
    X, y = X_tab.values, y_tab.values


    random_subset0 = [X, y]
    random_subset1 = [X[1::2], y[1::2]]
    random_subset2 = [X[0::2], y[0::2]]
    random_subset3 = [X[0::3], y[0::3]]
    random_subset4 = [X[1::3], y[1::3]]
    random_subset5 = [X[2::3], y[2::3]]
    random_subset6 = [X[get_part_of_set(X, 0.4):len(X)], y[get_part_of_set(y, 0.4):len(X)]]
    random_subset7 = [X[get_part_of_set(X, 0.2):(len(X) - 2)], y[get_part_of_set(y, 0.2):(len(X) - 2)]]
    
    subsets = [random_subset0, random_subset1, random_subset2, random_subset3, random_subset4, random_subset5, random_subset6, random_subset7]
    results = []

    for subset in subsets:
        accuracy, matrix = get_average_score(subset[0], subset[1])
        matrix_rev = reverseMatrix(matrix)
        results.append(calculateF1(matrix_rev))

    file.write(wicloxon_string_summary('NO SELECTION', set, X.shape[1], 'null', results))

    ALL_METHODS = ['ANOVA', 'RELIEF', 'INFORATION GAIN', 'CHI SQUARE', 'CORRELATION COEF']
    
    for method in ALL_METHODS:
        make_closest(file, method, set, subsets, results, X.shape[1])
def make_closest(file, method, set, subsets, results, set_len):
    found = False
    
    for feats in range (1, set_len):
        custom_res = []

        for subset in subsets:
            X_Fit, scores = get_method(method, subset, feats)
            accuracy, matrix = get_average_score(X_Fit, subset[1])
            matrix_rev = reverseMatrix(matrix)
            custom_res.append(calculateF1(matrix_rev))

        try: _, p = calculateWilcoxon(results, custom_res)
        except ValueError: p = 1
        
        make_wilcoxon_summary(method, set, X_Fit.shape[1], p, results, custom_res)
        
        if (p > ALPHA):
            print('FOUND - ORIGINAL NB: ' + str(set_len) + ' - NEW NB: ' + str(feats) + '\n')
            file.write(wicloxon_string_summary(method, set, X_Fit.shape[1], p, custom_res))
            found = True
            break
    
    if (not(found)):
        print('NOT FOUND - ROLLBACK\n')
        file.write(wicloxon_string_summary(method, set, set_len, 0, results))
Ejemplo n.º 4
0
def get_string_summary(method, set, X, accuracy, matrix, scores):
    return (set + '; ' + method + '; ' + str(X.shape[1]) + '; ' +
            str(X.shape[0]) + '; ' + str(accuracy) + '; ' +
            str(calculateBalancedAcc(matrix)) + '; ' +
            str(calculatePrecision(matrix)) + '; ' +
            str(calculateRecall(matrix)) + '; ' + str(calculateF1(matrix)) +
            '; ' + str(calculateFPR_TPR_TNR(matrix)).replace(' ', '') + '; ' +
            str(
                array2string(matrix, separator=',').replace('\n', '').replace(
                    ' ', '')) + '; ' + str(scores).replace(' ', '') + '\n')
Ejemplo n.º 5
0
def make_experiment(file, set, elements):
    [X, y] = elements
    
    accuracy, matrix = get_average_score(X, y)
    matrix_rev = reverseMatrix(matrix)

    make_simple_summary('NO SELECTION', set, X, accuracy, matrix_rev, ['all'])
    file.write(get_string_summary('NO SELECTION', set, X, accuracy, matrix_rev, ['all']))
    basic_f1 = calculateF1(matrix_rev)
    
    ALL_METHODS = ['ANOVA', 'RELIEF', 'INFORATION GAIN', 'CHI SQUARE', 'CORRELATION COEF']
    
    if (MODE == 'closest'):
        for method in ALL_METHODS:
            make_closest(file, method, set, elements, basic_f1)
    elif (MODE == 'best'):
        for method in ALL_METHODS:
            make_best(file, method, set, elements)
Ejemplo n.º 6
0
def make_simple_summary(method, set, X, accuracy, matrix, scores):
    print(bcolors.YELLOW + 'SET: ' + str(set) + bcolors.ENDC)
    print(bcolors.YELLOW + 'METHOD: ' + str(method) + bcolors.ENDC)
    print(bcolors.GREEN + 'NB_OF_FEATURES: ' + str(X.shape[1]) + bcolors.ENDC)
    print(bcolors.GREEN + 'NB_OF_ELEMENTS: ' + str(X.shape[0]) + bcolors.ENDC)
    print(bcolors.GREEN + 'ACCURACY: ' + str(accuracy) + bcolors.ENDC)
    print(bcolors.GREEN + 'BALANCED ACC: ' +
          str(calculateBalancedAcc(matrix)) + bcolors.ENDC)
    print(bcolors.GREEN + 'PRECISION: ' + str(calculatePrecision(matrix)) +
          bcolors.ENDC)
    print(bcolors.GREEN + 'RECALL: ' + str(calculateRecall(matrix)) +
          bcolors.ENDC)
    print(bcolors.BOLD + 'F1 SCORE: ' + str(calculateF1(matrix)) +
          bcolors.ENDC)
    print(bcolors.GREEN + 'TPR FPR TNR: ' + str(calculateFPR_TPR_TNR(matrix)) +
          bcolors.ENDC)
    print(bcolors.GREEN + 'MATRIX: ' + str(matrix).replace('\n', '') +
          bcolors.ENDC)
    print(bcolors.GREEN + 'SCORES: ' + str(scores) + bcolors.ENDC)
    print('\n')
Ejemplo n.º 7
0
def make_best(file, method, set, elements):
    best_num_of_feats = 1
    [X, y] = elements
    best_f1 = 0
    
    for feats in range (1, X.shape[1] + 1):
        X_Fit, scores = get_method(method, elements, feats)
        accuracy, matrix = get_average_score(X_Fit, y)
        matrix_rev = reverseMatrix(matrix)
        new_f1 = calculateF1(matrix_rev)
        make_simple_summary(method, set, X_Fit, accuracy, matrix_rev, scores)

        if (new_f1 > best_f1):
            best_f1 = new_f1
            best_num_of_feats = feats
    
    print('\nBEST FOR: ' + str(feats) + '\n')
    X_Fit, scores = get_method(method, elements, best_num_of_feats)
    accuracy, matrix = get_average_score(X_Fit, y)
    matrix_rev = reverseMatrix(matrix)
    make_simple_summary(method, set, X_Fit, accuracy, matrix_rev, scores)
    file.write(get_string_summary(method, set, X_Fit, accuracy, matrix_rev, scores))
Ejemplo n.º 8
0
def make_closest(file, method, set, elements, basic_f1):
    [X, y] = elements
    
    found = False
    for feats in range (1, X.shape[1]):
        X_Fit, scores = get_method(method, elements, feats)
        accuracy, matrix = get_average_score(X_Fit, y)
        matrix_rev = reverseMatrix(matrix)
        new_f1 = calculateF1(matrix_rev)
        p = abs(new_f1 - basic_f1)
        make_simple_summary(method, set, X_Fit, accuracy, matrix_rev, scores)

        if (p < ALPHA):
            print('FOUND - ORIGINAL NB: ' + str(X.shape[1]) + ' - NEW NB: ' + str(X_Fit.shape[1]) + '\n')
            file.write(get_string_summary(method, set, X_Fit, accuracy, matrix_rev, scores))
            found = True
            break
    
    if (not(found)):
        print('NOT FOUND - ROLLBACK\n')
        X_Fit, scores = get_method(method, elements, X.shape[1])
        accuracy, matrix = get_average_score(X_Fit, y)
        matrix_rev = reverseMatrix(matrix)
        file.write(get_string_summary(method, set, X_Fit, accuracy, matrix_rev, scores))