コード例 #1
0
def make_experiment(file, set, elements):
    [X, y] = elements

    table = pd.DataFrame(X)
    table['y'] = y
    table.sort_values('y', inplace=True)
    print(table)
    X_tab = table.drop('y', axis=1)
    y_tab = table['y']
    X, y = X_tab.values, y_tab.values


    random_subset0 = [X, y]
    random_subset1 = [X[1::2], y[1::2]]
    random_subset2 = [X[0::2], y[0::2]]
    random_subset3 = [X[0::3], y[0::3]]
    random_subset4 = [X[1::3], y[1::3]]
    random_subset5 = [X[2::3], y[2::3]]
    random_subset6 = [X[get_part_of_set(X, 0.4):len(X)], y[get_part_of_set(y, 0.4):len(X)]]
    random_subset7 = [X[get_part_of_set(X, 0.2):(len(X) - 2)], y[get_part_of_set(y, 0.2):(len(X) - 2)]]
    
    subsets = [random_subset0, random_subset1, random_subset2, random_subset3, random_subset4, random_subset5, random_subset6, random_subset7]
    results = []

    for subset in subsets:
        accuracy, matrix = get_average_score(subset[0], subset[1])
        matrix_rev = reverseMatrix(matrix)
        results.append(calculateF1(matrix_rev))

    file.write(wicloxon_string_summary('NO SELECTION', set, X.shape[1], 'null', results))

    ALL_METHODS = ['ANOVA', 'RELIEF', 'INFORATION GAIN', 'CHI SQUARE', 'CORRELATION COEF']
    
    for method in ALL_METHODS:
        make_closest(file, method, set, subsets, results, X.shape[1])
コード例 #2
0
def make_closest(file, method, set, subsets, results, set_len):
    found = False
    
    for feats in range (1, set_len):
        custom_res = []

        for subset in subsets:
            X_Fit, scores = get_method(method, subset, feats)
            accuracy, matrix = get_average_score(X_Fit, subset[1])
            matrix_rev = reverseMatrix(matrix)
            custom_res.append(calculateF1(matrix_rev))

        try: _, p = calculateWilcoxon(results, custom_res)
        except ValueError: p = 1
        
        make_wilcoxon_summary(method, set, X_Fit.shape[1], p, results, custom_res)
        
        if (p > ALPHA):
            print('FOUND - ORIGINAL NB: ' + str(set_len) + ' - NEW NB: ' + str(feats) + '\n')
            file.write(wicloxon_string_summary(method, set, X_Fit.shape[1], p, custom_res))
            found = True
            break
    
    if (not(found)):
        print('NOT FOUND - ROLLBACK\n')
        file.write(wicloxon_string_summary(method, set, set_len, 0, results))
コード例 #3
0
def make_best(file, method, set, elements):
    best_num_of_feats = 1
    [X, y] = elements
    best_f1 = 0
    
    for feats in range (1, X.shape[1] + 1):
        X_Fit, scores = get_method(method, elements, feats)
        accuracy, matrix = get_average_score(X_Fit, y)
        matrix_rev = reverseMatrix(matrix)
        new_f1 = calculateF1(matrix_rev)
        make_simple_summary(method, set, X_Fit, accuracy, matrix_rev, scores)

        if (new_f1 > best_f1):
            best_f1 = new_f1
            best_num_of_feats = feats
    
    print('\nBEST FOR: ' + str(feats) + '\n')
    X_Fit, scores = get_method(method, elements, best_num_of_feats)
    accuracy, matrix = get_average_score(X_Fit, y)
    matrix_rev = reverseMatrix(matrix)
    make_simple_summary(method, set, X_Fit, accuracy, matrix_rev, scores)
    file.write(get_string_summary(method, set, X_Fit, accuracy, matrix_rev, scores))
コード例 #4
0
def make_closest(file, method, set, elements, basic_f1):
    [X, y] = elements
    
    found = False
    for feats in range (1, X.shape[1]):
        X_Fit, scores = get_method(method, elements, feats)
        accuracy, matrix = get_average_score(X_Fit, y)
        matrix_rev = reverseMatrix(matrix)
        new_f1 = calculateF1(matrix_rev)
        p = abs(new_f1 - basic_f1)
        make_simple_summary(method, set, X_Fit, accuracy, matrix_rev, scores)

        if (p < ALPHA):
            print('FOUND - ORIGINAL NB: ' + str(X.shape[1]) + ' - NEW NB: ' + str(X_Fit.shape[1]) + '\n')
            file.write(get_string_summary(method, set, X_Fit, accuracy, matrix_rev, scores))
            found = True
            break
    
    if (not(found)):
        print('NOT FOUND - ROLLBACK\n')
        X_Fit, scores = get_method(method, elements, X.shape[1])
        accuracy, matrix = get_average_score(X_Fit, y)
        matrix_rev = reverseMatrix(matrix)
        file.write(get_string_summary(method, set, X_Fit, accuracy, matrix_rev, scores))
コード例 #5
0
def make_experiment(file, set, elements):
    [X, y] = elements
    
    accuracy, matrix = get_average_score(X, y)
    matrix_rev = reverseMatrix(matrix)

    make_simple_summary('NO SELECTION', set, X, accuracy, matrix_rev, ['all'])
    file.write(get_string_summary('NO SELECTION', set, X, accuracy, matrix_rev, ['all']))
    basic_f1 = calculateF1(matrix_rev)
    
    ALL_METHODS = ['ANOVA', 'RELIEF', 'INFORATION GAIN', 'CHI SQUARE', 'CORRELATION COEF']
    
    if (MODE == 'closest'):
        for method in ALL_METHODS:
            make_closest(file, method, set, elements, basic_f1)
    elif (MODE == 'best'):
        for method in ALL_METHODS:
            make_best(file, method, set, elements)
コード例 #6
0
def make_experiment(file, set, elements):
    [X, y] = elements
    
    accuracy, matrix = get_average_score(X, y)
    maririx_rev = reverseMatrix(matrix)
    make_simple_summary('NO SELECTION', set, X, accuracy, maririx_rev, ['all'], 0)
    file.write(get_string_summary('NO SELECTION', set, X, accuracy, maririx_rev, ['all'], 0))
    
    feat_num = math.floor(X.shape[1] / 5 * 5)

    start = time.time()
    X_Fit, scores = anova(X, y, feat_num)
    end = time.time()
    accuracy, matrix = get_average_score(X_Fit, y)
    maririx_rev = reverseMatrix(matrix)
    make_simple_summary('ANOVA', set, X_Fit, accuracy, maririx_rev, scores, end - start)
    file.write(get_string_summary('ANOVA', set, X_Fit, accuracy, maririx_rev, scores, end - start))
    
    start = time.time()
    X_Fit, scores = relief(X, y, feat_num)
    end = time.time()
    accuracy, matrix = get_average_score(X_Fit, y)
    maririx_rev = reverseMatrix(matrix)
    make_simple_summary('RELIEF', set, X_Fit, accuracy, maririx_rev, scores, end - start)
    file.write(get_string_summary('RELIEF', set, X_Fit, accuracy, maririx_rev, scores, end - start))
    
    start = time.time()
    X_Fit, scores = information_gain(X, y, feat_num)
    end = time.time()
    accuracy, matrix = get_average_score(X_Fit, y)
    maririx_rev = reverseMatrix(matrix)
    make_simple_summary('INFORATION GAIN', set, X_Fit, accuracy, maririx_rev, scores, end - start)
    file.write(get_string_summary('INFORATION GAIN', set, X_Fit, accuracy, maririx_rev, scores, end - start))
    
    start = time.time()
    X_Fit, scores = chi_square(X, y, feat_num)
    end = time.time()
    accuracy, matrix = get_average_score(X_Fit, y)
    maririx_rev = reverseMatrix(matrix)
    make_simple_summary('CHI SQUARE', set, X_Fit, accuracy, maririx_rev, scores, end - start)
    file.write(get_string_summary('CHI SQUARE', set, X_Fit, accuracy, maririx_rev, scores, end - start))
    
    start = time.time()
    X_Fit, scores = correlation_coef(X, y, feat_num)
    end = time.time()
    accuracy, matrix = get_average_score(X_Fit, y)
    maririx_rev = reverseMatrix(matrix)
    make_simple_summary('CORRELATION COEF', set, X_Fit, accuracy, maririx_rev, scores, end - start)
    file.write(get_string_summary('CORRELATION COEF', set, X_Fit, accuracy, maririx_rev, scores, end - start))