예제 #1
0
def calculate_ranks(figure_path, data_frame, algorithm_analyzed):

    data_frame = data_frame.iloc[:, 1:-1]
    names = data_frame.columns
    flat_values = data_frame[names].values

    ranks = np.array([stats.rankdata(array) for array in -flat_values])
    # Calculating the average ranks.
    average_ranks = np.mean(ranks, axis=0)
    print('\n'.join('{} average rank: {}'.format(a, r)
                    for a, r in zip(names, average_ranks)))

    cd = compute_CD(average_ranks,
                    n=len(flat_values),
                    alpha='0.05',
                    test='nemenyi')
    print(f'CD: {cd}')

    graph_ranks(
        average_ranks,
        names=[og_name + '_' + algorithm_analyzed for og_name in names],
        cd=cd,
        textspace=1.5,
        width=6)

    cd_data_frame = pd.DataFrame(columns=['algorithm', 'rank'])
    cd_data_frame['algorithm'] = names.values
    cd_data_frame['rank'] = average_ranks

    cd_data_frame.to_csv(figure_path + '\\avg_ranks_' + algorithm_analyzed +
                         '.csv',
                         sep=';')
    plt.savefig(figure_path + '\\cd_plot_' + algorithm_analyzed + '.pdf',
                bbox_inches='tight')
    plt.show()
예제 #2
0
def cd_diagram(df, filename_to_save):
    
    df_pivoted = df.pivot(index='dataset', columns='index', values='acc_mean')

    #ranks (tipo de classificador, parametro)
    ranks = util.rank_accuracy(df_pivoted)
    ranks = ranks.mean(axis=0).sort_values()#rank médio
    ranks = ranks.rename('rank')

    avgranks = ranks.tolist()
    names = ranks.index.tolist()
    cd = compute_CD(avgranks, 10)
    graph_ranks(avgranks, names, cd=cd, filename=filename_to_save)
    plt.show()
예제 #3
0
    def plot_comparisons(self, fried_result, names, cd, cd1, average_ranks):
        
        # This method generates the plot.
        graph_ranks(average_ranks, names=names,
                        cd=cd, width=8, textspace=1.5)
        
        plt.title(f'Friedman-Nemenyi={round(fried_result.pvalue, 4)}\nCD={round(cd, 3)}')
        plt.show()
        
        # This method generates the plot.
        graph_ranks(average_ranks, names=names,
                        cd=cd1, cdmethod=0, width=8, textspace=1.5)
        plt.title(f'Bonferroni-Dunn\nCD={round(cd1, 3)}')
        plt.show()

        return
예제 #4
0
    def gerar_plot(self, nome, caminho):
        
        #emparelhando as acuracias de cada modelo
        #print(friedmanchisquare(self._data[0], self._data[1], self._data[2], self._data[3], self._data[4], self._data[5]))
        
        #obtendo os rankins dos modelos e os pvalues
        meanRanks, pValues = self.do()
        #print("Média dos rankings: ")
        #print(meanRanks)
        
        #print("Pvalues: ")
        #print(pValues)
        
        #computando as estatisticas a partir do rank dos modelos
        cd = ora.compute_CD(meanRanks, len(self._data[0]), alpha="0.05", test="nemenyi")
        
        # criando o plot com os rankings, labels e distancia critica
        ora.graph_ranks(meanRanks, self.labels, cd=cd, width=10, textspace=2)

        # salvando a figura        
        plt.savefig(caminho+nome)
        
        print("Teste gerado!")
예제 #5
0
from scipy.stats import rankdata
from Orange.evaluation import graph_ranks, compute_CD
import matplotlib.pyplot as plt

# Para 4 clasificadores en nemenyi
q = 2.569
classifiers = 4
datasets = 10

cd = q * np.sqrt((classifiers * (classifiers + 1)) / (6 * datasets))
print(cd)

df = pd.read_csv('decisiontree.csv', header=0, index_col=0)
df1 = pd.read_csv('multiclass.csv', header=0, index_col=0)

data = df.to_numpy()
data = np.append(data, df1.to_numpy(), axis=1)

ranking = np.zeros(shape=data.shape)

for i in range(ranking.shape[0]):
    ranking[i, :] = rankdata(data[i, :])

final = np.copy(ranking.mean(axis=0))

names = ['DecisionTree', 'OVO', 'OVR', 'ECOC']

graph_ranks(list(final), names, cd)

plt.show()
예제 #6
0
#sign test F1-score
#performances = pd.DataFrame({'dataset':['df1', 'df2', 'df3', 'df4', 'df5', 'df6', 'df7', 'df8', 'df9', 'df10', 'df11'],'ORANGE': f1_orange, 'XGB': f1_xgb, 'RF': f1_rf, 'SVM': f1_svm, 'LR': f1_lr})

# First, we extract the algorithms names.
algorithms_names = performances.drop('dataset', axis=1).columns

print(algorithms_names)
# Then, we extract the performances as a numpy.ndarray.
performances_array = performances[algorithms_names].values
print(performances_array)
# Finally, we apply the Friedman test.
print(friedmanchisquare(*performances_array))
ranks = np.array([rankdata(-p) for p in performances_array])

# Calculating the average ranks.
average_ranks = np.mean(ranks, axis=0)
print('\n'.join('{} average rank: {}'.format(a, r)
                for a, r in zip(algorithms_names, average_ranks)))

cd = compute_CD(average_ranks,
                n=len(performances),
                alpha='0.05',
                test='nemenyi')
# This method generates the plot.
graph_ranks(average_ranks,
            names=algorithms_names,
            cd=cd,
            width=10,
            textspace=3,
            reverse=True)
plt.show()
예제 #7
0
        for result_key in classifier_results.keys():
            if 'RFE' in result_key:
                continue

            if classifier_name in result_key:
                results.append(
                    np.array(classifier_results[result_key][SCORING_METRIC]).mean())

        algorithm_averages[classifier_name] = np.array(results).mean()

    log(algorithm_averages)

    # compute Friedmans and CD for all except RFE, since we can't apply it
    # to all of the algorithms
    cd = compute_CD(algorithm_averages.values(),
                    len(FILES) * (len(feature_selectors) - 1))

    log("Critical differences: {}".format(cd))
    graph_ranks(algorithm_averages.values(),
                list(algorithm_averages.keys()), cd=cd, width=6, textspace=1.5)
    plt.savefig(OUTPUT_DIR + os.path.sep + 'nemenyi.png')

    log("Dataset results")
    log(algorithm_dataset_results)
    log('\n')

    friedman = stats.friedmanchisquare(*algorithm_dataset_results.values())
    log("Friedmans': {}, pvalue: {}".format(
        friedman.statistic, friedman.pvalue))
    log('\nDONE')
예제 #8
0
print('\n'.join('{} average rank: {}'.format(a, r)
                for a, r in zip(algorithms, average_ranks)))

names = [
    algorithms[i] + ' - ' + str(round(average_ranks[i], 3))
    for i in range(len(average_ranks))
]

# This method computes the critical difference for Nemenyi test with alpha=0.1.
# For some reason, this method only accepts alpha='0.05' or alpha='0.1'.
cd = compute_CD(average_ranks,
                n=len(results_array),
                alpha='0.05',
                test='nemenyi')
# This method generates the plot.
graph_ranks(average_ranks, names=names, cd=cd, width=6, textspace=1.5)
plt.title(
    f'Friedman-Nemenyi={round(friedmanchisquare(*results_array).pvalue, 4)}\nCD={round(cd, 3)}'
)
plt.show()

# This method computes the critical difference for Bonferroni-Dunn test with alpha=0.05.
# For some reason, this method only accepts alpha='0.05' or alpha='0.1'.
cd = compute_CD(average_ranks,
                n=len(results_array),
                alpha='0.05',
                test='bonferroni-dunn')
# This method generates the plot.
graph_ranks(average_ranks,
            names=names,
            cd=cd,
예제 #9
0
 def plot__nemenyi(gen, ranks, techniques, noise):
     cd = compute_CD(ranks, 30, alpha="0.05")
     graph_ranks(ranks, techniques, cd=cd, width=len(techniques), textspace=1.5)
     save_pdf(plt, root + '/Graphs/' + '/Nemenyi/' + gen + '/', 'Nemenyi_' + noise)