plt.rc('font', family='sans-serif') rcParams['figure.figsize'] = 15, 2 all_metrics = [] bdf = pd.DataFrame.from_csv('baseline.csv', index_col=False, header=None) results_dir = 'final_results_wl_2_to_20' idx = 0 a = 10 print a for folder in get_folders('NewlyAddedDatasets/'): metrics = {} best = 0 # for a in range(a, a + 1): for a in range(3, 21): try: clsssifiedf = join(folder, results_dir, 'alphabet_%s' % a, 'classified.csv') df = pd.DataFrame.from_csv(clsssifiedf, index_col=False) y_true, y_pred = df['label'].values.tolist(), df['predicted'].values.tolist() accuracy = accuracy_score(y_true, y_pred) if accuracy > best: best = accuracy except Exception as e: print e
def build_all_corpora(path): for folder in get_folders(path): build_corpus_for_folder(folder) return
from build_corpus import build_all_corpora from dsco_classification import process_results from os.path import join import pandas as pd if __name__ == '__main__': print 'Converting .mat to .csv and Symbolizing real-valued data to strings' saxify_all('NewlyAddedDatasets') print 'Done converting datasets' print '-' * 80 print 'Extracting unigrams and bigrams' build_all_corpora('NewlyAddedDatasets') print 'Done extracting unigrams and bigrams' print '-' * 80 for folder in get_folders('NewlyAddedDatasets/'): print 'Processing %s' % folder results = process_results(folder, resultsdir='results') df = pd.DataFrame(results, columns=['WL', 'A', 'Acc']) retf = join(folder, 'results.csv') df.to_csv(retf, index=False) print '-' * 80 print "DSCo-NG's suggestion:" i = df['Acc'].argmax() print 'w: %d\ta: %d\taccuracy: %f' % (df.iloc[i, 0], df.iloc[i, 1], df.iloc[i, 2]) print 'Results are saved to %s' % retf print '-' * 80