def selectBestModel(project_file, results_model_file): f = open(results_model_file + '.results.html', 'w') project = yaml.load(open(project_file, 'r')) className = project['className'] results_dir = project['resultsDirectory'] if os.path.exists(results_dir): classifierType = None # all types cr = ClassificationResults() print('Loading all results...') cr.readResults(results_dir) accuracy, filename, params = cr.best(1, classifierType)[0] print("RESULT " + project_file + '\t' + str(accuracy) + '\t' + filename) f.write('<h1>%s (%s)</h1>\nAccuracy: %s\n' % (className, project_file, accuracy)) cm = ConfusionMatrix() cm.load(filename) f.write(cm.toHtml()) filename = filename.replace('.result', '.param') trainSVMHistory(project_file, filename, results_model_file, className) shutil.copyfile(filename, results_model_file + '.param') else: print("RESULT " + "No results found for ", project_file, ": cannot build a model") f.write('<h1>%s (%s) </h1>\nResults not found\n' % (collection, project_file))
def selectBestModel(project_file, results_model_file): f = open(results_model_file + '.results.html', 'w') project = yaml.load(open(project_file, 'r')) className = project['className'] results_dir = project['resultsDirectory'] if os.path.exists(results_dir): classifierType = None # all types cr = ClassificationResults() print 'Loading all results...' cr.readResults(results_dir) accuracy, filename, params = cr.best(1, classifierType)[0] print "RESULT " + project_file + '\t' + str(accuracy) + '\t' + filename f.write('<h1>%s (%s)</h1>\nAccuracy: %s\n' % (className, project_file, accuracy)) cm = ConfusionMatrix() cm.load(filename) f.write(cm.toHtml()) filename = filename.replace('.result', '.param') trainSVMHistory(project_file, filename, results_model_file, className) shutil.copyfile(filename, results_model_file + '.param') else: print "RESULT " + "No results found for ", project_file, ": cannot build a model" f.write('<h1>%s (%s) </h1>\nResults not found\n' % (collection, project_file))
def testAccuraciesNFold(self): cm = ConfusionMatrix() # Fold 0 with acc = 100% cm = self.populateFold(cm, 1, 0, 1, 0, fold=0) # Fold 1 with acc = 0% cm = self.populateFold(cm, 0, 1, 0, 1, fold=1) # Resulting accuracy should be the average self.assertEqual(cm.accuracy(), 50.0)
def testNormalizedAccuracy(self): cm = ConfusionMatrix() # Consider the following confussion matrix, # A B # A | 3 | 1 | # B | 6 | 2 | # raw accuracy is 5 / 12 ~= 41.67% # However, normalized accuracy divides each # class contrubion by the class population: # A accuracy: 3 / 4, B accuracy: 2 / 8 # normalized accuracy is 50% cm = self.populateFold(cm, 3, 1, 2, 6, fold=0) self.assertEqual(cm.accuracy(), 100 * 5. / 12) self.assertEqual(cm.normalizedAccuracy(), 50.0)
def selectBestModel(): parser = OptionParser( usage='%prog [options] project_file results_model_file') options, args = parser.parse_args() try: project_file = args[0] results_model_file = args[1] except: parser.print_help() sys.exit(1) f = open(results_model_file + '.results.html', 'w') project = yaml.load(open(project_file, 'r')) className = project['className'] results_dir = project['resultsDirectory'] if os.path.exists(results_dir): classifierType = None # all types cr = ClassificationResults() print 'Loading all results...' cr.readResults(results_dir) accuracy, filename, params = cr.best(1, classifierType)[0] print "RESULT " + project_file + '\t' + str(accuracy) + '\t' + filename f.write('<h1>%s (%s)</h1>\nAccuracy: %s\n' % (className, project_file, accuracy)) cm = ConfusionMatrix() cm.load(filename) f.write(cm.toHtml()) filename = filename.replace('.result', '.param') trainSVMHistory(project_file, filename, results_model_file, className) shutil.copyfile(filename, results_model_file + '.param') else: print "RESULT " + "No results found for ", project_file, ": cannot build a model" f.write('<h1>%s (%s) </h1>\nResults not found\n' % (collection, project_file))
def readResults(self, dir): """Reads all the results file contained in the given directory, and generates the associated ConfusionMatrix for each one.""" resultFiles = glob.glob(join(dir, '*.result')) progress = TextProgress(len(resultFiles)) for i, filename in enumerate(resultFiles): cm = ConfusionMatrix() cm.load(filename) paramFile = splitext(filename)[0] + '.param' params = yaml.load(open(paramFile).read()) self.results += [ (filename, cm, params) ] progress.update(i+1)
def testStdNfold(self): cm = ConfusionMatrix() # Fold 0 with acc = 0% cm = self.populateFold(cm, 0, 1, 0, 1, fold=0) # Fold 1 with acc = 10% cm = self.populateFold(cm, 1, 9, 1, 9, fold=1) # Fold 2 with acc = 20% cm = self.populateFold(cm, 2, 8, 2, 8, fold=2) nfolds = 3. values = [-10. * -10., 0., 10. * 10.] # each fold contribution to the # std after substractig the mean analitic_std = sqrt(sum(values) / nfolds) self.assertEqual(cm.stdNfold(), analitic_std)
def select_best_model(project_dir): """Selects most accurate classifier parameters for the specified project. Args: project_file_path: Path to the project file in YAML format. Returns: Dictionary that contains information about best model for the dataset: - parameters: classifier parameters for selected model; - accuracy: accuracy of selected model; - confusion_matrix: simplified version of confusion matrix for selected model. - history_path: path to the history file generated using returned set of parameters for the best model. """ with open(os.path.join(project_dir, PROJECT_FILE_NAME)) as project_file: project = yaml.load(project_file) classifierName = project["className"] results = ClassificationResults() results.readResults(project["resultsDirectory"]) best_accuracy, best_result_file, best_params = results.best(1, None)[0] cm = ConfusionMatrix() cm.load(best_result_file) simplified_cm = {} for key, val in cm.matrix.items(): simplified_cm[key] = {} for predicted_key, predicted_val in val.items(): simplified_cm[key][predicted_key] = len(predicted_val) history_file_path = os.path.join(project_dir, "%s.history" % classifierName) train_svm_history(project, best_params, history_file_path) return { "parameters": best_params, "accuracy": round(best_accuracy, 2), "confusion_matrix": simplified_cm, "history_path": history_file_path, }
def testStdNfoldNormalizedAccuracies(self): # Same test considering normalized accuracies cm = ConfusionMatrix() # Fold 0 with normalized acc = 0% cm = self.populateFold(cm, 0, 1, 0, 1, fold=0) # Fold 1 with normalized acc = 10% cm = self.populateFold(cm, 4, 16, 0, 10, fold=1) # Fold 2 with normalized acc = 20% cm = self.populateFold(cm, 2, 8, 2, 8, fold=2) # resulting accuracy should be the average nfolds = 3. values = [-10. * -10., 0., 10. * 10.] # each fold contribution to the # std after substractig the mean analitic_std = sqrt(sum(values) / nfolds) self.assertEqual(cm.stdNfold(normalizedAccuracies=True), analitic_std) # Also make sure that this test does not work without the normalization self.assertNotEqual(cm.stdNfold(normalizedAccuracies=False), analitic_std)
# the terms of the GNU Affero General Public License as published by the Free # Software Foundation (FSF), either version 3 of the License, or (at your # option) any later version. # # This program is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more # details. # # You should have received a copy of the Affero GNU General Public License # version 3 along with this program. If not, see http://www.gnu.org/licenses/ from __future__ import print_function import sys from gaia2.classification import ConfusionMatrix try: results = sys.argv[1] output_html = sys.argv[2] except: print('Usage: %s <results_file> <confusion_matrix_html_file>' % sys.argv[0]) exit(1) cm = ConfusionMatrix() cm.load(results) open(output_html, 'w').write(cm.toHtml())
# Gaia is free software: you can redistribute it and/or modify it under # the terms of the GNU Affero General Public License as published by the Free # Software Foundation (FSF), either version 3 of the License, or (at your # option) any later version. # # This program is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more # details. # # You should have received a copy of the Affero GNU General Public License # version 3 along with this program. If not, see http://www.gnu.org/licenses/ import sys from gaia2.classification import ConfusionMatrix try: results = sys.argv[1] output_html = sys.argv[2] except: print 'Usage: %s <results_file> <confusion_matrix_html_file>' % sys.argv[0] exit(1) cm = ConfusionMatrix() cm.load(results) open(output_html, 'w').write(cm.toHtml())