def selectBestModel(project_file, results_model_file): f = open(results_model_file + '.results.html', 'w') project = yaml.load(open(project_file, 'r')) className = project['className'] results_dir = project['resultsDirectory'] if os.path.exists(results_dir): classifierType = None # all types cr = ClassificationResults() print('Loading all results...') cr.readResults(results_dir) accuracy, filename, params = cr.best(1, classifierType)[0] print("RESULT " + project_file + '\t' + str(accuracy) + '\t' + filename) f.write('<h1>%s (%s)</h1>\nAccuracy: %s\n' % (className, project_file, accuracy)) cm = ConfusionMatrix() cm.load(filename) f.write(cm.toHtml()) filename = filename.replace('.result', '.param') trainSVMHistory(project_file, filename, results_model_file, className) shutil.copyfile(filename, results_model_file + '.param') else: print("RESULT " + "No results found for ", project_file, ": cannot build a model") f.write('<h1>%s (%s) </h1>\nResults not found\n' % (collection, project_file))
def selectBestModel(project_file, results_model_file): f = open(results_model_file + '.results.html', 'w') project = yaml.load(open(project_file, 'r')) className = project['className'] results_dir = project['resultsDirectory'] if os.path.exists(results_dir): classifierType = None # all types cr = ClassificationResults() print 'Loading all results...' cr.readResults(results_dir) accuracy, filename, params = cr.best(1, classifierType)[0] print "RESULT " + project_file + '\t' + str(accuracy) + '\t' + filename f.write('<h1>%s (%s)</h1>\nAccuracy: %s\n' % (className, project_file, accuracy)) cm = ConfusionMatrix() cm.load(filename) f.write(cm.toHtml()) filename = filename.replace('.result', '.param') trainSVMHistory(project_file, filename, results_model_file, className) shutil.copyfile(filename, results_model_file + '.param') else: print "RESULT " + "No results found for ", project_file, ": cannot build a model" f.write('<h1>%s (%s) </h1>\nResults not found\n' % (collection, project_file))
def selectBestModel(): parser = OptionParser( usage='%prog [options] project_file results_model_file') options, args = parser.parse_args() try: project_file = args[0] results_model_file = args[1] except: parser.print_help() sys.exit(1) f = open(results_model_file + '.results.html', 'w') project = yaml.load(open(project_file, 'r')) className = project['className'] results_dir = project['resultsDirectory'] if os.path.exists(results_dir): classifierType = None # all types cr = ClassificationResults() print 'Loading all results...' cr.readResults(results_dir) accuracy, filename, params = cr.best(1, classifierType)[0] print "RESULT " + project_file + '\t' + str(accuracy) + '\t' + filename f.write('<h1>%s (%s)</h1>\nAccuracy: %s\n' % (className, project_file, accuracy)) cm = ConfusionMatrix() cm.load(filename) f.write(cm.toHtml()) filename = filename.replace('.result', '.param') trainSVMHistory(project_file, filename, results_model_file, className) shutil.copyfile(filename, results_model_file + '.param') else: print "RESULT " + "No results found for ", project_file, ": cannot build a model" f.write('<h1>%s (%s) </h1>\nResults not found\n' % (collection, project_file))
def update_parameters(project_file, c_values, gamma_values, preprocessing_values): """Update the project file with user-provided preferences Args: project_file: The file to be updated. c_values: C value to be updated. gamma_values: gamma value to be updated. preprocessing_values: preprocessing values to be updated. """ with open(project_file, "r") as pfile: project = yaml.load(pfile) for pref in project['classifiers']['svm']: if c_values: pref['C'] = c_values if gamma_values: pref['gamma'] = gamma_values if preprocessing_values: pref['preprocessing'] = preprocessing_values with open(project_file, "w") as pfile: yaml.dump(project, pfile)
def select_best_model(project_dir): """Selects most accurate classifier parameters for the specified project. Args: project_file_path: Path to the project file in YAML format. Returns: Dictionary that contains information about best model for the dataset: - parameters: classifier parameters for selected model; - accuracy: accuracy of selected model; - confusion_matrix: simplified version of confusion matrix for selected model. - history_path: path to the history file generated using returned set of parameters for the best model. """ with open(os.path.join(project_dir, PROJECT_FILE_NAME)) as project_file: project = yaml.load(project_file) classifierName = project["className"] results = ClassificationResults() results.readResults(project["resultsDirectory"]) best_accuracy, best_result_file, best_params = results.best(1, None)[0] cm = ConfusionMatrix() cm.load(best_result_file) simplified_cm = {} for key, val in cm.matrix.items(): simplified_cm[key] = {} for predicted_key, predicted_val in val.items(): simplified_cm[key][predicted_key] = len(predicted_val) history_file_path = os.path.join(project_dir, "%s.history" % classifierName) train_svm_history(project, best_params, history_file_path) return { "parameters": best_params, "accuracy": round(best_accuracy, 2), "confusion_matrix": simplified_cm, "history_path": history_file_path, }
def retrainModel(project_file, params_file, output_file): project = yaml.load(open(project_file, 'r')) class_name = project['className'] trainSVMHistory(project_file, params_file, output_file, class_name)
def generate_params_report(project_file, result_file): project = yaml.load(open(project_file, 'r')) project_file_dir = os.path.dirname(project_file) results_dir = project['resultsDirectory'] tsv_file = os.path.join(project_file_dir, result_file) if os.path.exists(results_dir): classifier_type = None # all types cr = ClassificationResults() print('Loading all results...') cr.read_results(results_dir) n_results = len(cr.results) results = cr.best(n_results, classifier_type) # results is a list of tuples sorted by accuracy with these fields: # (accuracy, std, norm_Accuracy, norm_std, results_file results) # use position as index and sort by norm_accuracy r = [(v[2], i) for i, v in enumerate(results, 1)] r = sorted(r, reverse=True) _, idx = zip(*r) fieldnames = [ 'accuracy_rank', 'accuracy', 'accuracy_std', 'normalized_accuracy_rank', 'normalized_accuracy', 'normalized_accuracy_std', 'results_file', 'param_file', 'kernel', 'C', 'preprocessing', 'balance_classes', 'type', 'classifier', 'gamma', 'evaluation' ] results = [{ 'accuracy_rank': k, 'accuracy': accuracy, 'accuracy_std': acc_std, 'normalized_accuracy_rank': idx.index(k) + 1, 'normalized_accuracy': norm_accuracy, 'normalized_accuracy_std': norm_std, 'results_file': results_file, 'param_file': results_file.rstrip('results') + 'params', 'kernel': results_param['model']['kernel'], 'C': results_param['model']['C'], 'preprocessing': results_param['model']['preprocessing'], 'balance_classes': results_param['model']['balanceClasses'], 'type': results_param['model']['type'], 'classifier': results_param['model']['classifier'], 'gamma': results_param['model']['gamma'], 'evaluation': results_param['evaluation'] } for k, (accuracy, acc_std, norm_accuracy, norm_std, results_file, results_param) in enumerate(results, 1)] with open(tsv_file, 'w') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=fieldnames, delimiter='\t') writer.writeheader() writer.writerows(results) else: print('No results found for {} in its "resultsDirectory" ({}).\n' 'If the project was moved you may want to manually update ' 'this field on the project file.'.format(project_file, results_dir))