def get_child_report_auc(child_dir, write_dir, dna_max_layers=8, dna_bits_per_layer=12, write=True, eval_type='auc'): g = Genetics(dna_max_layers, dna_bits_per_layer) report = {} dna = tools.read_dna(child_dir) perplexity, shape = g.decode_dna(dna) report['dna'] = dna report['perplexity'] = perplexity report['shape'] = shape report['name'] = child_dir.name loss = tools.read_loss(child_dir) if eval_type is 'auc': auc = tools.get_area_under_curve(loss) elif eval_type is 'half_auc': auc = tools.get_area_under_half_curve(loss) else: print("critical error") report[eval_type] = auc if write: with open(str(write_dir / 'report.json'), 'w') as outfile: json.dump(report, outfile) return report
def _eval_half_curve(resident_directory): # iterate through the subfolders (child folders) in resident_directory areas = [] for dir in [x for x in resident_directory.iterdir() if x.is_dir()]: losses = read_loss(dir) dna = read_dna(dir) area_under_curve = get_area_under_half_curve(losses) areas.append((area_under_curve, dna)) areas.sort(key=lambda x: x[0]) return areas[0][1], areas[1][1]
def half_auc_error_reports(test_dir): # iterate through every generation subfolder for gen_dir in [x for x in test_dir.iterdir() if x.is_dir()]: # create a dict to turn into a generational stat report data = {} for child_dir in [y for y in gen_dir.iterdir() if y.is_dir()]: dna = read_dna(child_dir) tform = read_tform(child_dir) loss = read_loss(child_dir) half_auc_error = get_area_under_half_curve(loss) name = child_dir.name data[name] = {'loss': half_auc_error, 'dna': dna} with open(str(gen_dir / "half_auc_error_report.json"), 'w') as json_file: json.dump(data, json_file)
def get_individual_model_report(name , dna_string , losses_list, eval_type = 'auc' , legacy_dna=False): report = {} perplexity, shape = Genetics.decode_dna(dna_string , legacy_dna=legacy_dna) report['dna'] = dna_string report['perplexity'] = perplexity report['shape'] = shape report['name'] = name if eval_type is 'auc': eval = tools.get_area_under_curve(losses_list) elif eval_type is 'half_auc': eval = tools.get_area_under_half_curve(losses_list) else: print("critical error") report[eval_type] = eval return report
def write_reports(test_folder, labels_path, labels_identifying_col=1): # retrieve our loss loss_csv = test_folder / 'loss.csv' losses = pd.read_csv(str(loss_csv), sep=',', header=None) half_auc_error = get_area_under_half_curve(losses) name = "vdm_standard" data = {} data[name] = {'loss': half_auc_error} with open(str(test_folder / "avg_loss.json"), 'w') as json_file: json.dump(data, json_file) labels = pd.read_csv(str(labels_path), sep=',', header=None).values tform_path = test_folder / "tform.csv" tform = pd.read_csv(str(tform_path), sep=',', header=None).values knn_error = avg_knn_error(tform, labels, labels_identifying_col) data = {} data[name] = {'knn_error': knn_error} with open(str(test_folder / 'knn_error.json'), 'w') as json_file: json.dump(data, json_file)