def get_model_report_from_folder(folder_path): name = tools.get_name_from_dir_path(folder_path) dna_string = tools.read_dna(folder_path) losses_list = tools.read_loss(folder_path) eval_type = 'half_auc' legacy_dna = True return get_individual_model_report(name, dna_string , losses_list , eval_type, legacy_dna)
def get_child_report_auc(child_dir, write_dir, dna_max_layers=8, dna_bits_per_layer=12, write=True, eval_type='auc'): g = Genetics(dna_max_layers, dna_bits_per_layer) report = {} dna = tools.read_dna(child_dir) perplexity, shape = g.decode_dna(dna) report['dna'] = dna report['perplexity'] = perplexity report['shape'] = shape report['name'] = child_dir.name loss = tools.read_loss(child_dir) if eval_type is 'auc': auc = tools.get_area_under_curve(loss) elif eval_type is 'half_auc': auc = tools.get_area_under_half_curve(loss) else: print("critical error") report[eval_type] = auc if write: with open(str(write_dir / 'report.json'), 'w') as outfile: json.dump(report, outfile) return report
def _eval_half_curve(resident_directory): # iterate through the subfolders (child folders) in resident_directory areas = [] for dir in [x for x in resident_directory.iterdir() if x.is_dir()]: losses = read_loss(dir) dna = read_dna(dir) area_under_curve = get_area_under_half_curve(losses) areas.append((area_under_curve, dna)) areas.sort(key=lambda x: x[0]) return areas[0][1], areas[1][1]
def _eval_knn_error(resident_directory): # iterate through subfolders (child folders) in resident directory knn_errors = [] for dir in [x for x in resident_directory.iterdir() if x.is_dir()]: tform = read_tform(dir) dna = read_dna(dir) knn_error = get_knn_error(tform, LABELS) knn_errors.append((knn_error, dna)) knn_errors.sort(key=lambda x: x[0]) return knn_errors[0][1], knn_errors[1][1]
def knn_error_reports(test_dir): # iterate through every generation subfolder for gen_dir in [x for x in test_dir.iterdir() if x.is_dir()]: # create a dict to turn into a generational stat report data = {} for child_dir in [y for y in gen_dir.iterdir() if y.is_dir()]: dna = read_dna(child_dir) tform = read_tform(child_dir) knn_error = get_knn_error(tform, LABELS) name = child_dir.name data[name] = {'knn_error': knn_error, 'dna': dna} with open(str(gen_dir / "knn_error_report.json"), 'w') as json_file: json.dump(data, json_file)
def organize_structures(gen_dir): children = [] genetic_helper = Genetics() for child in [x for x in gen_dir.iterdir() if x.is_dir()]: name = child.name dna = read_dna(child) perplexity, structure = unpack_dna(dna, genetic_helper) children.append([name, perplexity, structure, dna]) # sort the list children.sort(key=lambda x: x[2]) for name, perplexity, structure, _ in children: print(name, ':', perplexity, ': ', structure)
def half_auc_error_reports(test_dir): # iterate through every generation subfolder for gen_dir in [x for x in test_dir.iterdir() if x.is_dir()]: # create a dict to turn into a generational stat report data = {} for child_dir in [y for y in gen_dir.iterdir() if y.is_dir()]: dna = read_dna(child_dir) tform = read_tform(child_dir) loss = read_loss(child_dir) half_auc_error = get_area_under_half_curve(loss) name = child_dir.name data[name] = {'loss': half_auc_error, 'dna': dna} with open(str(gen_dir / "half_auc_error_report.json"), 'w') as json_file: json.dump(data, json_file)
def get_child_report_auc(child_dir, write_dir, dna_max_layers=8, dna_bits_per_layer=12, write=True): g = Genetics(dna_max_layers, dna_bits_per_layer) report = {} dna = tools.read_dna(child_dir) perplexity, shape = g.decode_dna(dna) report['dna'] = dna report['perplexity'] = perplexity report['shape'] = shape report['name'] = child_dir.name loss = tools.read_loss(child_dir) auc = tools.get_area_under_curve(loss) report['area_under_curve'] = auc if write: with open(str(write_dir / 'report.json'), 'w') as outfile: json.dump(report, outfile) return report
# go through our flat 40 data # use the info we have to retrain a model of that shape and structure # get a tranform and save it TARGET_FOLDER = Path.cwd( ) / "TestData" / "half_curve_layer_swap_40" / "generation_30" SAVE_FOLDER = Path.cwd() / "Bred40_Tforms" TRAINING_DATA = Path.cwd() / "RBMTrainingDataset" / "training_set.csv" TEST_DATA = Path.cwd() / "RBMTrainingDataset" / "2018_data_eos.csv" test_data = tools.get_ndarray(TEST_DATA) input_dims = len(test_data[0]) train_data = tools.get_ndarray(TRAINING_DATA) output_dims = 2 gh = G.Genetics() for child in [c for c in TARGET_FOLDER.iterdir() if c.is_dir()]: # get the perp and layer sturcure for this child dna_string = tools.read_dna(child) perp, layers = gh.decode_dna(dna_string, legacy_dna=True) # train this model ptsne = Parametric_tSNE(input_dims, output_dims, perp, all_layers=layers) _ = ptsne.fit(train_data, verbose=False) transform = ptsne.transform(test_data) save_path = SAVE_FOLDER / (child.name + "_tform.csv") tools.write_csv(transform, save_path) ptsne.clear_session()
import model_evaluation from pathlib import Path from tools import read_dna from Genetics import Genetics path_to_model_dir = Path.cwd() / "TestData" / "LongShallowGenTestCurve" / "generation_40" / "child_2" path_to_data_file = Path.cwd() / "RBMTrainingDataset" / "2018_data.csv" dna = read_dna(path_to_model_dir) gh = Genetics() perplexity , _ = gh.decode_dna(dna) save_path = Path.cwd() save_name = '40test.csv' model_evaluation.model_evaluation(path_to_model_dir, path_to_data_file, save_path, save_name, output_dims=2)
def package_child_data(childpath, genetic_helper): name = childpath.name dna = read_dna(childpath) perplexity, structure = unpack_dna(dna, genetic_helper) return [name, perplexity, structure, dna]