def get_model_report_from_folder(folder_path):
    name = tools.get_name_from_dir_path(folder_path)
    dna_string = tools.read_dna(folder_path)
    losses_list = tools.read_loss(folder_path)
    eval_type = 'half_auc'
    legacy_dna = True
    return get_individual_model_report(name, dna_string , losses_list , eval_type, legacy_dna)
예제 #2
0
def get_child_report_auc(child_dir,
                         write_dir,
                         dna_max_layers=8,
                         dna_bits_per_layer=12,
                         write=True,
                         eval_type='auc'):
    g = Genetics(dna_max_layers, dna_bits_per_layer)
    report = {}
    dna = tools.read_dna(child_dir)
    perplexity, shape = g.decode_dna(dna)
    report['dna'] = dna
    report['perplexity'] = perplexity
    report['shape'] = shape
    report['name'] = child_dir.name
    loss = tools.read_loss(child_dir)
    if eval_type is 'auc':
        auc = tools.get_area_under_curve(loss)
    elif eval_type is 'half_auc':
        auc = tools.get_area_under_half_curve(loss)
    else:
        print("critical error")
    report[eval_type] = auc

    if write:
        with open(str(write_dir / 'report.json'), 'w') as outfile:
            json.dump(report, outfile)

    return report
def _eval_half_curve(resident_directory):
    # iterate through the subfolders (child folders) in resident_directory
    areas = []
    for dir in [x for x in resident_directory.iterdir() if x.is_dir()]:
        losses = read_loss(dir)
        dna = read_dna(dir)
        area_under_curve = get_area_under_half_curve(losses)
        areas.append((area_under_curve, dna))
    areas.sort(key=lambda x: x[0])
    return areas[0][1], areas[1][1]
def _eval_knn_error(resident_directory):
    # iterate through subfolders (child folders) in resident directory
    knn_errors = []
    for dir in [x for x in resident_directory.iterdir() if x.is_dir()]:
        tform = read_tform(dir)
        dna = read_dna(dir)
        knn_error = get_knn_error(tform, LABELS)
        knn_errors.append((knn_error, dna))

    knn_errors.sort(key=lambda x: x[0])
    return knn_errors[0][1], knn_errors[1][1]
예제 #5
0
def knn_error_reports(test_dir):
    # iterate through every generation subfolder
    for gen_dir in [x for x in test_dir.iterdir() if x.is_dir()]:
        # create a dict to turn into a generational stat report
        data = {}
        for child_dir in [y for y in gen_dir.iterdir() if y.is_dir()]:
            dna = read_dna(child_dir)
            tform = read_tform(child_dir)
            knn_error = get_knn_error(tform, LABELS)
            name = child_dir.name
            data[name] = {'knn_error': knn_error, 'dna': dna}
        with open(str(gen_dir / "knn_error_report.json"), 'w') as json_file:
            json.dump(data, json_file)
def organize_structures(gen_dir):
    children = []
    genetic_helper = Genetics()
    for child in [x for x in gen_dir.iterdir() if x.is_dir()]:
        name = child.name
        dna = read_dna(child)
        perplexity, structure = unpack_dna(dna, genetic_helper)
        children.append([name, perplexity, structure, dna])

    # sort the list
    children.sort(key=lambda x: x[2])

    for name, perplexity, structure, _ in children:
        print(name, ':', perplexity, ':     ', structure)
예제 #7
0
def half_auc_error_reports(test_dir):
    # iterate through every generation subfolder
    for gen_dir in [x for x in test_dir.iterdir() if x.is_dir()]:
        # create a dict to turn into a generational stat report
        data = {}
        for child_dir in [y for y in gen_dir.iterdir() if y.is_dir()]:
            dna = read_dna(child_dir)
            tform = read_tform(child_dir)
            loss = read_loss(child_dir)
            half_auc_error = get_area_under_half_curve(loss)
            name = child_dir.name
            data[name] = {'loss': half_auc_error, 'dna': dna}
        with open(str(gen_dir / "half_auc_error_report.json"),
                  'w') as json_file:
            json.dump(data, json_file)
def get_child_report_auc(child_dir,
                         write_dir,
                         dna_max_layers=8,
                         dna_bits_per_layer=12,
                         write=True):
    g = Genetics(dna_max_layers, dna_bits_per_layer)
    report = {}
    dna = tools.read_dna(child_dir)
    perplexity, shape = g.decode_dna(dna)
    report['dna'] = dna
    report['perplexity'] = perplexity
    report['shape'] = shape
    report['name'] = child_dir.name
    loss = tools.read_loss(child_dir)
    auc = tools.get_area_under_curve(loss)
    report['area_under_curve'] = auc

    if write:
        with open(str(write_dir / 'report.json'), 'w') as outfile:
            json.dump(report, outfile)

    return report
예제 #9
0
# go through our flat 40 data
# use the info we have to retrain a model of that shape and structure
# get a tranform and save it

TARGET_FOLDER = Path.cwd(
) / "TestData" / "half_curve_layer_swap_40" / "generation_30"
SAVE_FOLDER = Path.cwd() / "Bred40_Tforms"
TRAINING_DATA = Path.cwd() / "RBMTrainingDataset" / "training_set.csv"
TEST_DATA = Path.cwd() / "RBMTrainingDataset" / "2018_data_eos.csv"

test_data = tools.get_ndarray(TEST_DATA)
input_dims = len(test_data[0])

train_data = tools.get_ndarray(TRAINING_DATA)

output_dims = 2

gh = G.Genetics()

for child in [c for c in TARGET_FOLDER.iterdir() if c.is_dir()]:
    # get the perp and layer sturcure for this child
    dna_string = tools.read_dna(child)
    perp, layers = gh.decode_dna(dna_string, legacy_dna=True)
    # train this model
    ptsne = Parametric_tSNE(input_dims, output_dims, perp, all_layers=layers)
    _ = ptsne.fit(train_data, verbose=False)
    transform = ptsne.transform(test_data)
    save_path = SAVE_FOLDER / (child.name + "_tform.csv")
    tools.write_csv(transform, save_path)
    ptsne.clear_session()
import model_evaluation
from pathlib import Path
from tools import read_dna
from Genetics import Genetics

path_to_model_dir = Path.cwd() / "TestData" / "LongShallowGenTestCurve" / "generation_40" / "child_2"
path_to_data_file = Path.cwd() / "RBMTrainingDataset" / "2018_data.csv"
dna = read_dna(path_to_model_dir)
gh = Genetics()
perplexity , _ = gh.decode_dna(dna)

save_path = Path.cwd()
save_name = '40test.csv'

model_evaluation.model_evaluation(path_to_model_dir, path_to_data_file, save_path, save_name, output_dims=2)
def package_child_data(childpath, genetic_helper):
    name = childpath.name
    dna = read_dna(childpath)
    perplexity, structure = unpack_dna(dna, genetic_helper)
    return [name, perplexity, structure, dna]