Exemple #1
0
def load_train_model_from_dna(model_save_name, output_dims=2):
    current_dir = Path.cwd()
    dna_file = filedialog.askopenfilename(initialdir=str(current_dir),
                                          title="Select DNA file")
    save_folder = filedialog.askdirectory(initialdir=str(current_dir),
                                          title="Select folder to save model")

    data_file = Path.cwd() / "RBMTrainingDataset" / "training_set.csv"

    test_data = pd.read_csv(str(data_file), sep=',', header=None).values
    input_dims = len(test_data[0])

    dna = Path(str(dna_file)).read_text()

    genetic_helper = Genetics()
    perplexity, layers = genetic_helper.decode_dna(dna)

    save = Path(save_folder)
    save_path = save / model_save_name

    ptsne = Parametric_tSNE(input_dims,
                            output_dims,
                            perplexity,
                            all_layers=layers)
    loss = ptsne.fit(test_data, verbose=False)
    ptsne.save_model(str(save_path))
Exemple #2
0
def train_child(dna,
                genetic_helper,
                test_data,
                input_dims,
                output_dims,
                log=False):
    perplexity, layers = genetic_helper.decode_dna(dna)
    if log:
        print("Training child with dim: ", str(layers))
    ptsne = Parametric_tSNE(input_dims,
                            output_dims,
                            perplexity,
                            all_layers=layers)
    # train it on test_data
    loss = ptsne.fit(test_data, verbose=False)

    return loss, ptsne
def load_and_transform(path_to_model, test_data, num_perplexities,
                       output_dims):
    '''
        path_to_model: a python pathlib path to the model to be loaded
        test_data: data that has already been loaded and formatted to an np array (i.e. the df.values)
        num_perplexities: the number of perplexities originally used to train the model
        output_dims: the original output dimensionality of the model

        returns: numpy.ndarray of the transformed data
    '''
    # grab the dimensionality of the numpy ndarray
    input_dims = len(test_data[0])

    # instance a ptsne object of this dimensionality and restore model
    ptsne = Parametric_tSNE(input_dims, output_dims, num_perplexities)
    ptsne.restore_model(str(path_to_model), num_perplexities=num_perplexities)

    # get transformation
    transform = ptsne.transform(test_data)

    return transform
Exemple #4
0
def train_child(child_dir_path , dna, test_data, input_dims, output_dims , gh):
    # translate our dna
    perplexity, layers = gh.decode_dna(dna)
    print("training child of dim: " , layers)
    # create a network of this name with our stats
    ptsne = Parametric_tSNE(input_dims, output_dims, perplexity, all_layers=layers)
    # train it on test_data
    loss = ptsne.fit(test_data, verbose=False)
    # save our model
    model_path = child_dir_path / 'model'
    ptsne.save_model(str(model_path))
    # write our loss, dna, and save a graph of performance
    tools.write_gen_report_curves(child_dir_path , dna , loss, perplexity, layers)
    ptsne.clear_session()
Exemple #5
0
def train_child(gen_folder,
                child_name,
                input_dim,
                output_dim,
                dna,
                training_data,
                test_data,
                labels_data,
                eval_type,
                verbose=False):
    # setup folder
    child_folder = io.create_subfolder(gen_folder, child_name)
    # train ptsne
    perplexity, layers = Genetics.decode_dna(dna)
    print("#################### Training child of shape:", str(layers),
          "#########################################")
    ptsne = Parametric_tSNE(input_dim,
                            output_dim,
                            perplexity,
                            all_layers=layers)
    losses = ptsne.fit(training_data, verbose=verbose)

    # tform test data
    tform = ptsne.transform(test_data)

    # TODO ------ save if necessary the model ------------

    # free memory
    ptsne.clear_session()

    # save data
    knn_error = stats.get_knn_error(tform, labels_data)
    if eval_type is "knn_error":
        eval_value = knn_error
    else:
        eval_value = eval.evaluate(eval_type, losses)
    report_name = "report.json"
    io.write_csv(child_folder, tform, "tform.csv")
    io.write_csv(child_folder, losses, "loss.csv")
    reporting.write_json(child_folder,
                         report_name,
                         child_name=child_name,
                         input_dim=input_dim,
                         output_dim=output_dim,
                         perplexity=perplexity,
                         layers=layers,
                         knn_error=knn_error,
                         DNA=dna,
                         eval_value=eval_value,
                         eval_type=eval_type)
def run_standard_ptsne(test_name_path,
                       train_data_path,
                       test_data_path,
                       output_dims=2):
    # create a folder with the test_name
    os.mkdir(str(test_name_path))

    # get relevant data
    raw_data = get_ndarray(train_data_path)
    test_data = get_ndarray(test_data_path)
    input_dims = len(raw_data[0])
    perplexity = 30

    ptsne = Parametric_tSNE(input_dims, output_dims, perplexity)
    loss = ptsne.fit(raw_data, verbose=False)
    tform = ptsne.transform(test_data)

    # write out data
    write_loss(test_name_path, loss)
    write_csv(tform, (test_name_path / "tform.csv"))

    ptsne.clear_session()
Exemple #7
0
# go through our flat 40 data
# use the info we have to retrain a model of that shape and structure
# get a tranform and save it

TARGET_FOLDER = Path.cwd(
) / "TestData" / "half_curve_layer_swap_40" / "generation_30"
SAVE_FOLDER = Path.cwd() / "Bred40_Tforms"
TRAINING_DATA = Path.cwd() / "RBMTrainingDataset" / "training_set.csv"
TEST_DATA = Path.cwd() / "RBMTrainingDataset" / "2018_data_eos.csv"

test_data = tools.get_ndarray(TEST_DATA)
input_dims = len(test_data[0])

train_data = tools.get_ndarray(TRAINING_DATA)

output_dims = 2

gh = G.Genetics()

for child in [c for c in TARGET_FOLDER.iterdir() if c.is_dir()]:
    # get the perp and layer sturcure for this child
    dna_string = tools.read_dna(child)
    perp, layers = gh.decode_dna(dna_string, legacy_dna=True)
    # train this model
    ptsne = Parametric_tSNE(input_dims, output_dims, perp, all_layers=layers)
    _ = ptsne.fit(train_data, verbose=False)
    transform = ptsne.transform(test_data)
    save_path = SAVE_FOLDER / (child.name + "_tform.csv")
    tools.write_csv(transform, save_path)
    ptsne.clear_session()
Exemple #8
0
from core import Parametric_tSNE
import pandas as pd
from pathlib import Path
import tools

datapath = Path.cwd() / 'RBMTrainingDataset' / 'training_set.csv'

data = pd.read_csv(str(datapath), sep=',', header=None)
high_dims = data.shape[1]
num_outputs = 2
perplexity = 30

train_data = data.values

#test_data = pd.read_csv(str(Path.cwd() / 'RBMTrainingDataset' / '2018_data.csv') , sep=',' , header=None)
#test_data = test_data.values

ptSNE = Parametric_tSNE(high_dims,
                        num_outputs,
                        perplexity,
                        all_layers=[100, 50])
ptSNE.fit(train_data, verbose=1)
#x = ptSNE.transform(test_data)
#tools.write_csv(x , 'transformed_data.csv')

#ptSNE.save_model(str(Path.cwd() / 'Models' / 'testmodel'))

print('done')
Exemple #9
0
from core import Parametric_tSNE
from pathlib import Path
import pandas as pd

datapath = Path.cwd() / 'RBMTrainingDataset' / 'training_set.csv'

data = pd.read_csv(str(datapath), sep=',', header=None)
high_dims = data.shape[1]
num_outputs = 2
perplexity = 30

target = str(Path.cwd() / 'Models' / 'testmodel')

ptsne = Parametric_tSNE(high_dims, num_outputs, perplexity)
ptsne.restore_model(target, num_perplexities=perplexity)

test_data = pd.read_csv(str(Path.cwd() / 'RBMTrainingDataset' /
                            '2018_data.csv'),
                        sep=',',
                        header=None)
test_data = test_data.values

x = ptsne.transform(test_data)

print(x)

print('fin')
Exemple #10
0
from core import Parametric_tSNE
from report_writing import log_basic_test_params
import pandas as pd
import training_evaluation as eval
import Genetics

# import mnist data and ensure its in a workable format (i think i already did this legwok)
train_data = pd.read_csv(str(Path.cwd() / 'Formatted_MNIST_Data' /
                             'formatted_mnist_train.csv'),
                         sep=',',
                         header=None).values
test_data = pd.read_csv(str(Path.cwd() / 'Formatted_MNIST_Data' /
                            'formatted_mnist_test.csv'),
                        sep=',',
                        header=None).values

# instance a ptsne network and train the dataset using training data
ptsne = Parametric_tSNE(784, 2, 30)
print("starting to train...")
loss = ptsne.fit(train_data, verbose=True)
print('done training....')
tform = ptsne.transform(test_data)

save_path = Path.cwd() / 'MNIST_testing'

tools.write_loss(save_path, loss)
tools.write_csv(tform, (save_path / "tform.csv"))

# graph and save