def load_train_model_from_dna(model_save_name, output_dims=2): current_dir = Path.cwd() dna_file = filedialog.askopenfilename(initialdir=str(current_dir), title="Select DNA file") save_folder = filedialog.askdirectory(initialdir=str(current_dir), title="Select folder to save model") data_file = Path.cwd() / "RBMTrainingDataset" / "training_set.csv" test_data = pd.read_csv(str(data_file), sep=',', header=None).values input_dims = len(test_data[0]) dna = Path(str(dna_file)).read_text() genetic_helper = Genetics() perplexity, layers = genetic_helper.decode_dna(dna) save = Path(save_folder) save_path = save / model_save_name ptsne = Parametric_tSNE(input_dims, output_dims, perplexity, all_layers=layers) loss = ptsne.fit(test_data, verbose=False) ptsne.save_model(str(save_path))
def train_child(dna, genetic_helper, test_data, input_dims, output_dims, log=False): perplexity, layers = genetic_helper.decode_dna(dna) if log: print("Training child with dim: ", str(layers)) ptsne = Parametric_tSNE(input_dims, output_dims, perplexity, all_layers=layers) # train it on test_data loss = ptsne.fit(test_data, verbose=False) return loss, ptsne
def load_and_transform(path_to_model, test_data, num_perplexities, output_dims): ''' path_to_model: a python pathlib path to the model to be loaded test_data: data that has already been loaded and formatted to an np array (i.e. the df.values) num_perplexities: the number of perplexities originally used to train the model output_dims: the original output dimensionality of the model returns: numpy.ndarray of the transformed data ''' # grab the dimensionality of the numpy ndarray input_dims = len(test_data[0]) # instance a ptsne object of this dimensionality and restore model ptsne = Parametric_tSNE(input_dims, output_dims, num_perplexities) ptsne.restore_model(str(path_to_model), num_perplexities=num_perplexities) # get transformation transform = ptsne.transform(test_data) return transform
def train_child(child_dir_path , dna, test_data, input_dims, output_dims , gh): # translate our dna perplexity, layers = gh.decode_dna(dna) print("training child of dim: " , layers) # create a network of this name with our stats ptsne = Parametric_tSNE(input_dims, output_dims, perplexity, all_layers=layers) # train it on test_data loss = ptsne.fit(test_data, verbose=False) # save our model model_path = child_dir_path / 'model' ptsne.save_model(str(model_path)) # write our loss, dna, and save a graph of performance tools.write_gen_report_curves(child_dir_path , dna , loss, perplexity, layers) ptsne.clear_session()
def train_child(gen_folder, child_name, input_dim, output_dim, dna, training_data, test_data, labels_data, eval_type, verbose=False): # setup folder child_folder = io.create_subfolder(gen_folder, child_name) # train ptsne perplexity, layers = Genetics.decode_dna(dna) print("#################### Training child of shape:", str(layers), "#########################################") ptsne = Parametric_tSNE(input_dim, output_dim, perplexity, all_layers=layers) losses = ptsne.fit(training_data, verbose=verbose) # tform test data tform = ptsne.transform(test_data) # TODO ------ save if necessary the model ------------ # free memory ptsne.clear_session() # save data knn_error = stats.get_knn_error(tform, labels_data) if eval_type is "knn_error": eval_value = knn_error else: eval_value = eval.evaluate(eval_type, losses) report_name = "report.json" io.write_csv(child_folder, tform, "tform.csv") io.write_csv(child_folder, losses, "loss.csv") reporting.write_json(child_folder, report_name, child_name=child_name, input_dim=input_dim, output_dim=output_dim, perplexity=perplexity, layers=layers, knn_error=knn_error, DNA=dna, eval_value=eval_value, eval_type=eval_type)
def run_standard_ptsne(test_name_path, train_data_path, test_data_path, output_dims=2): # create a folder with the test_name os.mkdir(str(test_name_path)) # get relevant data raw_data = get_ndarray(train_data_path) test_data = get_ndarray(test_data_path) input_dims = len(raw_data[0]) perplexity = 30 ptsne = Parametric_tSNE(input_dims, output_dims, perplexity) loss = ptsne.fit(raw_data, verbose=False) tform = ptsne.transform(test_data) # write out data write_loss(test_name_path, loss) write_csv(tform, (test_name_path / "tform.csv")) ptsne.clear_session()
# go through our flat 40 data # use the info we have to retrain a model of that shape and structure # get a tranform and save it TARGET_FOLDER = Path.cwd( ) / "TestData" / "half_curve_layer_swap_40" / "generation_30" SAVE_FOLDER = Path.cwd() / "Bred40_Tforms" TRAINING_DATA = Path.cwd() / "RBMTrainingDataset" / "training_set.csv" TEST_DATA = Path.cwd() / "RBMTrainingDataset" / "2018_data_eos.csv" test_data = tools.get_ndarray(TEST_DATA) input_dims = len(test_data[0]) train_data = tools.get_ndarray(TRAINING_DATA) output_dims = 2 gh = G.Genetics() for child in [c for c in TARGET_FOLDER.iterdir() if c.is_dir()]: # get the perp and layer sturcure for this child dna_string = tools.read_dna(child) perp, layers = gh.decode_dna(dna_string, legacy_dna=True) # train this model ptsne = Parametric_tSNE(input_dims, output_dims, perp, all_layers=layers) _ = ptsne.fit(train_data, verbose=False) transform = ptsne.transform(test_data) save_path = SAVE_FOLDER / (child.name + "_tform.csv") tools.write_csv(transform, save_path) ptsne.clear_session()
from core import Parametric_tSNE import pandas as pd from pathlib import Path import tools datapath = Path.cwd() / 'RBMTrainingDataset' / 'training_set.csv' data = pd.read_csv(str(datapath), sep=',', header=None) high_dims = data.shape[1] num_outputs = 2 perplexity = 30 train_data = data.values #test_data = pd.read_csv(str(Path.cwd() / 'RBMTrainingDataset' / '2018_data.csv') , sep=',' , header=None) #test_data = test_data.values ptSNE = Parametric_tSNE(high_dims, num_outputs, perplexity, all_layers=[100, 50]) ptSNE.fit(train_data, verbose=1) #x = ptSNE.transform(test_data) #tools.write_csv(x , 'transformed_data.csv') #ptSNE.save_model(str(Path.cwd() / 'Models' / 'testmodel')) print('done')
from core import Parametric_tSNE from pathlib import Path import pandas as pd datapath = Path.cwd() / 'RBMTrainingDataset' / 'training_set.csv' data = pd.read_csv(str(datapath), sep=',', header=None) high_dims = data.shape[1] num_outputs = 2 perplexity = 30 target = str(Path.cwd() / 'Models' / 'testmodel') ptsne = Parametric_tSNE(high_dims, num_outputs, perplexity) ptsne.restore_model(target, num_perplexities=perplexity) test_data = pd.read_csv(str(Path.cwd() / 'RBMTrainingDataset' / '2018_data.csv'), sep=',', header=None) test_data = test_data.values x = ptsne.transform(test_data) print(x) print('fin')
from core import Parametric_tSNE from report_writing import log_basic_test_params import pandas as pd import training_evaluation as eval import Genetics # import mnist data and ensure its in a workable format (i think i already did this legwok) train_data = pd.read_csv(str(Path.cwd() / 'Formatted_MNIST_Data' / 'formatted_mnist_train.csv'), sep=',', header=None).values test_data = pd.read_csv(str(Path.cwd() / 'Formatted_MNIST_Data' / 'formatted_mnist_test.csv'), sep=',', header=None).values # instance a ptsne network and train the dataset using training data ptsne = Parametric_tSNE(784, 2, 30) print("starting to train...") loss = ptsne.fit(train_data, verbose=True) print('done training....') tform = ptsne.transform(test_data) save_path = Path.cwd() / 'MNIST_testing' tools.write_loss(save_path, loss) tools.write_csv(tform, (save_path / "tform.csv")) # graph and save