def generate_image_from_single_point(single_point): """ generates the aae output from a given point on the latent space :param single_point: point on the latent space :return: 2 or 3D (depending on colorscale) list holding the image pixels """ if not Storage.get_aae_parameters(): return "Error: autoencoder not found", 404 z_dim = Storage.get_aae_parameters()["z_dim"] if Storage.get_selected_autoencoder() != "Unsupervised": return "Error: This function is supposed to work for unsupervised autoencoders only!", 412 if len(single_point) != z_dim: return "Error: Invalid dimension! Dimension should be %s." % z_dim, 400 # get the autoencoder aae = Storage.get_aae() # check if we have an autoencoder if not aae: return "Error: autoencoder not found", 404 # create the operation for the aae and add it operation = {"generate_image_from_single_point": single_point} aae.add_to_requested_operations_by_swagger(operation) # training has already stopped .. if aae.get_train_status() == "stop": # .. so we restart the aae aae.train(False) total_waiting_time = 0 # wait for the response from the aae (at maximum 30s) while aae.get_requested_operations_by_swagger_results( ) is None and total_waiting_time <= 30: # wait for 200 ms, then check again time.sleep(0.2) total_waiting_time += 0.2 # response took too long.. if total_waiting_time > 30: return "Request timed out", 408 # aae has responded result = aae.get_requested_operations_by_swagger_results() # reset the variable holding the results aae.set_requested_operations_by_swagger_results(None) # we need to convert it, since np arrays are not json serializable if not aae.selected_dataset == "mass_spec": result = result.astype("float64").tolist() return result, 200
def generate_image_grid(): """ generates the image grid by sampling the latent space and returning it :return: """ # check if we have an autoencoder if not Storage.get_aae(): return "Error: autoencoder not found", 404 # get the autoencoder aae = Storage.get_aae() # we don't need a parameter for the image grid operation = {"generate_image_grid": ""} aae.add_to_requested_operations_by_swagger(operation) print(aae.get_train_status()) # training has already stopped .. if aae.get_train_status() == "stop": # .. so we restart the aae aae.train(False) total_waiting_time = 0 # wait for the response from the aae (at maximum 30s) while aae.get_requested_operations_by_swagger_results( ) is None and total_waiting_time <= 30: # wait for 200 ms, then check again time.sleep(0.2) total_waiting_time += 0.2 # response took too long.. if total_waiting_time > 30: return "Request timed out", 408 # aae has responded result = aae.get_requested_operations_by_swagger_results() # reset the variable holding the results aae.set_requested_operations_by_swagger_results(None) # we need to convert it, since np arrays are not json serializable result = [a.astype("float64").tolist() for a in result] return result, 200
def get_tuning_results(): """ returns the tuning results as list of dictionaries ordered by their total loss: [{"parameter_combination": {...}, "performance": {"loss_x": x, "loss_y": y, ..}, "folder_name": "some path"}, ...] :return: """ # check if we have if Storage.get_tuning_results(): # save the tuning results for the swagger server tuning_results = Storage.get_tuning_results() return tuning_results, 200 else: return "No tuning results found", 404
def stop_tuning(): """ stops the tuning :return: """ # stop the tuning set_tuning_status("stop") # get the adv. autoencoder if Storage.get_aae(): aae = Storage.get_aae() else: return "No autoencoder found", 404 # stop the training of the current autoencoder aae.set_train_status("stop") return "Tuning stopped", 200
def generate_image_from_single_point_as_byte_string(single_point): """ generates the aae output from a given point on the latent space and returns it as byte string :param single_point: point on the latent space :return: byte string of the image """ img, response_code = generate_image_from_single_point(single_point) byte_string = convert_image_array_to_byte_string( img, channels=Storage.get_n_channels()) return byte_string, response_code
def get_single_image_as_byte_string(image_id, data_subset_name="train"): """ returns a single image with the respective id as byte string :param image_id: id of the image to return :param data_subset_name: one of ["train", "test", "validation"] :return: """ img, response_code = get_single_image(image_id, data_subset_name) byte_string = convert_image_array_to_byte_string(img, channels=Storage.get_n_channels()) return byte_string, response_code
def control_training(train_status): """ starts and stops the training :param train_status: one of ["start", "stop"]; whether to start or stop the training :return: """ if connexion.request.is_json: if train_status == "start": # get the adv. autoencoder aae = Storage.get_aae() # set the train status aae.set_train_status("start") try: # define background thread: aae_thread = threading.Thread(target=aae.train, args=(True, )) except AttributeError: return "No autoencoder found", 404 # the adv autoencoder is currently training, so we need to stop it first if Storage.get_aae_thread(): # stop training aae.set_train_status("stop") # set the new training thread Storage.set_aae_thread(aae_thread) # start training: aae_thread.start() return "Training started", 200 if train_status == "stop": # get aae aae = Storage.get_aae() # stop training aae.set_train_status("stop") return "Training stopped", 200
def load_data_set(dataset_name, mass_spec_data_properties=None): """ loads a dataset into the storage class :param dataset_name: one of ["MNIST", "SVHN", "cifar10", "custom"] :param mass_spec_data_properties: dictionary holding the properties for the mass spec data :return: """ if dataset_name not in ["MNIST", "SVHN", "cifar10", "mass_spec", "custom"]: return "dataset name not found", 404 if dataset_name == "mass_spec" and mass_spec_data_properties is None: return "Bad request! mass_spec_data_properties needs to be provided when using mass spec data!", 404 dataset = get_input_data(dataset_name, filepath="../data", mass_spec_data_properties=mass_spec_data_properties) # store the data in the storage class Storage.set_input_data(dataset) Storage.set_selected_dataset(dataset_name) return "data successfully loaded", 200
def generate_image_grid_as_byte_string(): """ generates the image grid by sampling the latent space and returning it as array of byte strings :return: """ imgs, response_code = generate_image_grid() list_of_byte_strings = [ convert_image_array_to_byte_string(img, channels=Storage.get_n_channels()) for img in imgs ] return list_of_byte_strings, response_code
def get_data_batch(batch_size=100, data_subset_name="train"): """ returns the data (images and labels) for the current batch :param batch_size: size of the batch :param data_subset_name: one of ["train", "test", "validation"] :return: """ try: data = Storage.get_input_data(data_subset_name) except KeyError: return "No data found", 404 # get the images and the labels for the current batch batch_images, batch_labels = data.next_batch(batch_size) # store current batch_images and labels in the storage class Storage.set_current_batch_data({"images": batch_images, "labels": batch_labels}) # since swagger doesn't allow multiple return values, we have to pack them in a dictionary and return it batch_dict = {"images": batch_images.astype("float64").tolist(), "labels": batch_labels.tolist()} return batch_dict, 200
def reset_tensorflow_graph(): """ resets the tensorflow graph to enable training another autoencoder :return: """ # get the adversarial autoencoder aae = Storage.get_aae() # check if we have an autoencoder if not aae: return "Error: no autoencoder found", 404 aae.reset_graph() return "Graph successfully reset", 200
def get_data_batch_as_byte_string(batch_size=100, data_subset_name="train"): """ returns the data (images and labels) for the current batch as byte string :param batch_size: size of the batch :param data_subset_name: one of ["train", "test", "validation"] :return: """ batch_dict, response_code = get_data_batch(batch_size, data_subset_name) channels = Storage.get_n_channels() images = batch_dict["images"] images = [convert_image_array_to_byte_string(image, channels) for image in images] batch_dict["images"] = images return batch_dict, 200
def run_randomsearch(aae_parameters, selected_autoencoder, n_randomized_parameter_combinations): """ runs a random search using n_randomized_parameter_combinations different parameter combinations. Provided parameter values, e.g. batch_size=100, will be used throughout all of the parameter combinations, whereas "missing" parameters will be used with randomized values. :param aae_parameters: parameter values shared by all runs :param selected_autoencoder: what autoencoder to use :param n_randomized_parameter_combinations: how many combinations should be evaluated :return: """ if connexion.request.is_json: # get the parameters for the adv autoencoder aae_parameters = connexion.request.get_json() # check if we have a dataset selected if not Storage.get_selected_dataset(): return "Error: data set not found", 404 # get the selected dataset ["MNIST", "SVHN", "cifar10", "custom"] selected_dataset = Storage.get_selected_dataset() aae_parameters["selected_dataset"] = selected_dataset # set the results_path based on the selected autoencoder and the selected autoencoder aae_parameters["results_path"] = get_result_path_for_selected_autoencoder(selected_autoencoder) aae_parameters["selected_autoencoder"] = selected_autoencoder # check which params are missing; those will then be used for randomizing the parameters default_params = get_default_parameters(selected_autoencoder, selected_dataset) params_selected_for_random_search = [key for key in default_params if key not in aae_parameters] print("params selected as args for random search: \n", params_selected_for_random_search) # we need a tuple as input for args args = tuple([n_randomized_parameter_combinations] + params_selected_for_random_search) try: tuning_thread = threading.Thread(target=do_randomsearch, args=args, kwargs=aae_parameters) tuning_thread.start() except KeyError: return 'Error: Parameter %s not found' % sys.exc_info()[1], 404 # store the parameters and the selected autoencoder in the storage class Storage.set_aae_parameters(aae_parameters) Storage.set_selected_autoencoder(selected_autoencoder) return "Success: random search has started", 200 return 'Error: parameters not in .json format', 415
def get_single_label(label_id, data_subset_name="train"): """ returns a single label with the respective id :param label_id: id of the label to return :param data_subset_name: one of ["train", "test", "validation"] :return: """ try: data = Storage.get_input_data(data_subset_name) except KeyError: return "No data found", 404 try: label = data.labels[label_id] except IndexError: return "Index out of bounds", 415 return list(label), 200
def get_single_image(image_id, data_subset_name="train"): """ returns a single image with the respective id :param image_id: id of the image to return :param data_subset_name: one of ["train", "test", "validation"] :return: """ try: data = Storage.get_input_data(data_subset_name) except KeyError: return "No data found", 404 try: image = data.images[image_id] except IndexError: return "Index out of bounds", 415 return list(image.astype("float64")), 200
def get_spectra_original_and_reconstruction(): # get the autoencoder aae = Storage.get_aae() # check if we have an autoencoder if not aae: return "Error: no autoencoder found", 404 # get the vars for the minibatch summary spectra_original_and_reconstruction = aae.get_spectra_original_and_reconstruction( ) # convert the numpy arrays in the dictionary to lists (np array is not json serializable..) for key in spectra_original_and_reconstruction: if spectra_original_and_reconstruction[key] is not None: print(spectra_original_and_reconstruction[key]) print(key) spectra_original_and_reconstruction[ key] = spectra_original_and_reconstruction[key].tolist() return spectra_original_and_reconstruction, 200
def run_gridsearch(aae_parameters, selected_autoencoder): """ runs a random search using n_randomized_parameter_combinations different parameter combinations. Provided parameter values, e.g. batch_size=100, will be used throughout all of the parameter combinations, whereas "missing" parameters will be used with randomized values. :param aae_parameters: parameter values shared by all runs :param selected_autoencoder: what autoencoder to use :return: """ if connexion.request.is_json: # get the parameters for the adv autoencoder aae_parameters = connexion.request.get_json() # check if we have a dataset selected if not Storage.get_selected_dataset(): return "Error: data set not found", 404 # get the selected dataset ["MNIST", "SVHN", "cifar10", "custom"] selected_dataset = Storage.get_selected_dataset() aae_parameters["selected_dataset"] = selected_dataset # set the results_path based on the selected autoencoder and the selected autoencoder aae_parameters["results_path"] = get_result_path_for_selected_autoencoder(selected_autoencoder) aae_parameters["selected_autoencoder"] = selected_autoencoder print(aae_parameters) try: tuning_thread = threading.Thread(target=do_gridsearch, kwargs=aae_parameters) tuning_thread.start() except KeyError: return 'Error: Parameter %s not found' % sys.exc_info()[1], 404 # store the parameters and the selected autoencoder in the storage class Storage.set_aae_parameters(aae_parameters) Storage.set_selected_autoencoder(selected_autoencoder) return "Success: grid search has started", 200 return 'Error: parameters not in .json format', 415
def classify_single_image(single_image): """ classifies a single image and returns the predicted class label as integer label :param single_image: numpy array of the image to classify :return: integer label of the predicted class """ if not Storage.get_aae_parameters(): return "Error: autoencoder not found", 404 input_dim_x = Storage.get_aae_parameters()["input_dim_x"] input_dim_y = Storage.get_aae_parameters()["input_dim_y"] color_scale = Storage.get_aae_parameters()["color_scale"] input_dim = 0 if color_scale == "gray_scale": input_dim = input_dim_x * input_dim_y elif color_scale == "rgb_scale": input_dim = input_dim_x * input_dim_y * 3 if not Storage.get_selected_autoencoder() == "SemiSupervised": return "Error: This function is supposed to work for semi-supervised autoencoders only!", 412 if len(single_image) != input_dim: return "Error: Invalid dimension! Dimension should be %s." % input_dim, 400 # get the autoencoder aae = Storage.get_aae() # check if we have an autoencoder if not aae: return "Error: autoencoder not found", 404 operation = {"classify_single_image": single_image} aae.add_to_requested_operations_by_swagger(operation) # training has already stopped .. if aae.get_train_status() == "stop": # .. so we restart the aae aae.train(False) total_waiting_time = 0 # wait for the response from the aae (at maximum 30s) while aae.get_requested_operations_by_swagger_results( ) is None and total_waiting_time <= 30: # wait for 200 ms, then check again time.sleep(0.2) total_waiting_time += 0.2 # response took too long.. if total_waiting_time > 30: return "Request timed out", 408 # aae has responded result = aae.get_requested_operations_by_swagger_results() # reset the variable holding the results aae.set_requested_operations_by_swagger_results(None) # we need to convert it, since np ints are not json serializable result = int(result) return result, 200
def get_biases_or_weights_for_layer(bias_or_weights, layer_name): # TODO: layer name as enum if bias_or_weights not in ["bias", "weights"]: return "invalid input", 400 if not Storage.get_aae_parameters(): return "Error: autoencoder not found", 404 # get the autoencoder aae = Storage.get_aae() # check if we have an autoencoder if not aae: return "Error: autoencoder not found", 404 # check if the layer_name is valid subnetwork = layer_name.split("_")[0] # encoder, decoder, etc all_layer_names = aae.get_all_layer_names() try: all_layer_names[subnetwork] except KeyError: return "Error: layer_name is invalid!", 400 if layer_name not in all_layer_names[subnetwork]: return "Error: layer_name is invalid!", 400 # request the operation operation = { "get_biases_or_weights_for_layer": (bias_or_weights, layer_name) } aae.add_to_requested_operations_by_swagger(operation) # training has stopped if aae.get_train_status() == "stop": # restart aae and get the weights/biases aae.train(False) total_waiting_time = 0 # wait for the response from the aae (at maximum 30s) while aae.get_requested_operations_by_swagger_results( ) is None and total_waiting_time <= 30: # wait for 200 ms, then check again time.sleep(0.2) total_waiting_time += 0.2 # response took too long.. if total_waiting_time > 30: return "Request timed out! Maybe you need to start training first", 408 # aae has responded result = aae.get_requested_operations_by_swagger_results() # reset the variable holding the results aae.set_requested_operations_by_swagger_results(None) # we need to convert it, since np arrays are not json serializable result = [a.astype("float64").tolist() for a in result] return result, 200
def do_gridsearch(*args, selected_autoencoder="Unsupervised", selected_dataset="MNIST", **kwargs): """ Performs a grid search using all possible combinations of the parameters provided. In case there are no parameters provided it uses all the possible parameter combinations from the hard coded parameters. Example calls: - do_gridsearch("n_neurons_of_hidden_layer_x_autoencoder", learning_rate_autoencoder=[0.1, 0.01, 0.001], MomentumOptimizer_momentum_autoencoder=[1.0, 0.9, 0.8]) - do_gridsearch(n_neurons_of_hidden_layer_x_autoencoder=[[500, 250, 125], [1000, 750, 25]], n_neurons_of_hidden_layer_x_discriminator=[[500, 250, 125], [1000, 750, 25]]) - do_gridsearch(n_neurons_of_hidden_layer_x_discriminator=[[500, 250, 125], [1000, 750, 25]]) - do_gridsearch("n_neurons_of_hidden_layer_x_autoencoder", "learning_rate_autoencoder") - do_gridsearch() - do_gridsearch("n_neurons_of_hidden_layer_x_autoencoder", learning_rate_autoencoder=[0.5], MomentumOptimizer_momentum_autoencoder=[1.0]) :param args: strings of the variable defined in the Parameters class to do the grid search for. In this case it uses the possible parameter values in the Parameters class: "variable_name" :param selected_dataset: ["MNIST", "SVHN", "cifar10", "custom"] :param selected_autoencoder: ["Unsupervised", "Supervised", "SemiSupervised"] :param kwargs: arbitrary number of: variable_name=[variable_value1, variable_value2, variable_value3] :return: the best parameter combination as a dictionary """ print("Doing grid search..") log_result_path = "../results/Logs/GridSearch" date = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S").replace( " ", ":").replace(":", "_") log_file_name = log_result_path + "/{0}_{1}_log.txt".format( date, selected_dataset) print("Log will be saved at location " + log_file_name) # iterate over the parameter combinations gridsearch_parameter_combinations = \ aae_params.get_gridsearch_parameters(*args, selected_autoencoder=selected_autoencoder, selected_dataset=selected_dataset, **kwargs) # stores the performance for the parameter combination performance_for_parameter_combination = [] print("There are", len(gridsearch_parameter_combinations), "combinations:") for a in gridsearch_parameter_combinations: print(a) print() # iterate over each parameter combination for gridsearch_parameter_combination in gridsearch_parameter_combinations: # for controlling the tuning via swagger if not tuning_status == "stop": print("Training .. ", gridsearch_parameter_combination) # create the AAE and train it with the current parameters if selected_autoencoder == "Unsupervised": adv_autoencoder = UnsupervisedAdversarialAutoencoder( gridsearch_parameter_combination) elif selected_autoencoder == "Supervised": adv_autoencoder = SupervisedAdversarialAutoencoder( gridsearch_parameter_combination) elif selected_autoencoder == "SemiSupervised": adv_autoencoder = SemiSupervisedAdversarialAutoencoder( gridsearch_parameter_combination) # we want to include the results from our previous runs on the minibatch summary images adv_autoencoder.set_include_tuning_performance(True) # set the autoencoder for the swagger server Storage.set_aae(adv_autoencoder) # start the training adv_autoencoder.train(True) # adv_autoencoder.train(False) # get the performance performance = adv_autoencoder.get_final_performance() print(performance) # convert performance to float64 (for swagger server) for key, value in performance.items(): performance[key] = np.float64(value) folder_name = adv_autoencoder.get_result_folder_name() # store the param_comb and the performance in the list current_performance = { "parameter_combination": gridsearch_parameter_combination, "performance": performance, "folder_name": folder_name } performance_for_parameter_combination.append(current_performance) # store the performance over time of the current autoencoder Storage.get_tuning_results_performance_over_time()[folder_name] = \ adv_autoencoder.get_performance_over_time() # store the learning rates over time of the current autoencoder Storage.get_tuning_results_learning_rates_over_time( )[folder_name] = adv_autoencoder.get_learning_rates() # reset the tensorflow graph adv_autoencoder.reset_graph() # sort combinations by their performance sorted_list = sorted(performance_for_parameter_combination, key=lambda x: x["performance"]["summed_loss_final"]) # save the tuning results for the swagger server Storage.set_tuning_results(sorted_list) print("#" * 20) # create a new log file with open(log_file_name, 'w') as log: log.write("") for comb in sorted_list: print("performance:", comb["performance"]) print("folder name:", comb["folder_name"]) print() with open(log_file_name, 'a') as log: log.write("performance: {}\n".format(comb["performance"])) log.write("folder name: {}\n".format(comb["folder_name"])) print(sorted_list) print("best param combination:", sorted_list[0]["parameter_combination"]) print("best performance:", sorted_list[0]["performance"]) print("folder name:", sorted_list[0]["folder_name"]) with open(log_file_name, 'a') as log: log.write("best param combination: {}\n".format( sorted_list[0]["parameter_combination"])) log.write("best performance: {}\n".format( sorted_list[0]["performance"])) log.write("folder name: {}\n".format(sorted_list[0]["folder_name"])) return sorted_list[0]["parameter_combination"]
def do_randomsearch(n_parameter_combinations=5, *args, selected_autoencoder="Unsupervised", selected_dataset="MNIST", **kwargs): """ Performs a random search using n_parameter_combinations different parameter combinations. The parameter combination is obtained by randomly assigning values for the parameters provided (args and kwargs). Example calls: - do_randomsearch() - do_randomsearch(2, "batch_size", learning_rate_autoencoder=random.uniform(0.2, 0.001)) - do_randomsearch(10, "batch_size", learning_rate_autoencoder=random.uniform(0.2, 0.001)) - do_randomsearch(5, "batch_size", "learning_rate_autoencoder") - do_randomsearch(5, learning_rate_autoencoder=random.uniform(0.2, 0.001), learning_rate_discriminator=random.uniform(0.2, 0.001)) :param n_parameter_combinations: number of parameter combinations to try :param selected_dataset: ["MNIST", "SVHN", "cifar10", "custom"] :param selected_autoencoder: ["Unsupervised", "Supervised", "SemiSupervised"] :param args: strings of the variable defined in the Parameters class to randomize :param kwargs: manually assigned values for the specified variable :return: the best parameter combination as a dictionary """ print("Doing random search..") log_result_path = "../results/Logs/RandomSearch" date = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S").replace( " ", ":").replace(":", "_") log_file_name = log_result_path + "/{0}_{1}_log.txt".format( date, selected_dataset) print("Log will be saved at location " + log_file_name) # get some random parameter combinations random_param_combinations = \ [aae_params.get_randomized_parameters(*args, selected_autoencoder=selected_autoencoder, selected_dataset=selected_dataset, **kwargs) for i in range(n_parameter_combinations)] # TODO: think about this, whether it should be included all the time # add the default parameter combination to the list based on the selected dataset # random_param_combinations.append(aae_parameter_class.get_default_parameters(selected_autoencoder, selected_dataset)) # stores the performance for the parameter combination performance_for_parameter_combination = [] print("There are", len(random_param_combinations), "combinations:") for a in random_param_combinations: print(a) print() # iterate over each parameter combination for random_param_combination in random_param_combinations: # for controlling the tuning via swagger if not tuning_status == "stop": print(random_param_combination) # create the AAE and train it with the current parameters if selected_autoencoder == "Unsupervised": adv_autoencoder = UnsupervisedAdversarialAutoencoder( random_param_combination) elif selected_autoencoder == "Supervised": adv_autoencoder = SupervisedAdversarialAutoencoder( random_param_combination) elif selected_autoencoder == "SemiSupervised": adv_autoencoder = SemiSupervisedAdversarialAutoencoder( random_param_combination) # we want to include the results from our previous runs on the minibatch summary images adv_autoencoder.set_include_tuning_performance(True) try: # set the autoencoder for the swagger server Storage.set_aae(adv_autoencoder) # start the training adv_autoencoder.train(True) # get the performance performance = adv_autoencoder.get_final_performance() except: print("whoops") performance = { "autoencoder_loss_final": float('inf'), "discriminator_loss_final": float('inf'), "generator_loss_final": float('inf'), "summed_loss_final": float('inf') } print(performance) # convert performance to float64 (for swagger server) for key, value in performance.items(): performance[key] = np.float64(value) folder_name = adv_autoencoder.get_result_folder_name() # store the parameter combination and the performance in the list current_performance = { "parameter_combination": random_param_combination, "performance": performance, "folder_name": folder_name } performance_for_parameter_combination.append(current_performance) # store the performance over time of the current autoencoder Storage.get_tuning_results_performance_over_time()[folder_name] \ = adv_autoencoder.get_performance_over_time() # store the learning rates over time of the current autoencoder Storage.get_tuning_results_learning_rates_over_time()[folder_name] \ = adv_autoencoder.get_learning_rates() # reset the tensorflow graph adv_autoencoder.reset_graph() # sort combinations by their performance # TODO: change back to summed loss # sorted_list = sorted(performance_for_parameter_combination, key=lambda x: x["performance"]["summed_loss_final"]) sorted_list = sorted( performance_for_parameter_combination, key=lambda x: x["performance"]["autoencoder_loss_final"]) # store the tuning results for the swagger server Storage.set_tuning_results(performance_for_parameter_combination) print("#" * 20) print(Storage.get_tuning_results_performance_over_time()) # create a new log file with open(log_file_name, 'w') as log: log.write("") for comb in sorted_list: print("performance:", comb["performance"]) print("folder name:", comb["folder_name"]) print() with open(log_file_name, 'a') as log: log.write("performance: {}\n".format(comb["performance"])) log.write("folder name: {}\n".format(comb["folder_name"])) print(sorted_list) print("best param combination:", sorted_list[0]["parameter_combination"]) print("best performance:", sorted_list[0]["performance"]) print("folder name:", sorted_list[0]["folder_name"]) with open(log_file_name, 'a') as log: log.write("best param combination: {}\n".format( sorted_list[0]["parameter_combination"])) log.write("best performance: {}\n".format( sorted_list[0]["performance"])) log.write("folder name: {}\n".format(sorted_list[0]["folder_name"])) return sorted_list[0]["parameter_combination"]
def load_aae(selected_autoencoder, filepath): """ loads a trained autoencoder :param selected_autoencoder: autoencoder to load, e.g. Unsupervised, Supervised, etc. :param filepath: :return: """ # reset previous autoencoders (if they exist) aae = Storage.get_aae() if aae: aae.reset_graph() selected_dataset = Storage.get_selected_dataset() # check if we have a dataset selected if not selected_dataset: return "Error: data set not found", 404 adv_autoencoder = None try: params = get_params_from_params_file(filepath) except FileNotFoundError: return "Error: No such file or directory: '" + filepath + "'", 404 try: if selected_autoencoder == "Unsupervised": adv_autoencoder = UnsupervisedAdversarialAutoencoder(params) elif selected_autoencoder == "Supervised": adv_autoencoder = SupervisedAdversarialAutoencoder(params) elif selected_autoencoder == "SemiSupervised": adv_autoencoder = SemiSupervisedAdversarialAutoencoder(params) elif selected_autoencoder == "IncorporatingLabelInformation": adv_autoencoder = IncorporatingLabelInformationAdversarialAutoencoder( params) elif selected_autoencoder == "UnsupervisedClustering": adv_autoencoder = UnsupervisedClusteringAdversarialAutoencoder( params) elif selected_autoencoder == "DimensionalityReduction": adv_autoencoder = DimensionalityReductionAdversarialAutoencoder( params) except KeyError: return 'Error: Parameter %s not found' % sys.exc_info()[1], 404 except IndexError: return 'Error: The parameters seems to be invalid. Make sure you selected the correct autoencoder', 400 # building the autoencoder sets the train status to start, so we need to manually set it to stop, since the # autoencoder is already trained adv_autoencoder.set_train_status("stop") try: # get the last part: e.g. "\2018-08-02_17_48_33_MNIST\log\params.txt" result_folder_name = filepath.split(selected_autoencoder)[1] # get the first part: "\2018-08-02_17_48_33_MNIST\" result_folder_name = result_folder_name.split("log")[0] # remove the trailing separator: "\2018-08-02_17_48_33_MNIST\" result_folder_name = result_folder_name.split( selected_dataset)[0] + selected_dataset except IndexError: return 'Error: The parameters seems to be invalid. Make sure you selected the correct autoencoder', 400 adv_autoencoder.set_result_folder_name(result_folder_name) # store the parameters and the adv. autoencoder in the storage class Storage.set_aae(adv_autoencoder) Storage.set_aae_parameters(params) Storage.set_selected_autoencoder(selected_autoencoder) return "AAE successfully loaded", 200
def build_aae(selected_autoencoder, aae_parameters): """ builds the adversarial autoencoder with the parameters provided :param selected_autoencoder: one of ["Unsupervised", "Supervised", "SemiSupervised"] :param aae_parameters: parameters for the adv. autoencoder :return: """ if connexion.request.is_json: # get the parameters for the adv autoencoder aae_parameters = connexion.request.get_json() # check if we have a dataset selected if not Storage.get_selected_dataset(): return "Error: data set not found", 404 # get the selected dataset ["MNIST", "SVHN", "cifar10", "custom"] selected_dataset = Storage.get_selected_dataset() aae_parameters["selected_dataset"] = selected_dataset # get the results_path based on the selected autoencoder aae_parameters[ "results_path"] = get_result_path_for_selected_autoencoder( selected_autoencoder) # set the input dim and the color scale according to the selected dataset if selected_dataset == "MNIST": aae_parameters["input_dim_x"] = 28 aae_parameters["input_dim_y"] = 28 aae_parameters["color_scale"] = "gray_scale" elif selected_dataset == "SVHN": aae_parameters["input_dim_x"] = 32 aae_parameters["input_dim_y"] = 32 aae_parameters["color_scale"] = "rgb_scale" elif selected_dataset == "cifar10": aae_parameters["input_dim_x"] = 32 aae_parameters["input_dim_y"] = 32 aae_parameters["color_scale"] = "rgb_scale" elif selected_dataset == "custom": return "Error: not implemented", 404 if Storage.get_aae() is not None: # reset the tensorflow graph Storage.get_aae().reset_graph() # create the AAE with the current parameters adv_autoencoder = None try: if selected_autoencoder == "Unsupervised": adv_autoencoder = UnsupervisedAdversarialAutoencoder( aae_parameters) elif selected_autoencoder == "Supervised": adv_autoencoder = SupervisedAdversarialAutoencoder( aae_parameters) elif selected_autoencoder == "SemiSupervised": adv_autoencoder = SemiSupervisedAdversarialAutoencoder( aae_parameters) elif selected_autoencoder == "IncorporatingLabelInformation": adv_autoencoder = IncorporatingLabelInformationAdversarialAutoencoder( aae_parameters) elif selected_autoencoder == "UnsupervisedClustering": adv_autoencoder = UnsupervisedClusteringAdversarialAutoencoder( aae_parameters) elif selected_autoencoder == "DimensionalityReduction": adv_autoencoder = DimensionalityReductionAdversarialAutoencoder( aae_parameters) except KeyError: return 'Error: Parameter %s not found' % sys.exc_info()[1], 404 # store the parameters and the adv. autoencoder in the storage class Storage.set_aae(adv_autoencoder) Storage.set_aae_parameters(aae_parameters) Storage.set_selected_autoencoder(selected_autoencoder) return "Success: AAE successfully built", 200 return 'Error: parameters not in .json format', 415
def get_performance_over_time(): """ returns the performance over time (losses (and accuracy for the semi-supervised aae)) for the current autoencoder :return: """ # get the adversarial autoencoder aae = Storage.get_aae() # check if we have an autoencoder if not aae: return "Error: no autoencoder found", 404 # get the performance over time performance_over_time = aae.get_performance_over_time() # since swagger doesn't allow different return values for the same function, we return all of them discriminator_losses = [] # only (un)-supervised discriminator_gaussian_losses = [] # only semi-supervised discriminator_categorical_losses = [] # only semi-supervised supervised_encoder_loss = [] # only semi-supervised accuracy = [] # only semi-supervised accuracy_epochs = [] # only semi-supervised autoencoder_losses = performance_over_time["autoencoder_losses"] autoencoder_losses = [float(number) for number in autoencoder_losses] list_of_epochs = performance_over_time["list_of_epochs"] list_of_epochs = [float(number) for number in list_of_epochs] mz_values_losses = performance_over_time["mz_values_losses"] mz_values_losses = [float(number) for number in mz_values_losses] intensities_losses = performance_over_time["intensities_losses"] intensities_losses = [float(number) for number in intensities_losses] # distinguish between semi-supervised or (un-)supervised autoencoder if Storage.get_selected_autoencoder() == "SemiSupervised": discriminator_gaussian_losses = performance_over_time[ "discriminator_gaussian_losses"] discriminator_gaussian_losses = [ float(number) for number in discriminator_gaussian_losses ] discriminator_categorical_losses = performance_over_time[ "discriminator_categorical_losses"] discriminator_categorical_losses = [ float(number) for number in discriminator_categorical_losses ] generator_losses = performance_over_time["generator_losses"] generator_losses = [float(number) for number in generator_losses] supervised_encoder_loss = performance_over_time[ "supervised_encoder_loss"] supervised_encoder_loss = [ float(number) for number in supervised_encoder_loss ] accuracy = performance_over_time["accuracy"] accuracy = [float(number) for number in accuracy] accuracy_epochs = performance_over_time["accuracy_epochs"] accuracy_epochs = [float(number) for number in accuracy_epochs] # we have an unsupervised or a supervised autoencoder else: discriminator_losses = performance_over_time["discriminator_losses"] discriminator_losses = [ float(number) for number in discriminator_losses ] generator_losses = performance_over_time["generator_losses"] generator_losses = [float(number) for number in generator_losses] # since swagger doesn't allow multiple return values, we have to pack them in a dictionary and return it performance_dict = { "autoencoder_losses:": autoencoder_losses, "discriminator_losses:": discriminator_losses, "generator_losses:": generator_losses, "list_of_epochs:": list_of_epochs, "discriminator_gaussian_losses": discriminator_gaussian_losses, "discriminator_categorical_losses": discriminator_categorical_losses, "supervised_encoder_loss": supervised_encoder_loss, "accuracy": accuracy, "accuracy_epochs": accuracy_epochs, "mz_values_losses": mz_values_losses, "intensities_losses": intensities_losses } return performance_dict, 200
def get_epoch_summary_vars(): # get the autoencoder aae = Storage.get_aae() # check if we have an autoencoder if not aae: return "Error: no autoencoder found", 404 # get the vars for the minibatch summary minibatch_summary_vars = aae.get_epoch_summary_vars() # since swagger doesn't allow different return values for the same function, we return all of them discriminator_neg = [] # only (un)-supervised discriminator_pos = [] # only (un)-supervised batch_x = [] # only (un)-supervised decoder_output = [] # only (un)-supervised batch_labels = [] # only (un)-supervised batch_X_unlabeled = [] # only semi-supervised reconstructed_image = [] # only semi-supervised real_cat_dist = [] # only semi-supervised encoder_cat_dist = [] # only semi-supervised batch_labels = [] # only semi-supervised discriminator_gaussian_neg = [] # only semi-supervised discriminator_gaussian_pos = [] # only semi-supervised discriminator_cat_neg = [] # only semi-supervised discriminator_cat_pos = [] # only semi-supervised # TODO: fix this # distinguish between semi-supervised or (un-)supervised autoencoder if Storage.get_selected_autoencoder() == "SemiSupervised": real_dist = np.array(minibatch_summary_vars["real_dist"] ) # (batch_size, z_dim) array of floats real_dist = real_dist.astype("float64").tolist() latent_representation = np.array( minibatch_summary_vars["latent_representation"] ) # (batch_size, z_dim) array of floats latent_representation = latent_representation.astype( "float64").tolist() batch_X_unlabeled = np.array( minibatch_summary_vars["batch_X_unlabeled"] ) # (batch_size, z_dim) array of floats batch_X_unlabeled = batch_X_unlabeled.astype("float64").tolist() reconstructed_images = np.array( minibatch_summary_vars["reconstructed_images"] ) # (batch_size, z_dim) array of floats reconstructed_images = reconstructed_images.astype("float64").tolist() real_cat_dist = np.array(minibatch_summary_vars["real_cat_dist"] ) # (batch_size, z_dim) array of floats real_cat_dist = real_cat_dist.astype("float64").tolist() encoder_cat_dist = np.array(minibatch_summary_vars["encoder_cat_dist"] ) # (batch_size, z_dim) array of floats encoder_cat_dist = encoder_cat_dist.astype("float64").tolist() batch_labels = np.array(minibatch_summary_vars["batch_labels"] ) # (batch_size, z_dim) array of floats batch_labels = batch_labels.astype("float64").tolist() discriminator_gaussian_neg = np.array( minibatch_summary_vars["discriminator_gaussian_neg"] ) # (batch_size) array of floats discriminator_gaussian_neg = discriminator_gaussian_neg.astype( "float64").tolist() discriminator_gaussian_pos = np.array( minibatch_summary_vars["discriminator_gaussian_pos"] ) # (batch_size) array of floats discriminator_gaussian_pos = discriminator_gaussian_pos.astype( "float64").tolist() discriminator_cat_neg = np.array( minibatch_summary_vars["discriminator_cat_neg"] ) # (batch_size) array of floats discriminator_cat_neg = discriminator_cat_neg.astype( "float64").tolist() discriminator_cat_pos = np.array( minibatch_summary_vars["discriminator_cat_pos"] ) # (batch_size, z_dim) array of floats discriminator_cat_pos = discriminator_cat_pos.astype( "float64").tolist() epoch = minibatch_summary_vars["epoch"] # single integer # we have an unsupervised or a supervised autoencoder else: real_dist = np.array(minibatch_summary_vars["real_dist"] ) # (batch_size, z_dim) array of floats real_dist = real_dist.astype("float64").tolist() latent_representation = np.array( minibatch_summary_vars["latent_representation"] ) # (batch_size, z_dim) array of floats latent_representation = latent_representation.astype( "float64").tolist() discriminator_neg = np.array( minibatch_summary_vars["discriminator_neg"] ) # (batch_size) array of floats discriminator_neg = discriminator_neg.astype("float64").tolist() discriminator_pos = np.array( minibatch_summary_vars["discriminator_pos"] ) # (batch_size, z_dim) array of floats discriminator_pos = discriminator_pos.astype("float64").tolist() batch_x = np.array( minibatch_summary_vars["batch_x"] ) # (batch_size, input_dim_x*input_dim_x*color_scale) array of floats batch_x = batch_x.astype("float64").tolist() reconstructed_images = np.array( minibatch_summary_vars["reconstructed_images"] ) # (batch_size, input_dim_x*input_dim_x*color_scale) reconstructed_images = reconstructed_images.astype( "float64").tolist() # array of floats batch_labels = np.array(minibatch_summary_vars["batch_labels"] ) # (batch_size, n_classes) array of ints batch_labels = batch_labels.astype("float64").tolist() epoch = minibatch_summary_vars["epoch"] # single integer minibatch_summary_vars_dict = { "real_dist": real_dist, "latent_representation": latent_representation, "discriminator_neg": discriminator_neg, "discriminator_pos": discriminator_pos, "batch_x": batch_x, "reconstructed_images": reconstructed_images, "epoch": epoch, "batch_labels": batch_labels, "batch_X_unlabeled": batch_X_unlabeled, "real_cat_dist": real_cat_dist, "encoder_cat_dist": encoder_cat_dist, "discriminator_gaussian_neg": discriminator_gaussian_neg, "discriminator_gaussian_pos": discriminator_gaussian_pos, "discriminator_cat_neg": discriminator_cat_neg, "discriminator_cat_pos": discriminator_cat_pos } return minibatch_summary_vars_dict, 200
def get_learning_rates(): """ returns the learning rates over time for the current autoencoder :return: """ # get the autoencoder aae = Storage.get_aae() # check if we have an autoencoder if not aae: return "Error: no autoencoder found", 404 # get the learning rates learning_rates = aae.get_learning_rates() # since swagger doesn't allow different return values for the same function, we return all of them discriminator_lr = [] # only (un)-supervised discriminator_g_lr = [] # only semi-supervised discriminator_c_lr = [] # only semi-supervised supervised_encoder_lr = [] # only semi-supervised # distinguish between semi-supervised or (un-)supervised autoencoder if Storage.get_selected_autoencoder() == "SemiSupervised": autoencoder_lr = learning_rates["autoencoder_lr"] autoencoder_lr = [float(number) for number in autoencoder_lr] discriminator_g_lr = learning_rates["discriminator_g_lr"] discriminator_g_lr = [float(number) for number in discriminator_g_lr] discriminator_c_lr = learning_rates["discriminator_c_lr"] discriminator_c_lr = [float(number) for number in discriminator_c_lr] generator_lr = learning_rates["generator_lr"] generator_lr = [float(number) for number in generator_lr] supervised_encoder_lr = learning_rates["supervised_encoder_lr"] supervised_encoder_lr = [ float(number) for number in supervised_encoder_lr ] list_of_epochs = learning_rates["list_of_epochs"] list_of_epochs = [float(number) for number in list_of_epochs] # we have an unsupervised or a supervised autoencoder else: autoencoder_lr = learning_rates["autoencoder_lr"] autoencoder_lr = [float(number) for number in autoencoder_lr] discriminator_lr = learning_rates["discriminator_lr"] discriminator_lr = [float(number) for number in discriminator_lr] generator_lr = learning_rates["generator_lr"] generator_lr = [float(number) for number in generator_lr] list_of_epochs = learning_rates["list_of_epochs"] list_of_epochs = [float(number) for number in list_of_epochs] # since swagger doesn't allow multiple return values, we have to pack them in a dictionary and return it lr_dict = { "autoencoder_lr:": autoencoder_lr, "discriminator_lr:": discriminator_lr, "generator_lr:": generator_lr, "list_of_epochs:": list_of_epochs, "discriminator_g_lr": discriminator_g_lr, "discriminator_c_lr": discriminator_c_lr, "supervised_encoder_lr": supervised_encoder_lr } return lr_dict, 200