Beispiel #1
0
def generate_image_from_single_point(single_point):
    """
    generates the aae output from a given point on the latent space
    :param single_point: point on the latent space
    :return: 2 or 3D (depending on colorscale) list holding the image pixels
    """

    if not Storage.get_aae_parameters():
        return "Error: autoencoder not found", 404
    z_dim = Storage.get_aae_parameters()["z_dim"]

    if Storage.get_selected_autoencoder() != "Unsupervised":
        return "Error: This function is supposed to work for unsupervised autoencoders only!", 412

    if len(single_point) != z_dim:
        return "Error: Invalid dimension! Dimension should be %s." % z_dim, 400

    # get the autoencoder
    aae = Storage.get_aae()

    # check if we have an autoencoder
    if not aae:
        return "Error: autoencoder not found", 404

    # create the operation for the aae and add it
    operation = {"generate_image_from_single_point": single_point}
    aae.add_to_requested_operations_by_swagger(operation)

    # training has already stopped ..
    if aae.get_train_status() == "stop":
        # .. so we restart the aae
        aae.train(False)

    total_waiting_time = 0

    # wait for the response from the aae (at maximum 30s)
    while aae.get_requested_operations_by_swagger_results(
    ) is None and total_waiting_time <= 30:
        # wait for 200 ms, then check again
        time.sleep(0.2)
        total_waiting_time += 0.2

    # response took too long..
    if total_waiting_time > 30:
        return "Request timed out", 408

    # aae has responded
    result = aae.get_requested_operations_by_swagger_results()

    # reset the variable holding the results
    aae.set_requested_operations_by_swagger_results(None)

    # we need to convert it, since np arrays are not json serializable
    if not aae.selected_dataset == "mass_spec":
        result = result.astype("float64").tolist()

    return result, 200
Beispiel #2
0
def generate_image_grid():
    """
    generates the image grid by sampling the latent space and returning it
    :return:
    """

    # check if we have an autoencoder
    if not Storage.get_aae():
        return "Error: autoencoder not found", 404

    # get the autoencoder
    aae = Storage.get_aae()

    # we don't need a parameter for the image grid
    operation = {"generate_image_grid": ""}
    aae.add_to_requested_operations_by_swagger(operation)

    print(aae.get_train_status())

    # training has already stopped ..
    if aae.get_train_status() == "stop":
        # .. so we restart the aae
        aae.train(False)

    total_waiting_time = 0

    # wait for the response from the aae (at maximum 30s)
    while aae.get_requested_operations_by_swagger_results(
    ) is None and total_waiting_time <= 30:
        # wait for 200 ms, then check again
        time.sleep(0.2)
        total_waiting_time += 0.2

    # response took too long..
    if total_waiting_time > 30:
        return "Request timed out", 408

    # aae has responded
    result = aae.get_requested_operations_by_swagger_results()

    # reset the variable holding the results
    aae.set_requested_operations_by_swagger_results(None)

    # we need to convert it, since np arrays are not json serializable
    result = [a.astype("float64").tolist() for a in result]

    return result, 200
Beispiel #3
0
def get_tuning_results():
    """
    returns the tuning results as list of dictionaries ordered by their total loss:
    [{"parameter_combination": {...}, "performance": {"loss_x": x, "loss_y": y, ..}, "folder_name": "some path"}, ...]
    :return:
    """

    # check if we have
    if Storage.get_tuning_results():

        # save the tuning results for the swagger server
        tuning_results = Storage.get_tuning_results()

        return tuning_results, 200

    else:
        return "No tuning results found", 404
Beispiel #4
0
def stop_tuning():
    """
    stops the tuning
    :return:
    """

    # stop the tuning
    set_tuning_status("stop")

    # get the adv. autoencoder
    if Storage.get_aae():
        aae = Storage.get_aae()
    else:
        return "No autoencoder found", 404

    # stop the training of the current autoencoder
    aae.set_train_status("stop")

    return "Tuning stopped", 200
Beispiel #5
0
def generate_image_from_single_point_as_byte_string(single_point):
    """
    generates the aae output from a given point on the latent space and returns it as byte string
    :param single_point: point on the latent space
    :return: byte string of the image
    """

    img, response_code = generate_image_from_single_point(single_point)
    byte_string = convert_image_array_to_byte_string(
        img, channels=Storage.get_n_channels())
    return byte_string, response_code
Beispiel #6
0
def get_single_image_as_byte_string(image_id, data_subset_name="train"):
    """
    returns a single image with the respective id as byte string
    :param image_id: id of the image to return
    :param data_subset_name: one of ["train", "test", "validation"]
    :return:
    """

    img, response_code = get_single_image(image_id, data_subset_name)
    byte_string = convert_image_array_to_byte_string(img, channels=Storage.get_n_channels())
    return byte_string, response_code
def control_training(train_status):
    """
    starts and stops the training
    :param train_status: one of ["start", "stop"]; whether to start or stop the training
    :return:
    """
    if connexion.request.is_json:
        if train_status == "start":
            # get the adv. autoencoder
            aae = Storage.get_aae()

            # set the train status
            aae.set_train_status("start")

            try:
                # define background thread:
                aae_thread = threading.Thread(target=aae.train, args=(True, ))
            except AttributeError:
                return "No autoencoder found", 404

            # the adv autoencoder is currently training, so we need to stop it first
            if Storage.get_aae_thread():
                # stop training
                aae.set_train_status("stop")

            # set the new training thread
            Storage.set_aae_thread(aae_thread)

            # start training:
            aae_thread.start()

            return "Training started", 200

        if train_status == "stop":
            # get aae
            aae = Storage.get_aae()

            # stop training
            aae.set_train_status("stop")

            return "Training stopped", 200
Beispiel #8
0
def load_data_set(dataset_name, mass_spec_data_properties=None):
    """
    loads a dataset into the storage class
    :param dataset_name: one of ["MNIST", "SVHN", "cifar10", "custom"]
    :param mass_spec_data_properties: dictionary holding the properties for the mass spec data
    :return:
    """

    if dataset_name not in ["MNIST", "SVHN", "cifar10", "mass_spec", "custom"]:
        return "dataset name not found", 404

    if dataset_name == "mass_spec" and mass_spec_data_properties is None:
        return "Bad request! mass_spec_data_properties needs to be provided when using mass spec data!", 404

    dataset = get_input_data(dataset_name, filepath="../data", mass_spec_data_properties=mass_spec_data_properties)

    # store the data in the storage class
    Storage.set_input_data(dataset)
    Storage.set_selected_dataset(dataset_name)

    return "data successfully loaded", 200
Beispiel #9
0
def generate_image_grid_as_byte_string():
    """
    generates the image grid by sampling the latent space and returning it as array of byte strings
    :return:
    """

    imgs, response_code = generate_image_grid()
    list_of_byte_strings = [
        convert_image_array_to_byte_string(img,
                                           channels=Storage.get_n_channels())
        for img in imgs
    ]
    return list_of_byte_strings, response_code
Beispiel #10
0
def get_data_batch(batch_size=100, data_subset_name="train"):
    """
    returns the data (images and labels) for the current batch
    :param batch_size: size of the batch
    :param data_subset_name: one of ["train", "test", "validation"]
    :return:
    """

    try:
        data = Storage.get_input_data(data_subset_name)
    except KeyError:
        return "No data found", 404

    # get the images and the labels for the current batch
    batch_images, batch_labels = data.next_batch(batch_size)

    # store current batch_images and labels in the storage class
    Storage.set_current_batch_data({"images": batch_images, "labels": batch_labels})

    # since swagger doesn't allow multiple return values, we have to pack them in a dictionary and return it
    batch_dict = {"images": batch_images.astype("float64").tolist(), "labels": batch_labels.tolist()}

    return batch_dict, 200
def reset_tensorflow_graph():
    """
    resets the tensorflow graph to enable training another autoencoder
    :return:
    """

    # get the adversarial autoencoder
    aae = Storage.get_aae()

    # check if we have an autoencoder
    if not aae:
        return "Error: no autoencoder found", 404

    aae.reset_graph()

    return "Graph successfully reset", 200
Beispiel #12
0
def get_data_batch_as_byte_string(batch_size=100, data_subset_name="train"):
    """
    returns the data (images and labels) for the current batch as byte string
    :param batch_size: size of the batch
    :param data_subset_name: one of ["train", "test", "validation"]
    :return:
    """

    batch_dict, response_code = get_data_batch(batch_size, data_subset_name)

    channels = Storage.get_n_channels()
    images = batch_dict["images"]
    images = [convert_image_array_to_byte_string(image, channels) for image in images]

    batch_dict["images"] = images

    return batch_dict, 200
Beispiel #13
0
def run_randomsearch(aae_parameters, selected_autoencoder, n_randomized_parameter_combinations):
    """
    runs a random search using n_randomized_parameter_combinations different parameter combinations. Provided parameter
    values, e.g. batch_size=100, will be used throughout all of the parameter combinations, whereas "missing" parameters
    will be used with randomized values.
    :param aae_parameters: parameter values shared by all runs
    :param selected_autoencoder: what autoencoder to use
    :param n_randomized_parameter_combinations: how many combinations should be evaluated
    :return:
    """

    if connexion.request.is_json:

        # get the parameters for the adv autoencoder
        aae_parameters = connexion.request.get_json()

        # check if we have a dataset selected
        if not Storage.get_selected_dataset():
            return "Error: data set not found", 404

        # get the selected dataset ["MNIST", "SVHN", "cifar10", "custom"]
        selected_dataset = Storage.get_selected_dataset()
        aae_parameters["selected_dataset"] = selected_dataset

        # set the results_path based on the selected autoencoder and the selected autoencoder
        aae_parameters["results_path"] = get_result_path_for_selected_autoencoder(selected_autoencoder)
        aae_parameters["selected_autoencoder"] = selected_autoencoder

        # check which params are missing; those will then be used for randomizing the parameters
        default_params = get_default_parameters(selected_autoencoder, selected_dataset)
        params_selected_for_random_search = [key for key in default_params if key not in aae_parameters]
        print("params selected as args for random search: \n", params_selected_for_random_search)

        # we need a tuple as input for args
        args = tuple([n_randomized_parameter_combinations] + params_selected_for_random_search)

        try:
            tuning_thread = threading.Thread(target=do_randomsearch, args=args,
                                             kwargs=aae_parameters)
            tuning_thread.start()

        except KeyError:
            return 'Error: Parameter %s not found' % sys.exc_info()[1], 404

        # store the parameters and the selected autoencoder in the storage class
        Storage.set_aae_parameters(aae_parameters)
        Storage.set_selected_autoencoder(selected_autoencoder)

        return "Success: random search has started", 200

    return 'Error: parameters not in .json format', 415
Beispiel #14
0
def get_single_label(label_id, data_subset_name="train"):
    """
    returns a single label with the respective id
    :param label_id: id of the label to return
    :param data_subset_name: one of ["train", "test", "validation"]
    :return:
    """

    try:
        data = Storage.get_input_data(data_subset_name)
    except KeyError:
        return "No data found", 404

    try:
        label = data.labels[label_id]
    except IndexError:
        return "Index out of bounds", 415

    return list(label), 200
Beispiel #15
0
def get_single_image(image_id, data_subset_name="train"):
    """
    returns a single image with the respective id
    :param image_id: id of the image to return
    :param data_subset_name: one of ["train", "test", "validation"]
    :return:
    """

    try:
        data = Storage.get_input_data(data_subset_name)
    except KeyError:
        return "No data found", 404

    try:
        image = data.images[image_id]
    except IndexError:
        return "Index out of bounds", 415

    return list(image.astype("float64")), 200
def get_spectra_original_and_reconstruction():

    # get the autoencoder
    aae = Storage.get_aae()

    # check if we have an autoencoder
    if not aae:
        return "Error: no autoencoder found", 404

    # get the vars for the minibatch summary
    spectra_original_and_reconstruction = aae.get_spectra_original_and_reconstruction(
    )

    # convert the numpy arrays in the dictionary to lists (np array is not json serializable..)
    for key in spectra_original_and_reconstruction:
        if spectra_original_and_reconstruction[key] is not None:
            print(spectra_original_and_reconstruction[key])
            print(key)
            spectra_original_and_reconstruction[
                key] = spectra_original_and_reconstruction[key].tolist()

    return spectra_original_and_reconstruction, 200
Beispiel #17
0
def run_gridsearch(aae_parameters, selected_autoencoder):
    """
    runs a random search using n_randomized_parameter_combinations different parameter combinations. Provided parameter
    values, e.g. batch_size=100, will be used throughout all of the parameter combinations, whereas "missing" parameters
    will be used with randomized values.
    :param aae_parameters: parameter values shared by all runs
    :param selected_autoencoder: what autoencoder to use
    :return:
    """

    if connexion.request.is_json:

        # get the parameters for the adv autoencoder
        aae_parameters = connexion.request.get_json()

        # check if we have a dataset selected
        if not Storage.get_selected_dataset():
            return "Error: data set not found", 404

        # get the selected dataset ["MNIST", "SVHN", "cifar10", "custom"]
        selected_dataset = Storage.get_selected_dataset()
        aae_parameters["selected_dataset"] = selected_dataset

        # set the results_path based on the selected autoencoder and the selected autoencoder
        aae_parameters["results_path"] = get_result_path_for_selected_autoencoder(selected_autoencoder)
        aae_parameters["selected_autoencoder"] = selected_autoencoder

        print(aae_parameters)

        try:
            tuning_thread = threading.Thread(target=do_gridsearch, kwargs=aae_parameters)
            tuning_thread.start()

        except KeyError:
            return 'Error: Parameter %s not found' % sys.exc_info()[1], 404

        # store the parameters and the selected autoencoder in the storage class
        Storage.set_aae_parameters(aae_parameters)
        Storage.set_selected_autoencoder(selected_autoencoder)

        return "Success: grid search has started", 200

    return 'Error: parameters not in .json format', 415
Beispiel #18
0
def classify_single_image(single_image):
    """
    classifies a single image and returns the predicted class label as integer label
    :param single_image: numpy array of the image to classify
    :return: integer label of the predicted class
    """

    if not Storage.get_aae_parameters():
        return "Error: autoencoder not found", 404
    input_dim_x = Storage.get_aae_parameters()["input_dim_x"]
    input_dim_y = Storage.get_aae_parameters()["input_dim_y"]
    color_scale = Storage.get_aae_parameters()["color_scale"]
    input_dim = 0

    if color_scale == "gray_scale":
        input_dim = input_dim_x * input_dim_y
    elif color_scale == "rgb_scale":
        input_dim = input_dim_x * input_dim_y * 3

    if not Storage.get_selected_autoencoder() == "SemiSupervised":
        return "Error: This function is supposed to work for semi-supervised autoencoders only!", 412

    if len(single_image) != input_dim:
        return "Error: Invalid dimension! Dimension should be %s." % input_dim, 400

    # get the autoencoder
    aae = Storage.get_aae()

    # check if we have an autoencoder
    if not aae:
        return "Error: autoencoder not found", 404

    operation = {"classify_single_image": single_image}
    aae.add_to_requested_operations_by_swagger(operation)

    # training has already stopped ..
    if aae.get_train_status() == "stop":
        # .. so we restart the aae
        aae.train(False)

    total_waiting_time = 0

    # wait for the response from the aae (at maximum 30s)
    while aae.get_requested_operations_by_swagger_results(
    ) is None and total_waiting_time <= 30:
        # wait for 200 ms, then check again
        time.sleep(0.2)
        total_waiting_time += 0.2

    # response took too long..
    if total_waiting_time > 30:
        return "Request timed out", 408

    # aae has responded
    result = aae.get_requested_operations_by_swagger_results()

    # reset the variable holding the results
    aae.set_requested_operations_by_swagger_results(None)

    # we need to convert it, since np ints are not json serializable
    result = int(result)

    return result, 200
Beispiel #19
0
def get_biases_or_weights_for_layer(bias_or_weights, layer_name):

    # TODO: layer name as enum

    if bias_or_weights not in ["bias", "weights"]:
        return "invalid input", 400

    if not Storage.get_aae_parameters():
        return "Error: autoencoder not found", 404

    # get the autoencoder
    aae = Storage.get_aae()

    # check if we have an autoencoder
    if not aae:
        return "Error: autoencoder not found", 404

    # check if the layer_name is valid
    subnetwork = layer_name.split("_")[0]  # encoder, decoder, etc
    all_layer_names = aae.get_all_layer_names()
    try:
        all_layer_names[subnetwork]
    except KeyError:
        return "Error: layer_name is invalid!", 400
    if layer_name not in all_layer_names[subnetwork]:
        return "Error: layer_name is invalid!", 400

    # request the operation
    operation = {
        "get_biases_or_weights_for_layer": (bias_or_weights, layer_name)
    }
    aae.add_to_requested_operations_by_swagger(operation)

    # training has stopped
    if aae.get_train_status() == "stop":
        # restart aae and get the weights/biases
        aae.train(False)

    total_waiting_time = 0

    # wait for the response from the aae (at maximum 30s)
    while aae.get_requested_operations_by_swagger_results(
    ) is None and total_waiting_time <= 30:
        # wait for 200 ms, then check again
        time.sleep(0.2)
        total_waiting_time += 0.2

    # response took too long..
    if total_waiting_time > 30:
        return "Request timed out! Maybe you need to start training first", 408

    # aae has responded
    result = aae.get_requested_operations_by_swagger_results()

    # reset the variable holding the results
    aae.set_requested_operations_by_swagger_results(None)

    # we need to convert it, since np arrays are not json serializable
    result = [a.astype("float64").tolist() for a in result]

    return result, 200
def do_gridsearch(*args,
                  selected_autoencoder="Unsupervised",
                  selected_dataset="MNIST",
                  **kwargs):
    """
    Performs a grid search using all possible combinations of the parameters provided. In case there are no parameters
    provided it uses all the possible parameter combinations from the hard coded parameters.
    Example calls:
        - do_gridsearch("n_neurons_of_hidden_layer_x_autoencoder", learning_rate_autoencoder=[0.1, 0.01, 0.001],
                    MomentumOptimizer_momentum_autoencoder=[1.0, 0.9, 0.8])
        - do_gridsearch(n_neurons_of_hidden_layer_x_autoencoder=[[500, 250, 125], [1000, 750, 25]],
                        n_neurons_of_hidden_layer_x_discriminator=[[500, 250, 125], [1000, 750, 25]])
        - do_gridsearch(n_neurons_of_hidden_layer_x_discriminator=[[500, 250, 125], [1000, 750, 25]])
        - do_gridsearch("n_neurons_of_hidden_layer_x_autoencoder", "learning_rate_autoencoder")
        - do_gridsearch()
        - do_gridsearch("n_neurons_of_hidden_layer_x_autoencoder", learning_rate_autoencoder=[0.5],
                        MomentumOptimizer_momentum_autoencoder=[1.0])
    :param args: strings of the variable defined in the Parameters class to do the grid search for. In this case
    it uses the possible parameter values in the Parameters class: "variable_name"
    :param selected_dataset: ["MNIST", "SVHN", "cifar10", "custom"]
    :param selected_autoencoder: ["Unsupervised", "Supervised", "SemiSupervised"]
    :param kwargs: arbitrary number of: variable_name=[variable_value1, variable_value2, variable_value3]
    :return: the best parameter combination as a dictionary
    """

    print("Doing grid search..")

    log_result_path = "../results/Logs/GridSearch"
    date = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S").replace(
        " ", ":").replace(":", "_")
    log_file_name = log_result_path + "/{0}_{1}_log.txt".format(
        date, selected_dataset)

    print("Log will be saved at location " + log_file_name)

    # iterate over the parameter combinations
    gridsearch_parameter_combinations = \
        aae_params.get_gridsearch_parameters(*args, selected_autoencoder=selected_autoencoder,
                                             selected_dataset=selected_dataset, **kwargs)

    # stores the performance for the parameter combination
    performance_for_parameter_combination = []

    print("There are", len(gridsearch_parameter_combinations), "combinations:")

    for a in gridsearch_parameter_combinations:
        print(a)
    print()

    # iterate over each parameter combination
    for gridsearch_parameter_combination in gridsearch_parameter_combinations:

        # for controlling the tuning via swagger
        if not tuning_status == "stop":

            print("Training .. ", gridsearch_parameter_combination)

            # create the AAE and train it with the current parameters
            if selected_autoencoder == "Unsupervised":
                adv_autoencoder = UnsupervisedAdversarialAutoencoder(
                    gridsearch_parameter_combination)
            elif selected_autoencoder == "Supervised":
                adv_autoencoder = SupervisedAdversarialAutoencoder(
                    gridsearch_parameter_combination)
            elif selected_autoencoder == "SemiSupervised":
                adv_autoencoder = SemiSupervisedAdversarialAutoencoder(
                    gridsearch_parameter_combination)

            # we want to include the results from our previous runs on the minibatch summary images
            adv_autoencoder.set_include_tuning_performance(True)

            # set the autoencoder for the swagger server
            Storage.set_aae(adv_autoencoder)

            # start the training
            adv_autoencoder.train(True)
            # adv_autoencoder.train(False)

            # get the performance
            performance = adv_autoencoder.get_final_performance()
            print(performance)

            # convert performance to float64 (for swagger server)
            for key, value in performance.items():
                performance[key] = np.float64(value)

            folder_name = adv_autoencoder.get_result_folder_name()

            # store the param_comb and the performance in the list
            current_performance = {
                "parameter_combination": gridsearch_parameter_combination,
                "performance": performance,
                "folder_name": folder_name
            }
            performance_for_parameter_combination.append(current_performance)

            # store the performance over time of the current autoencoder
            Storage.get_tuning_results_performance_over_time()[folder_name] = \
                adv_autoencoder.get_performance_over_time()

            # store the learning rates over time of the current autoencoder
            Storage.get_tuning_results_learning_rates_over_time(
            )[folder_name] = adv_autoencoder.get_learning_rates()

            # reset the tensorflow graph
            adv_autoencoder.reset_graph()

    # sort combinations by their performance
    sorted_list = sorted(performance_for_parameter_combination,
                         key=lambda x: x["performance"]["summed_loss_final"])

    # save the tuning results for the swagger server
    Storage.set_tuning_results(sorted_list)

    print("#" * 20)

    # create a new log file
    with open(log_file_name, 'w') as log:
        log.write("")

    for comb in sorted_list:
        print("performance:", comb["performance"])
        print("folder name:", comb["folder_name"])
        print()
        with open(log_file_name, 'a') as log:
            log.write("performance: {}\n".format(comb["performance"]))
            log.write("folder name: {}\n".format(comb["folder_name"]))

    print(sorted_list)
    print("best param combination:", sorted_list[0]["parameter_combination"])
    print("best performance:", sorted_list[0]["performance"])
    print("folder name:", sorted_list[0]["folder_name"])

    with open(log_file_name, 'a') as log:
        log.write("best param combination: {}\n".format(
            sorted_list[0]["parameter_combination"]))
        log.write("best performance: {}\n".format(
            sorted_list[0]["performance"]))
        log.write("folder name: {}\n".format(sorted_list[0]["folder_name"]))

    return sorted_list[0]["parameter_combination"]
def do_randomsearch(n_parameter_combinations=5,
                    *args,
                    selected_autoencoder="Unsupervised",
                    selected_dataset="MNIST",
                    **kwargs):
    """
    Performs a random search using n_parameter_combinations different parameter combinations. The parameter combination
    is obtained by randomly assigning values for the parameters provided (args and kwargs).
    Example calls:
        - do_randomsearch()
        - do_randomsearch(2, "batch_size", learning_rate_autoencoder=random.uniform(0.2, 0.001))
        - do_randomsearch(10, "batch_size", learning_rate_autoencoder=random.uniform(0.2, 0.001))
        - do_randomsearch(5, "batch_size", "learning_rate_autoencoder")
        - do_randomsearch(5, learning_rate_autoencoder=random.uniform(0.2, 0.001),
                          learning_rate_discriminator=random.uniform(0.2, 0.001))
    :param n_parameter_combinations: number of parameter combinations to try
    :param selected_dataset: ["MNIST", "SVHN", "cifar10", "custom"]
    :param selected_autoencoder: ["Unsupervised", "Supervised", "SemiSupervised"]
    :param args: strings of the variable defined in the Parameters class to randomize
    :param kwargs: manually assigned values for the specified variable
    :return: the best parameter combination as a dictionary
    """

    print("Doing random search..")

    log_result_path = "../results/Logs/RandomSearch"
    date = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S").replace(
        " ", ":").replace(":", "_")
    log_file_name = log_result_path + "/{0}_{1}_log.txt".format(
        date, selected_dataset)

    print("Log will be saved at location " + log_file_name)

    # get some random parameter combinations
    random_param_combinations = \
        [aae_params.get_randomized_parameters(*args, selected_autoencoder=selected_autoencoder,
                                              selected_dataset=selected_dataset, **kwargs)
         for i in range(n_parameter_combinations)]

    # TODO: think about this, whether it should be included all the time
    # add the default parameter combination to the list based on the selected dataset
    # random_param_combinations.append(aae_parameter_class.get_default_parameters(selected_autoencoder, selected_dataset))

    # stores the performance for the parameter combination
    performance_for_parameter_combination = []

    print("There are", len(random_param_combinations), "combinations:")
    for a in random_param_combinations:
        print(a)
    print()

    # iterate over each parameter combination
    for random_param_combination in random_param_combinations:

        # for controlling the tuning via swagger
        if not tuning_status == "stop":

            print(random_param_combination)

            # create the AAE and train it with the current parameters
            if selected_autoencoder == "Unsupervised":
                adv_autoencoder = UnsupervisedAdversarialAutoencoder(
                    random_param_combination)
            elif selected_autoencoder == "Supervised":
                adv_autoencoder = SupervisedAdversarialAutoencoder(
                    random_param_combination)
            elif selected_autoencoder == "SemiSupervised":
                adv_autoencoder = SemiSupervisedAdversarialAutoencoder(
                    random_param_combination)

            # we want to include the results from our previous runs on the minibatch summary images
            adv_autoencoder.set_include_tuning_performance(True)
            try:

                # set the autoencoder for the swagger server
                Storage.set_aae(adv_autoencoder)

                # start the training
                adv_autoencoder.train(True)

                # get the performance
                performance = adv_autoencoder.get_final_performance()
            except:
                print("whoops")
                performance = {
                    "autoencoder_loss_final": float('inf'),
                    "discriminator_loss_final": float('inf'),
                    "generator_loss_final": float('inf'),
                    "summed_loss_final": float('inf')
                }

            print(performance)

            # convert performance to float64 (for swagger server)
            for key, value in performance.items():
                performance[key] = np.float64(value)

            folder_name = adv_autoencoder.get_result_folder_name()

            # store the parameter combination and the performance in the list
            current_performance = {
                "parameter_combination": random_param_combination,
                "performance": performance,
                "folder_name": folder_name
            }
            performance_for_parameter_combination.append(current_performance)

            # store the performance over time of the current autoencoder
            Storage.get_tuning_results_performance_over_time()[folder_name] \
                = adv_autoencoder.get_performance_over_time()

            # store the learning rates over time of the current autoencoder
            Storage.get_tuning_results_learning_rates_over_time()[folder_name] \
                = adv_autoencoder.get_learning_rates()

            # reset the tensorflow graph
            adv_autoencoder.reset_graph()

    # sort combinations by their performance
    # TODO: change back to summed loss
    # sorted_list = sorted(performance_for_parameter_combination, key=lambda x: x["performance"]["summed_loss_final"])
    sorted_list = sorted(
        performance_for_parameter_combination,
        key=lambda x: x["performance"]["autoencoder_loss_final"])

    # store the tuning results for the swagger server
    Storage.set_tuning_results(performance_for_parameter_combination)

    print("#" * 20)

    print(Storage.get_tuning_results_performance_over_time())

    # create a new log file
    with open(log_file_name, 'w') as log:
        log.write("")

    for comb in sorted_list:
        print("performance:", comb["performance"])
        print("folder name:", comb["folder_name"])
        print()
        with open(log_file_name, 'a') as log:
            log.write("performance: {}\n".format(comb["performance"]))
            log.write("folder name: {}\n".format(comb["folder_name"]))

    print(sorted_list)
    print("best param combination:", sorted_list[0]["parameter_combination"])
    print("best performance:", sorted_list[0]["performance"])
    print("folder name:", sorted_list[0]["folder_name"])

    with open(log_file_name, 'a') as log:
        log.write("best param combination: {}\n".format(
            sorted_list[0]["parameter_combination"]))
        log.write("best performance: {}\n".format(
            sorted_list[0]["performance"]))
        log.write("folder name: {}\n".format(sorted_list[0]["folder_name"]))

    return sorted_list[0]["parameter_combination"]
Beispiel #22
0
def load_aae(selected_autoencoder, filepath):
    """
    loads a trained autoencoder
    :param selected_autoencoder: autoencoder to load, e.g. Unsupervised, Supervised, etc.
    :param filepath:
    :return:
    """

    # reset previous autoencoders (if they exist)
    aae = Storage.get_aae()
    if aae:
        aae.reset_graph()

    selected_dataset = Storage.get_selected_dataset()

    # check if we have a dataset selected
    if not selected_dataset:
        return "Error: data set not found", 404

    adv_autoencoder = None

    try:
        params = get_params_from_params_file(filepath)
    except FileNotFoundError:
        return "Error: No such file or directory: '" + filepath + "'", 404

    try:
        if selected_autoencoder == "Unsupervised":
            adv_autoencoder = UnsupervisedAdversarialAutoencoder(params)
        elif selected_autoencoder == "Supervised":
            adv_autoencoder = SupervisedAdversarialAutoencoder(params)
        elif selected_autoencoder == "SemiSupervised":
            adv_autoencoder = SemiSupervisedAdversarialAutoencoder(params)
        elif selected_autoencoder == "IncorporatingLabelInformation":
            adv_autoencoder = IncorporatingLabelInformationAdversarialAutoencoder(
                params)
        elif selected_autoencoder == "UnsupervisedClustering":
            adv_autoencoder = UnsupervisedClusteringAdversarialAutoencoder(
                params)
        elif selected_autoencoder == "DimensionalityReduction":
            adv_autoencoder = DimensionalityReductionAdversarialAutoencoder(
                params)
    except KeyError:
        return 'Error: Parameter %s not found' % sys.exc_info()[1], 404
    except IndexError:
        return 'Error: The parameters seems to be invalid. Make sure you selected the correct autoencoder', 400

    # building the autoencoder sets the train status to start, so we need to manually set it to stop, since the
    # autoencoder is already trained
    adv_autoencoder.set_train_status("stop")

    try:
        # get the last part: e.g. "\2018-08-02_17_48_33_MNIST\log\params.txt"
        result_folder_name = filepath.split(selected_autoencoder)[1]
        # get the first part: "\2018-08-02_17_48_33_MNIST\"
        result_folder_name = result_folder_name.split("log")[0]
        # remove the trailing separator: "\2018-08-02_17_48_33_MNIST\"
        result_folder_name = result_folder_name.split(
            selected_dataset)[0] + selected_dataset
    except IndexError:
        return 'Error: The parameters seems to be invalid. Make sure you selected the correct autoencoder', 400

    adv_autoencoder.set_result_folder_name(result_folder_name)

    # store the parameters and the adv. autoencoder in the storage class
    Storage.set_aae(adv_autoencoder)
    Storage.set_aae_parameters(params)
    Storage.set_selected_autoencoder(selected_autoencoder)

    return "AAE successfully loaded", 200
Beispiel #23
0
def build_aae(selected_autoencoder, aae_parameters):
    """
    builds the adversarial autoencoder with the parameters provided
    :param selected_autoencoder: one of ["Unsupervised", "Supervised", "SemiSupervised"]
    :param aae_parameters: parameters for the adv. autoencoder
    :return:
    """

    if connexion.request.is_json:

        # get the parameters for the adv autoencoder
        aae_parameters = connexion.request.get_json()

        # check if we have a dataset selected
        if not Storage.get_selected_dataset():
            return "Error: data set not found", 404

        # get the selected dataset ["MNIST", "SVHN", "cifar10", "custom"]
        selected_dataset = Storage.get_selected_dataset()
        aae_parameters["selected_dataset"] = selected_dataset

        # get the results_path based on the selected autoencoder
        aae_parameters[
            "results_path"] = get_result_path_for_selected_autoencoder(
                selected_autoencoder)

        # set the input dim and the color scale according to the selected dataset
        if selected_dataset == "MNIST":
            aae_parameters["input_dim_x"] = 28
            aae_parameters["input_dim_y"] = 28
            aae_parameters["color_scale"] = "gray_scale"
        elif selected_dataset == "SVHN":
            aae_parameters["input_dim_x"] = 32
            aae_parameters["input_dim_y"] = 32
            aae_parameters["color_scale"] = "rgb_scale"
        elif selected_dataset == "cifar10":
            aae_parameters["input_dim_x"] = 32
            aae_parameters["input_dim_y"] = 32
            aae_parameters["color_scale"] = "rgb_scale"
        elif selected_dataset == "custom":
            return "Error: not implemented", 404

        if Storage.get_aae() is not None:
            # reset the tensorflow graph
            Storage.get_aae().reset_graph()

        # create the AAE with the current parameters
        adv_autoencoder = None
        try:
            if selected_autoencoder == "Unsupervised":
                adv_autoencoder = UnsupervisedAdversarialAutoencoder(
                    aae_parameters)
            elif selected_autoencoder == "Supervised":
                adv_autoencoder = SupervisedAdversarialAutoencoder(
                    aae_parameters)
            elif selected_autoencoder == "SemiSupervised":
                adv_autoencoder = SemiSupervisedAdversarialAutoencoder(
                    aae_parameters)
            elif selected_autoencoder == "IncorporatingLabelInformation":
                adv_autoencoder = IncorporatingLabelInformationAdversarialAutoencoder(
                    aae_parameters)
            elif selected_autoencoder == "UnsupervisedClustering":
                adv_autoencoder = UnsupervisedClusteringAdversarialAutoencoder(
                    aae_parameters)
            elif selected_autoencoder == "DimensionalityReduction":
                adv_autoencoder = DimensionalityReductionAdversarialAutoencoder(
                    aae_parameters)
        except KeyError:
            return 'Error: Parameter %s not found' % sys.exc_info()[1], 404

        # store the parameters and the adv. autoencoder in the storage class
        Storage.set_aae(adv_autoencoder)
        Storage.set_aae_parameters(aae_parameters)
        Storage.set_selected_autoencoder(selected_autoencoder)

        return "Success: AAE successfully built", 200
    return 'Error: parameters not in .json format', 415
def get_performance_over_time():
    """
    returns the performance over time (losses (and accuracy for the semi-supervised aae)) for the current autoencoder
    :return:
    """

    # get the adversarial autoencoder
    aae = Storage.get_aae()

    # check if we have an autoencoder
    if not aae:
        return "Error: no autoencoder found", 404

    # get the performance over time
    performance_over_time = aae.get_performance_over_time()

    # since swagger doesn't allow different return values for the same function, we return all of them
    discriminator_losses = []  # only (un)-supervised
    discriminator_gaussian_losses = []  # only semi-supervised
    discriminator_categorical_losses = []  # only semi-supervised
    supervised_encoder_loss = []  # only semi-supervised
    accuracy = []  # only semi-supervised
    accuracy_epochs = []  # only semi-supervised

    autoencoder_losses = performance_over_time["autoencoder_losses"]
    autoencoder_losses = [float(number) for number in autoencoder_losses]

    list_of_epochs = performance_over_time["list_of_epochs"]
    list_of_epochs = [float(number) for number in list_of_epochs]

    mz_values_losses = performance_over_time["mz_values_losses"]
    mz_values_losses = [float(number) for number in mz_values_losses]

    intensities_losses = performance_over_time["intensities_losses"]
    intensities_losses = [float(number) for number in intensities_losses]

    # distinguish between semi-supervised or (un-)supervised autoencoder
    if Storage.get_selected_autoencoder() == "SemiSupervised":

        discriminator_gaussian_losses = performance_over_time[
            "discriminator_gaussian_losses"]
        discriminator_gaussian_losses = [
            float(number) for number in discriminator_gaussian_losses
        ]

        discriminator_categorical_losses = performance_over_time[
            "discriminator_categorical_losses"]
        discriminator_categorical_losses = [
            float(number) for number in discriminator_categorical_losses
        ]

        generator_losses = performance_over_time["generator_losses"]
        generator_losses = [float(number) for number in generator_losses]

        supervised_encoder_loss = performance_over_time[
            "supervised_encoder_loss"]
        supervised_encoder_loss = [
            float(number) for number in supervised_encoder_loss
        ]

        accuracy = performance_over_time["accuracy"]
        accuracy = [float(number) for number in accuracy]

        accuracy_epochs = performance_over_time["accuracy_epochs"]
        accuracy_epochs = [float(number) for number in accuracy_epochs]

    # we have an unsupervised or a supervised autoencoder
    else:

        discriminator_losses = performance_over_time["discriminator_losses"]
        discriminator_losses = [
            float(number) for number in discriminator_losses
        ]

        generator_losses = performance_over_time["generator_losses"]
        generator_losses = [float(number) for number in generator_losses]

    # since swagger doesn't allow multiple return values, we have to pack them in a dictionary and return it
    performance_dict = {
        "autoencoder_losses:": autoencoder_losses,
        "discriminator_losses:": discriminator_losses,
        "generator_losses:": generator_losses,
        "list_of_epochs:": list_of_epochs,
        "discriminator_gaussian_losses": discriminator_gaussian_losses,
        "discriminator_categorical_losses": discriminator_categorical_losses,
        "supervised_encoder_loss": supervised_encoder_loss,
        "accuracy": accuracy,
        "accuracy_epochs": accuracy_epochs,
        "mz_values_losses": mz_values_losses,
        "intensities_losses": intensities_losses
    }

    return performance_dict, 200
def get_epoch_summary_vars():

    # get the autoencoder
    aae = Storage.get_aae()

    # check if we have an autoencoder
    if not aae:
        return "Error: no autoencoder found", 404

    # get the vars for the minibatch summary
    minibatch_summary_vars = aae.get_epoch_summary_vars()

    # since swagger doesn't allow different return values for the same function, we return all of them
    discriminator_neg = []  # only (un)-supervised
    discriminator_pos = []  # only (un)-supervised
    batch_x = []  # only (un)-supervised
    decoder_output = []  # only (un)-supervised
    batch_labels = []  # only (un)-supervised

    batch_X_unlabeled = []  # only semi-supervised
    reconstructed_image = []  # only semi-supervised
    real_cat_dist = []  # only semi-supervised
    encoder_cat_dist = []  # only semi-supervised
    batch_labels = []  # only semi-supervised
    discriminator_gaussian_neg = []  # only semi-supervised
    discriminator_gaussian_pos = []  # only semi-supervised
    discriminator_cat_neg = []  # only semi-supervised
    discriminator_cat_pos = []  # only semi-supervised

    # TODO: fix this

    # distinguish between semi-supervised or (un-)supervised autoencoder
    if Storage.get_selected_autoencoder() == "SemiSupervised":

        real_dist = np.array(minibatch_summary_vars["real_dist"]
                             )  # (batch_size, z_dim) array of floats
        real_dist = real_dist.astype("float64").tolist()

        latent_representation = np.array(
            minibatch_summary_vars["latent_representation"]
        )  # (batch_size, z_dim) array of floats
        latent_representation = latent_representation.astype(
            "float64").tolist()

        batch_X_unlabeled = np.array(
            minibatch_summary_vars["batch_X_unlabeled"]
        )  # (batch_size, z_dim) array of floats
        batch_X_unlabeled = batch_X_unlabeled.astype("float64").tolist()

        reconstructed_images = np.array(
            minibatch_summary_vars["reconstructed_images"]
        )  # (batch_size, z_dim) array of floats
        reconstructed_images = reconstructed_images.astype("float64").tolist()

        real_cat_dist = np.array(minibatch_summary_vars["real_cat_dist"]
                                 )  # (batch_size, z_dim) array of floats
        real_cat_dist = real_cat_dist.astype("float64").tolist()

        encoder_cat_dist = np.array(minibatch_summary_vars["encoder_cat_dist"]
                                    )  # (batch_size, z_dim) array of floats
        encoder_cat_dist = encoder_cat_dist.astype("float64").tolist()

        batch_labels = np.array(minibatch_summary_vars["batch_labels"]
                                )  # (batch_size, z_dim) array of floats
        batch_labels = batch_labels.astype("float64").tolist()

        discriminator_gaussian_neg = np.array(
            minibatch_summary_vars["discriminator_gaussian_neg"]
        )  # (batch_size) array of floats
        discriminator_gaussian_neg = discriminator_gaussian_neg.astype(
            "float64").tolist()

        discriminator_gaussian_pos = np.array(
            minibatch_summary_vars["discriminator_gaussian_pos"]
        )  # (batch_size) array of floats
        discriminator_gaussian_pos = discriminator_gaussian_pos.astype(
            "float64").tolist()

        discriminator_cat_neg = np.array(
            minibatch_summary_vars["discriminator_cat_neg"]
        )  # (batch_size) array of floats
        discriminator_cat_neg = discriminator_cat_neg.astype(
            "float64").tolist()

        discriminator_cat_pos = np.array(
            minibatch_summary_vars["discriminator_cat_pos"]
        )  # (batch_size, z_dim) array of floats
        discriminator_cat_pos = discriminator_cat_pos.astype(
            "float64").tolist()

        epoch = minibatch_summary_vars["epoch"]  # single integer

    # we have an unsupervised or a supervised autoencoder
    else:
        real_dist = np.array(minibatch_summary_vars["real_dist"]
                             )  # (batch_size, z_dim) array of floats
        real_dist = real_dist.astype("float64").tolist()

        latent_representation = np.array(
            minibatch_summary_vars["latent_representation"]
        )  # (batch_size, z_dim) array of floats
        latent_representation = latent_representation.astype(
            "float64").tolist()

        discriminator_neg = np.array(
            minibatch_summary_vars["discriminator_neg"]
        )  # (batch_size) array of floats
        discriminator_neg = discriminator_neg.astype("float64").tolist()

        discriminator_pos = np.array(
            minibatch_summary_vars["discriminator_pos"]
        )  # (batch_size, z_dim) array of floats
        discriminator_pos = discriminator_pos.astype("float64").tolist()

        batch_x = np.array(
            minibatch_summary_vars["batch_x"]
        )  # (batch_size, input_dim_x*input_dim_x*color_scale) array of floats
        batch_x = batch_x.astype("float64").tolist()

        reconstructed_images = np.array(
            minibatch_summary_vars["reconstructed_images"]
        )  # (batch_size, input_dim_x*input_dim_x*color_scale)
        reconstructed_images = reconstructed_images.astype(
            "float64").tolist()  # array of floats

        batch_labels = np.array(minibatch_summary_vars["batch_labels"]
                                )  # (batch_size, n_classes) array of ints
        batch_labels = batch_labels.astype("float64").tolist()

        epoch = minibatch_summary_vars["epoch"]  # single integer

    minibatch_summary_vars_dict = {
        "real_dist": real_dist,
        "latent_representation": latent_representation,
        "discriminator_neg": discriminator_neg,
        "discriminator_pos": discriminator_pos,
        "batch_x": batch_x,
        "reconstructed_images": reconstructed_images,
        "epoch": epoch,
        "batch_labels": batch_labels,
        "batch_X_unlabeled": batch_X_unlabeled,
        "real_cat_dist": real_cat_dist,
        "encoder_cat_dist": encoder_cat_dist,
        "discriminator_gaussian_neg": discriminator_gaussian_neg,
        "discriminator_gaussian_pos": discriminator_gaussian_pos,
        "discriminator_cat_neg": discriminator_cat_neg,
        "discriminator_cat_pos": discriminator_cat_pos
    }

    return minibatch_summary_vars_dict, 200
def get_learning_rates():
    """
    returns the learning rates over time for the current autoencoder
    :return:
    """

    # get the autoencoder
    aae = Storage.get_aae()

    # check if we have an autoencoder
    if not aae:
        return "Error: no autoencoder found", 404

    # get the learning rates
    learning_rates = aae.get_learning_rates()

    # since swagger doesn't allow different return values for the same function, we return all of them
    discriminator_lr = []  # only (un)-supervised
    discriminator_g_lr = []  # only semi-supervised
    discriminator_c_lr = []  # only semi-supervised
    supervised_encoder_lr = []  # only semi-supervised

    # distinguish between semi-supervised or (un-)supervised autoencoder
    if Storage.get_selected_autoencoder() == "SemiSupervised":
        autoencoder_lr = learning_rates["autoencoder_lr"]
        autoencoder_lr = [float(number) for number in autoencoder_lr]

        discriminator_g_lr = learning_rates["discriminator_g_lr"]
        discriminator_g_lr = [float(number) for number in discriminator_g_lr]

        discriminator_c_lr = learning_rates["discriminator_c_lr"]
        discriminator_c_lr = [float(number) for number in discriminator_c_lr]

        generator_lr = learning_rates["generator_lr"]
        generator_lr = [float(number) for number in generator_lr]

        supervised_encoder_lr = learning_rates["supervised_encoder_lr"]
        supervised_encoder_lr = [
            float(number) for number in supervised_encoder_lr
        ]

        list_of_epochs = learning_rates["list_of_epochs"]
        list_of_epochs = [float(number) for number in list_of_epochs]

    # we have an unsupervised or a supervised autoencoder
    else:

        autoencoder_lr = learning_rates["autoencoder_lr"]
        autoencoder_lr = [float(number) for number in autoencoder_lr]

        discriminator_lr = learning_rates["discriminator_lr"]
        discriminator_lr = [float(number) for number in discriminator_lr]

        generator_lr = learning_rates["generator_lr"]
        generator_lr = [float(number) for number in generator_lr]

        list_of_epochs = learning_rates["list_of_epochs"]
        list_of_epochs = [float(number) for number in list_of_epochs]

    # since swagger doesn't allow multiple return values, we have to pack them in a dictionary and return it
    lr_dict = {
        "autoencoder_lr:": autoencoder_lr,
        "discriminator_lr:": discriminator_lr,
        "generator_lr:": generator_lr,
        "list_of_epochs:": list_of_epochs,
        "discriminator_g_lr": discriminator_g_lr,
        "discriminator_c_lr": discriminator_c_lr,
        "supervised_encoder_lr": supervised_encoder_lr
    }

    return lr_dict, 200