def setup_generator_testfile(class_type, is_autoencoder, dataset_info_dict, yield_mc_info=False, zero_center=True):
    train_file=dataset_info_dict["train_file"]
    test_file=dataset_info_dict["test_file"]
    n_bins=dataset_info_dict["n_bins"]
    #broken_simulations_mode=dataset_info_dict["broken_simulations_mode"] #def 0
    filesize_factor=dataset_info_dict["filesize_factor"]
    #filesize_factor_test=dataset_info_dict["filesize_factor_test"]
    batchsize=dataset_info_dict["batchsize"] #def 32
    
    train_tuple=[[train_file, int(h5_get_number_of_rows(train_file)*filesize_factor)]]
    #test_tuple=[[test_file, int(h5_get_number_of_rows(test_file)*filesize_factor_test)]]
    
    if zero_center==True:
        xs_mean = load_zero_center_data(train_files=train_tuple, batchsize=batchsize, n_bins=n_bins, n_gpu=1)
    else:
        xs_mean = None
    generator = generate_batches_from_hdf5_file(test_file, batchsize, n_bins, class_type, 
                                    is_autoencoder, dataset_info_dict, broken_simulations_mode=0,
                                    f_size=None, zero_center_image=xs_mean, yield_mc_info=yield_mc_info,
                                    swap_col=None, is_in_test_mode = False)
    return generator
#for plot mode, number of 32 batches of channel_id arrays should be read through for the plot
how_many_dom_batches = 1000
bins=100

model=load_model(model_name)
dataset_info_dict=get_dataset_info("xyzc_flat")

if mode == "simple":
    #Print some 31-arrays and the prediction from the autoencoder
    how_many_doms=10 #to read from file
    minimum_counts = 5
    
    test_file = dataset_info_dict["test_file"]
    
    if zero_center==True:
        xs_mean=load_zero_center_data(((dataset_info_dict["train_file"],),), batchsize=32, n_bins=dataset_info_dict["n_bins"], n_gpu=1)
    else:
        xs_mean = 0
    
    f = h5py.File(test_file, "r")
    
    #look for some doms that are not mostly 0
    batch=[]
    i=0
    while len(batch)<=how_many_doms:
        dom=f["x"][i:i+1]
        if dom.sum()>=minimum_counts:
            batch.extend(dom)
        i+=1
        
    batch=np.array(batch) 
filesize_factor_test = dataset_info_dict["filesize_factor_test"]
batchsize = dataset_info_dict["batchsize"]  #def 32

train_tuple = [[
    train_file,
    int(h5_get_number_of_rows(train_file) * filesize_factor)
]]
test_files = [[
    test_file,
    int(h5_get_number_of_rows(test_file) * filesize_factor_test)
]]

n_gpu = (1, 'avolkov')
if zero_center == True:
    xs_mean = load_zero_center_data(train_files=train_tuple,
                                    batchsize=batchsize,
                                    n_bins=n_bins,
                                    n_gpu=n_gpu[0])
else:
    xs_mean = None

swap_4d_channels = None
evaluation = evaluate_model(model, test_files, batchsize, n_bins, class_type,
                            xs_mean, swap_4d_channels, is_autoencoder,
                            broken_simulations_mode, dataset_info_dict)

metrics = model.metrics_names
print("\n\n")
if "acc" in metrics:
    #loss and accuracy
    print('\n{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6:.4g}'.format(
        "--epoch--",
Esempio n. 4
0
def execute_training(modeltag, runs, autoencoder_stage, epoch, encoder_epoch, class_type, zero_center, verbose, dataset,
                     learning_rate, learning_rate_decay, epsilon, lambda_comp, use_opti, encoder_version, options,
                     ae_loss_name, supervised_loss, init_model_path):
    # Get info like path of trainfile etc.
    dataset_info_dict = get_dataset_info(dataset)
    # home_path=dataset_info_dict["home_path"]
    train_file = dataset_info_dict["train_file"]
    test_file = dataset_info_dict["test_file"]
    n_bins = dataset_info_dict["n_bins"]
    broken_simulations_mode = dataset_info_dict["broken_simulations_mode"]  # def 0
    filesize_factor = dataset_info_dict["filesize_factor"]
    filesize_factor_test = dataset_info_dict["filesize_factor_test"]
    batchsize = dataset_info_dict["batchsize"]  # def 32

    # All models are now saved in their own folder   models/"modeltag"/
    model_folder = "/home/woody/capn/mppi013h/Km3-Autoencoder/models/" + modeltag + "/"

    # Only for the encoder-types. Autoencoders ignore this:
    number_of_output_neurons = class_type[0]

    # If autoencoder stage 4 is selected (unfreeze C layers), set up everything like AE stage 1
    if autoencoder_stage == 4:
        autoencoder_stage = 1
        unfreeze_layer_training = True
        print("Autoencoder stage 4: Unfreeze Training. Setting up network like in AE stage 1...")
    else:
        unfreeze_layer_training = False

    custom_objects = None
    # define loss function to use for new AEs
    print("Using AE loss:", ae_loss_name)
    if ae_loss_name == "mse":
        ae_loss = "mse"
    elif ae_loss_name == "mae":
        ae_loss = "mae"
    else:
        # custom loss functions have to be handed to load_model or it wont work
        custom_objects = get_custom_objects()
        ae_loss = custom_objects[ae_loss_name]

    # define loss function and metrics to use for new supervised networks
    if supervised_loss == "auto":
        # automatically choose the supervised loss based on the number of output neurons;
        # otherwise use the user defined one (mse or mae)
        if number_of_output_neurons >= 2:
            # e.g. up-down, PID, ...
            supervised_loss = 'categorical_crossentropy'
            supervised_metrics = ['accuracy']
        else:
            # for energy regression
            supervised_loss = 'mae'
            supervised_metrics = None
    else:
        if supervised_loss == 'categorical_crossentropy':
            supervised_metrics = ['accuracy']
        elif supervised_loss == 'mae':
            supervised_metrics = None
        elif supervised_loss == 'mse':
            supervised_metrics = None
        else:
            raise NameError("supervised_loss: " + supervised_loss + " unknown.")
    print("Using supervised loss:", supervised_loss)

    if not os.path.exists(model_folder):
        os.makedirs(model_folder)
        print("Created model folder", model_folder)

    # Optimizer used in all the networks:
    lr = learning_rate  # 0.01 default for SGD, 0.001 for Adam
    # lr_decay can either be a float, e.g. 0.05 for 5% decay of lr per epoch,
    # or it can be a string like s1 for lr schedule 1.
    # The original learning rate is still passed to the lr schedule function.
    try:
        lr_decay = float(learning_rate_decay)  # % decay for each epoch, e.g. if 0.05 -> lr_new = lr*(1-0.05)=0.95*lr
        lr_schedule_number = None  # no schedule
    except ValueError:
        # then it is a schedule like s1 or some other string
        lr_schedule_number = learning_rate_decay
        lr_decay = 0
        lr = 0.001
        print("Using learning rate schedule", lr_schedule_number)

    # automatically look for latest epoch if -1 was given:
    if autoencoder_stage == 0 and epoch == -1:
        epoch = 0
        while True:
            if os.path.isfile(
                    model_folder + "trained_" + modeltag + "_autoencoder_epoch" + str(epoch + 1) + '.h5') == True:
                epoch += 1
            else:
                break
    elif autoencoder_stage == 1 and encoder_epoch == -1:
        encoder_epoch = 0
        while True:
            if os.path.isfile(
                    model_folder + "trained_" + modeltag + "_autoencoder_epoch" + str(epoch) + "_supervised_" +
                    class_type[1] + encoder_version + '_epoch' + str(encoder_epoch + 1) + '.h5') == True:
                encoder_epoch += 1
            else:
                break
    elif autoencoder_stage == 2 and encoder_epoch == -1:
        encoder_epoch = 0
        while True:
            if os.path.isfile(model_folder + "trained_" + modeltag + "_supervised_" + class_type[
                1] + encoder_version + '_epoch' + str(encoder_epoch + 1) + '.h5') == True:
                encoder_epoch += 1
            else:
                break
    elif autoencoder_stage == 3 and encoder_epoch == -1:
        encoder_epoch = 0
        while True:
            if os.path.isfile(model_folder + "trained_" + modeltag + "_autoencoder_supervised_parallel_" + class_type[
                1] + encoder_version + '_epoch' + str(encoder_epoch + 1) + '.h5') == True:
                encoder_epoch += 1
            else:
                break

    # if lr is negative, take its absolute as the starting lr and apply the decays that happend during the
    # previous epochs; The lr gets decayed once when train_and_test_model is called (so epoch-1 here)
    if lr < 0:
        if autoencoder_stage == 0 and epoch > 0:
            lr = abs(lr * (1 - float(lr_decay)) ** (epoch - 1))

        elif (autoencoder_stage == 1 or autoencoder_stage == 2 or autoencoder_stage == 3) and encoder_epoch > 0:
            lr = abs(lr * (1 - float(lr_decay)) ** (encoder_epoch - 1))
        else:
            lr = abs(lr)

    # Optimizer to be used. If an epsilon is specified, adam is used with epsilon=10**(given epsilon).
    # only used when compiling model, so use lambda_comp if optimizer should be changed
    # Default:
    # adam = optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
    if use_opti == "adam" or use_opti == "ADAM":
        adam = optimizers.Adam(lr=lr, beta_1=0.9, beta_2=0.999, epsilon=10 ** epsilon, decay=0.0)
    elif use_opti == "SGD" or use_opti == "sgd":
        adam = optimizers.SGD(lr=lr, momentum=0.0, decay=0.0, nesterov=False)
    else:
        print("Optimizer ", use_opti, " unknown!")
        raise NameError(use_opti)

    # fit_model and evaluate_model take lists of tuples, so that you can give many single files (doesnt work?)
    train_tuple = [[train_file, int(h5_get_number_of_rows(train_file) * filesize_factor)]]
    test_tuple = [[test_file, int(h5_get_number_of_rows(test_file) * filesize_factor_test)]]

    # Check wheter a file with this name exists or not
    def check_for_file(proposed_filename):
        if (os.path.isfile(proposed_filename)):
            sys.exit("Warning:", proposed_filename + "exists already! Exiting...")

    # Zero-Center with precalculated mean image
    n_gpu = (1, 'avolkov')
    if zero_center == True:
        xs_mean = load_zero_center_data(train_files=train_tuple, batchsize=batchsize, n_bins=n_bins, n_gpu=n_gpu[0])
    else:
        xs_mean = None
        print("Not using zero centering. Are you sure?")

    autoencoder_model = None
    # Setup network:
    # If the corresponding epoch is 0, a new model is created. Otherwise, the existing model
    # of the given epoch is loaded unchanged.
    # Autoencoder self-supervised training. Epoch is the autoencoder epoch, enc_epoch not relevant for this stage
    if autoencoder_stage == 0:
        is_autoencoder = True
        modelname = modeltag + "_autoencoder"
        print("\n\nAutoencoder stage 0")
        if epoch == 0:
            # Create a new autoencoder network
            print("Creating new autoencoder network:", modeltag)
            model = setup_model(model_tag=modeltag, autoencoder_stage=0, modelpath_and_name=None,
                                additional_options=options)
            model.compile(optimizer=adam, loss=ae_loss)
            # Create header for new test log file
            with open(model_folder + "trained_" + modelname + '_test.txt', 'w') as test_log_file:
                metrics = model.metrics_names  # ["loss"]
                test_log_file.write('{0}\tTest {1}\tTrain {2}\tTime\tLR'.format("Epoch", metrics[0], metrics[0]))

        else:
            # Load an existing trained autoencoder network and train that
            autoencoder_model_to_load = model_folder + "trained_" + modelname + '_epoch' + str(epoch) + '.h5'
            print("Loading existing autoencoder to continue training:", autoencoder_model_to_load)
            if lambda_comp == False:
                model = load_model(autoencoder_model_to_load, custom_objects=custom_objects)
            elif lambda_comp == True:
                # in case of lambda layers: Load model structure and insert weights, because load model is bugged for lambda layers
                print("Lambda mode enabled")
                model = setup_model(model_tag=modeltag, autoencoder_stage=0, modelpath_and_name=None,
                                    additional_options=options)
                model.load_weights(autoencoder_model_to_load)
                model.compile(optimizer=adam, loss=ae_loss)

                opti_weights = load_model(autoencoder_model_to_load,
                                          custom_objects=custom_objects).optimizer.get_weights()
                model.optimizer.set_weights(opti_weights)

    # Encoder supervised training:
    # Load the encoder part of an autoencoder, import weights from trained model, freeze it and add dense layers
    elif autoencoder_stage == 1:
        print("\n\nAutoencoder stage 1")
        is_autoencoder = False
        # name of the autoencoder model file that the encoder part is taken from:
        autoencoder_model = model_folder + "trained_" + modeltag + "_autoencoder_epoch" + str(epoch) + '.h5'
        # name of the supervised model:
        modelname = modeltag + "_autoencoder_epoch" + str(epoch) + "_supervised_" + class_type[1] + encoder_version

        if encoder_epoch == 0:
            # Create a new encoder network:
            print("Creating new encoder network", modeltag, "from autoencoder", autoencoder_model)
            model = setup_model(model_tag=modeltag, autoencoder_stage=1, modelpath_and_name=autoencoder_model,
                                additional_options=options, number_of_output_neurons=number_of_output_neurons)
            model.compile(loss=supervised_loss, optimizer=adam, metrics=supervised_metrics)
            # Create header for new test log file
            with open(model_folder + "trained_" + modelname + '_test.txt', 'w') as test_log_file:
                metrics = model.metrics_names  # ['loss', 'acc']
                if len(metrics) == 2:
                    line = '{0}\tTest {1}\tTrain {2}\tTest {3}\tTrain {4}\tTime\tLR'.format("Epoch", metrics[0],
                                                                                            metrics[0], metrics[1],
                                                                                            metrics[1])
                elif len(metrics) == 1:
                    line = '{0}\tTest {1}\tTrain {2}\tTime\tLR'.format("Epoch", metrics[0], metrics[0])
                else:
                    sys.exit("Warning: Only 1 or 2 metrics are supported for logfile headers. Given was", metrics,
                             "Exiting...")
                test_log_file.write(line)

        else:
            # Load an existing trained encoder network and train that
            encoder_network_to_load = model_folder + "trained_" + modelname + '_epoch' + str(encoder_epoch) + '.h5'
            print("Loading existing encoder network", encoder_network_to_load)
            model = load_model(encoder_network_to_load, custom_objects=custom_objects)


    # Unfrozen Encoder supervised training with completely unfrozen model:
    elif autoencoder_stage == 2:
        print("\n\nAutoencoder stage 2")
        is_autoencoder = False
        # name of the supervised model:
        modelname = modeltag + "_supervised_" + class_type[1] + encoder_version

        if encoder_epoch == 0:
            # Create a new encoder network:
            print("Creating new unfrozen encoder network:", modelname)
            model = setup_model(model_tag=modeltag, autoencoder_stage=2, additional_options=options,
                                number_of_output_neurons=number_of_output_neurons)
            model.compile(loss=supervised_loss, optimizer=adam, metrics=supervised_metrics)
            # Create header for new test log file
            with open(model_folder + "trained_" + modelname + '_test.txt', 'w') as test_log_file:
                metrics = model.metrics_names  # ['loss', 'acc']
                if len(metrics) == 2:
                    line = '{0}\tTest {1}\tTrain {2}\tTest {3}\tTrain {4}\tTime\tLR'.format("Epoch", metrics[0],
                                                                                            metrics[0], metrics[1],
                                                                                            metrics[1])
                elif len(metrics) == 1:
                    line = '{0}\tTest {1}\tTrain {2}\tTime\tLR'.format("Epoch", metrics[0], metrics[0])
                else:
                    sys.exit("Warning: Only 1 or 2 metrics are supported for logfile headers. Given was", metrics,
                             "Exiting...")
                test_log_file.write(line)
        else:
            # Load an existing trained encoder network and train that
            load_this = model_folder + "trained_" + modelname + '_epoch' + str(encoder_epoch) + '.h5'
            print("Loading existing unfrozen encoder network", load_this)
            model = load_model(load_this, custom_objects=custom_objects)

    # Training of the supervised network on several different autoencoder epochs
    # epoch is calculated automatically and not used from user input
    # encoder epoch as usual
    # This does not use the same call for executing the training as stage 0,1 and 2

    # XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
    elif autoencoder_stage == 3:
        # how many epochs should be trained on each autoencoder epoch, starting from epoch 1
        # if first epoch is 0, then the trained supervised network will be used
        if modeltag[:7] == "channel":
            # channel id autoencoders need less epochs per AE epoch, their modeltag starts with channel
            how_many_epochs_each_to_train = [1, ] * 100
            # Dataset is switched when moving to encoder training, so stateful has to be active
            make_stateful = True
        else:
            how_many_epochs_each_to_train = [10, ] * 1 + [2, ] * 5 + [1, ] * 194
            make_stateful = False

        # model to initialize from if first epoch is 0
        # this one is only used for vgg_3_eps modeltag
        init_model_eps = model_folder + "trained_vgg_3_eps_autoencoder_epoch1_supervised_up_down_accdeg2_epoch26.h5"

        print(
        "\n\nAutoencoder stage 3:\nParallel training with epoch schedule:", how_many_epochs_each_to_train[:20], ",...")

        def switch_encoder_weights(encoder_model, autoencoder_model, last_encoder_layer_index_override=None):
            # Change the weights of the frozen layers (up to the flatten layer)
            # of the frozen encoder to that of another autoencoder model
            changed_layers = 0
            # look for last encoder layer = last flatten layer in the network / layer with name encoded if present
            last_encoder_layer_index = 1
            if last_encoder_layer_index_override == None:
                last_encoder_layer_index = get_index_of_bottleneck(encoder_model)
            else:
                last_encoder_layer_index = last_encoder_layer_index_override

            for i, layer in enumerate(encoder_model.layers):
                if i <= last_encoder_layer_index:
                    layer.set_weights(autoencoder_model.layers[i].get_weights())
                    changed_layers += 1
                else:
                    break
            print("Weights of layers changed:", changed_layers, "(up to layer",
                  encoder_model.layers[last_encoder_layer_index].name, ")")

        # in case of the 200_dense model, manually set encoded layer (does not work otherwise...(it actually does...))
        if modeltag == "vgg_5_200_dense":
            last_encoder_layer_index_override = 35
            print("Last encoder layer set to 35")
        else:
            last_encoder_layer_index_override = None

        # Encoder epochs after which to switch the autoencoder model
        switch_autoencoder_model = np.cumsum(how_many_epochs_each_to_train)
        # calculate the current autoencoder epoch automatically based on the encoder epoch
        # e.g. switch_at = [10,12,14], encoder_epoch = 11
        # --> AE epoch=2
        for ae_epoch, switch in enumerate(switch_autoencoder_model):
            if encoder_epoch - switch <= 0:
                autoencoder_epoch = ae_epoch + 1
                break

        is_autoencoder = False
        # name of the autoencoder model file that the encoder part is taken from:
        autoencoder_model = model_folder + "trained_" + modeltag + "_autoencoder_epoch" + str(autoencoder_epoch) + '.h5'
        # name of the supervised model:
        modelname = modeltag + "_autoencoder_supervised_parallel_" + class_type[1] + encoder_version

        if encoder_epoch == 0:
            # Create a new encoder network:
            model = setup_model(model_tag=modeltag, autoencoder_stage=1, modelpath_and_name=autoencoder_model,
                                additional_options=options, number_of_output_neurons=number_of_output_neurons)
            model.compile(loss=supervised_loss, optimizer=adam, metrics=supervised_metrics)

            # Custom model is loaded as initialization
            if switch_autoencoder_model[0] == 0:
                if modeltag == "vgg_3_eps":
                    init_model = init_model_eps
                else:
                    raise ("Cannot load initial model " + init_model + " Modeltags are different " + modeltag)
                print("Initializing model to", init_model)
                autoencoder_epoch = 2
                autoencoder_model = model_folder + "trained_" + modeltag + "_autoencoder_epoch" + str(
                    autoencoder_epoch) + '.h5'
                model_for_init = load_model(init_model, custom_objects=custom_objects)
                for i, layer in enumerate(model.layers):
                    layer.set_weights(model_for_init.layers[i].get_weights())

            # Create header for new test log file
            with open(model_folder + "trained_" + modelname + '_test.txt', 'w') as test_log_file:
                metrics = model.metrics_names  # ['loss', 'acc']
                if len(metrics) == 2:
                    line = '{0}\tTest {1}\tTrain {2}\tTest {3}\tTrain {4}\tTime\tLR'.format("Epoch", metrics[0],
                                                                                            metrics[0], metrics[1],
                                                                                            metrics[1])
                elif len(metrics) == 1:
                    line = '{0}\tTest {1}\tTrain {2}\tTime\tLR'.format("Epoch", metrics[0], metrics[0])
                else:
                    sys.exit("Warning: Only 1 or 2 metrics are supported for logfile headers. Given was", metrics,
                             "Exiting...")
                test_log_file.write(line)
        else:
            # Load an existing trained encoder network and train that
            model = load_model(model_folder + "trained_" + modelname + '_epoch' + str(encoder_epoch) + '.h5',
                               custom_objects=custom_objects)
            if make_stateful == True:
                model = make_encoder_stateful(model)

        # Own execution of training
        # Set LR of loaded model to new lr
        K.set_value(model.optimizer.lr, learning_rate)

        # Which epochs are the ones relevant for current stage
        running_epoch = encoder_epoch

        model.summary()
        print("\n\nModel: ", modelname)
        print("Current State of optimizer: \n", model.optimizer.get_config())
        filesize_hint = "Filesize factor=" + str(filesize_factor) if filesize_factor != 1 else ""
        filesize_hint_test = "Filesize factor test=" + str(filesize_factor_test) if filesize_factor_test != 1 else ""
        print("Train files:", train_tuple, filesize_hint)
        print("Test files:", test_tuple, filesize_hint_test)
        print("Using autoencoder model:", autoencoder_model)

        # Execute Training:
        for current_epoch in range(running_epoch, running_epoch + runs):
            # Does the model we are about to save exist already?
            print("\n")
            check_for_file(model_folder + "trained_" + modelname + '_epoch' + str(current_epoch + 1) + '.h5')
            # custom lr schedule; lr_decay was set to 0 already
            if lr_schedule_number != None:
                lr = lr_schedule(current_epoch + 1, lr_schedule_number, learning_rate)
                K.set_value(model.optimizer.lr, lr)

            if current_epoch in switch_autoencoder_model:
                autoencoder_epoch += 1
                autoencoder_model = model_folder + "trained_" + modeltag + "_autoencoder_epoch" + str(
                    autoencoder_epoch) + '.h5'
                print("Changing weights before epoch ", current_epoch + 1, " to ", autoencoder_model)
                switch_encoder_weights(model, load_model(autoencoder_model, custom_objects=custom_objects),
                                       last_encoder_layer_index_override)

            # Train network, write logfile, save network, evaluate network, save evaluation to file
            lr = train_and_test_model(model=model, modelname=modelname, train_files=train_tuple, test_files=test_tuple,
                                      batchsize=batchsize, n_bins=n_bins, class_type=class_type, xs_mean=xs_mean,
                                      epoch=current_epoch,
                                      shuffle=False, lr=lr, lr_decay=lr_decay, tb_logger=False, swap_4d_channels=None,
                                      save_path=model_folder, is_autoencoder=is_autoencoder, verbose=verbose,
                                      broken_simulations_mode=broken_simulations_mode,
                                      dataset_info_dict=dataset_info_dict)

        sys.exit()

    # XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

    if init_model_path is not None:
        print("Initializing model weights to", init_model_path)
        init_model = load_model(init_model_path, custom_objects=custom_objects)
        for i, layer in enumerate(model.layers):
            layer.set_weights(init_model.layers[i].get_weights())

    # Which epochs are the ones relevant for current stage
    if is_autoencoder == True:
        running_epoch = epoch  # Stage 0
    else:
        running_epoch = encoder_epoch  # Stage 1 and 2

    # Set LR of loaded model to new lr
    if lr_schedule_number != None:
        lr = lr_schedule(running_epoch + 1, lr_schedule_number, learning_rate)
    K.set_value(model.optimizer.lr, lr)

    model.summary()
    print("\n\nModel: ", modelname)
    print("Current State of optimizer: \n", model.optimizer.get_config())
    filesize_hint = "Filesize factor=" + str(filesize_factor) if filesize_factor != 1 else ""
    filesize_hint_test = "Filesize factor test=" + str(filesize_factor_test) if filesize_factor_test != 1 else ""
    print("Train files:", train_tuple, filesize_hint)
    print("Test files:", test_tuple, filesize_hint_test)
    if autoencoder_model is not None: print("Using autoencoder model:", autoencoder_model)

    # Execute Training:
    for current_epoch in range(running_epoch, running_epoch + runs):
        # This is before epoch current_epoch+1
        # Does the model we are about to save exist already?
        print("\n")
        check_for_file(model_folder + "trained_" + modelname + '_epoch' + str(current_epoch + 1) + '.h5')

        if lr_schedule_number != None:
            lr = lr_schedule(current_epoch + 1, lr_schedule_number, learning_rate)
            K.set_value(model.optimizer.lr, lr)

        if unfreeze_layer_training == True:
            # Unfreeze C layers of the model according to schedule
            # An additional C block is set trainable before these epochs
            unfreeze_a_c_block_at = np.array([5, 10, 15, 20, 25, 30, 35, 40, 45])

            how_many = np.where(unfreeze_a_c_block_at == current_epoch)[0]
            if len(how_many) > 0:
                how_many = how_many[0] + 1
                model = unfreeze_conv_layers(model, how_many)

        # Train network, write logfile, save network, evaluate network, save evaluation to file
        lr = train_and_test_model(model=model, modelname=modelname, train_files=train_tuple, test_files=test_tuple,
                                  batchsize=batchsize, n_bins=n_bins, class_type=class_type, xs_mean=xs_mean,
                                  epoch=current_epoch,
                                  shuffle=False, lr=lr, lr_decay=lr_decay, tb_logger=False, swap_4d_channels=None,
                                  save_path=model_folder, is_autoencoder=is_autoencoder, verbose=verbose,
                                  broken_simulations_mode=broken_simulations_mode, dataset_info_dict=dataset_info_dict)
Esempio n. 5
0
def train_model(model, dataset, zero_center, modelname, autoencoder_model,
                lr_schedule_number, runs, learning_rate, model_folder,
                last_encoder_layer_index_override, switch_autoencoder_model,
                autoencoder_stage, succ_autoencoder_epoch, modeltag,
                unfreeze_layer_training, custom_objects, class_type, lr,
                lr_decay, verbose, is_AE_adevers_training, is_autoencoder,
                epoch, encoder_epoch):
    """ Train, test and save the model and logfiles. """

    #Get infos about the dataset
    dataset_info_dict = get_dataset_info(dataset)
    train_file = dataset_info_dict["train_file"]
    test_file = dataset_info_dict["test_file"]
    n_bins = dataset_info_dict["n_bins"]
    broken_simulations_mode = dataset_info_dict[
        "broken_simulations_mode"]  #def 0
    filesize_factor = dataset_info_dict["filesize_factor"]
    filesize_factor_test = dataset_info_dict["filesize_factor_test"]
    batchsize = dataset_info_dict["batchsize"]  #def 32
    #The files to train and test on, together with the nummber of events in them
    train_tuple = [[
        train_file,
        int(h5_get_number_of_rows(train_file) * filesize_factor)
    ]]
    test_tuple = [[
        test_file,
        int(h5_get_number_of_rows(test_file) * filesize_factor_test)
    ]]
    #Zero-Center for the data. if zero center image does not exist, a new one
    #is calculated and saved
    if zero_center == True:
        xs_mean = load_zero_center_data(train_files=train_tuple,
                                        batchsize=batchsize,
                                        n_bins=n_bins,
                                        n_gpu=1)
        print("Using zero centering.")
    else:
        xs_mean = None
        print("Not using zero centering.")
    #Which epochs are the ones relevant for current stage
    if is_autoencoder == True:
        running_epoch = epoch  #Stage 0
    else:
        running_epoch = encoder_epoch  #Stage 1,2,3
    #Set LR of loaded model to new lr
    if lr_schedule_number != None:
        lr = lr_schedule(running_epoch + 1, lr_schedule_number, learning_rate)
    K.set_value(model.optimizer.lr, lr)

    #Print info about the model and training
    model.summary()
    print("\n\nModel: ", modelname)
    print("Current State of optimizer: \n", model.optimizer.get_config())
    filesize_hint = "Filesize factor=" + str(
        filesize_factor) if filesize_factor != 1 else ""
    filesize_hint_test = "Filesize factor test=" + str(
        filesize_factor_test) if filesize_factor_test != 1 else ""
    print("Train files:", train_tuple, filesize_hint)
    print("Test files:", test_tuple, filesize_hint_test)
    print("Using metrics:", model.metrics_names)
    if autoencoder_model is not None:
        print("Using autoencoder model:", autoencoder_model)

    #Main loop: Execute Training
    for current_epoch in range(running_epoch, running_epoch + runs):
        #This is before epoch current_epoch+1
        print("\n")
        #Does the model we are about to save exist already?
        proposed_filename = model_folder + "trained_" + modelname + '_epoch' + str(
            current_epoch + 1) + '.h5'
        if (os.path.isfile(proposed_filename)):
            raise NameError("Warning:", proposed_filename + "exists already!")
        if lr_schedule_number != None:
            lr = lr_schedule(current_epoch + 1, lr_schedule_number,
                             learning_rate)
            K.set_value(model.optimizer.lr, lr)

        #For autoencoder stage 3 (Successive training):
        #Load in weights of new encoders periodically
        #succ_autoencoder_epoch is the epoch of the autoencoder from which
        #the weights are loaded in
        if autoencoder_stage == 3:
            if current_epoch in switch_autoencoder_model:
                succ_autoencoder_epoch += 1
                autoencoder_model = model_folder + "trained_" + modeltag \
                    + "_autoencoder_epoch" + str(succ_autoencoder_epoch) + '.h5'
                print("Changing weights before epoch ", current_epoch + 1,
                      " to ", autoencoder_model)
                switch_encoder_weights(
                    model,
                    load_model(autoencoder_model,
                               custom_objects=custom_objects),
                    last_encoder_layer_index_override)
        #For autoencoder stage 4 (Layer unfreeze training):
        if unfreeze_layer_training == True:
            #Unfreeze C layers of the model according to schedule
            #An additional C block is set trainable before these epochs
            unfreeze_a_c_block_at = np.array([
                5,
                10,
                15,
                20,
                25,
                30,
                35,
                40,
            ])
            how_many = np.where(unfreeze_a_c_block_at == current_epoch)[0]
            if len(how_many) > 0:
                how_many = how_many[0] + 1
                model = unfreeze_conv_layers(model, how_many)

        #Train network, write logfile, save network, evaluate network, save evaluation to file
        lr = train_and_test_model(
            model=model,
            modelname=modelname,
            train_files=train_tuple,
            test_files=test_tuple,
            batchsize=batchsize,
            n_bins=n_bins,
            class_type=class_type,
            xs_mean=xs_mean,
            epoch=current_epoch,
            shuffle=False,
            lr=lr,
            lr_decay=lr_decay,
            tb_logger=False,
            swap_4d_channels=None,
            save_path=model_folder,
            is_autoencoder=is_autoencoder,
            verbose=verbose,
            broken_simulations_mode=broken_simulations_mode,
            dataset_info_dict=dataset_info_dict,
            is_AE_adevers_training=is_AE_adevers_training)