Ejemplo n.º 1
0
def get_single_multi_stacked_model_simulation(
        source_model_1, source_model_2, target_model, n_stacked_simulations,
        target_x, target_y, target_x_val, target_y_val, target_x_test,
        target_y_test, n_stacked_epochs, batch_size, reg_lam):
    """ Runs an instance of a multi-stacked model given trained source DA and target models and reg_lam
    """
    this_simulation_runs = []
    for _ in range(
            n_stacked_simulations):  # helps account for random initialization
        model = compile_multi_stacked_model(source_model_1,
                                            source_model_2,
                                            target_model,
                                            input_dim=target_x.shape[-1],
                                            optimizer=Adam(lr=0.003),
                                            reg_lam=reg_lam)
        _, best_performance, _ = get_single_target_model_simulation(
            model,
            target_x,
            target_y,
            target_x_val,
            target_y_val,
            target_x_test,
            target_y_test,
            n_stacked_epochs,
            batch_size,
            use_cutoff=False)
        this_simulation_runs.append(best_performance)

    run_val_acc = round(get_mean([dp[0] for dp in this_simulation_runs]), 4)
    run_val_bal_acc = round(get_mean([dp[1] for dp in this_simulation_runs]),
                            4)
    run_test_acc = round(get_mean([dp[2] for dp in this_simulation_runs]), 4)
    run_test_bal_acc = round(get_mean([dp[3] for dp in this_simulation_runs]),
                             4)
    return run_val_acc, run_val_bal_acc, run_test_acc, run_test_bal_acc
def get_domain_adaptation_model_simulation(all_data,target_data_name,source_data_name,model_name,n_simulations=5,n_epochs=10,batch_size=50,model_type="semi_supervised",output_dir="../model_output/",save_runs=False):
    """ Simulation of the standard domain adaptation model.
    args:
        all_data: list which contains the target train,val,test sets and the source train set
        target_data_name: name of target domain data (e.g. amazon_toys)
        source_data_name: name of source domain data (e.g. amazon_toys)
        model_name: name of the domain adaptation model (dann,mmd,coral)
        model_type: (e.g. unsupervised or semi_supervised)
        save_runs(bool): whether to save data on the runs to memory, also controls printing info to console
    """
    target_x,target_y,target_x_val,target_y_val,target_x_test,target_y_test,source_data = all_data
    source_x,source_y = source_data

    source_to_target_text = source_data_name+" -> "+target_data_name+"; "+model_name
    if save_runs:
        out_file = open(output_dir+model_type+"/"+source_to_target_text+".txt","w+") # file for outputting model runs
        print(source_to_target_text+"; "+model_type)

    all_model_runs = [] # stores the best performance metrics for each run for later averaging
    for _ in range(n_simulations):
        start = time.time()
        if model_name=="dann":
            model = dann_model(input_dim=target_x.shape[-1])
            _,best_performance,_ = get_single_dann_model_simulation(model,source_x,source_y,target_x,target_y,target_x_val,target_y_val,target_x_test,target_y_test,n_epochs,batch_size,model_type,optimizer=Adam(lr=0.003),use_cutoff=False)
        elif model_name=="mmd":
            model = mmd_model(input_dim=target_x.shape[-1])
            _,best_performance,_ = get_single_mmd_model_simulation(model,source_x,source_y,target_x,target_y,target_x_val,target_y_val,target_x_test,target_y_test,n_epochs,batch_size,model_type,optimizer=Adam(lr=0.003),use_cutoff=False)
        elif model_name=="coral":
            model = coral_model(input_dim=target_x.shape[-1])
            _,best_performance,_ = get_single_coral_model_simulation(model,source_x,source_y,target_x,target_y,target_x_val,target_y_val,target_x_test,target_y_test,n_epochs,batch_size,model_type,optimizer=Adam(lr=0.003),use_cutoff=False)

        all_model_runs.append(best_performance)
        val_acc,val_bal_acc,test_acc,test_bal_acc = best_performance
        model_run_text = "Target; VAL: acc:{} bal_acc:{}; TEST: acc:{} bal_acc:{}\n".format(val_acc,val_bal_acc,test_acc,test_bal_acc)
        took = str(round((time.time()-start)/60,2))
        if save_runs:
            out_file.write(model_run_text)
            print("-"+took,end='',flush=True)

    # getting the average performance
    mean_val_acc = round(get_mean([dp[0] for dp in all_model_runs]),4)
    mean_val_bal_acc = round(get_mean([dp[1] for dp in all_model_runs]),4)
    mean_test_acc = round(get_mean([dp[2] for dp in all_model_runs]),4)
    mean_test_bal_acc = round(get_mean([dp[3] for dp in all_model_runs]),4)
    model_final_text = "AVERAGE: Target; VAL: acc:{} bal_acc:{}; TEST: acc:{} bal_acc:{}\n".format(mean_val_acc,mean_val_bal_acc,mean_test_acc,mean_test_bal_acc)
    if save_runs:
        out_file.write(model_final_text)
        out_file.close()
        print()
    
    return mean_val_acc,mean_test_acc
def get_target_model_simulation(all_data,target_data_name,n_simulations=5,n_epochs=20,batch_size=50,output_dir="../model_output/",save_runs=False):
    """ Simulation based on training a model with only target data
    args:
        all_data: list which contains the target train,val,test sets
        target_data_name: name of training data (e.g. amazon_toys)
        n_simulations: number of times to run the model from scratch (average results over)
        n_epochs: number of epochs to train the model for each instantiation
        output_dir: directory in which to store the model runs metrics
        save_runs(bool): whether to save data on the runs to memory, also controls printing info to console
    """

    x,y,x_val,y_val,x_test,y_test,_ = all_data

    if save_runs:
        out_file = open(output_dir+"target_only/"+target_data_name+".txt","w+") # file for outputting model runs
        print(target_data_name+"; target-only")

    all_model_runs = [] # stores the best performance metrics for each run for later averaging
    for _ in range(n_simulations):
        start = time.time()
        model = compile_target_model(input_dim=x.shape[-1],optimizer=Adam(lr=0.003))
        _,best_performance,_ = get_single_target_model_simulation(model,x,y,x_val,y_val,x_test,y_test,n_epochs,batch_size,use_cutoff=False)
        all_model_runs.append(best_performance)
        val_acc,val_bal_acc,test_acc,test_bal_acc = best_performance
        model_run_text = "Target; VAL: acc:{} bal_acc:{}; TEST: acc:{} bal_acc:{}\n".format(val_acc,val_bal_acc,test_acc,test_bal_acc)
        took = str(round((time.time()-start)/60,2))
        if save_runs:
            out_file.write(model_run_text)
            print("-"+took,end='',flush=True)

    # getting the average performance
    mean_val_acc = round(get_mean([dp[0] for dp in all_model_runs]),4)
    mean_val_bal_acc = round(get_mean([dp[1] for dp in all_model_runs]),4)
    mean_test_acc = round(get_mean([dp[2] for dp in all_model_runs]),4)
    mean_test_bal_acc = round(get_mean([dp[3] for dp in all_model_runs]),4)
    model_final_text = "AVERAGE: Target; VAL: acc:{} bal_acc:{}; TEST: acc:{} bal_acc:{}\n".format(mean_val_acc,mean_val_bal_acc,mean_test_acc,mean_test_bal_acc)
    if save_runs:
        out_file.write(model_final_text)
        out_file.close()
        print()

    return mean_val_acc,mean_test_acc
Ejemplo n.º 4
0
def get_single_da_model(source_x, source_y, source_num, source_cutoff,
                        model_name, model_type, target_x, target_y,
                        target_x_val, target_y_val, target_x_test,
                        target_y_test, n_epochs, batch_size):
    """ returns the trained source-domain model
    args:
        source_num: 1 or 2, used for debugging
    """
    used_cutoff = False
    prev_best_cutoffs = []
    while not used_cutoff:  # keep running the model until the training was properly cutoff
        if model_name == "dann":
            model = dann_model(input_dim=target_x.shape[-1])
            model, best_performance, used_cutoff = get_single_dann_model_simulation(
                model,
                source_x,
                source_y,
                target_x,
                target_y,
                target_x_val,
                target_y_val,
                target_x_test,
                target_y_test,
                n_epochs,
                batch_size,
                model_type,
                optimizer=Adam(lr=0.003),
                use_cutoff=True,
                cutoff=source_cutoff)
        elif model_name == "mmd":
            model = mmd_model(input_dim=target_x.shape[-1])
            model, best_performance, used_cutoff = get_single_mmd_model_simulation(
                model,
                source_x,
                source_y,
                target_x,
                target_y,
                target_x_val,
                target_y_val,
                target_x_test,
                target_y_test,
                n_epochs,
                batch_size,
                model_type,
                optimizer=Adam(lr=0.003),
                use_cutoff=True,
                cutoff=source_cutoff)
        elif model_name == "coral":
            model = coral_model(input_dim=target_x.shape[-1])
            model, best_performance, used_cutoff = get_single_coral_model_simulation(
                model,
                source_x,
                source_y,
                target_x,
                target_y,
                target_x_val,
                target_y_val,
                target_x_test,
                target_y_test,
                n_epochs,
                batch_size,
                model_type,
                optimizer=Adam(lr=0.003),
                use_cutoff=True,
                cutoff=source_cutoff)
        if not used_cutoff:  # debugging
            print("s" + source_num, end='', flush=True)
            prev_best_cutoffs.append(best_performance[0])  # val acc
            if len(prev_best_cutoffs) % 10 == 0:
                new_source_cutoff = round(get_mean(prev_best_cutoffs), 4)
                print("[{},{}]".format(source_cutoff, new_source_cutoff),
                      end='',
                      flush=True)
                source_cutoff = new_source_cutoff
                prev_best_cutoffs = []

    source_model = Model(
        inputs=model.layers[0].input,
        outputs=model.layers[1].output)  # extracting feature extraction layers
    source_model.trainable = False
    return source_model, source_cutoff
Ejemplo n.º 5
0
def get_multi_stacked_model_simulation(partition_n,
                                       target_train_size,
                                       target_data_name,
                                       target_cutoff_dict,
                                       source_data_name_1,
                                       source_data_name_2,
                                       source_cutoff_dict,
                                       model_name,
                                       n_simulations=5,
                                       n_epochs=10,
                                       batch_size=50,
                                       n_stacked_simulations=2,
                                       n_stacked_epochs=20,
                                       model_type="unsupervised",
                                       data_dir="../data/",
                                       output_dir="../model_output/",
                                       l2_lam_values=[0.0, 0.01, 0.1, 1.0],
                                       source_cutoff=20000,
                                       use_source_cutoff=False):
    """ Simulation for the stacked model - for the case in which there are two source-domain datasets
    args:
        source_data_name_1,source_data_name_2: names of the two source domain datasets
        target_cutoff_dict: holds cutoffs for target-only model (key: [target_data_name])
        source_cutoff_dict: holds cutoffs for DA models (key: [source-domain-used]_[target-domain]_[model_name])
        l2_lam_values: the lam. values for l2 regularization
        n_epochs: number of epochs to train the source DA models
        n_stacked_epochs: number of epochs to train the stacked model and the target model
        source_cutoff: the number of source-domain datapoints to use from each domain
    """
    all_data = load_data(target_dir=target_data_name + "_reviews",
                         source_dirs=[
                             source_data_name_1 + "_reviews",
                             source_data_name_2 + "_reviews"
                         ],
                         partition_n=partition_n,
                         target_train_size=target_train_size,
                         data_dir=data_dir,
                         source_cutoff=source_cutoff,
                         use_source_cutoff=use_source_cutoff)
    target_x, target_y, target_x_val, target_y_val, target_x_test, target_y_test, source_data = all_data
    source_x_1, source_y_1, source_x_2, source_y_2 = source_data

    source_to_target_text = source_data_name_1 + ";" + source_data_name_2 + " -> " + target_data_name
    out_files = [
        open(
            output_dir + "multi_stacked_" + model_type + "/" +
            source_to_target_text + "; lam=" + str(lam) + "; " + model_name +
            ".txt", "w+") for lam in l2_lam_values
    ]  # files for outputting model runs
    print(source_to_target_text + "; " + model_name + "; " + model_type +
          "; multi-stacked")

    # getting the cutoffs for all models:
    target_cutoff = target_cutoff_dict[target_data_name]
    source_cutoff_1 = source_cutoff_dict[source_data_name_1 + "_" +
                                         target_data_name + "_" + model_name]
    source_cutoff_2 = source_cutoff_dict[source_data_name_2 + "_" +
                                         target_data_name + "_" + model_name]

    all_model_runs = [
        [] for _ in range(len(l2_lam_values))
    ]  # stores the best performance metrics for each run for later averaging; includes corr. metrics
    for _ in range(n_simulations):
        start = time.time()
        print("-", end='', flush=True)

        # training two source-domain DA models:
        source_model_1, source_cutoff_1 = get_single_da_model(
            source_x_1, source_y_1, "1", source_cutoff_1, model_name,
            model_type, target_x, target_y, target_x_val, target_y_val,
            target_x_test, target_y_test, n_epochs, batch_size)
        source_model_2, source_cutoff_2 = get_single_da_model(
            source_x_2, source_y_2, "2", source_cutoff_2, model_name,
            model_type, target_x, target_y, target_x_val, target_y_val,
            target_x_test, target_y_test, n_epochs, batch_size)

        # secondly training the target-domain model:
        used_cutoff = False
        while not used_cutoff:  # keep running the model until the training was properly cutoff
            model = compile_target_model(input_dim=target_x.shape[-1],
                                         optimizer=Adam(lr=0.003))
            model, _, used_cutoff = get_single_target_model_simulation(
                model,
                target_x,
                target_y,
                target_x_val,
                target_y_val,
                target_x_test,
                target_y_test,
                n_stacked_epochs,
                batch_size,
                use_cutoff=True,
                cutoff=target_cutoff)
            if not used_cutoff:  # debugging
                print("t", end='', flush=True)

        target_model = Model(inputs=model.layers[0].input,
                             outputs=model.layers[1].output
                             )  # extracting feature extraction layers
        target_model.trainable = False

        # getting correlation metrics between the feature representations of the sources and target models for the target test set:
        target_model_h = target_model(target_x_test).numpy()
        source_model_1_h = source_model_1(target_x_test).numpy()
        source_model_2_h = source_model_2(target_x_test).numpy()
        s1_t_corr = get_mean_corr_values(source_model_1_h, target_model_h)
        s2_t_corr = get_mean_corr_values(source_model_2_h, target_model_h)
        s1_s2_corr = get_mean_corr_values(source_model_1_h, source_model_2_h)

        # lastly training the stacked model with the various l2_lam_values:
        for i, reg_lam in enumerate(l2_lam_values):
            run_val_acc, run_val_bal_acc, run_test_acc, run_test_bal_acc = get_single_multi_stacked_model_simulation(
                source_model_1, source_model_2, target_model,
                n_stacked_simulations, target_x, target_y, target_x_val,
                target_y_val, target_x_test, target_y_test, n_stacked_epochs,
                batch_size, reg_lam)

            this_simulation_run_best_performance = (run_val_acc,
                                                    run_val_bal_acc,
                                                    run_test_acc,
                                                    run_test_bal_acc,
                                                    s1_t_corr, s2_t_corr,
                                                    s1_s2_corr)
            all_model_runs[i].append(this_simulation_run_best_performance)

            if reg_lam == 0.0:  # the corr. values remain the same regardless of the reg. lam.
                model_run_text = "Target; VAL: acc:{} bal_acc:{}; TEST: acc:{} bal_acc:{}; CORR mean: s1,t:{} s2,t:{} s1,s2:{}\n".format(
                    run_val_acc, run_val_bal_acc, run_test_acc,
                    run_test_bal_acc, s1_t_corr, s2_t_corr, s1_s2_corr)
            else:
                model_run_text = "Target; VAL: acc:{} bal_acc:{}; TEST: acc:{} bal_acc:{};\n".format(
                    run_val_acc, run_val_bal_acc, run_test_acc,
                    run_test_bal_acc)
            out_files[i].write(model_run_text)

        took = str(round((time.time() - start) / 60, 2))
        print(took, end='', flush=True)
    print()

    all_test_acc_file = open(
        output_dir + "multi_stacked_" + model_type + "/all_test_averages/" +
        source_to_target_text + "; " + model_name + ".txt", "w+")
    all_test_string = "AVERAGE: Target TEST acc"  # store the average test acc for each lam. value
    final_mean_test_acc_per_lam = []  # to return
    for i, reg_lam in enumerate(l2_lam_values):
        mean_val_acc = round(get_mean([dp[0] for dp in all_model_runs[i]]), 4)
        mean_val_bal_acc = round(get_mean([dp[1] for dp in all_model_runs[i]]),
                                 4)
        mean_test_acc = round(get_mean([dp[2] for dp in all_model_runs[i]]), 4)
        mean_test_bal_acc = round(
            get_mean([dp[3] for dp in all_model_runs[i]]), 4)
        all_test_string += ("; lam={},acc:{}".format(reg_lam, mean_test_acc))
        final_mean_test_acc_per_lam.append((reg_lam, mean_test_acc))

        if reg_lam == 0.0:  # the corr. values remain the same regardless of the reg. lam.
            mean_s1_t_corr = round(
                get_mean([
                    dp[4] for dp in all_model_runs[i] if not np.isnan(dp[4])
                ]), 4)
            mean_s2_t_corr = round(
                get_mean([
                    dp[5] for dp in all_model_runs[i] if not np.isnan(dp[5])
                ]), 4)
            mean_s1_s2_corr = round(
                get_mean([
                    dp[6] for dp in all_model_runs[i] if not np.isnan(dp[6])
                ]), 4)
            model_final_text = "AVERAGE: Target; VAL: acc:{} bal_acc:{}; TEST: acc:{} bal_acc:{}; CORR mean: s1,t:{} s2,t:{} s1,s2:{}\n".format(
                mean_val_acc, mean_val_bal_acc, mean_test_acc,
                mean_test_bal_acc, mean_s1_t_corr, mean_s2_t_corr,
                mean_s1_s2_corr)
            final_mean_s1_s2_corr = mean_s1_s2_corr
        else:
            model_final_text = "AVERAGE: Target; VAL: acc:{} bal_acc:{}; TEST: acc:{} bal_acc:{};\n".format(
                mean_val_acc, mean_val_bal_acc, mean_test_acc,
                mean_test_bal_acc)

        out_files[i].write(model_final_text)
        out_files[i].close()

    all_test_acc_file.write(all_test_string + '\n')
    all_test_acc_file.close()

    return final_mean_test_acc_per_lam, final_mean_s1_s2_corr
def get_stacked_model_simulation(partition_n,target_train_size,target_data_name,source_data_name,target_cutoff_dict,source_cutoff_dict,model_name,n_simulations=5,n_epochs=10,batch_size=50,n_stacked_simulations=2,n_stacked_epochs=20,model_type="semi_supervised",data_dir="../data/",output_dir="../model_output/"):
    """ Simulation for the stacked model - which uses the features extracted from both the target model and the DA model.
    args:
        target_cutoff_dict: holds val acc cutoff for target-only model (key: [target_data_name])(invariant to source domain and model type)
        source_cutoff_dict: holds the val acc cutoff for DA model training (key: [source_data_name]_[target_data_name]_[model_name])
        n_epochs: number of epochs to train the source DA model
        n_stacked_simulations: number of times to run final stacked model to average results for each trained source,target model pairing
        n_stacked_epochs: number of epochs to train the stacked model and the target model
    """
    all_data = load_data(target_dir=target_data_name+"_reviews",source_dirs=[source_data_name+"_reviews"],partition_n=partition_n,target_train_size=target_train_size,data_dir=data_dir)
    target_x,target_y,target_x_val,target_y_val,target_x_test,target_y_test,source_data = all_data
    source_x,source_y = source_data

    source_to_target_text = source_data_name+" -> "+target_data_name+"; "+model_name
    out_file = open(output_dir+"stacked_"+model_type+"/"+source_to_target_text+".txt","w+") # file for outputting model runs
    print(source_to_target_text+"; "+model_type+"; stacked")

    source_cutoff = source_cutoff_dict[source_data_name+"_"+target_data_name+"_"+model_name] # used for DA model
    target_cutoff = target_cutoff_dict[target_data_name] # used for target-only model

    all_model_runs = [] # stores the best performance metrics for each run for later averaging; includes corr. metrics
    for _ in range(n_simulations):
        start = time.time()
        print("-",end='',flush=True)

        # first training the DA model:
        used_cutoff = False
        prev_best_cutoffs = []
        while not used_cutoff: # keep running the model until the training was properly cutoff
            if model_name=="dann":
                model = dann_model(input_dim=target_x.shape[-1])
                model,best_performance,used_cutoff = get_single_dann_model_simulation(model,source_x,source_y,target_x,target_y,target_x_val,target_y_val,target_x_test,target_y_test,n_epochs,batch_size,model_type,optimizer=Adam(lr=0.003),use_cutoff=True,cutoff=source_cutoff)
            elif model_name=="mmd":
                model = mmd_model(input_dim=target_x.shape[-1])
                model,best_performance,used_cutoff = get_single_mmd_model_simulation(model,source_x,source_y,target_x,target_y,target_x_val,target_y_val,target_x_test,target_y_test,n_epochs,batch_size,model_type,optimizer=Adam(lr=0.003),use_cutoff=True,cutoff=source_cutoff)
            elif model_name=="coral":
                model = coral_model(input_dim=target_x.shape[-1])
                model,best_performance,used_cutoff = get_single_coral_model_simulation(model,source_x,source_y,target_x,target_y,target_x_val,target_y_val,target_x_test,target_y_test,n_epochs,batch_size,model_type,optimizer=Adam(lr=0.003),use_cutoff=True,cutoff=source_cutoff)
            if not used_cutoff: # debugging
                print("s",end='',flush=True)
                prev_best_cutoffs.append(best_performance[0]) # val acc
                if len(prev_best_cutoffs)%10==0:
                    new_source_cutoff=round(get_mean(prev_best_cutoffs),4)
                    print("[{},{}]".format(source_cutoff,new_source_cutoff),end='',flush=True)
                    source_cutoff=new_source_cutoff
                    prev_best_cutoffs = []
        
        source_model = Model(inputs=model.layers[0].input,outputs=model.layers[1].output) # extracting feature extraction layers
        source_model.trainable=False

        # secondly training the target-domain model:
        used_cutoff = False
        while not used_cutoff: # keep running the model until the training was properly cutoff
            model = compile_target_model(input_dim=target_x.shape[-1],optimizer=Adam(lr=0.003))
            model,_,used_cutoff = get_single_target_model_simulation(model,target_x,target_y,target_x_val,target_y_val,target_x_test,target_y_test,n_stacked_epochs,batch_size,use_cutoff=True,cutoff=target_cutoff)
            if not used_cutoff: # debugging
                print("t",end='',flush=True)

        target_model = Model(inputs=model.layers[0].input,outputs=model.layers[1].output) # extracting feature extraction layers
        target_model.trainable=False

        # getting correlation metrics between the feature representations of the source and target models for the target test set:
        target_model_h = target_model(target_x_test).numpy()
        source_model_h = source_model(target_x_test).numpy()
        corr_matrix = tfp.stats.correlation(target_model_h,source_model_h)
        corr_matrix = np.abs(corr_matrix.numpy()) # neg. and pos. correlation treated as equivalent
        corr_matrix[np.isnan(corr_matrix)]=-np.inf # remove nan
        arg_max = np.argmax(corr_matrix,axis=1)
        max_corr_values_per_row = corr_matrix[np.array([i for i in range(256)]),arg_max]
        mean_max_corr_values = round(float(np.mean(max_corr_values_per_row[~np.isnan(max_corr_values_per_row)])),4)

        # lastly training the stacked model:
        this_simulation_runs = []
        for _ in range(n_stacked_simulations): # helps account for random initialization
            model = compile_stacked_model(source_model,target_model,input_dim=target_x.shape[-1],optimizer=Adam(lr=0.003))
            _,best_performance,_ = get_single_target_model_simulation(model,target_x,target_y,target_x_val,target_y_val,target_x_test,target_y_test,n_stacked_epochs,batch_size,use_cutoff=False)
            this_simulation_runs.append(best_performance)

        run_val_acc = round(get_mean([dp[0] for dp in this_simulation_runs]),4)
        run_val_bal_acc = round(get_mean([dp[1] for dp in this_simulation_runs]),4)
        run_test_acc = round(get_mean([dp[2] for dp in this_simulation_runs]),4)
        run_test_bal_acc = round(get_mean([dp[3] for dp in this_simulation_runs]),4)
        this_simulation_run_best_performance = (run_val_acc,run_val_bal_acc,run_test_acc,run_test_bal_acc,mean_max_corr_values)
        all_model_runs.append(this_simulation_run_best_performance)
        model_run_text = "Target; VAL: acc:{} bal_acc:{}; TEST: acc:{} bal_acc:{}; CORR: mean:{}\n".format(run_val_acc,run_val_bal_acc,run_test_acc,run_test_bal_acc,mean_max_corr_values)
        out_file.write(model_run_text)
        took = str(round((time.time()-start)/60,2))
        print(took,end='',flush=True)
    print()

    mean_val_acc = round(get_mean([dp[0] for dp in all_model_runs]),4)
    mean_val_bal_acc = round(get_mean([dp[1] for dp in all_model_runs]),4)
    mean_test_acc = round(get_mean([dp[2] for dp in all_model_runs]),4)
    mean_test_bal_acc = round(get_mean([dp[3] for dp in all_model_runs]),4)
    mean_mean_max_corr = round(get_mean([dp[4] for dp in all_model_runs if not np.isnan(dp[4])]),4)

    model_final_text = "AVERAGE: Target; VAL: acc:{} bal_acc:{}; TEST: acc:{} bal_acc:{}; CORR: mean:{}\n".format(mean_val_acc,mean_val_bal_acc,mean_test_acc,mean_test_bal_acc,mean_mean_max_corr)
    out_file.write(model_final_text)
    out_file.close()

    return mean_test_acc,mean_mean_max_corr