def get_cv_cpv(x: str, percent: float) -> float: global model_goal # Get dataset number dataset_num = get_dataset_num(x) # Get number of pcs for CPV > 0.8 and CPV > 0.99 if percent == 0.99: pcs_cpv = df_selection.loc[dataset_num, "Cum. Perc. Var. (0.99)"] else: pcs_cpv = df_selection.loc[dataset_num, "Cum. Perc. Var. (0.8)"] # Get df_results df = pd.read_csv(x) idx = df.features_kept == pcs_cpv try: return df.loc[idx].cv.values[0] except: inputs = Inputs(paths) inputs.random_seed = 1969 inputs.get_df_split(dataset_num) pca_model = get_pca_model(inputs) cluster_model = Clustering(inputs.num_cluster, 100, inputs.random_seed) cluster_model.fit(pca_model.pcs_train.loc[:, :pcs_cpv - 1]) cluster_prediction = cluster_model.predict( pca_model.pcs_test.loc[:, :pcs_cpv - 1]) cluster_performances = cluster_model.get_cluster_performances( inputs.df_test.copy(), cluster_prediction, pcs_cpv, inputs.num_cluster, model_goal=model_goal) return variation(cluster_performances)