Example #1
0
def get_cv_cpv(x: str, percent: float) -> float:
    global model_goal
    # Get dataset number
    dataset_num = get_dataset_num(x)

    # Get number of pcs for CPV > 0.8 and CPV > 0.99
    if percent == 0.99:
        pcs_cpv = df_selection.loc[dataset_num, "Cum. Perc. Var. (0.99)"]
    else:
        pcs_cpv = df_selection.loc[dataset_num, "Cum. Perc. Var. (0.8)"]

    # Get df_results
    df = pd.read_csv(x)
    idx = df.features_kept == pcs_cpv
    try:
        return df.loc[idx].cv.values[0]
    except:
        inputs = Inputs(paths)
        inputs.random_seed = 1969
        inputs.get_df_split(dataset_num)

        pca_model = get_pca_model(inputs)

        cluster_model = Clustering(inputs.num_cluster, 100, inputs.random_seed)
        cluster_model.fit(pca_model.pcs_train.loc[:, :pcs_cpv - 1])
        cluster_prediction = cluster_model.predict(
            pca_model.pcs_test.loc[:, :pcs_cpv - 1])
        cluster_performances = cluster_model.get_cluster_performances(
            inputs.df_test.copy(),
            cluster_prediction,
            pcs_cpv,
            inputs.num_cluster,
            model_goal=model_goal)
        return variation(cluster_performances)