Esempio n. 1
0
                               0.2), int(len(kmeans_best.get_clustered) *
                                         0.8), 100)
        ]
        for train_l in train_l_list:
            forest = Forest(kmeans_best.get_clustered, 'clusters', train_l)
            forest.train()
            forest.predict()
            analyzer.make_features_rate(forest.get_feature_importances,
                                        kmeans_best.get_data.columns, train_l)
            forest_test_scores.append(
                forest.collect_test_score('cross_validate'))
            forest_train_scores.append(
                forest.collect_train_score('cross_validate'))
        analyzer.probability_per_cluster(kmeans_best.get_test)
        analyzer.normal_check()
        analyzer.calc_predictors_interval()
        test_len_list = [
            len(dp.get_dataset_no_useless) - i for i in train_l_list
        ]

        if type(forest_test_scores[0]) == numpy.ndarray:
            train_val = list(zip(*forest_train_scores))
            test_val = list(zip(*forest_test_scores))
            for counter, metrics in enumerate(train_val):
                vis.make_overfit_check_plot(
                    train_val[counter], test_val[counter], train_l_list,
                    f'forest_test/random_forest_for_best_clustering_{counter}_crossval'
                )
        else:
            vis.make_overfit_check_plot(
                forest_train_scores, forest_test_scores, train_l_list,