def find_best_single_feature_parameters(self, dataset):
     for feature in dataset.suggested_discretize_features:
         permutations = self.generate_feature_parameters(feature)
         print(permutations)
         best_mean_fcs = self.best_fcs[dataset]
         best_perm = None
         for p, perm in enumerate(permutations):
             logging.error("[Parameters Tester][{}][{}][Perm {:03d}] Current permutation: {}".format(dataset, feature, p+1,  perm))
             dm = DataModel.generate_from_file(dataset, discretize_params=perm)
             classes_list = dm.get_classes_list()
             f_scores = []
             a = 1
             for _ in range(self.best_fold[dataset][1]):
                 for train_set, test_set in dm.generate_k_folds_stratified(self.best_fold[dataset][0]):
                     model_evaluator = ModelEvaluator(train_set, test_set, classes_list)
                     model_evaluator.evaluate()
                     f_scores.append(model_evaluator.get_f_score())
                     logging.error("[Parameters Tester][{}][{}][Perm {:03d}][{:03d}] FCS: {}".format(dataset, feature, p+1, a, f_scores[-1]))
                     a += 1
             f_score_mean = sum(f_scores) / len(f_scores)
             logging.error("[Parameters Tester][{}][{}][Perm {:03d}] Best FCS: {}, Mean FCS {}".format(dataset, feature, p+1, max(f_scores), f_score_mean))
             if f_score_mean > best_mean_fcs:
                 best_perm = perm[0]
                 best_mean_fcs = f_score_mean
         if best_perm is not None:
             self.best_discretize_feature_params[dataset].append(best_perm)
         logging.error("[Parameters Tester][{}][{}] Best mean FCS: {}, Best parameters: {}".format(dataset, feature, best_mean_fcs, best_perm))
Beispiel #2
0
def main():
    dm = DataModel.generate_from_file(
        PIMA_DIABETES_DATASET,
        smooth=True,
        discretize_params=[
            DiscretizeParam('Age', kbins_discretize, 10),
            DiscretizeParam('SkinThickness', kbins_discretize, 10),
            DiscretizeParam('Pregnancies', kbins_discretize, 10)
        ])
    print(Discretizer.kmean_models)
 def find_best_fold(self, dataset):
     dm = DataModel.generate_from_file(dataset)
     classes_list = dm.get_classes_list()
     for fold in FOLDS:
         f_scores = []
         a = 1
         for _ in range(fold[1]):
             for train_set, test_set in dm.generate_k_folds_stratified(fold[0]):
                 model_evaluator = ModelEvaluator(train_set, test_set, classes_list)
                 model_evaluator.evaluate()
                 f_scores.append(model_evaluator.get_f_score())
                 logging.error("[Parameters Tester][{}][CV{:02d}][{:03d}] FCS: {}".format(dataset, fold[0], a, f_scores[-1]))
                 a += 1
         f_score_mean = sum(f_scores) / len(f_scores)
         logging.error("[Parameters Tester][{}][CV{:02d}] Best FCS: {}, Mean FCS {}".format(dataset, fold[0], max(f_scores), f_score_mean))
         self.append_result({'dataset':dataset.name, 'fold':fold[0], 'f_score':f_score_mean, 'permutation':-1})
         if f_score_mean > self.best_fcs[dataset]:
             self.best_fold[dataset] = fold
             self.best_fcs[dataset] = f_score_mean
     logging.error("[Parameters Tester][{}] Best mean FCS: {}, Best fold: {}".format(dataset, self.best_fcs[dataset], self.best_fold[dataset]))   
 def find_best_parameters(self, dataset):
     permutations = self.generate_permutations(dataset)
     for p, perm in enumerate(permutations):
         logging.error("[Parameters Tester][{}][Perm {:08d}] Current permutation: {}".format(dataset, p+1, perm))
         dm = DataModel.generate_from_file(dataset, discretize_params=perm)
         classes_list = dm.get_classes_list()
         f_scores = []
         a = 1
         for _ in range(self.best_fold[dataset][1]):
             for train_set, test_set in dm.generate_k_folds_stratified(self.best_fold[dataset][0]):
                 model_evaluator = ModelEvaluator(train_set, test_set, classes_list)
                 model_evaluator.evaluate()
                 f_scores.append(model_evaluator.get_f_score())
                 logging.error("[Parameters Tester][{}][Perm {:08d}][{:03d}] FCS: {}".format(dataset, p+1, a, f_scores[-1]))
                 a += 1
         f_score_mean = sum(f_scores) / len(f_scores)
         logging.error("[Parameters Tester][{}][Perm {:08d}] Best FCS: {}, Mean FCS {}".format(dataset, p+1, max(f_scores), f_score_mean))
         for param in perm:
             self.append_result({'dataset':dataset.name, 'fold':self.best_fold[dataset][0], 'f_score':f_score_mean, 'permutation':p + 1, 'feature':param.feature_name, 'function':param.discretize_function.__name__, 'bins':param.buckets_amount})
         if f_score_mean > self.best_fcs[dataset]:
             self.best_discretize_parameters[dataset] = perm
             self.best_fcs[dataset] = f_score_mean
     logging.error("[Parameters Tester][{}] Best mean FCS: {}, Best parameters: {}".format(dataset, self.best_fcs[dataset], self.best_discretize_parameters[dataset]))
Beispiel #5
0
def visualize(dataset):
    dm = DataModel.generate_from_file(dataset)
    visualize_histograms(dm, 'histograms-{}'.format(str(dataset)))