Esempio n. 1
0
def accuracy_for_iris():
    iris = load_iris()
    folds = 3
    data_split, labels_split = tools.cross_validation_split(dataset=iris.data,
                                                            labels=iris.target,
                                                            folds=folds)
    print("Multinomial:")
    accuracy_of_multinomial(data_split, labels_split)
    print("Gaussian:")
    accuracy_of_gaussian(data_split, labels_split)
def accuracy_of_multinomial(dataset, labels, num_of_bins=100):
    folds = 3
    data_split, labels_split = tools.cross_validation_split(dataset=dataset,
                                                            labels=labels,
                                                            folds=folds)
    nb = NaiveBayesMultinomial(num_of_bins=num_of_bins)
    clf = MultinomialNB(alpha=0.0001, fit_prior=True)

    accuracies, sklearn_accuracies = tools.accuracy_of_method(
        data_split, labels_split, nb, sklearn_class=clf)
    return np.mean(accuracies), np.mean(sklearn_accuracies)
Esempio n. 3
0
def accuracy_for_letters():
    with open('../../data_sources/letter-recognition.data', 'r') as f:
        dataset, labels = tools.load_text_file(f,
                                               label_index=0,
                                               labels_numeric=False)
        folds = 3
        data_split, labels_split = tools.cross_validation_split(
            dataset=dataset, labels=labels, folds=folds)
        # iris = load_iris()
        print("Multinomial:")
        accuracy_of_multinomial(data_split, labels_split, num_of_bins=11)
        print("Gaussian:")
        accuracy_of_gaussian(data_split, labels_split)
Esempio n. 4
0
def accuracy_for_cancer():
    with open('../../data_sources/kag_risk_factors_cervical_cancer.csv',
              'r') as f:
        dataset, labels = tools.load_text_file(f, label_index=28, dtype=float)
        # labels = np.array(dataset[:,28], dtype=int)
        # dataset=np.append(dataset[:,:28], dataset[:, 29:], axis=1)
        # iris = load_iris()
        # labels -= 1
        folds = 3
        data_split, labels_split = tools.cross_validation_split(
            dataset=dataset, labels=labels, folds=folds)
        # iris = load_iris()
        print("Multinomial:")
        accuracy_of_multinomial(data_split, labels_split)
        print("Gaussian:")
        accuracy_of_gaussian(data_split, labels_split)
Esempio n. 5
0
def accuracy_for_trees():
    with open('../../data_sources/covtype.csv', 'r') as f:
        dataset, labels = tools.load_text_file(f,
                                               label_index=-1,
                                               dtype=float,
                                               labels_numeric=True)
        # iris = load_iris()
        # labels -= 1
        folds = 3
        data_split, labels_split = tools.cross_validation_split(
            dataset=dataset, labels=labels, folds=folds)
        # iris = load_iris()
        print("Multinomial:")
        accuracy_of_multinomial(data_split, labels_split)
        print("Gaussian:")
        accuracy_of_gaussian(data_split, labels_split)
Esempio n. 6
0
    def accuracies_of_different_methods(self, dataset, labels, algorithm_classes, algorithm_kwargs):
        """
        Returns np array of accuracies of different methods for one dataset, dataset cannot be already splitted, labels have to be separated
        algorithm_classes - not_initiated class
        algorithm_kwargs - arguments for initiation

        :param dataset:
        :param labels:
        :param algorithm_classes:
        :param algorithm_kwargs:
        :return:
        """
        data_split, labels_split = tools.cross_validation_split(dataset=abs(dataset), labels=labels, folds=3)
        accuracies_for_dataset = np.zeros(len(algorithm_classes))
        for index, algorithm in enumerate(algorithm_classes):
            alg = algorithm(**algorithm_kwargs[index])
            acc_my, _ = tools.accuracy_of_method(data_split, labels_split, alg)
            accuracies_for_dataset[index] = np.mean(acc_my)
        return accuracies_for_dataset