def accuracy_for_iris(): iris = load_iris() folds = 3 data_split, labels_split = tools.cross_validation_split(dataset=iris.data, labels=iris.target, folds=folds) print("Multinomial:") accuracy_of_multinomial(data_split, labels_split) print("Gaussian:") accuracy_of_gaussian(data_split, labels_split)
def accuracy_of_multinomial(dataset, labels, num_of_bins=100): folds = 3 data_split, labels_split = tools.cross_validation_split(dataset=dataset, labels=labels, folds=folds) nb = NaiveBayesMultinomial(num_of_bins=num_of_bins) clf = MultinomialNB(alpha=0.0001, fit_prior=True) accuracies, sklearn_accuracies = tools.accuracy_of_method( data_split, labels_split, nb, sklearn_class=clf) return np.mean(accuracies), np.mean(sklearn_accuracies)
def accuracy_for_letters(): with open('../../data_sources/letter-recognition.data', 'r') as f: dataset, labels = tools.load_text_file(f, label_index=0, labels_numeric=False) folds = 3 data_split, labels_split = tools.cross_validation_split( dataset=dataset, labels=labels, folds=folds) # iris = load_iris() print("Multinomial:") accuracy_of_multinomial(data_split, labels_split, num_of_bins=11) print("Gaussian:") accuracy_of_gaussian(data_split, labels_split)
def accuracy_for_cancer(): with open('../../data_sources/kag_risk_factors_cervical_cancer.csv', 'r') as f: dataset, labels = tools.load_text_file(f, label_index=28, dtype=float) # labels = np.array(dataset[:,28], dtype=int) # dataset=np.append(dataset[:,:28], dataset[:, 29:], axis=1) # iris = load_iris() # labels -= 1 folds = 3 data_split, labels_split = tools.cross_validation_split( dataset=dataset, labels=labels, folds=folds) # iris = load_iris() print("Multinomial:") accuracy_of_multinomial(data_split, labels_split) print("Gaussian:") accuracy_of_gaussian(data_split, labels_split)
def accuracy_for_trees(): with open('../../data_sources/covtype.csv', 'r') as f: dataset, labels = tools.load_text_file(f, label_index=-1, dtype=float, labels_numeric=True) # iris = load_iris() # labels -= 1 folds = 3 data_split, labels_split = tools.cross_validation_split( dataset=dataset, labels=labels, folds=folds) # iris = load_iris() print("Multinomial:") accuracy_of_multinomial(data_split, labels_split) print("Gaussian:") accuracy_of_gaussian(data_split, labels_split)
def accuracies_of_different_methods(self, dataset, labels, algorithm_classes, algorithm_kwargs): """ Returns np array of accuracies of different methods for one dataset, dataset cannot be already splitted, labels have to be separated algorithm_classes - not_initiated class algorithm_kwargs - arguments for initiation :param dataset: :param labels: :param algorithm_classes: :param algorithm_kwargs: :return: """ data_split, labels_split = tools.cross_validation_split(dataset=abs(dataset), labels=labels, folds=3) accuracies_for_dataset = np.zeros(len(algorithm_classes)) for index, algorithm in enumerate(algorithm_classes): alg = algorithm(**algorithm_kwargs[index]) acc_my, _ = tools.accuracy_of_method(data_split, labels_split, alg) accuracies_for_dataset[index] = np.mean(acc_my) return accuracies_for_dataset