def test_auc_on_monks(self): ds = data.Table("monks-1") cv = testing.cross_validation([self.learner], ds, folds=5) pt = testing.proportion_test([self.learner], ds, times=1) auc = scoring.AUC(cv) self.assertEqual(len(auc), 1) auc = scoring.AUC(pt) self.assertEqual(len(auc), 1)
def test_auc_on_iris_weighted_one_against_all(self): ds = data.Table("iris") test_results = testing.cross_validation([self.learner], ds, folds=5) auc = scoring.AUC(test_results, multiclass=scoring.AUC.WeightedOneAgainstAll) self.assertEqual(len(auc), 1)
def test_split_by_classifier(self): learners = [random_learner, random_learner, random_learner] ds = data.Table("lenses") cv = testing.cross_validation(learners, ds, folds=5, store_examples=True) cv_split = scoring.split_by_classifiers(cv) ca_scores = scoring.CA(cv) auc_scores = scoring.AUC(cv) for i, cv1 in enumerate(cv_split): self.assertEqual(cv1.class_values, cv.class_values) self.assertEqual(cv1.classifier_names, [cv.classifier_names[i]]) self.assertEqual(cv1.number_of_iterations, cv.number_of_iterations) self.assertEqual(cv1.number_of_learners, 1) self.assertEqual(cv1.base_class, cv.base_class) self.assertEqual(cv1.weights, cv.weights) self.assertEqual(len(cv1.results), len(cv.results)) self.assertEqual(cv1.examples, cv.examples) ca_one = scoring.CA(cv1)[0] auc_one = scoring.AUC(cv1)[0] self.assertAlmostEqual(ca_scores[i], ca_one, delta=1e-10) self.assertAlmostEquals(auc_scores[i], auc_one, delta=1e-10)
def predict_wine_quality(table, n): #Make the continous varibles discrete disc = Discretize() disc.method = discretize.EqualWidth(n=n) table = disc(table) #Define domain feature_vars = list(table.domain[1:]) class_label_var = table.domain[0] wine_domain = Domain(feature_vars, class_label_var) table = Table.from_table(domain=wine_domain, source=table) #Construct learner and print results tree_learner = NNClassificationLearner(hidden_layer_sizes=(10, ), max_iter=4000) eval_results = CrossValidation(table, [tree_learner], k=10) print("Accuracy of cross validation: {:.3f}".format( scoring.CA(eval_results)[0])) print("AUC: {:.3f}".format(scoring.AUC(eval_results)[0]))
from Orange import data from Orange.classification import svm vehicle = data.Table("vehicle.tab") svm_easy = svm.SVMLearnerEasy(name="svm easy", folds=3) svm_normal = svm.SVMLearner(name="svm") learners = [svm_easy, svm_normal] from Orange.evaluation import testing, scoring results = testing.cross_validation(learners, vehicle, folds=5) print "Name CA AUC" for learner, CA, AUC in zip(learners, scoring.CA(results), scoring.AUC(results)): print "%-8s %.2f %.2f" % (learner.name, CA, AUC)
print("Applying learner on total data records {}".format(len(norm_data_table))) #Create a NN classifier learner then = datetime.datetime.now() ann_learner = NNClassificationLearner(hidden_layer_sizes=(10, ),max_iter=4000 ) ann_classifier = ann_learner(norm_data_table) #Do the 10 folds cross validation eval_results = CrossValidation(norm_data_table, [ann_learner], k=10) now = datetime.datetime.now() tdelta = now - then print("Processing completed after: {} ".format(tdelta)) #Accuracy and area under (receiver operating characteristic, ROC) curve (AUC) print("Accuracy: {:.3f}".format(scoring.CA(eval_results)[0])) print("AUC: {:.3f}".format(scoring.AUC(eval_results)[0])) #Remove minority classes value_counts = pd.Series(raw_data_table[:,0]).value_counts()[pd.Series(raw_data_table[:,0]).value_counts() > 10] value_counts1 = pd.Series(raw_data_table[:,0]).value_counts()[pd.Series(raw_data_table[:,0]).value_counts() < 10] print(value_counts1) first_elts = [x[0] for x in value_counts.keys().tolist()] sel = [i for i, d in enumerate(raw_data_table) if d["quality"] in first_elts] subset_data = raw_data_table[sel] subset_data_table = Table.from_table(domain=wine_domain, source=subset_data) norm_subset_data_table = normalize_table(subset_data_table) print("Applying learner on filtered data recordset, total data records {}".format(len(norm_subset_data_table))) then = datetime.datetime.now() ann_learner = NNClassificationLearner(hidden_layer_sizes=(10, ),max_iter=4000 ) ann_classifier = ann_learner(norm_subset_data_table)
datae1.to_csv(output, header=True, sep=",") path1 = ("C:\\Users\\acer\\Desktop\\friends\\export.csv") datae = p.read_csv(path1) ''' now fro b part''' from Orange.classification import SklTreeLearner td = Table.from_file("C:\\Users\\acer\\Desktop\\friends\\export.csv") #print(data1.domain) #print(d) feature_vars = list(td.domain.variables[1:]) class_label_var = td.domain.variables[7] print(class_label_var) md = Domain(feature_vars, class_label_var) #print(d_dis[0]) td = Table.from_table(domain=md, source=td) #print(.domain.variables[1:]) n1 = td.approx_len() print(n1 * 80 / 100) train_data_set = td[:1360] test_data_set = td[1360:] #print(train_data_set.domain) #print(test_data_set.domain) tree_learner = SklTreeLearner() decision_tree = tree_learner(train_data_set) results = CrossValidation(td, [tree_learner], k=10) print(decision_tree(test_data_set)) print("Accuracy", scoring.CA(results)[0]) print("AUC", scoring.AUC(results)[0])
def test_auc_on_iris_by_weighted_pairs(self): ds = data.Table("iris") test_results = testing.cross_validation([self.learner], ds, folds=5) auc = scoring.AUC(test_results, multiclass=scoring.AUC.ByWeightedPairs) self.assertEqual(len(auc), 1)
def test_auc_on_iris(self): ds = data.Table("iris") test_results = testing.cross_validation([self.learner], ds, folds=5) auc = scoring.AUC(test_results) self.assertEqual(len(auc), 1)
import Orange.preprocess import pandas as pd data = Table.from_file('white wine.csv') CLabel = Orange.preprocess.Discretize() CLabel.method = Orange.preprocess.discretize.EqualWidth(n=3) newCLabel = CLabel(data[:, 0]) FeatureV = data[:, 1:].domain.variables CLabelSet = newCLabel.domain.variables WineDomain = Domain(FeatureV, CLabelSet) data = Table.from_table(domain=WineDomain, source=data) TLearner = NNClassificationLearner(hidden_layer_sizes=(10, 1), max_iter=4000) evalR = CrossValidation(data, [TLearner], k=10) print("Accuracy: {:.3f}".format(scoring.CA(evalR)[0])) print("AUC: {:.3f}".format(scoring.AUC(evalR)[0])) n = sum(1 for d in data if (d["quality"] == 1.0 or d["quality"] == 2.0 or d["quality"] == 9.0)) SubSet = Table(data.domain, [d for d in data if (d["quality"] < 4.0 or d["quality"] > 8.0)]) for d in SubSet: del data[d] CLabel = Orange.preprocess.Discretize() CLabel.method = Orange.preprocess.discretize.EqualWidth(n=3) DataSet = CLabel(data[:, 0]) FeatureV = data[:, 1:].domain.variables newCLabelSet = DataSet.domain.variables wineDomain = Domain(FeatureV, newCLabelSet) data = Table.from_table(domain=wineDomain, source=data) TLearner = NNClassificationLearner(hidden_layer_sizes=(10, 1), max_iter=4000)
def predict_cal_acc(test_dataset, data_table, tree_learner): y_pred = decision_tree(test_dataset) eval_results = CrossValidation(data_table, [tree_learner], k=10) print("Accuracy: {:.3f}".format(scoring.CA(eval_results)[0])) print("AUC: {:.3f}".format(scoring.AUC(eval_results)[0]))