예제 #1
0
    def test_auc_on_monks(self):
        ds = data.Table("monks-1")
        cv = testing.cross_validation([self.learner], ds, folds=5)
        pt = testing.proportion_test([self.learner], ds, times=1)

        auc = scoring.AUC(cv)
        self.assertEqual(len(auc), 1)

        auc = scoring.AUC(pt)
        self.assertEqual(len(auc), 1)
예제 #2
0
    def test_auc_on_iris_weighted_one_against_all(self):
        ds = data.Table("iris")
        test_results = testing.cross_validation([self.learner], ds, folds=5)
        auc = scoring.AUC(test_results,
                          multiclass=scoring.AUC.WeightedOneAgainstAll)

        self.assertEqual(len(auc), 1)
예제 #3
0
    def test_split_by_classifier(self):
        learners = [random_learner, random_learner, random_learner]
        ds = data.Table("lenses")
        cv = testing.cross_validation(learners,
                                      ds,
                                      folds=5,
                                      store_examples=True)
        cv_split = scoring.split_by_classifiers(cv)
        ca_scores = scoring.CA(cv)
        auc_scores = scoring.AUC(cv)
        for i, cv1 in enumerate(cv_split):
            self.assertEqual(cv1.class_values, cv.class_values)
            self.assertEqual(cv1.classifier_names, [cv.classifier_names[i]])
            self.assertEqual(cv1.number_of_iterations, cv.number_of_iterations)
            self.assertEqual(cv1.number_of_learners, 1)
            self.assertEqual(cv1.base_class, cv.base_class)
            self.assertEqual(cv1.weights, cv.weights)
            self.assertEqual(len(cv1.results), len(cv.results))
            self.assertEqual(cv1.examples, cv.examples)

            ca_one = scoring.CA(cv1)[0]
            auc_one = scoring.AUC(cv1)[0]
            self.assertAlmostEqual(ca_scores[i], ca_one, delta=1e-10)
            self.assertAlmostEquals(auc_scores[i], auc_one, delta=1e-10)
예제 #4
0
def predict_wine_quality(table, n):
    #Make the continous varibles discrete
    disc = Discretize()
    disc.method = discretize.EqualWidth(n=n)
    table = disc(table)
    #Define domain
    feature_vars = list(table.domain[1:])
    class_label_var = table.domain[0]
    wine_domain = Domain(feature_vars, class_label_var)
    table = Table.from_table(domain=wine_domain, source=table)
    #Construct learner and print results
    tree_learner = NNClassificationLearner(hidden_layer_sizes=(10, ),
                                           max_iter=4000)
    eval_results = CrossValidation(table, [tree_learner], k=10)
    print("Accuracy of cross validation: {:.3f}".format(
        scoring.CA(eval_results)[0]))
    print("AUC: {:.3f}".format(scoring.AUC(eval_results)[0]))
예제 #5
0
from Orange import data
from Orange.classification import svm

vehicle = data.Table("vehicle.tab")

svm_easy = svm.SVMLearnerEasy(name="svm easy", folds=3)
svm_normal = svm.SVMLearner(name="svm")
learners = [svm_easy, svm_normal]

from Orange.evaluation import testing, scoring

results = testing.cross_validation(learners, vehicle, folds=5)
print "Name     CA        AUC"
for learner, CA, AUC in zip(learners, scoring.CA(results),
                            scoring.AUC(results)):
    print "%-8s %.2f      %.2f" % (learner.name, CA, AUC)
예제 #6
0
print("Applying learner on total data records {}".format(len(norm_data_table)))

#Create a NN classifier learner
then = datetime.datetime.now()
ann_learner = NNClassificationLearner(hidden_layer_sizes=(10, ),max_iter=4000 )
ann_classifier = ann_learner(norm_data_table) 

#Do the 10 folds cross validation 
eval_results = CrossValidation(norm_data_table, [ann_learner], k=10)
now = datetime.datetime.now()
tdelta = now - then
print("Processing completed after: {} ".format(tdelta))

#Accuracy and area under (receiver operating characteristic, ROC) curve (AUC)
print("Accuracy: {:.3f}".format(scoring.CA(eval_results)[0]))
print("AUC: {:.3f}".format(scoring.AUC(eval_results)[0]))

#Remove minority classes
value_counts = pd.Series(raw_data_table[:,0]).value_counts()[pd.Series(raw_data_table[:,0]).value_counts() > 10]
value_counts1 = pd.Series(raw_data_table[:,0]).value_counts()[pd.Series(raw_data_table[:,0]).value_counts() < 10]
print(value_counts1)
first_elts = [x[0] for x in value_counts.keys().tolist()]
sel = [i for i, d in enumerate(raw_data_table) if d["quality"] in first_elts]
subset_data = raw_data_table[sel]
subset_data_table = Table.from_table(domain=wine_domain, source=subset_data)
norm_subset_data_table = normalize_table(subset_data_table)
print("Applying learner on filtered data recordset, total data records {}".format(len(norm_subset_data_table)))

then = datetime.datetime.now()
ann_learner = NNClassificationLearner(hidden_layer_sizes=(10, ),max_iter=4000 )
ann_classifier = ann_learner(norm_subset_data_table)  
예제 #7
0
    datae1.to_csv(output, header=True, sep=",")

path1 = ("C:\\Users\\acer\\Desktop\\friends\\export.csv")
datae = p.read_csv(path1)
'''
now fro b part'''
from Orange.classification import SklTreeLearner
td = Table.from_file("C:\\Users\\acer\\Desktop\\friends\\export.csv")
#print(data1.domain)
#print(d)
feature_vars = list(td.domain.variables[1:])
class_label_var = td.domain.variables[7]
print(class_label_var)
md = Domain(feature_vars, class_label_var)
#print(d_dis[0])
td = Table.from_table(domain=md, source=td)
#print(.domain.variables[1:])

n1 = td.approx_len()
print(n1 * 80 / 100)
train_data_set = td[:1360]
test_data_set = td[1360:]
#print(train_data_set.domain)
#print(test_data_set.domain)
tree_learner = SklTreeLearner()
decision_tree = tree_learner(train_data_set)
results = CrossValidation(td, [tree_learner], k=10)
print(decision_tree(test_data_set))
print("Accuracy", scoring.CA(results)[0])
print("AUC", scoring.AUC(results)[0])
예제 #8
0
    def test_auc_on_iris_by_weighted_pairs(self):
        ds = data.Table("iris")
        test_results = testing.cross_validation([self.learner], ds, folds=5)
        auc = scoring.AUC(test_results, multiclass=scoring.AUC.ByWeightedPairs)

        self.assertEqual(len(auc), 1)
예제 #9
0
    def test_auc_on_iris(self):
        ds = data.Table("iris")
        test_results = testing.cross_validation([self.learner], ds, folds=5)
        auc = scoring.AUC(test_results)

        self.assertEqual(len(auc), 1)
예제 #10
0
import Orange.preprocess
import pandas as pd

data = Table.from_file('white wine.csv')
CLabel = Orange.preprocess.Discretize()
CLabel.method = Orange.preprocess.discretize.EqualWidth(n=3)
newCLabel = CLabel(data[:, 0])
FeatureV = data[:, 1:].domain.variables
CLabelSet = newCLabel.domain.variables
WineDomain = Domain(FeatureV, CLabelSet)
data = Table.from_table(domain=WineDomain, source=data)

TLearner = NNClassificationLearner(hidden_layer_sizes=(10, 1), max_iter=4000)
evalR = CrossValidation(data, [TLearner], k=10)
print("Accuracy: {:.3f}".format(scoring.CA(evalR)[0]))
print("AUC: {:.3f}".format(scoring.AUC(evalR)[0]))

n = sum(1 for d in data
        if (d["quality"] == 1.0 or d["quality"] == 2.0 or d["quality"] == 9.0))
SubSet = Table(data.domain,
               [d for d in data if (d["quality"] < 4.0 or d["quality"] > 8.0)])
for d in SubSet:
    del data[d]
CLabel = Orange.preprocess.Discretize()
CLabel.method = Orange.preprocess.discretize.EqualWidth(n=3)
DataSet = CLabel(data[:, 0])
FeatureV = data[:, 1:].domain.variables
newCLabelSet = DataSet.domain.variables
wineDomain = Domain(FeatureV, newCLabelSet)
data = Table.from_table(domain=wineDomain, source=data)
TLearner = NNClassificationLearner(hidden_layer_sizes=(10, 1), max_iter=4000)
def predict_cal_acc(test_dataset, data_table, tree_learner):
    y_pred = decision_tree(test_dataset)
    eval_results = CrossValidation(data_table, [tree_learner], k=10)
    print("Accuracy: {:.3f}".format(scoring.CA(eval_results)[0]))
    print("AUC: {:.3f}".format(scoring.AUC(eval_results)[0]))