Beispiel #1
0
    def test_pickle(self):
        d = orange.ExampleTable("iris")
        ba = orange.BayesLearner(d)
        s = pickle.dumps(ba)
        ba2 = pickle.loads(s)
        for e in d:
            self.assertEqual(ba(e), ba2(e))

        ba1 = orange.BayesLearner()
        s = pickle.dumps(ba1)
        ba2 = pickle.loads(s)
Beispiel #2
0
    def applyLearner(self):
        self.warning(0)
        if float(self.m_estimator.m) < 0:
            self.warning(0, "Parameter m should be positive")
            self.learner = None

        elif float(self.windowProportion) < 0 or float(
                self.windowProportion) > 1:
            self.warning(
                0, "Window proportion for LOESS should be between 0.0 and 1.0")
            self.learner = None

        else:
            self.learner = orange.BayesLearner(
                name=self.name, adjustThreshold=self.adjustThreshold)
            self.learner.estimatorConstructor = self.estMethods[
                self.probEstimation][1]
            if self.condProbEstimation:
                self.learner.conditionalEstimatorConstructor = self.condEstMethods[
                    self.condProbEstimation][1]
                self.learner.conditionalEstimatorConstructorContinuous = orange.ConditionalProbabilityEstimatorConstructor_loess(
                    windowProportion=self.windowProportion,
                    nPoints=self.loessPoints)

            if self.preprocessor:
                self.learner = self.preprocessor.wrapLearner(self.learner)

        self.send("Learner", self.learner)
        self.applyData()
        self.changed = False
Beispiel #3
0
    def createInstance(self):
        bayes = orange.BayesLearner()
        if hasattr(self, "estimatorConstructor"):
            bayes.estimatorConstructor = self.estimatorConstructor
            if hasattr(self, "m"):
                if hasattr(bayes.estimatorConstructor, "m"):
                    raise AttributeError(
                        "invalid combination of attributes: 'estimatorConstructor' does not expect 'm'"
                    )
                else:
                    self.estimatorConstructor.m = self.m
        elif hasattr(self, "m"):
            bayes.estimatorConstructor = orange.ProbabilityEstimatorConstructor_m(
                m=self.m)

        if hasattr(self, "conditionalEstimatorConstructor"):
            bayes.conditionalEstimatorConstructor = self.conditionalEstimatorConstructor
        elif bayes.estimatorConstructor:
            bayes.conditionalEstimatorConstructor = orange.ConditionalProbabilityEstimatorConstructor_ByRows(
            )
            bayes.conditionalEstimatorConstructor.estimatorConstructor = bayes.estimatorConstructor

        if hasattr(self, "conditionalEstimatorConstructorContinuous"):
            bayes.conditionalEstimatorConstructorContinuous = self.conditionalEstimatorConstructorContinuous

        return bayes
Beispiel #4
0
 def __call__(self, data, weight=None):
     disc = orange.Preprocessor_discretize(
         data, method=orange.EntropyDiscretization())
     #show_values(disc, "Entropy based discretization")
     model = orange.BayesLearner(disc, weight, adjustThreshold=0)
     #print "model.distribution", model.distribution
     #print "model.conditionalDistributions", model.conditionalDistributions
     return Classifier(classifier=model)
Beispiel #5
0
 def test_learning_cont(self):
     d = orange.ExampleTable("iris")
     bal = orange.BayesLearner()
     ba = bal(d)
     corr = 0
     for e in d:
         if ba(e) == e.getclass():
             corr += 1
     self.assertGreater(corr, 125)
Beispiel #6
0
 def test_Learner(self):
     b = orange.BayesLearner()
     self.assertEqual(b.name, "bayes")
     b.name = "foo"
     self.assertEqual(b.name, "foo")
     b.name = "BayesLearner"
     self.assertEqual(b.name, "BayesLearner")
     b.name = "x.BayesLearner"
     self.assertEqual(b.name, "x.BayesLearner")
     b.name = ""
     self.assertEqual(b.name, "")
Beispiel #7
0
 def test_named_const(self):
     ba = orange.BayesLearner()
     self.assertEqual(ba.loess_distribution_method,
                      orange.BayesLearner.DistributionMethod.Fixed)
     s = pickle.dumps(ba)
     ba2 = pickle.loads(s)
     self.assertEqual(ba2.loess_distribution_method,
                      orange.BayesLearner.DistributionMethod.Fixed)
     ba.loess_distribution_method = orange.BayesLearner.DistributionMethod.Uniform
     s = pickle.dumps(ba)
     ba2 = pickle.loads(s)
     self.assertEqual(ba2.loess_distribution_method,
                      orange.BayesLearner.DistributionMethod.Uniform)
Beispiel #8
0
def classify():

    import orange, orngTree

    testData = orange.ExampleTable('data/audioTest.tab')
    trainData = orange.ExampleTable('data/audioTrain.tab')
    bayes = orange.BayesLearner(trainData)
    bayes.name = "bayes"
    tree = orngTree.TreeLearner(trainData)
    tree.name = "tree"
    classifiers = [bayes, tree]

    return classifiers, trainData, testData
Beispiel #9
0
    def __call__(self, data, weight=0):
        import orngLookup

        if self.alternativeMeasure:
            raise SystemError, "alternativeMeasure not implemented yet"

        keepDuplicates = getattr(self, "keepDuplicates", 0)

        data = orange.ExampleTable(data)
        if not weight:
            # This is here for backward compatibility
            if hasattr(self, "weight"):
                weight = self.weight
            else:
                weight = orange.newmetaid()
                data.addMetaAttribute(weight)

        if self.redundancyRemover:
            data = self.redundancyRemover(data, weight)
        if not keepDuplicates:
            data.removeDuplicates(weight)

        induced = 0
        featureGenerator = FeatureGenerator(
            featureInducer=self.featureInducer,
            subsetsGenerator=self.subsetsGenerator)

        while (1):
            newFeatures = featureGenerator(data, weight)
            if not newFeatures or not len(newFeatures):
                break

            best = orngMisc.selectBest(newFeatures,
                                       orngMisc.compare2_lastBigger)[0]
            if len(best.getValueFrom.boundset()) == len(
                    data.domain.attributes):
                break

            induced += 1
            best.name = "c%d" % induced

            data = replaceWithInduced(best, data)
            if not keepDuplicates:
                data.removeDuplicates(weight)

        if self.learnerForUnknown:
            learnerForUnknown = self.learnerForUnknown
        else:
            learnerForUnknown = orange.BayesLearner()

        return orngLookup.lookupFromExamples(data, weight, learnerForUnknown)
Beispiel #10
0
 def __call__(self, examples, weight = 0,fulldata=0):
     if not(examples.domain.classVar.varType == 1 and len(examples.domain.classVar.values)==2):
         raise "BasicBayes learner only works with binary discrete class."
     for attr in examples.domain.attributes:
         if not(attr.varType == 1):
             raise "BasicBayes learner does not work with continuous attributes."
     translate = orng2Array.DomainTranslation(self.translation_mode_d,self.translation_mode_c)
     if fulldata != 0:
         translate.analyse(fulldata, weight)
     else:
         translate.analyse(examples, weight)
     translate.prepareLR()
     (beta, coeffs) = self._process(orange.BayesLearner(examples), examples)
     return BasicBayesClassifier(beta,coeffs,translate)
Beispiel #11
0
def bayes_classifier(sequence):
    f = open('header.tab', 'r')
    feature_header = f.read()
    f.close()
    f = open('new_seq.tab', 'w')
    f.write(feature_header)
    f.write('unknow\t')
    features = bayes_pre(sequence)
    for feature in features:
        f.write(str(feature))
        f.write('\t')
    f.close()
    data = orange.ExampleTable("data3_v5.tab")
    learner = orange.BayesLearner()
    classifier = learner(data)
    new_data = orange.ExampleTable("new_seq.tab")
    p = [str(classifier(new_data[0])), orange.GetProbabilities]
    return p
Beispiel #12
0
def cross_validation():
    k = 10
    d = list(data)
    nps = len(d) / 10
    acs = []
    for i in range(k):
        random.shuffle(d)
        subsamples = list(split(d, nps))
        accurancy = 0
        for j in range(k):
            validation = subsamples[j]
            training = []
            for s in subsamples:
                if s == validation: continue
                training += s
            l = orange.BayesLearner(training)
            thisa = 0
            for ex in validation:
                if ex.getclass() == l(ex):
                    thisa += 1
            accurancy += float(thisa) / len(validation)
        acs.append(accurancy)
    print sum(acs) / float(k) / float(k)
Beispiel #13
0
def train_classifier(data, type, filter):
    if type == "tree" or type == "c4.5" or type == "decision_tree":
        learner = orange.C45Learner()
    elif type == "bayes" or type == "naive" or type == "naive_bayes":
        learner = orange.BayesLearner()
    elif type == "svm" or type == "linear_svm":
        learner = Orange.classification.svm.LinearSVMLearner()
    #elif type == "logreg" or type == "regression":
    #	learner = Orange.classification.logreg.LogRegLearner()
    else:
        print "Invalid Learner Type\n"
        exit()

    if filter == 0:
        classifier = learner(data)
    else:
        filtered_learner = Orange.feature.selection.FilteredLearner(
            learner,
            filter=Orange.feature.selection.FilterBestN(n=filter),
            name='filtered')
        classifier = filtered_learner(data)

    return classifier
Beispiel #14
0
 def __call__(self, data, weight=None):
   ma = orngFSS.attMeasure(data)
   filtered = orngFSS.selectBestNAtts(data, ma, self.N)
   model = orange.BayesLearner(filtered)
   return BayesFSS_Classifier(classifier=model, N=self.N, name=self.name)
Beispiel #15
0
 def __init__(self, discr = orange.EntropyDiscretization(), learnr = orange.BayesLearner()):
     self.disc = discr
     self.learner = learnr
print


impdata = imputer(data)
for i in range(20, 25):
    print data[i]
    print impdata[i]
    print



print "\n*** BAYES and AVERAGE IMPUTATION ***\n"

imputer = orange.ImputerConstructor_model()
imputer.learnerContinuous = orange.MajorityLearner()
imputer.learnerDiscrete = orange.BayesLearner()
imputer = imputer(data)

print "Example w/ missing values"
print data[19]
print "Imputed:"
print imputer(data[19])
print


impdata = imputer(data)
for i in range(20, 25):
    print data[i]
    print impdata[i]
    print
Beispiel #17
0
def bayes(input_dict):
    import orange
    output_dict = {}
    output_dict['bayesout'] = orange.BayesLearner(name="Naive Bayes (Orange)",
                                                  hovername="DOlgo ime bajesa")
    return output_dict
Beispiel #18
0
import orange, orngWrap, orngTest, orngStat

data = orange.ExampleTable("bupa")

learner = orange.BayesLearner()
thresh = orngWrap.ThresholdLearner(learner=learner)
thresh80 = orngWrap.ThresholdLearner_fixed(learner=learner, threshold=.8)
res = orngTest.crossValidation([learner, thresh, thresh80], data)
CAs = orngStat.CA(res)

print "W/out threshold adjustement: %5.3f" % CAs[0]
print "With adjusted thredhold: %5.3f" % CAs[1]
print "With threshold at 0.80: %5.3f" % CAs[2]
Beispiel #19
0
        if self.data is not None:
            class_var = self.data.domain.class_var
            if self.method == 0 and not \
                    isinstance(class_var, orange.EnumVariable):
                self.warning(0, "Cannot use 'Boosting' on non discrete class")
                return False
        return True
    
    def commit(self):
        wrapped = None
        classifier = None
        if self.learner:
            wrapped = self.METHODS[self.method][1](self.learner, t=self.t)
        self.send("Learner", wrapped)
            
        if self.data and wrapped and self.checkMethod():
            classifier = wrapped(self.data)
        self.send("Classifier", classifier)
            
if __name__ == "__main__":
    app = QApplication(sys.argv)
    w = OWEnsemble()
    w.setLearner(orange.BayesLearner())
    w.setData(orange.ExampleTable("../../doc/datasets/iris"))
    w.show()
    app.exec_()
            
        
        
          
Beispiel #20
0
    def __call__(self, examples, weight=0):
        # next function changes data set to a extended with unknown values
        def createLogRegExampleTable(data, weightID):
            setsOfData = []
            for at in data.domain.attributes:
                # za vsak atribut kreiraj nov newExampleTable newData
                # v dataOrig, dataFinal in newData dodaj nov atribut -- continuous variable
                if at.varType == orange.VarTypes.Continuous:
                    atDisc = orange.FloatVariable(at.name + "Disc")
                    newDomain = orange.Domain(data.domain.attributes +
                                              [atDisc, data.domain.classVar])
                    newDomain.addmetas(data.domain.getmetas())
                    newData = orange.ExampleTable(newDomain, data)
                    altData = orange.ExampleTable(newDomain, data)
                    for i, d in enumerate(newData):
                        d[atDisc] = 0
                        d[weightID] = 1 * data[i][weightID]
                    for i, d in enumerate(altData):
                        d[atDisc] = 1
                        d[at] = 0
                        d[weightID] = 0.000001 * data[i][weightID]
                elif at.varType == orange.VarTypes.Discrete:
                    # v dataOrig, dataFinal in newData atributu "at" dodaj ee  eno  vreednost, ki ima vrednost kar  ime atributa +  "X"
                    atNew = orange.EnumVariable(at.name,
                                                values=at.values +
                                                [at.name + "X"])
                    newDomain = orange.Domain(
                        filter(lambda x: x != at, data.domain.attributes) +
                        [atNew, data.domain.classVar])
                    newDomain.addmetas(data.domain.getmetas())
                    newData = orange.ExampleTable(newDomain, data)
                    altData = orange.ExampleTable(newDomain, data)
                    for i, d in enumerate(newData):
                        d[atNew] = data[i][at]
                        d[weightID] = 1 * data[i][weightID]
                    for i, d in enumerate(altData):
                        d[atNew] = at.name + "X"
                        d[weightID] = 0.000001 * data[i][weightID]
                newData.extend(altData)
                setsOfData.append(newData)
            return setsOfData

        learner = LogRegLearner(imputer=orange.ImputerConstructor_average(),
                                removeSingular=self.removeSingular)
        # get Original Model
        orig_model = learner(examples, weight)
        if orig_model.fit_status:
            print "Warning: model did not converge"

        # get extended Model (you should not change data)
        if weight == 0:
            weight = orange.newmetaid()
            examples.addMetaAttribute(weight, 1.0)
        extended_set_of_examples = createLogRegExampleTable(examples, weight)
        extended_models = [learner(extended_examples, weight) \
                           for extended_examples in extended_set_of_examples]

        ##        print examples[0]
        ##        printOUT(orig_model)
        ##        print orig_model.domain
        ##        print orig_model.beta
        ##        print orig_model.beta[orig_model.continuizedDomain.attributes[-1]]
        ##        for i,m in enumerate(extended_models):
        ##            print examples.domain.attributes[i]
        ##            printOUT(m)

        # izracunas odstopanja
        # get sum of all betas
        beta = 0
        betas_ap = []
        for m in extended_models:
            beta_add = m.beta[m.continuizedDomain.attributes[-1]]
            betas_ap.append(beta_add)
            beta = beta + beta_add

        # substract it from intercept
        #print "beta", beta
        logistic_prior = orig_model.beta[0] + beta

        # compare it to bayes prior
        bayes = orange.BayesLearner(examples)
        bayes_prior = math.log(bayes.distribution[1] / bayes.distribution[0])

        # normalize errors
        ##        print "bayes", bayes_prior
        ##        print "lr", orig_model.beta[0]
        ##        print "lr2", logistic_prior
        ##        print "dist", orange.Distribution(examples.domain.classVar,examples)
        ##        print "prej", betas_ap

        # error normalization - to avoid errors due to assumption of independence of unknown values
        dif = bayes_prior - logistic_prior
        positives = sum(filter(lambda x: x >= 0, betas_ap))
        negatives = -sum(filter(lambda x: x < 0, betas_ap))
        if not negatives == 0:
            kPN = positives / negatives
            diffNegatives = dif / (1 + kPN)
            diffPositives = kPN * diffNegatives
            kNegatives = (negatives - diffNegatives) / negatives
            kPositives = positives / (positives - diffPositives)
            ##        print kNegatives
            ##        print kPositives

            for i, b in enumerate(betas_ap):
                if b < 0: betas_ap[i] *= kNegatives
                else: betas_ap[i] *= kPositives
        #print "potem", betas_ap

        # vrni originalni model in pripadajoce apriorne niclele
        return (orig_model, betas_ap)
Beispiel #21
0
# Description: Demonstrates the use of discretization
# Category:    discretization
# Classes:     entropyDiscretization, DiscretizedLearner
# Uses:        iris.tab

import orange
import orngDisc

data = orange.ExampleTable("iris.tab")

disc_data = orngDisc.entropyDiscretization(data)

disc_learner = orngDisc.DiscretizedLearner(orange.BayesLearner(),
                                           name="disc-bayes")
learner = orange.BayesLearner(name="bayes")

learners = [learner, disc_learner]

import orngTest, orngStat

results = orngTest.crossValidation(learners, data)
print "Classification Accuracy:"
for i in range(len(learners)):
    print("%15s: %5.3f") % (learners[i].name, orngStat.CA(results)[i])
Beispiel #22
0
# Description: Demostration of use of cross-validation as provided in orngEval module
# Category:    evaluation
# Uses:        voting.tab
# Classes:     orngTest.crossValidation
# Referenced:  c_performance.htm

import orange, orngTest, orngStat, orngTree

# set up the learners
bayes = orange.BayesLearner()
tree = orngTree.TreeLearner(mForPruning=2)
bayes.name = "bayes"
tree.name = "tree"
learners = [bayes, tree]

# compute accuracies on data
data = orange.ExampleTable("voting")
results = orngTest.crossValidation(learners, data, folds=10)

# output the results
print "Learner  CA     IS     Brier    AUC"
for i in range(len(learners)):
    print "%-8s %5.3f  %5.3f  %5.3f  %5.3f" % (learners[i].name, \
        orngStat.CA(results)[i], orngStat.IS(results)[i],
        orngStat.BrierScore(results)[i], orngStat.AUC(results)[i])
Beispiel #23
0
import orange, orngTest, orngStat
import random

data = orange.ExampleTable("voting")

bayes = orange.BayesLearner(name="bayes")
tree = orange.TreeLearner(name="tree")
majority = orange.MajorityLearner(name="default")
learners = [bayes, tree, majority]
names = [x.name for x in learners]


def printResults(res):
    CAs = orngStat.CA(res, reportSE=1)
    for i in range(len(names)):
        print "%s: %5.3f+-%5.3f   " % (names[i], CAs[i][0], 1.96 * CAs[i][1]),
    print


print "\nproportionsTest that will always give the same results"
for i in range(3):
    res = orngTest.proportionTest(learners, data, 0.7)
    printResults(res)

print "\nproportionsTest that will give different results, but the same each time the script is run"
myRandom = orange.RandomGenerator()
for i in range(3):
    res = orngTest.proportionTest(learners,
                                  data,
                                  0.7,
                                  randomGenerator=myRandom)
Beispiel #24
0
    else:
      return learner
    
  def __init__(self, name='Naive Bayes with FSS', N=5):
    self.name = name
    self.N = 5
      
  def __call__(self, data, weight=None):
    ma = orngFSS.attMeasure(data)
    filtered = orngFSS.selectBestNAtts(data, ma, self.N)
    model = orange.BayesLearner(filtered)
    return BayesFSS_Classifier(classifier=model, N=self.N, name=self.name)

class BayesFSS_Classifier:
  def __init__(self, **kwds):
    self.__dict__.update(kwds)
    
  def __call__(self, example, resultType = orange.GetValue):
    return self.classifier(example, resultType)

# test above wraper on a data set
import orngStat, orngTest
data = orange.ExampleTable("voting")
learners = (orange.BayesLearner(name='Naive Bayes'), BayesFSS(name="with FSS"))
results = orngTest.crossValidation(learners, data)

# output the results
print "Learner      CA"
for i in range(len(learners)):
  print "%-12s %5.3f" % (learners[i].name, orngStat.CA(results)[i])
Beispiel #25
0
 def learner(self, data):
     return orange.BayesLearner(data)
import matplotlib
fig = plt.figure()
ax1 = fig.add_subplot(111)

ax1.scatter(X11, X12, s=10, c='b', marker="+")
ax1.scatter(X21, X22, s=10, c='c', marker="o")
ax1.scatter(X31, X32, s=10, c='y', marker="x")
plt.title('Plot of Three Classes of Data')
plt.show()

########################
# Build Classifier
########################

import orange, orngTest, orngStat, orngTree
classifier = orange.BayesLearner(data)
bayes = orange.BayesLearner()
bayes.name = "bayes"
learners = [bayes]

results = orngTest.crossValidation(learners, data_test, folds=10)

########################
# Compute the misclassified observations
########################

X, Y = data_test.to_numpy("A/C")
data_scored = []
for i in range(len(results.results)):
    if results.results[i].classes[0] == results.results[i].actual_class:
        data_scored.append(1)
Beispiel #27
0
    def __call__(self, examples, weight=0):
        # next function changes data set to a extended with unknown values
        def createLogRegExampleTable(data, weightID):
            finalData = orange.ExampleTable(data)
            origData = orange.ExampleTable(data)
            for at in data.domain.attributes:
                # za vsak atribut kreiraj nov newExampleTable newData
                # v dataOrig, dataFinal in newData dodaj nov atribut -- continuous variable
                if at.varType == orange.VarTypes.Continuous:
                    atDisc = orange.FloatVariable(at.name + "Disc")
                    newDomain = orange.Domain(origData.domain.attributes +
                                              [atDisc, data.domain.classVar])
                    newDomain.addmetas(newData.domain.getmetas())
                    finalData = orange.ExampleTable(newDomain, finalData)
                    newData = orange.ExampleTable(newDomain, origData)
                    origData = orange.ExampleTable(newDomain, origData)
                    for d in origData:
                        d[atDisc] = 0
                    for d in finalData:
                        d[atDisc] = 0
                    for i, d in enumerate(newData):
                        d[atDisc] = 1
                        d[at] = 0
                        d[weightID] = 100 * data[i][weightID]

                elif at.varType == orange.VarTypes.Discrete:
                    # v dataOrig, dataFinal in newData atributu "at" dodaj ee  eno  vreednost, ki ima vrednost kar  ime atributa +  "X"
                    atNew = orange.EnumVariable(at.name,
                                                values=at.values +
                                                [at.name + "X"])
                    newDomain = orange.Domain(
                        filter(lambda x: x != at, origData.domain.attributes) +
                        [atNew, origData.domain.classVar])
                    newDomain.addmetas(origData.domain.getmetas())
                    temp_finalData = orange.ExampleTable(finalData)
                    finalData = orange.ExampleTable(newDomain, finalData)
                    newData = orange.ExampleTable(newDomain, origData)
                    temp_origData = orange.ExampleTable(origData)
                    origData = orange.ExampleTable(newDomain, origData)
                    for i, d in enumerate(origData):
                        d[atNew] = temp_origData[i][at]
                    for i, d in enumerate(finalData):
                        d[atNew] = temp_finalData[i][at]
                    for i, d in enumerate(newData):
                        d[atNew] = at.name + "X"
                        d[weightID] = 10 * data[i][weightID]
                finalData.extend(newData)
            return finalData

        learner = LogRegLearner(imputer=orange.ImputerConstructor_average(),
                                removeSingular=self.removeSingular)
        # get Original Model
        orig_model = learner(examples, weight)

        # get extended Model (you should not change data)
        if weight == 0:
            weight = orange.newmetaid()
            examples.addMetaAttribute(weight, 1.0)
        extended_examples = createLogRegExampleTable(examples, weight)
        extended_model = learner(extended_examples, weight)

        ##        print examples[0]
        ##        printOUT(orig_model)
        ##        print orig_model.domain
        ##        print orig_model.beta

        ##        printOUT(extended_model)
        # izracunas odstopanja
        # get sum of all betas
        beta = 0
        betas_ap = []
        for m in extended_models:
            beta_add = m.beta[m.continuizedDomain.attributes[-1]]
            betas_ap.append(beta_add)
            beta = beta + beta_add

        # substract it from intercept
        #print "beta", beta
        logistic_prior = orig_model.beta[0] + beta

        # compare it to bayes prior
        bayes = orange.BayesLearner(examples)
        bayes_prior = math.log(bayes.distribution[1] / bayes.distribution[0])

        # normalize errors
        #print "bayes", bayes_prior
        #print "lr", orig_model.beta[0]
        #print "lr2", logistic_prior
        #print "dist", orange.Distribution(examples.domain.classVar,examples)
        k = (bayes_prior - orig_model.beta[0]) / (logistic_prior -
                                                  orig_model.beta[0])
        #print "prej", betas_ap
        betas_ap = [k * x for x in betas_ap]
        #print "potem", betas_ap

        # vrni originalni model in pripadajoce apriorne niclele
        return (orig_model, betas_ap)
            it.moveBy(0, -header.height() - graph.height())
        self.showNomogram()

    # Callbacks
    def showNomogram(self):
        if self.bnomogram and self.cl:
            #self.bnomogram.hide()
            self.bnomogram.show()
            self.bnomogram.update()


# test widget appearance
if __name__ == "__main__":
    import orngLR, orngSVM

    a = QApplication(sys.argv)
    ow = OWNomogram()
    a.setMainWidget(ow)
    data = orange.ExampleTable("../../doc/datasets/heart_disease.tab")

    bayes = orange.BayesLearner(data)
    bayes.setattr("data", data)
    ow.classifier(bayes)

    # here you can test setting some stuff

    a.exec_()

    # save settings
    ow.saveSettings()
Beispiel #29
0
Datei: sqk.py Projekt: sloria/usv
def main():
    version = "%prog version 0.1"
    usage = "usage: %prog [options] [input] [options [classification]]"
    desc = "QUICK START: To extract data from a trial, 'cd' to the \
trial's directory and type: 'sqk --classify'. To extract data \
from one channel of the trial (ch 1 in this case), type: \
'sqk --classify --channel=1'."

    # Parse command line options.
    parser = optparse.OptionParser(usage, version=version, description=desc)
    parser.add_option("-C", "--classify",
                    dest="classify",
                    action="store_true",
                    default=False,
                    help="Classify the trial. IMPORTANT: Trial folder must " \
                         "be the current directory.")
    parser.add_option("-m", "--channel", metavar="<CH>",
                    dest="channel",
                    action="store",
                    type="int",
                    default=0,
                    help="Specify which channel to extract data from. " \
                         "Default (%default) extracts data from both " \
                         "channels. Must choose 0 (both channels), 1, or 2.")
    parser.add_option("-l", "--log",
                    dest="log", action="store_true", default=False,
                    help="Parses a log file if it exists and adds time and" \
                         " duration information to the data file.")
    parser.add_option("-T",
                      "--traindata",
                      metavar="<DATA_FILE>",
                      dest="trainData",
                      action="store",
                      default=os.path.join(TRAIN_PATH, 'traindata'),
                      help="Specify training data set. Default is %default")
    parser.add_option("-L", "--learner", metavar="<TYPE>",
                    dest="learner",
                    action="store",
                    default="svm",
                    help="Specify the classifier algorithm. Options include:" \
                         " 'bayes' (Naive Bayes), 'knn' (k-Nearest Neighbor)," \
                         " 'svm' (SVM), 'forest' (random forest). " \
                         "Default is %default.")
    parser.add_option("-f",
                      "--file",
                      metavar="<AUDIO_FILE>",
                      dest="audio",
                      action="store",
                      help="Extract features and classify audio file (wav)")
    parser.add_option("-p", "--path", metavar="<PATH>",
                    dest="path",
                    action="store",
                    help="Extract features and classify all files in a " \
                         "directory. To extract from current directory: " \
                         "'usv.py -p .' ")
    parser.add_option("-r", "--rate", metavar="<SAMPLE_RATE>",
                    dest="sampleRate",
                    action="store",
                    default="11025",
                    help="Specify the sample rate of input files. Default is " \
                         "%default (Hz).")
    parser.add_option("-t",
                      "--train",
                      metavar="<CLASS>",
                      dest="exampleClass",
                      action="store",
                      type='string',
                      help="Label the training example(s).")
    parser.add_option("-d", "--data", metavar="<DATA_FILE>",
                    dest="data",
                    action="store",
                    default="data.tab",
                    help="Write to data file (.tab format). Default is " \
                         "'%default' or 'traindata.tab' for training data.")
    parser.add_option("-S", "--seg-resamp",
                    dest="segment",
                    action="store_true",
                    default=False,
                    help="Resample to 11025 Hz and split into multiple files " \
                         "based on silence. IMPORTANT: Trial folder must " \
                         "be the current directory.")
    (opts, args) = parser.parse_args()
    if opts.channel and not (opts.classify or opts.segment):
        parser.error("'--channel' option requires '--classify' option'")
    if opts.log and not opts.classify:
        parser.error("'--log' option requires '--classify' option'")

    # Open train data file or create it if it doesn't exist.
    if opts.exampleClass and opts.data == "data.tab":
        opts.data = os.path.join(TRAIN_PATH, 'traindata.tab')

    if opts.audio or opts.path:
        if not opts.segment:
            print 'Opening %r. . .' % (opts.data)
            data = open(opts.data, "a+")
    elif opts.segment:
        print "Resampling and segmenting trial. . ."
    elif opts.classify:
        print "Classifying trial. . ."
    else:
        parser.error('No input file or path specified.')

    # If user specifies an audio file (-f AUDIO_FILE)
    if opts.audio:
        file_name, ext = os.path.splitext(opts.audio)
        # Add MFCC 1-12 to data.
        if not opts.segment:
            write_features(opts.audio, opts.sampleRate, data)
        # If classification is specified, write to data.
        if opts.exampleClass:
            data.write(opts.exampleClass.lower() + "\n")
            print "Classified %r as %r." % (opts.audio,
                                            opts.exampleClass.lower())
        # Else if user chooses to segment file (-S)
        elif opts.segment:
            print "Resampling and segmenting %s. . ." % (opts.audio)
            if opts.channel == 0:
                run_commands(
                    seg_resamp(opts.audio,
                               int(opts.sampleRate),
                               outfile=file_name + '_call.wav',
                               directory=file_name + "_ch1_2",
                               ch1=True,
                               ch2=True))
            elif opts.channel == 1:
                run_commands(
                    seg_resamp(opts.audio,
                               int(opts.sampleRate),
                               outfile=file_name + '_ch1_.wav',
                               directory=file_name + "_ch1",
                               ch1=True,
                               ch2=False))
            elif opts.channel == 2:
                run_commands(
                    seg_resamp(opts.audio,
                               int(opts.sampleRate),
                               outfile=file_name + '_ch2_.wav',
                               directory=file_name + "_ch2",
                               ch1=False,
                               ch2=True))
            print "Wrote to './%s'." % (file_name + "_calls")
        else:
            print "Invalid data for %r. Skipping. . ." % opts.audio
            data.write('\n')
    # Else if user specifies path (-p PATH)
    elif opts.path:
        # Read all wav files in specified path
        try:
            for root, dirs, files in os.walk(opts.path):
                for basename in files:
                    if fnmatch.fnmatch(basename, "*.[wW][aA][vV]"):
                        audiofile = os.path.join(root, basename)
                        # Skip small files
                        if os.path.getsize(audiofile) < 100:
                            continue
                        file_name, ext = os.path.splitext(audiofile)
                        # Add MFCC 1-12 to data.
                        if not opts.segment:
                            write_features(audiofile, opts.sampleRate, data)
                        # Write filename
                        data.write(str(os.path.basename(audiofile)) + "\t")
                        # If classification is specified, write to file.
                        if opts.exampleClass:
                            data.write(opts.exampleClass.lower() + "\n")
                            print "Classified %r as %r." % (
                                audiofile, opts.exampleClass.lower())
                        # If user specifies resample and segment
                        elif opts.segment:
                            print "Resampling and segmenting %r. . ." % (
                                audiofile)
                            if opts.channel == 0:
                                run_commands(
                                    seg_resamp(
                                        audiofile,
                                        int(opts.sampleRate),
                                        outfile=os.path.basename(file_name) +
                                        '_call.wav',
                                        directory=os.path.basename(file_name) +
                                        "_ch1_2",
                                        ch1=True,
                                        ch2=True))
                            elif opts.channel == 1:
                                run_commands(
                                    seg_resamp(
                                        audiofile,
                                        int(opts.sampleRate),
                                        outfile=os.path.basename(file_name) +
                                        '_ch1_.wav',
                                        directory=os.path.basename(file_name) +
                                        "_ch1",
                                        ch1=True,
                                        ch2=False))
                            elif opts.channel == 2:
                                run_commands(
                                    seg_resamp(
                                        audiofile,
                                        int(opts.sampleRate),
                                        outfile=os.path.basename(file_name) +
                                        '_ch2_.wav',
                                        directory=os.path.basename(file_name) +
                                        "_ch2",
                                        ch1=False,
                                        ch2=True))
                        else:
                            data.write('\n')
        except (FloatingPointError, IOError):
            print "An error occurred. Skipping %. . .r" % audiofile
    # Else if user chooses to segment and resample the trial (current dir)
    elif opts.segment:
        for audiofile in glob(os.path.join('./', "*.[wW][aA][vV]")):
            file_name, ext = os.path.splitext(audiofile)
            print "Resampling and segmenting %r. . ." % (file_name)
            if opts.channel == 0:
                run_commands(
                    seg_resamp(audiofile,
                               int(opts.sampleRate),
                               outfile=file_name + '_call.wav',
                               directory=file_name + "_ch1_2",
                               ch1=True,
                               ch2=True))
            elif opts.channel == 1:
                run_commands(
                    seg_resamp(audiofile,
                               int(opts.sampleRate),
                               outfile=file_name + '_ch1_.wav',
                               directory=file_name + "_ch1",
                               ch1=True,
                               ch2=False))
            elif opts.channel == 2:
                run_commands(
                    seg_resamp(audiofile,
                               int(opts.sampleRate),
                               outfile=file_name + '_ch2_.wav',
                               directory=file_name + "_ch2",
                               ch1=False,
                               ch2=True))
    # Else if user chooses to classify the trial
    elif opts.classify:
        # TODO: Should not be able to classify if no data files in folder
        try:
            traindata = orange.ExampleTable(opts.trainData)
        except SystemError:
            print "Training data not found."
            sys.exit(1)
        # The logger
        if opts.log:
            logs = glob(os.path.join(os.getcwd(), "*.[lL][oO][gG]"))
            if len(logs) > 1:
                print "ERROR: Multiple log files."
                sys.exit(1)
            log = usv.avisoftlog.RecLog(open(logs[0], 'r'))

        # The classifier
        print "Constructing %s classifier \
(may take several minutes). . ." % (opts.learner)
        if opts.learner.lower() == "bayes":
            classifier = orange.BayesLearner(traindata)
            classifier.name = "naive_bayes"
        elif opts.learner.lower() == "knn":
            classifier = Orange.classification.knn.kNNLearner(traindata)
            classifier.name = "kNN"
        elif opts.learner.lower() == "svm":
            svm = SVMLearner(name="SVM",
                             kernel_type=kernels.RBF,
                             C=128,
                             gamma=2,
                             nu=0.1)
            classifier = svm(traindata)
            classifier.name = "SVM"
        elif opts.learner.lower() == "tree":
            classifier = orngTree.TreeLearner(traindata)
            classifier.name = "tree"
        elif opts.learner.lower() == "forest":
            classifier = Orange.ensemble.forest.RandomForestLearner(traindata)
            classifier.name = "random_forest"

        # Create data summary file
        if opts.channel == 0:
            datasummary_name = os.path.splitext(opts.data)[0] + "_ch1_2.tab"
        elif opts.channel == 1:
            datasummary_name = os.path.splitext(opts.data)[0] + "_ch1.tab"
        elif opts.channel == 2:
            datasummary_name = os.path.splitext(opts.data)[0] + "_ch2.tab"
        if os.path.exists(datasummary_name):
            print "Data file %r already exists." % (datasummary_name)
            print "Exiting . . ."
            sys.exit(1)
        else:
            summary = open(datasummary_name, "a+")
        # Write metadata
        summary.write("# data = %s\n" % (datasummary_name))
        summary.write("# channel = %d\n" % (opts.channel))
        summary.write("# sample_rate = %s\n" % (opts.sampleRate))
        summary.write("# classifier = %s\n" % (classifier.name))
        # Write header
        summary.write("FILE\t")
        for i in range(len(traindata.domain.classVar.values)):
            summary.write(traindata.domain.classVar.values[i].upper() + "\t")
        if opts.log:
            summary.write("start: " + str(log.start.time) + "\t")
            summary.write("Duration" + "\t")
        summary.write("\n")

        totals = [0] * len(traindata.domain.classVar.values)
        proportions = [0.0] * len(totals)
        for root, dirs, files in os.walk(os.getcwd()):
            # For each file's directory in this trial
            for dir in dirs:
                data = open(os.path.join(dir, dir + '.tab'), 'w+')
                if opts.channel == 0:
                    calls = glob(os.path.join(dir, "*ch1_2*.[wW][aA][vV]"))
                elif opts.channel == 1:
                    calls = glob(os.path.join(dir, "*ch1*.[wW][aA][vV]"))
                elif opts.channel == 2:
                    calls = glob(os.path.join(dir, "*ch2*.[wW][aA][vV]"))
                # For each call
                for c in calls:
                    # Skip small files
                    if os.path.getsize(c) < 100:
                        print "Skipping %s (not enough data)" % c
                        continue
                    # Write feature data
                    write_features(c, opts.sampleRate, data)
                    data.close()  # Ensures that data is saved
                    # Write filenames and classifications
                    data = open(os.path.join(dir, dir + '.tab'), 'a+')
                    datatable = orange.ExampleTable(
                        os.path.join(dir, dir + '.tab'))
                    classification = classifier(datatable[calls.index(c)])
                    data.write(str(os.path.basename(c)) + '\t')
                    data.write(str(classification))
                    data.write('\n')
            try:
                data.close()
            except UnboundLocalError:
                parser.error(
                    'No directories in this folder. Did you remember to segment the files?'
                )

            # Write class count data to summary table
            for dir in dirs:
                if opts.channel == 0:
                    data_files = glob(os.path.join(dir, "*ch1_2.tab"))
                elif opts.channel == 1:
                    data_files = glob(os.path.join(dir, "*ch1.tab"))
                elif opts.channel == 2:
                    data_files = glob(os.path.join(dir, "*ch2.tab"))
                for c in data_files:
                    if os.path.getsize(c) == 0:
                        continue
                    file_name, ext = os.path.splitext(os.path.basename(c))
                    summary.write(file_name + '\t')
                    callsdata = orange.ExampleTable(os.path.join("./", c))
                    # Vector of class counts
                    counts = [0] * len(callsdata.domain.classVar.values)
                    for e in callsdata:
                        counts[int(e.getclass())] += 1
                    # Write counts
                    for i in range(len(callsdata.domain.classVar.values)):
                        summary.write(str(counts[i]) + "\t")
                        totals[i] += counts[i]
                    # Write log data
                    if opts.log:
                        tmp = str(os.path.basename(dir)).lower()
                        entry = tmp[0:tmp.find("_")] + ".wav"
                        summary.write(str(log.getevent(entry).time) + "\t")
                        summary.write(log.getevent(entry).duration + "\t")
                        log.close()
                    summary.write('\n')
        # Write totals. Exclude BGNOISE.
        summary.write("TOTAL" + "\t\t")
        for i in range(1, len(totals)):
            summary.write(str(totals[i]) + "\t")
        if opts.log:
            summary.write("end: " + str(log.end.time) + "\t")
        summary.write("\n")
        # Write proportions. Exclude BGNOISE.
        summary.write("P" + "\t\t")
        for i in range(1, len(proportions)):
            try:
                proportions[i] = float(
                    totals[i]) / float(sum(totals) - totals[0])
            except ZeroDivisionError:
                proportions[i] = 0.0
            summary.write("%.4f\t" % (proportions[i]))
        summary.write("\n")
        summary.close()
        # Open data file when finished
        subprocess.call('open %s' % (datasummary_name), shell=True)

    else:
        data.write("\n")

    if not opts.segment:
        data.close()
    print "Success!"
# Description: Demostrates the use of classification scores
# Category:    evaluation
# Uses:        voting.tab
# Referenced:  orngStat.htm

import orange, orngTest, orngTree

learners = [orange.BayesLearner(name = "bayes"),
            orngTree.TreeLearner(name="tree"),
            orange.MajorityLearner(name="majrty")]

voting = orange.ExampleTable("voting")
res = orngTest.crossValidation(learners, voting)

vehicle = orange.ExampleTable("vehicle")
resVeh = orngTest.crossValidation(learners, vehicle)

import orngStat

CAs = orngStat.CA(res)
APs = orngStat.AP(res)
Briers = orngStat.BrierScore(res)
ISs = orngStat.IS(res)

print
print "method\tCA\tAP\tBrier\tIS"
for l in range(len(learners)):
    print "%s\t%5.3f\t%5.3f\t%5.3f\t%6.3f" % (learners[l].name, CAs[l], APs[l], Briers[l], ISs[l])


CAs = orngStat.CA(res, reportSE=True)