def test_pickle(self): d = orange.ExampleTable("iris") ba = orange.BayesLearner(d) s = pickle.dumps(ba) ba2 = pickle.loads(s) for e in d: self.assertEqual(ba(e), ba2(e)) ba1 = orange.BayesLearner() s = pickle.dumps(ba1) ba2 = pickle.loads(s)
def applyLearner(self): self.warning(0) if float(self.m_estimator.m) < 0: self.warning(0, "Parameter m should be positive") self.learner = None elif float(self.windowProportion) < 0 or float( self.windowProportion) > 1: self.warning( 0, "Window proportion for LOESS should be between 0.0 and 1.0") self.learner = None else: self.learner = orange.BayesLearner( name=self.name, adjustThreshold=self.adjustThreshold) self.learner.estimatorConstructor = self.estMethods[ self.probEstimation][1] if self.condProbEstimation: self.learner.conditionalEstimatorConstructor = self.condEstMethods[ self.condProbEstimation][1] self.learner.conditionalEstimatorConstructorContinuous = orange.ConditionalProbabilityEstimatorConstructor_loess( windowProportion=self.windowProportion, nPoints=self.loessPoints) if self.preprocessor: self.learner = self.preprocessor.wrapLearner(self.learner) self.send("Learner", self.learner) self.applyData() self.changed = False
def createInstance(self): bayes = orange.BayesLearner() if hasattr(self, "estimatorConstructor"): bayes.estimatorConstructor = self.estimatorConstructor if hasattr(self, "m"): if hasattr(bayes.estimatorConstructor, "m"): raise AttributeError( "invalid combination of attributes: 'estimatorConstructor' does not expect 'm'" ) else: self.estimatorConstructor.m = self.m elif hasattr(self, "m"): bayes.estimatorConstructor = orange.ProbabilityEstimatorConstructor_m( m=self.m) if hasattr(self, "conditionalEstimatorConstructor"): bayes.conditionalEstimatorConstructor = self.conditionalEstimatorConstructor elif bayes.estimatorConstructor: bayes.conditionalEstimatorConstructor = orange.ConditionalProbabilityEstimatorConstructor_ByRows( ) bayes.conditionalEstimatorConstructor.estimatorConstructor = bayes.estimatorConstructor if hasattr(self, "conditionalEstimatorConstructorContinuous"): bayes.conditionalEstimatorConstructorContinuous = self.conditionalEstimatorConstructorContinuous return bayes
def __call__(self, data, weight=None): disc = orange.Preprocessor_discretize( data, method=orange.EntropyDiscretization()) #show_values(disc, "Entropy based discretization") model = orange.BayesLearner(disc, weight, adjustThreshold=0) #print "model.distribution", model.distribution #print "model.conditionalDistributions", model.conditionalDistributions return Classifier(classifier=model)
def test_learning_cont(self): d = orange.ExampleTable("iris") bal = orange.BayesLearner() ba = bal(d) corr = 0 for e in d: if ba(e) == e.getclass(): corr += 1 self.assertGreater(corr, 125)
def test_Learner(self): b = orange.BayesLearner() self.assertEqual(b.name, "bayes") b.name = "foo" self.assertEqual(b.name, "foo") b.name = "BayesLearner" self.assertEqual(b.name, "BayesLearner") b.name = "x.BayesLearner" self.assertEqual(b.name, "x.BayesLearner") b.name = "" self.assertEqual(b.name, "")
def test_named_const(self): ba = orange.BayesLearner() self.assertEqual(ba.loess_distribution_method, orange.BayesLearner.DistributionMethod.Fixed) s = pickle.dumps(ba) ba2 = pickle.loads(s) self.assertEqual(ba2.loess_distribution_method, orange.BayesLearner.DistributionMethod.Fixed) ba.loess_distribution_method = orange.BayesLearner.DistributionMethod.Uniform s = pickle.dumps(ba) ba2 = pickle.loads(s) self.assertEqual(ba2.loess_distribution_method, orange.BayesLearner.DistributionMethod.Uniform)
def classify(): import orange, orngTree testData = orange.ExampleTable('data/audioTest.tab') trainData = orange.ExampleTable('data/audioTrain.tab') bayes = orange.BayesLearner(trainData) bayes.name = "bayes" tree = orngTree.TreeLearner(trainData) tree.name = "tree" classifiers = [bayes, tree] return classifiers, trainData, testData
def __call__(self, data, weight=0): import orngLookup if self.alternativeMeasure: raise SystemError, "alternativeMeasure not implemented yet" keepDuplicates = getattr(self, "keepDuplicates", 0) data = orange.ExampleTable(data) if not weight: # This is here for backward compatibility if hasattr(self, "weight"): weight = self.weight else: weight = orange.newmetaid() data.addMetaAttribute(weight) if self.redundancyRemover: data = self.redundancyRemover(data, weight) if not keepDuplicates: data.removeDuplicates(weight) induced = 0 featureGenerator = FeatureGenerator( featureInducer=self.featureInducer, subsetsGenerator=self.subsetsGenerator) while (1): newFeatures = featureGenerator(data, weight) if not newFeatures or not len(newFeatures): break best = orngMisc.selectBest(newFeatures, orngMisc.compare2_lastBigger)[0] if len(best.getValueFrom.boundset()) == len( data.domain.attributes): break induced += 1 best.name = "c%d" % induced data = replaceWithInduced(best, data) if not keepDuplicates: data.removeDuplicates(weight) if self.learnerForUnknown: learnerForUnknown = self.learnerForUnknown else: learnerForUnknown = orange.BayesLearner() return orngLookup.lookupFromExamples(data, weight, learnerForUnknown)
def __call__(self, examples, weight = 0,fulldata=0): if not(examples.domain.classVar.varType == 1 and len(examples.domain.classVar.values)==2): raise "BasicBayes learner only works with binary discrete class." for attr in examples.domain.attributes: if not(attr.varType == 1): raise "BasicBayes learner does not work with continuous attributes." translate = orng2Array.DomainTranslation(self.translation_mode_d,self.translation_mode_c) if fulldata != 0: translate.analyse(fulldata, weight) else: translate.analyse(examples, weight) translate.prepareLR() (beta, coeffs) = self._process(orange.BayesLearner(examples), examples) return BasicBayesClassifier(beta,coeffs,translate)
def bayes_classifier(sequence): f = open('header.tab', 'r') feature_header = f.read() f.close() f = open('new_seq.tab', 'w') f.write(feature_header) f.write('unknow\t') features = bayes_pre(sequence) for feature in features: f.write(str(feature)) f.write('\t') f.close() data = orange.ExampleTable("data3_v5.tab") learner = orange.BayesLearner() classifier = learner(data) new_data = orange.ExampleTable("new_seq.tab") p = [str(classifier(new_data[0])), orange.GetProbabilities] return p
def cross_validation(): k = 10 d = list(data) nps = len(d) / 10 acs = [] for i in range(k): random.shuffle(d) subsamples = list(split(d, nps)) accurancy = 0 for j in range(k): validation = subsamples[j] training = [] for s in subsamples: if s == validation: continue training += s l = orange.BayesLearner(training) thisa = 0 for ex in validation: if ex.getclass() == l(ex): thisa += 1 accurancy += float(thisa) / len(validation) acs.append(accurancy) print sum(acs) / float(k) / float(k)
def train_classifier(data, type, filter): if type == "tree" or type == "c4.5" or type == "decision_tree": learner = orange.C45Learner() elif type == "bayes" or type == "naive" or type == "naive_bayes": learner = orange.BayesLearner() elif type == "svm" or type == "linear_svm": learner = Orange.classification.svm.LinearSVMLearner() #elif type == "logreg" or type == "regression": # learner = Orange.classification.logreg.LogRegLearner() else: print "Invalid Learner Type\n" exit() if filter == 0: classifier = learner(data) else: filtered_learner = Orange.feature.selection.FilteredLearner( learner, filter=Orange.feature.selection.FilterBestN(n=filter), name='filtered') classifier = filtered_learner(data) return classifier
def __call__(self, data, weight=None): ma = orngFSS.attMeasure(data) filtered = orngFSS.selectBestNAtts(data, ma, self.N) model = orange.BayesLearner(filtered) return BayesFSS_Classifier(classifier=model, N=self.N, name=self.name)
def __init__(self, discr = orange.EntropyDiscretization(), learnr = orange.BayesLearner()): self.disc = discr self.learner = learnr
print impdata = imputer(data) for i in range(20, 25): print data[i] print impdata[i] print print "\n*** BAYES and AVERAGE IMPUTATION ***\n" imputer = orange.ImputerConstructor_model() imputer.learnerContinuous = orange.MajorityLearner() imputer.learnerDiscrete = orange.BayesLearner() imputer = imputer(data) print "Example w/ missing values" print data[19] print "Imputed:" print imputer(data[19]) print impdata = imputer(data) for i in range(20, 25): print data[i] print impdata[i] print
def bayes(input_dict): import orange output_dict = {} output_dict['bayesout'] = orange.BayesLearner(name="Naive Bayes (Orange)", hovername="DOlgo ime bajesa") return output_dict
import orange, orngWrap, orngTest, orngStat data = orange.ExampleTable("bupa") learner = orange.BayesLearner() thresh = orngWrap.ThresholdLearner(learner=learner) thresh80 = orngWrap.ThresholdLearner_fixed(learner=learner, threshold=.8) res = orngTest.crossValidation([learner, thresh, thresh80], data) CAs = orngStat.CA(res) print "W/out threshold adjustement: %5.3f" % CAs[0] print "With adjusted thredhold: %5.3f" % CAs[1] print "With threshold at 0.80: %5.3f" % CAs[2]
if self.data is not None: class_var = self.data.domain.class_var if self.method == 0 and not \ isinstance(class_var, orange.EnumVariable): self.warning(0, "Cannot use 'Boosting' on non discrete class") return False return True def commit(self): wrapped = None classifier = None if self.learner: wrapped = self.METHODS[self.method][1](self.learner, t=self.t) self.send("Learner", wrapped) if self.data and wrapped and self.checkMethod(): classifier = wrapped(self.data) self.send("Classifier", classifier) if __name__ == "__main__": app = QApplication(sys.argv) w = OWEnsemble() w.setLearner(orange.BayesLearner()) w.setData(orange.ExampleTable("../../doc/datasets/iris")) w.show() app.exec_()
def __call__(self, examples, weight=0): # next function changes data set to a extended with unknown values def createLogRegExampleTable(data, weightID): setsOfData = [] for at in data.domain.attributes: # za vsak atribut kreiraj nov newExampleTable newData # v dataOrig, dataFinal in newData dodaj nov atribut -- continuous variable if at.varType == orange.VarTypes.Continuous: atDisc = orange.FloatVariable(at.name + "Disc") newDomain = orange.Domain(data.domain.attributes + [atDisc, data.domain.classVar]) newDomain.addmetas(data.domain.getmetas()) newData = orange.ExampleTable(newDomain, data) altData = orange.ExampleTable(newDomain, data) for i, d in enumerate(newData): d[atDisc] = 0 d[weightID] = 1 * data[i][weightID] for i, d in enumerate(altData): d[atDisc] = 1 d[at] = 0 d[weightID] = 0.000001 * data[i][weightID] elif at.varType == orange.VarTypes.Discrete: # v dataOrig, dataFinal in newData atributu "at" dodaj ee eno vreednost, ki ima vrednost kar ime atributa + "X" atNew = orange.EnumVariable(at.name, values=at.values + [at.name + "X"]) newDomain = orange.Domain( filter(lambda x: x != at, data.domain.attributes) + [atNew, data.domain.classVar]) newDomain.addmetas(data.domain.getmetas()) newData = orange.ExampleTable(newDomain, data) altData = orange.ExampleTable(newDomain, data) for i, d in enumerate(newData): d[atNew] = data[i][at] d[weightID] = 1 * data[i][weightID] for i, d in enumerate(altData): d[atNew] = at.name + "X" d[weightID] = 0.000001 * data[i][weightID] newData.extend(altData) setsOfData.append(newData) return setsOfData learner = LogRegLearner(imputer=orange.ImputerConstructor_average(), removeSingular=self.removeSingular) # get Original Model orig_model = learner(examples, weight) if orig_model.fit_status: print "Warning: model did not converge" # get extended Model (you should not change data) if weight == 0: weight = orange.newmetaid() examples.addMetaAttribute(weight, 1.0) extended_set_of_examples = createLogRegExampleTable(examples, weight) extended_models = [learner(extended_examples, weight) \ for extended_examples in extended_set_of_examples] ## print examples[0] ## printOUT(orig_model) ## print orig_model.domain ## print orig_model.beta ## print orig_model.beta[orig_model.continuizedDomain.attributes[-1]] ## for i,m in enumerate(extended_models): ## print examples.domain.attributes[i] ## printOUT(m) # izracunas odstopanja # get sum of all betas beta = 0 betas_ap = [] for m in extended_models: beta_add = m.beta[m.continuizedDomain.attributes[-1]] betas_ap.append(beta_add) beta = beta + beta_add # substract it from intercept #print "beta", beta logistic_prior = orig_model.beta[0] + beta # compare it to bayes prior bayes = orange.BayesLearner(examples) bayes_prior = math.log(bayes.distribution[1] / bayes.distribution[0]) # normalize errors ## print "bayes", bayes_prior ## print "lr", orig_model.beta[0] ## print "lr2", logistic_prior ## print "dist", orange.Distribution(examples.domain.classVar,examples) ## print "prej", betas_ap # error normalization - to avoid errors due to assumption of independence of unknown values dif = bayes_prior - logistic_prior positives = sum(filter(lambda x: x >= 0, betas_ap)) negatives = -sum(filter(lambda x: x < 0, betas_ap)) if not negatives == 0: kPN = positives / negatives diffNegatives = dif / (1 + kPN) diffPositives = kPN * diffNegatives kNegatives = (negatives - diffNegatives) / negatives kPositives = positives / (positives - diffPositives) ## print kNegatives ## print kPositives for i, b in enumerate(betas_ap): if b < 0: betas_ap[i] *= kNegatives else: betas_ap[i] *= kPositives #print "potem", betas_ap # vrni originalni model in pripadajoce apriorne niclele return (orig_model, betas_ap)
# Description: Demonstrates the use of discretization # Category: discretization # Classes: entropyDiscretization, DiscretizedLearner # Uses: iris.tab import orange import orngDisc data = orange.ExampleTable("iris.tab") disc_data = orngDisc.entropyDiscretization(data) disc_learner = orngDisc.DiscretizedLearner(orange.BayesLearner(), name="disc-bayes") learner = orange.BayesLearner(name="bayes") learners = [learner, disc_learner] import orngTest, orngStat results = orngTest.crossValidation(learners, data) print "Classification Accuracy:" for i in range(len(learners)): print("%15s: %5.3f") % (learners[i].name, orngStat.CA(results)[i])
# Description: Demostration of use of cross-validation as provided in orngEval module # Category: evaluation # Uses: voting.tab # Classes: orngTest.crossValidation # Referenced: c_performance.htm import orange, orngTest, orngStat, orngTree # set up the learners bayes = orange.BayesLearner() tree = orngTree.TreeLearner(mForPruning=2) bayes.name = "bayes" tree.name = "tree" learners = [bayes, tree] # compute accuracies on data data = orange.ExampleTable("voting") results = orngTest.crossValidation(learners, data, folds=10) # output the results print "Learner CA IS Brier AUC" for i in range(len(learners)): print "%-8s %5.3f %5.3f %5.3f %5.3f" % (learners[i].name, \ orngStat.CA(results)[i], orngStat.IS(results)[i], orngStat.BrierScore(results)[i], orngStat.AUC(results)[i])
import orange, orngTest, orngStat import random data = orange.ExampleTable("voting") bayes = orange.BayesLearner(name="bayes") tree = orange.TreeLearner(name="tree") majority = orange.MajorityLearner(name="default") learners = [bayes, tree, majority] names = [x.name for x in learners] def printResults(res): CAs = orngStat.CA(res, reportSE=1) for i in range(len(names)): print "%s: %5.3f+-%5.3f " % (names[i], CAs[i][0], 1.96 * CAs[i][1]), print print "\nproportionsTest that will always give the same results" for i in range(3): res = orngTest.proportionTest(learners, data, 0.7) printResults(res) print "\nproportionsTest that will give different results, but the same each time the script is run" myRandom = orange.RandomGenerator() for i in range(3): res = orngTest.proportionTest(learners, data, 0.7, randomGenerator=myRandom)
else: return learner def __init__(self, name='Naive Bayes with FSS', N=5): self.name = name self.N = 5 def __call__(self, data, weight=None): ma = orngFSS.attMeasure(data) filtered = orngFSS.selectBestNAtts(data, ma, self.N) model = orange.BayesLearner(filtered) return BayesFSS_Classifier(classifier=model, N=self.N, name=self.name) class BayesFSS_Classifier: def __init__(self, **kwds): self.__dict__.update(kwds) def __call__(self, example, resultType = orange.GetValue): return self.classifier(example, resultType) # test above wraper on a data set import orngStat, orngTest data = orange.ExampleTable("voting") learners = (orange.BayesLearner(name='Naive Bayes'), BayesFSS(name="with FSS")) results = orngTest.crossValidation(learners, data) # output the results print "Learner CA" for i in range(len(learners)): print "%-12s %5.3f" % (learners[i].name, orngStat.CA(results)[i])
def learner(self, data): return orange.BayesLearner(data)
import matplotlib fig = plt.figure() ax1 = fig.add_subplot(111) ax1.scatter(X11, X12, s=10, c='b', marker="+") ax1.scatter(X21, X22, s=10, c='c', marker="o") ax1.scatter(X31, X32, s=10, c='y', marker="x") plt.title('Plot of Three Classes of Data') plt.show() ######################## # Build Classifier ######################## import orange, orngTest, orngStat, orngTree classifier = orange.BayesLearner(data) bayes = orange.BayesLearner() bayes.name = "bayes" learners = [bayes] results = orngTest.crossValidation(learners, data_test, folds=10) ######################## # Compute the misclassified observations ######################## X, Y = data_test.to_numpy("A/C") data_scored = [] for i in range(len(results.results)): if results.results[i].classes[0] == results.results[i].actual_class: data_scored.append(1)
def __call__(self, examples, weight=0): # next function changes data set to a extended with unknown values def createLogRegExampleTable(data, weightID): finalData = orange.ExampleTable(data) origData = orange.ExampleTable(data) for at in data.domain.attributes: # za vsak atribut kreiraj nov newExampleTable newData # v dataOrig, dataFinal in newData dodaj nov atribut -- continuous variable if at.varType == orange.VarTypes.Continuous: atDisc = orange.FloatVariable(at.name + "Disc") newDomain = orange.Domain(origData.domain.attributes + [atDisc, data.domain.classVar]) newDomain.addmetas(newData.domain.getmetas()) finalData = orange.ExampleTable(newDomain, finalData) newData = orange.ExampleTable(newDomain, origData) origData = orange.ExampleTable(newDomain, origData) for d in origData: d[atDisc] = 0 for d in finalData: d[atDisc] = 0 for i, d in enumerate(newData): d[atDisc] = 1 d[at] = 0 d[weightID] = 100 * data[i][weightID] elif at.varType == orange.VarTypes.Discrete: # v dataOrig, dataFinal in newData atributu "at" dodaj ee eno vreednost, ki ima vrednost kar ime atributa + "X" atNew = orange.EnumVariable(at.name, values=at.values + [at.name + "X"]) newDomain = orange.Domain( filter(lambda x: x != at, origData.domain.attributes) + [atNew, origData.domain.classVar]) newDomain.addmetas(origData.domain.getmetas()) temp_finalData = orange.ExampleTable(finalData) finalData = orange.ExampleTable(newDomain, finalData) newData = orange.ExampleTable(newDomain, origData) temp_origData = orange.ExampleTable(origData) origData = orange.ExampleTable(newDomain, origData) for i, d in enumerate(origData): d[atNew] = temp_origData[i][at] for i, d in enumerate(finalData): d[atNew] = temp_finalData[i][at] for i, d in enumerate(newData): d[atNew] = at.name + "X" d[weightID] = 10 * data[i][weightID] finalData.extend(newData) return finalData learner = LogRegLearner(imputer=orange.ImputerConstructor_average(), removeSingular=self.removeSingular) # get Original Model orig_model = learner(examples, weight) # get extended Model (you should not change data) if weight == 0: weight = orange.newmetaid() examples.addMetaAttribute(weight, 1.0) extended_examples = createLogRegExampleTable(examples, weight) extended_model = learner(extended_examples, weight) ## print examples[0] ## printOUT(orig_model) ## print orig_model.domain ## print orig_model.beta ## printOUT(extended_model) # izracunas odstopanja # get sum of all betas beta = 0 betas_ap = [] for m in extended_models: beta_add = m.beta[m.continuizedDomain.attributes[-1]] betas_ap.append(beta_add) beta = beta + beta_add # substract it from intercept #print "beta", beta logistic_prior = orig_model.beta[0] + beta # compare it to bayes prior bayes = orange.BayesLearner(examples) bayes_prior = math.log(bayes.distribution[1] / bayes.distribution[0]) # normalize errors #print "bayes", bayes_prior #print "lr", orig_model.beta[0] #print "lr2", logistic_prior #print "dist", orange.Distribution(examples.domain.classVar,examples) k = (bayes_prior - orig_model.beta[0]) / (logistic_prior - orig_model.beta[0]) #print "prej", betas_ap betas_ap = [k * x for x in betas_ap] #print "potem", betas_ap # vrni originalni model in pripadajoce apriorne niclele return (orig_model, betas_ap)
it.moveBy(0, -header.height() - graph.height()) self.showNomogram() # Callbacks def showNomogram(self): if self.bnomogram and self.cl: #self.bnomogram.hide() self.bnomogram.show() self.bnomogram.update() # test widget appearance if __name__ == "__main__": import orngLR, orngSVM a = QApplication(sys.argv) ow = OWNomogram() a.setMainWidget(ow) data = orange.ExampleTable("../../doc/datasets/heart_disease.tab") bayes = orange.BayesLearner(data) bayes.setattr("data", data) ow.classifier(bayes) # here you can test setting some stuff a.exec_() # save settings ow.saveSettings()
def main(): version = "%prog version 0.1" usage = "usage: %prog [options] [input] [options [classification]]" desc = "QUICK START: To extract data from a trial, 'cd' to the \ trial's directory and type: 'sqk --classify'. To extract data \ from one channel of the trial (ch 1 in this case), type: \ 'sqk --classify --channel=1'." # Parse command line options. parser = optparse.OptionParser(usage, version=version, description=desc) parser.add_option("-C", "--classify", dest="classify", action="store_true", default=False, help="Classify the trial. IMPORTANT: Trial folder must " \ "be the current directory.") parser.add_option("-m", "--channel", metavar="<CH>", dest="channel", action="store", type="int", default=0, help="Specify which channel to extract data from. " \ "Default (%default) extracts data from both " \ "channels. Must choose 0 (both channels), 1, or 2.") parser.add_option("-l", "--log", dest="log", action="store_true", default=False, help="Parses a log file if it exists and adds time and" \ " duration information to the data file.") parser.add_option("-T", "--traindata", metavar="<DATA_FILE>", dest="trainData", action="store", default=os.path.join(TRAIN_PATH, 'traindata'), help="Specify training data set. Default is %default") parser.add_option("-L", "--learner", metavar="<TYPE>", dest="learner", action="store", default="svm", help="Specify the classifier algorithm. Options include:" \ " 'bayes' (Naive Bayes), 'knn' (k-Nearest Neighbor)," \ " 'svm' (SVM), 'forest' (random forest). " \ "Default is %default.") parser.add_option("-f", "--file", metavar="<AUDIO_FILE>", dest="audio", action="store", help="Extract features and classify audio file (wav)") parser.add_option("-p", "--path", metavar="<PATH>", dest="path", action="store", help="Extract features and classify all files in a " \ "directory. To extract from current directory: " \ "'usv.py -p .' ") parser.add_option("-r", "--rate", metavar="<SAMPLE_RATE>", dest="sampleRate", action="store", default="11025", help="Specify the sample rate of input files. Default is " \ "%default (Hz).") parser.add_option("-t", "--train", metavar="<CLASS>", dest="exampleClass", action="store", type='string', help="Label the training example(s).") parser.add_option("-d", "--data", metavar="<DATA_FILE>", dest="data", action="store", default="data.tab", help="Write to data file (.tab format). Default is " \ "'%default' or 'traindata.tab' for training data.") parser.add_option("-S", "--seg-resamp", dest="segment", action="store_true", default=False, help="Resample to 11025 Hz and split into multiple files " \ "based on silence. IMPORTANT: Trial folder must " \ "be the current directory.") (opts, args) = parser.parse_args() if opts.channel and not (opts.classify or opts.segment): parser.error("'--channel' option requires '--classify' option'") if opts.log and not opts.classify: parser.error("'--log' option requires '--classify' option'") # Open train data file or create it if it doesn't exist. if opts.exampleClass and opts.data == "data.tab": opts.data = os.path.join(TRAIN_PATH, 'traindata.tab') if opts.audio or opts.path: if not opts.segment: print 'Opening %r. . .' % (opts.data) data = open(opts.data, "a+") elif opts.segment: print "Resampling and segmenting trial. . ." elif opts.classify: print "Classifying trial. . ." else: parser.error('No input file or path specified.') # If user specifies an audio file (-f AUDIO_FILE) if opts.audio: file_name, ext = os.path.splitext(opts.audio) # Add MFCC 1-12 to data. if not opts.segment: write_features(opts.audio, opts.sampleRate, data) # If classification is specified, write to data. if opts.exampleClass: data.write(opts.exampleClass.lower() + "\n") print "Classified %r as %r." % (opts.audio, opts.exampleClass.lower()) # Else if user chooses to segment file (-S) elif opts.segment: print "Resampling and segmenting %s. . ." % (opts.audio) if opts.channel == 0: run_commands( seg_resamp(opts.audio, int(opts.sampleRate), outfile=file_name + '_call.wav', directory=file_name + "_ch1_2", ch1=True, ch2=True)) elif opts.channel == 1: run_commands( seg_resamp(opts.audio, int(opts.sampleRate), outfile=file_name + '_ch1_.wav', directory=file_name + "_ch1", ch1=True, ch2=False)) elif opts.channel == 2: run_commands( seg_resamp(opts.audio, int(opts.sampleRate), outfile=file_name + '_ch2_.wav', directory=file_name + "_ch2", ch1=False, ch2=True)) print "Wrote to './%s'." % (file_name + "_calls") else: print "Invalid data for %r. Skipping. . ." % opts.audio data.write('\n') # Else if user specifies path (-p PATH) elif opts.path: # Read all wav files in specified path try: for root, dirs, files in os.walk(opts.path): for basename in files: if fnmatch.fnmatch(basename, "*.[wW][aA][vV]"): audiofile = os.path.join(root, basename) # Skip small files if os.path.getsize(audiofile) < 100: continue file_name, ext = os.path.splitext(audiofile) # Add MFCC 1-12 to data. if not opts.segment: write_features(audiofile, opts.sampleRate, data) # Write filename data.write(str(os.path.basename(audiofile)) + "\t") # If classification is specified, write to file. if opts.exampleClass: data.write(opts.exampleClass.lower() + "\n") print "Classified %r as %r." % ( audiofile, opts.exampleClass.lower()) # If user specifies resample and segment elif opts.segment: print "Resampling and segmenting %r. . ." % ( audiofile) if opts.channel == 0: run_commands( seg_resamp( audiofile, int(opts.sampleRate), outfile=os.path.basename(file_name) + '_call.wav', directory=os.path.basename(file_name) + "_ch1_2", ch1=True, ch2=True)) elif opts.channel == 1: run_commands( seg_resamp( audiofile, int(opts.sampleRate), outfile=os.path.basename(file_name) + '_ch1_.wav', directory=os.path.basename(file_name) + "_ch1", ch1=True, ch2=False)) elif opts.channel == 2: run_commands( seg_resamp( audiofile, int(opts.sampleRate), outfile=os.path.basename(file_name) + '_ch2_.wav', directory=os.path.basename(file_name) + "_ch2", ch1=False, ch2=True)) else: data.write('\n') except (FloatingPointError, IOError): print "An error occurred. Skipping %. . .r" % audiofile # Else if user chooses to segment and resample the trial (current dir) elif opts.segment: for audiofile in glob(os.path.join('./', "*.[wW][aA][vV]")): file_name, ext = os.path.splitext(audiofile) print "Resampling and segmenting %r. . ." % (file_name) if opts.channel == 0: run_commands( seg_resamp(audiofile, int(opts.sampleRate), outfile=file_name + '_call.wav', directory=file_name + "_ch1_2", ch1=True, ch2=True)) elif opts.channel == 1: run_commands( seg_resamp(audiofile, int(opts.sampleRate), outfile=file_name + '_ch1_.wav', directory=file_name + "_ch1", ch1=True, ch2=False)) elif opts.channel == 2: run_commands( seg_resamp(audiofile, int(opts.sampleRate), outfile=file_name + '_ch2_.wav', directory=file_name + "_ch2", ch1=False, ch2=True)) # Else if user chooses to classify the trial elif opts.classify: # TODO: Should not be able to classify if no data files in folder try: traindata = orange.ExampleTable(opts.trainData) except SystemError: print "Training data not found." sys.exit(1) # The logger if opts.log: logs = glob(os.path.join(os.getcwd(), "*.[lL][oO][gG]")) if len(logs) > 1: print "ERROR: Multiple log files." sys.exit(1) log = usv.avisoftlog.RecLog(open(logs[0], 'r')) # The classifier print "Constructing %s classifier \ (may take several minutes). . ." % (opts.learner) if opts.learner.lower() == "bayes": classifier = orange.BayesLearner(traindata) classifier.name = "naive_bayes" elif opts.learner.lower() == "knn": classifier = Orange.classification.knn.kNNLearner(traindata) classifier.name = "kNN" elif opts.learner.lower() == "svm": svm = SVMLearner(name="SVM", kernel_type=kernels.RBF, C=128, gamma=2, nu=0.1) classifier = svm(traindata) classifier.name = "SVM" elif opts.learner.lower() == "tree": classifier = orngTree.TreeLearner(traindata) classifier.name = "tree" elif opts.learner.lower() == "forest": classifier = Orange.ensemble.forest.RandomForestLearner(traindata) classifier.name = "random_forest" # Create data summary file if opts.channel == 0: datasummary_name = os.path.splitext(opts.data)[0] + "_ch1_2.tab" elif opts.channel == 1: datasummary_name = os.path.splitext(opts.data)[0] + "_ch1.tab" elif opts.channel == 2: datasummary_name = os.path.splitext(opts.data)[0] + "_ch2.tab" if os.path.exists(datasummary_name): print "Data file %r already exists." % (datasummary_name) print "Exiting . . ." sys.exit(1) else: summary = open(datasummary_name, "a+") # Write metadata summary.write("# data = %s\n" % (datasummary_name)) summary.write("# channel = %d\n" % (opts.channel)) summary.write("# sample_rate = %s\n" % (opts.sampleRate)) summary.write("# classifier = %s\n" % (classifier.name)) # Write header summary.write("FILE\t") for i in range(len(traindata.domain.classVar.values)): summary.write(traindata.domain.classVar.values[i].upper() + "\t") if opts.log: summary.write("start: " + str(log.start.time) + "\t") summary.write("Duration" + "\t") summary.write("\n") totals = [0] * len(traindata.domain.classVar.values) proportions = [0.0] * len(totals) for root, dirs, files in os.walk(os.getcwd()): # For each file's directory in this trial for dir in dirs: data = open(os.path.join(dir, dir + '.tab'), 'w+') if opts.channel == 0: calls = glob(os.path.join(dir, "*ch1_2*.[wW][aA][vV]")) elif opts.channel == 1: calls = glob(os.path.join(dir, "*ch1*.[wW][aA][vV]")) elif opts.channel == 2: calls = glob(os.path.join(dir, "*ch2*.[wW][aA][vV]")) # For each call for c in calls: # Skip small files if os.path.getsize(c) < 100: print "Skipping %s (not enough data)" % c continue # Write feature data write_features(c, opts.sampleRate, data) data.close() # Ensures that data is saved # Write filenames and classifications data = open(os.path.join(dir, dir + '.tab'), 'a+') datatable = orange.ExampleTable( os.path.join(dir, dir + '.tab')) classification = classifier(datatable[calls.index(c)]) data.write(str(os.path.basename(c)) + '\t') data.write(str(classification)) data.write('\n') try: data.close() except UnboundLocalError: parser.error( 'No directories in this folder. Did you remember to segment the files?' ) # Write class count data to summary table for dir in dirs: if opts.channel == 0: data_files = glob(os.path.join(dir, "*ch1_2.tab")) elif opts.channel == 1: data_files = glob(os.path.join(dir, "*ch1.tab")) elif opts.channel == 2: data_files = glob(os.path.join(dir, "*ch2.tab")) for c in data_files: if os.path.getsize(c) == 0: continue file_name, ext = os.path.splitext(os.path.basename(c)) summary.write(file_name + '\t') callsdata = orange.ExampleTable(os.path.join("./", c)) # Vector of class counts counts = [0] * len(callsdata.domain.classVar.values) for e in callsdata: counts[int(e.getclass())] += 1 # Write counts for i in range(len(callsdata.domain.classVar.values)): summary.write(str(counts[i]) + "\t") totals[i] += counts[i] # Write log data if opts.log: tmp = str(os.path.basename(dir)).lower() entry = tmp[0:tmp.find("_")] + ".wav" summary.write(str(log.getevent(entry).time) + "\t") summary.write(log.getevent(entry).duration + "\t") log.close() summary.write('\n') # Write totals. Exclude BGNOISE. summary.write("TOTAL" + "\t\t") for i in range(1, len(totals)): summary.write(str(totals[i]) + "\t") if opts.log: summary.write("end: " + str(log.end.time) + "\t") summary.write("\n") # Write proportions. Exclude BGNOISE. summary.write("P" + "\t\t") for i in range(1, len(proportions)): try: proportions[i] = float( totals[i]) / float(sum(totals) - totals[0]) except ZeroDivisionError: proportions[i] = 0.0 summary.write("%.4f\t" % (proportions[i])) summary.write("\n") summary.close() # Open data file when finished subprocess.call('open %s' % (datasummary_name), shell=True) else: data.write("\n") if not opts.segment: data.close() print "Success!"
# Description: Demostrates the use of classification scores # Category: evaluation # Uses: voting.tab # Referenced: orngStat.htm import orange, orngTest, orngTree learners = [orange.BayesLearner(name = "bayes"), orngTree.TreeLearner(name="tree"), orange.MajorityLearner(name="majrty")] voting = orange.ExampleTable("voting") res = orngTest.crossValidation(learners, voting) vehicle = orange.ExampleTable("vehicle") resVeh = orngTest.crossValidation(learners, vehicle) import orngStat CAs = orngStat.CA(res) APs = orngStat.AP(res) Briers = orngStat.BrierScore(res) ISs = orngStat.IS(res) print print "method\tCA\tAP\tBrier\tIS" for l in range(len(learners)): print "%s\t%5.3f\t%5.3f\t%5.3f\t%6.3f" % (learners[l].name, CAs[l], APs[l], Briers[l], ISs[l]) CAs = orngStat.CA(res, reportSE=True)