def apply(self): kernel = ["linear", "poly", "rbf", "sigmoid"][self.kernel_type] common_args = dict(kernel=kernel, degree=self.degree, gamma=self.gamma, coef0=self.coef0, tol=self.tol, max_iter=self.max_iter if self.limit_iter else -1, probability=True, preprocessors=self.preprocessors) if self.svmtype == 0: learner = SVMLearner(C=self.C, **common_args) else: learner = NuSVMLearner(nu=self.nu, **common_args) learner.name = self.learner_name classifier = None sv = None if self.data is not None: self.error([0, 1]) if not learner.check_learner_adequacy(self.data.domain): self.error(0, learner.learner_adequacy_err_msg) elif len(np.unique(self.data.Y)) < 2: self.error(1, "Data contains only one target value.") else: classifier = learner(self.data) classifier.name = self.learner_name sv = self.data[classifier.skl_model.support_] self.send("Learner", learner) self.send("Classifier", classifier) self.send("Support vectors", sv)
def apply(self): kernel = ["linear", "poly", "rbf", "sigmoid"][self.kernel_type] common_args = dict( kernel=kernel, degree=self.degree, gamma=self.gamma, coef0=self.coef0, tol=self.tol, max_iter=self.max_iter if self.limit_iter else -1, probability=True, preprocessors=self.preprocessors ) if self.svmtype == 0: learner = SVMLearner(C=self.C, **common_args) else: learner = NuSVMLearner(nu=self.nu, **common_args) learner.name = self.learner_name classifier = None sv = None if self.data is not None: self.error(0) if not learner.check_learner_adequacy(self.data.domain): self.error(0, learner.learner_adequacy_err_msg) else: classifier = learner(self.data) classifier.name = self.learner_name sv = self.data[classifier.skl_model.support_] self.send("Learner", learner) self.send("Classifier", classifier) self.send("Support vectors", sv)
class PolySVMTestCase(testing.LearnerTestCase): LEARNER = SVMLearner(name="svm-poly", kernel_type=SVMLearner.Polynomial) @test_on_data def test_learner_on(self, dataset): testing.LearnerTestCase.test_learner_on(self, dataset) svm_test_binary_classifier(self, dataset)
class SigmoidSVMTestCase(testing.LearnerTestCase): LEARNER = SVMLearner(name="svm-sig", kernel_type=SVMLearner.Sigmoid) @test_on_data def test_learner_on(self, dataset): testing.LearnerTestCase.test_learner_on(self, dataset) svm_test_binary_classifier(self, dataset)
def __init__(self, **kwds): self.folds=4 self.verbose=True SVMLearner.__init__(self, **kwds) self.learner = SVMLearner(**kwds) try: self.multinomialTreatment = eval(kwds["multinomialTreatment"]) except: self.multinomialTreatment = DomainContinuizer.NValues try: self.continuousTreatment = eval(kwds["continuousTreatment"]) except: self.continuousTreatment = DomainContinuizer.NormalizeBySpan try: self.classTreatment = eval(kwds["classTreatment"]) except: self.classTreatment = DomainContinuizer.Ignore
def create_learner(self): kernel = ["linear", "poly", "rbf", "sigmoid"][self.kernel_type] common_args = dict(kernel=kernel, degree=self.degree, gamma=self.gamma or self._default_gamma, coef0=self.coef0, tol=self.tol, max_iter=self.max_iter if self.limit_iter else -1, probability=True, preprocessors=self.preprocessors) if self.svmtype == OWSVMClassification.C_SVC: return SVMLearner(C=self.C, **common_args) else: return NuSVMLearner(nu=self.nu, **common_args)
class LinearSVMTestCase(testing.LearnerTestCase): LEARNER = SVMLearner(name="svm-lin", kernel_type=SVMLearner.Linear) @test_on_data def test_learner_on(self, dataset): testing.LearnerTestCase.test_learner_on(self, dataset) svm_test_binary_classifier(self, dataset) # Don't test on "monks" the coefs are really large and @test_on_datasets(datasets=["iris", "brown-selected", "lenses", "zoo"]) def test_linear_classifier_weights_on(self, dataset): # Test get_linear_svm_weights classifier = self.LEARNER(dataset) weights = get_linear_svm_weights(classifier, sum=True) weights = get_linear_svm_weights(classifier, sum=False) n_class = len(classifier.class_var.values) def class_pairs(n_class): for i in range(n_class - 1): for j in range(i + 1, n_class): yield i, j l_map = classifier._get_libsvm_labels_map() for inst in dataset[:20]: dec_values = classifier.get_decision_values(inst) for dec_v, weight, rho, pair in zip(dec_values, weights, classifier.rho, class_pairs(n_class)): t_inst = Orange.data.Instance(classifier.domain, inst) dec_v1 = example_weighted_sum(t_inst, weight) - rho self.assertAlmostEqual(dec_v, dec_v1, 4) @test_on_datasets(datasets=testing.REGRESSION_DATASETS) def test_linear_regression_weights_on(self, dataset): predictor = self.LEARNER(dataset) weights = get_linear_svm_weights(predictor) for inst in dataset[:20]: t_inst = Orange.data.Instance(predictor.domain, inst) prediction = predictor(inst) w_sum = example_weighted_sum(t_inst, weights) self.assertAlmostEqual(float(prediction), w_sum - predictor.rho[0], places=4)
def main(): version = "%prog version 0.1" usage = "usage: %prog [options] [input] [options [classification]]" desc = "QUICK START: To extract data from a trial, 'cd' to the \ trial's directory and type: 'sqk --classify'. To extract data \ from one channel of the trial (ch 1 in this case), type: \ 'sqk --classify --channel=1'." # Parse command line options. parser = optparse.OptionParser(usage, version=version, description=desc) parser.add_option("-C", "--classify", dest="classify", action="store_true", default=False, help="Classify the trial. IMPORTANT: Trial folder must " \ "be the current directory.") parser.add_option("-m", "--channel", metavar="<CH>", dest="channel", action="store", type="int", default=0, help="Specify which channel to extract data from. " \ "Default (%default) extracts data from both " \ "channels. Must choose 0 (both channels), 1, or 2.") parser.add_option("-l", "--log", dest="log", action="store_true", default=False, help="Parses a log file if it exists and adds time and" \ " duration information to the data file.") parser.add_option("-T", "--traindata", metavar="<DATA_FILE>", dest="trainData", action="store", default=os.path.join(TRAIN_PATH, 'traindata'), help="Specify training data set. Default is %default") parser.add_option("-L", "--learner", metavar="<TYPE>", dest="learner", action="store", default="svm", help="Specify the classifier algorithm. Options include:" \ " 'bayes' (Naive Bayes), 'knn' (k-Nearest Neighbor)," \ " 'svm' (SVM), 'forest' (random forest). " \ "Default is %default.") parser.add_option("-f", "--file", metavar="<AUDIO_FILE>", dest="audio", action="store", help="Extract features and classify audio file (wav)") parser.add_option("-p", "--path", metavar="<PATH>", dest="path", action="store", help="Extract features and classify all files in a " \ "directory. To extract from current directory: " \ "'usv.py -p .' ") parser.add_option("-r", "--rate", metavar="<SAMPLE_RATE>", dest="sampleRate", action="store", default="11025", help="Specify the sample rate of input files. Default is " \ "%default (Hz).") parser.add_option("-t", "--train", metavar="<CLASS>", dest="exampleClass", action="store", type='string', help="Label the training example(s).") parser.add_option("-d", "--data", metavar="<DATA_FILE>", dest="data", action="store", default="data.tab", help="Write to data file (.tab format). Default is " \ "'%default' or 'traindata.tab' for training data.") parser.add_option("-S", "--seg-resamp", dest="segment", action="store_true", default=False, help="Resample to 11025 Hz and split into multiple files " \ "based on silence. IMPORTANT: Trial folder must " \ "be the current directory.") (opts, args) = parser.parse_args() if opts.channel and not (opts.classify or opts.segment): parser.error("'--channel' option requires '--classify' option'") if opts.log and not opts.classify: parser.error("'--log' option requires '--classify' option'") # Open train data file or create it if it doesn't exist. if opts.exampleClass and opts.data == "data.tab": opts.data = os.path.join(TRAIN_PATH, 'traindata.tab') if opts.audio or opts.path: if not opts.segment: print 'Opening %r. . .' % (opts.data) data = open(opts.data, "a+") elif opts.segment: print "Resampling and segmenting trial. . ." elif opts.classify: print "Classifying trial. . ." else: parser.error('No input file or path specified.') # If user specifies an audio file (-f AUDIO_FILE) if opts.audio: file_name, ext = os.path.splitext(opts.audio) # Add MFCC 1-12 to data. if not opts.segment: write_features(opts.audio, opts.sampleRate, data) # If classification is specified, write to data. if opts.exampleClass: data.write(opts.exampleClass.lower() + "\n") print "Classified %r as %r." % (opts.audio, opts.exampleClass.lower()) # Else if user chooses to segment file (-S) elif opts.segment: print "Resampling and segmenting %s. . ." % (opts.audio) if opts.channel == 0: run_commands( seg_resamp(opts.audio, int(opts.sampleRate), outfile=file_name + '_call.wav', directory=file_name + "_ch1_2", ch1=True, ch2=True)) elif opts.channel == 1: run_commands( seg_resamp(opts.audio, int(opts.sampleRate), outfile=file_name + '_ch1_.wav', directory=file_name + "_ch1", ch1=True, ch2=False)) elif opts.channel == 2: run_commands( seg_resamp(opts.audio, int(opts.sampleRate), outfile=file_name + '_ch2_.wav', directory=file_name + "_ch2", ch1=False, ch2=True)) print "Wrote to './%s'." % (file_name + "_calls") else: print "Invalid data for %r. Skipping. . ." % opts.audio data.write('\n') # Else if user specifies path (-p PATH) elif opts.path: # Read all wav files in specified path try: for root, dirs, files in os.walk(opts.path): for basename in files: if fnmatch.fnmatch(basename, "*.[wW][aA][vV]"): audiofile = os.path.join(root, basename) # Skip small files if os.path.getsize(audiofile) < 100: continue file_name, ext = os.path.splitext(audiofile) # Add MFCC 1-12 to data. if not opts.segment: write_features(audiofile, opts.sampleRate, data) # Write filename data.write(str(os.path.basename(audiofile)) + "\t") # If classification is specified, write to file. if opts.exampleClass: data.write(opts.exampleClass.lower() + "\n") print "Classified %r as %r." % ( audiofile, opts.exampleClass.lower()) # If user specifies resample and segment elif opts.segment: print "Resampling and segmenting %r. . ." % ( audiofile) if opts.channel == 0: run_commands( seg_resamp( audiofile, int(opts.sampleRate), outfile=os.path.basename(file_name) + '_call.wav', directory=os.path.basename(file_name) + "_ch1_2", ch1=True, ch2=True)) elif opts.channel == 1: run_commands( seg_resamp( audiofile, int(opts.sampleRate), outfile=os.path.basename(file_name) + '_ch1_.wav', directory=os.path.basename(file_name) + "_ch1", ch1=True, ch2=False)) elif opts.channel == 2: run_commands( seg_resamp( audiofile, int(opts.sampleRate), outfile=os.path.basename(file_name) + '_ch2_.wav', directory=os.path.basename(file_name) + "_ch2", ch1=False, ch2=True)) else: data.write('\n') except (FloatingPointError, IOError): print "An error occurred. Skipping %. . .r" % audiofile # Else if user chooses to segment and resample the trial (current dir) elif opts.segment: for audiofile in glob(os.path.join('./', "*.[wW][aA][vV]")): file_name, ext = os.path.splitext(audiofile) print "Resampling and segmenting %r. . ." % (file_name) if opts.channel == 0: run_commands( seg_resamp(audiofile, int(opts.sampleRate), outfile=file_name + '_call.wav', directory=file_name + "_ch1_2", ch1=True, ch2=True)) elif opts.channel == 1: run_commands( seg_resamp(audiofile, int(opts.sampleRate), outfile=file_name + '_ch1_.wav', directory=file_name + "_ch1", ch1=True, ch2=False)) elif opts.channel == 2: run_commands( seg_resamp(audiofile, int(opts.sampleRate), outfile=file_name + '_ch2_.wav', directory=file_name + "_ch2", ch1=False, ch2=True)) # Else if user chooses to classify the trial elif opts.classify: # TODO: Should not be able to classify if no data files in folder try: traindata = orange.ExampleTable(opts.trainData) except SystemError: print "Training data not found." sys.exit(1) # The logger if opts.log: logs = glob(os.path.join(os.getcwd(), "*.[lL][oO][gG]")) if len(logs) > 1: print "ERROR: Multiple log files." sys.exit(1) log = usv.avisoftlog.RecLog(open(logs[0], 'r')) # The classifier print "Constructing %s classifier \ (may take several minutes). . ." % (opts.learner) if opts.learner.lower() == "bayes": classifier = orange.BayesLearner(traindata) classifier.name = "naive_bayes" elif opts.learner.lower() == "knn": classifier = Orange.classification.knn.kNNLearner(traindata) classifier.name = "kNN" elif opts.learner.lower() == "svm": svm = SVMLearner(name="SVM", kernel_type=kernels.RBF, C=128, gamma=2, nu=0.1) classifier = svm(traindata) classifier.name = "SVM" elif opts.learner.lower() == "tree": classifier = orngTree.TreeLearner(traindata) classifier.name = "tree" elif opts.learner.lower() == "forest": classifier = Orange.ensemble.forest.RandomForestLearner(traindata) classifier.name = "random_forest" # Create data summary file if opts.channel == 0: datasummary_name = os.path.splitext(opts.data)[0] + "_ch1_2.tab" elif opts.channel == 1: datasummary_name = os.path.splitext(opts.data)[0] + "_ch1.tab" elif opts.channel == 2: datasummary_name = os.path.splitext(opts.data)[0] + "_ch2.tab" if os.path.exists(datasummary_name): print "Data file %r already exists." % (datasummary_name) print "Exiting . . ." sys.exit(1) else: summary = open(datasummary_name, "a+") # Write metadata summary.write("# data = %s\n" % (datasummary_name)) summary.write("# channel = %d\n" % (opts.channel)) summary.write("# sample_rate = %s\n" % (opts.sampleRate)) summary.write("# classifier = %s\n" % (classifier.name)) # Write header summary.write("FILE\t") for i in range(len(traindata.domain.classVar.values)): summary.write(traindata.domain.classVar.values[i].upper() + "\t") if opts.log: summary.write("start: " + str(log.start.time) + "\t") summary.write("Duration" + "\t") summary.write("\n") totals = [0] * len(traindata.domain.classVar.values) proportions = [0.0] * len(totals) for root, dirs, files in os.walk(os.getcwd()): # For each file's directory in this trial for dir in dirs: data = open(os.path.join(dir, dir + '.tab'), 'w+') if opts.channel == 0: calls = glob(os.path.join(dir, "*ch1_2*.[wW][aA][vV]")) elif opts.channel == 1: calls = glob(os.path.join(dir, "*ch1*.[wW][aA][vV]")) elif opts.channel == 2: calls = glob(os.path.join(dir, "*ch2*.[wW][aA][vV]")) # For each call for c in calls: # Skip small files if os.path.getsize(c) < 100: print "Skipping %s (not enough data)" % c continue # Write feature data write_features(c, opts.sampleRate, data) data.close() # Ensures that data is saved # Write filenames and classifications data = open(os.path.join(dir, dir + '.tab'), 'a+') datatable = orange.ExampleTable( os.path.join(dir, dir + '.tab')) classification = classifier(datatable[calls.index(c)]) data.write(str(os.path.basename(c)) + '\t') data.write(str(classification)) data.write('\n') try: data.close() except UnboundLocalError: parser.error( 'No directories in this folder. Did you remember to segment the files?' ) # Write class count data to summary table for dir in dirs: if opts.channel == 0: data_files = glob(os.path.join(dir, "*ch1_2.tab")) elif opts.channel == 1: data_files = glob(os.path.join(dir, "*ch1.tab")) elif opts.channel == 2: data_files = glob(os.path.join(dir, "*ch2.tab")) for c in data_files: if os.path.getsize(c) == 0: continue file_name, ext = os.path.splitext(os.path.basename(c)) summary.write(file_name + '\t') callsdata = orange.ExampleTable(os.path.join("./", c)) # Vector of class counts counts = [0] * len(callsdata.domain.classVar.values) for e in callsdata: counts[int(e.getclass())] += 1 # Write counts for i in range(len(callsdata.domain.classVar.values)): summary.write(str(counts[i]) + "\t") totals[i] += counts[i] # Write log data if opts.log: tmp = str(os.path.basename(dir)).lower() entry = tmp[0:tmp.find("_")] + ".wav" summary.write(str(log.getevent(entry).time) + "\t") summary.write(log.getevent(entry).duration + "\t") log.close() summary.write('\n') # Write totals. Exclude BGNOISE. summary.write("TOTAL" + "\t\t") for i in range(1, len(totals)): summary.write(str(totals[i]) + "\t") if opts.log: summary.write("end: " + str(log.end.time) + "\t") summary.write("\n") # Write proportions. Exclude BGNOISE. summary.write("P" + "\t\t") for i in range(1, len(proportions)): try: proportions[i] = float( totals[i]) / float(sum(totals) - totals[0]) except ZeroDivisionError: proportions[i] = 0.0 summary.write("%.4f\t" % (proportions[i])) summary.write("\n") summary.close() # Open data file when finished subprocess.call('open %s' % (datasummary_name), shell=True) else: data.write("\n") if not opts.segment: data.close() print "Success!"
class SigmoidSVMTestCase(testing.LearnerTestCase): LEARNER = SVMLearner(name="svm-sig", kernel_type=SVMLearner.Sigmoid)
class RBFSVMTestCase(testing.LearnerTestCase): LEARNER = SVMLearner(name="svm-RBF", kernel_type=SVMLearner.RBF)
class PolySVMTestCase(testing.LearnerTestCase): LEARNER = SVMLearner(name="svm-poly", kernel_type=SVMLearner.Polynomial)
class LinearSVMTestCase(testing.LearnerTestCase): LEARNER = SVMLearner(name="svm-lin", kernel_type=SVMLearner.Linear)
from Orange import data from Orange import evaluation from Orange.classification.svm import SVMLearner, kernels from Orange.distance import Euclidean from Orange.distance import Hamming iris = data.Table("iris.tab") l1 = SVMLearner() l1.kernel_func = kernels.RBFKernelWrapper(Euclidean(iris), gamma=0.5) l1.kernel_type = SVMLearner.Custom l1.probability = True c1 = l1(iris) l1.name = "SVM - RBF(Euclidean)" l2 = SVMLearner() l2.kernel_func = kernels.RBFKernelWrapper(Hamming(iris), gamma=0.5) l2.kernel_type = SVMLearner.Custom l2.probability = True c2 = l2(iris) l2.name = "SVM - RBF(Hamming)" l3 = SVMLearner() l3.kernel_func = kernels.CompositeKernelWrapper( kernels.RBFKernelWrapper(Euclidean(iris), gamma=0.5), kernels.RBFKernelWrapper(Hamming(iris), gamma=0.5), l=0.5) l3.kernel_type = SVMLearner.Custom l3.probability = True c3 = l1(iris) l3.name = "SVM - Composite"
svm = Orange.classification.svm.SVMLearner(trn_data) # Check the accuracy print "\n" + "*" * 50 print "DEFAULT SVM" print "Training Accuracy: " + str(compute_learner_accuracy(svm, trn_data)) print "Testing Accuracy: " + str(compute_learner_accuracy(svm, tst_data)) # Cross Validation from Orange.classification.svm import SVMLearner, kernels from Orange.distance import Euclidean from Orange.distance import Hamming svm1 = SVMLearner() svm2 = SVMLearner() svm2.kernel_func = kernels.RBFKernelWrapper(Hamming(trn_data), gamma=0.5) svm2.kernel_type = SVMLearner.Custom svm2.probability = True svm3 = SVMLearner(kernel_type=SVMLearner.Custom, kernel_func=kernels.CompositeKernelWrapper( kernels.RBFKernelWrapper(Euclidean(trn_data), gamma=0.5), kernels.RBFKernelWrapper(Hamming(trn_data), gamma=0.5), l=0.5), probability=False) res = Orange.evaluation.testing.cross_validation([svm1, svm2, svm3], trn_data, folds=5) print "\n" + "-" * 30 print "CROSS VALIDATION" print "SVM 1 Accuracy: %.2f" % Orange.evaluation.scoring.CA(res)[0] print "SVM 2 Accuracy: %.2f" % Orange.evaluation.scoring.CA(res)[1]
from timetest import * if ORANGE3: SVM = Orange.classification.NuSVMLearner(nu=0.10, kernel='rbf', degree=3, gamma=0.0, coef0=0.0, tol=0.001) else: from Orange.classification.svm import SVMLearner SVM = SVMLearner(svm_type=SVMLearner.Nu_SVC, kernel_type=SVMLearner.RBF, kernel_func=None, gamma=0.0, degree=3, normalization=False, eps=0.001, nu=0.10) class TestSVM_iris(TimeTest): def setUp(self): self.data = Orange.data.Table("iris.tab") def test_use(self): SVM(self.data) class TestSVM_adult_sample(TimeTest): def setUp(self): self.data = Orange.data.Table("adult_sample.tab") def test_use(self): SVM(self.data) class TestSVM_car(TimeTest): def setUp(self): self.data = Orange.data.Table("car.tab") def test_use(self):