def add_class_noise(data, noise_level, rnd_seed): """adds class Noise :param data: Orange dataset :param noise_level: :param rnd_seed: :return: """ meta_noisy = orange.EnumVariable("noise", values=["no", "yes"]) mid = orange.newmetaid() while mid in data.domain.getmetas().keys(): mid = orange.newmetaid() data.domain.addmeta(mid, meta_noisy) data.addMetaAttribute("noise", "no") # Generate random indices for noise insertion percent = float(noise_level) / 100 try: rnds = int(rnd_seed) except: rnds = 0 print "Random Seed:", rnds orange.setrandseed(rnds) noise_indices = random.sample(range(len(data)), int(round(percent * len(data)))) #print "Amount of added noise:", percent*100, "percent (", len(noise_indices), "examples ):" #print "Random indices for added noise:", noise_indices className = data.domain.classVar.name #print "Class name:", className for index in noise_indices: data[index]["noise"] = "yes" temp = data[index][className] ## if len(data.domain.classVar.values) > 2: # random value + check if it is diferent from the current one new_label = data.domain.classVar.randomvalue() while new_label == temp: new_label = data.domain.classVar.randomvalue() data[index][className] = new_label ## else: ## # switch the class value ## data[index][className] = data.domain.classVar.nextvalue(data[index][className]) #print "\t", temp, "changed to:", data[index].getclass(), "(", index, ")" #print "\n" noise_indices.sort() return noise_indices, data
def add_class_noise(data, noise_level, rnd_seed): """adds class Noise :param data: Orange dataset :param noise_level: :param rnd_seed: :return: """ meta_noisy = orange.EnumVariable("noise", values=["no", "yes"]) mid = orange.newmetaid() while mid in data.domain.getmetas().keys(): mid = orange.newmetaid() data.domain.addmeta(mid, meta_noisy) data.addMetaAttribute("noise", "no") # Generate random indices for noise insertion percent = float(noise_level)/100 try: rnds = int(rnd_seed) except: rnds = 0 print "Random Seed:", rnds orange.setrandseed(rnds) noise_indices = random.sample(range(len(data)), int(round(percent*len(data)))) #print "Amount of added noise:", percent*100, "percent (", len(noise_indices), "examples ):" #print "Random indices for added noise:", noise_indices className = data.domain.classVar.name #print "Class name:", className for index in noise_indices: data[index]["noise"] = "yes" temp = data[index][className] ## if len(data.domain.classVar.values) > 2: # random value + check if it is diferent from the current one new_label = data.domain.classVar.randomvalue() while new_label == temp: new_label = data.domain.classVar.randomvalue() data[index][className] = new_label ## else: ## # switch the class value ## data[index][className] = data.domain.classVar.nextvalue(data[index][className]) #print "\t", temp, "changed to:", data[index].getclass(), "(", index, ")" #print "\n" noise_indices.sort() return noise_indices, data
def entropyDiscretization(data): """ Discretizes continuous attributes using the entropy based discretization. It removes the attributes discretized to a single interval and prints their names. Arguments: data Returns: table of examples with discretized atributes. Attributes that are categorized to a single value (constant) are removed. """ orange.setrandseed(0) tablen = orange.Preprocessor_discretize( data, method=orange.EntropyDiscretization()) attrlist = [] nrem = 0 for i in tablen.domain.attributes: if (len(i.values) > 1): attrlist.append(i) else: nrem = nrem + 1 attrlist.append(tablen.domain.classVar) return tablen.select(attrlist)
selection = orange.MakeRandomIndices2(data, p) train_data = data.select(selection, 0) test_data = data.select(selection, 1) classifiers = [] for l in learners: classifiers.append(l(train_data)) acc1 = accuracy(test_data, classifiers) print "%d: %s" % (i + 1, acc1) for j in range(len(learners)): acc[j] += acc1[j] for j in range(len(learners)): acc[j] = acc[j] / n return acc orange.setrandseed(0) # set up the learners bayes = orange.BayesLearner() tree = orngTree.TreeLearner() #tree = orngTree.TreeLearner(mForPruning=2) bayes.name = "bayes" tree.name = "tree" learners = [bayes, tree] # compute accuracies on data data = orange.ExampleTable("voting") acc = test_rnd_sampling(data, learners) print "Classification accuracies:" for i in range(len(learners)): print learners[i].name, acc[i]
ar = [0.0]*len(learners) selection = orange.MakeRandomIndicesCV(data, folds=k) for test_fold in range(k): train_data = data.select(selection, test_fold, negate=1) test_data = data.select(selection, test_fold) classifiers = [] for l in learners: classifiers.append(l(train_data)) result = aroc(test_data, classifiers) for j in range(len(learners)): ar[j] += result[j] for j in range(len(learners)): ar[j] = ar[j]/k return ar orange.setrandseed(0) # set up the learners bayes = orange.BayesLearner() tree = orngTree.TreeLearner(mForPruning=2) maj = orange.MajorityLearner() bayes.name = "bayes" tree.name = "tree" maj.name = "majority" learners = [bayes, tree, maj] # compute accuracies on data data = orange.ExampleTable("voting") acc = cross_validation(data, learners, k=10) print "Area under ROC:" for i in range(len(learners)): print learners[i].name, "%.2f" % acc[i]