def add_class_noise(data, noise_level, rnd_seed):
    """adds class Noise

    :param data: Orange dataset
    :param noise_level:
    :param rnd_seed:
    :return:
    """

    meta_noisy = orange.EnumVariable("noise", values=["no", "yes"])
    mid = orange.newmetaid()
    while mid in data.domain.getmetas().keys():
        mid = orange.newmetaid()
    data.domain.addmeta(mid, meta_noisy)
    data.addMetaAttribute("noise", "no")
    # Generate random indices for noise insertion
    percent = float(noise_level) / 100
    try:
        rnds = int(rnd_seed)
    except:
        rnds = 0
    print "Random Seed:", rnds
    orange.setrandseed(rnds)
    noise_indices = random.sample(range(len(data)),
                                  int(round(percent * len(data))))
    #print "Amount of added noise:", percent*100, "percent (", len(noise_indices), "examples ):"
    #print "Random indices for added noise:", noise_indices
    className = data.domain.classVar.name
    #print "Class name:", className
    for index in noise_indices:
        data[index]["noise"] = "yes"
        temp = data[index][className]
        ##        if len(data.domain.classVar.values) > 2:
        # random value + check if it is diferent from the current one
        new_label = data.domain.classVar.randomvalue()
        while new_label == temp:
            new_label = data.domain.classVar.randomvalue()
        data[index][className] = new_label


##        else:
##            # switch the class value
##            data[index][className] = data.domain.classVar.nextvalue(data[index][className])
#print "\t", temp, "changed to:", data[index].getclass(), "(", index, ")"
#print "\n"
    noise_indices.sort()
    return noise_indices, data
def add_class_noise(data, noise_level, rnd_seed):
    """adds class Noise

    :param data: Orange dataset
    :param noise_level:
    :param rnd_seed:
    :return:
    """

    meta_noisy = orange.EnumVariable("noise", values=["no", "yes"])
    mid = orange.newmetaid()
    while mid in data.domain.getmetas().keys():
        mid = orange.newmetaid()
    data.domain.addmeta(mid, meta_noisy)
    data.addMetaAttribute("noise", "no")
    # Generate random indices for noise insertion
    percent = float(noise_level)/100
    try:
        rnds = int(rnd_seed)
    except:
        rnds = 0
    print "Random Seed:", rnds
    orange.setrandseed(rnds)
    noise_indices = random.sample(range(len(data)), int(round(percent*len(data))))
    #print "Amount of added noise:", percent*100, "percent (", len(noise_indices), "examples ):"
    #print "Random indices for added noise:", noise_indices
    className = data.domain.classVar.name
    #print "Class name:", className
    for index in noise_indices:
        data[index]["noise"] = "yes"
        temp = data[index][className]
##        if len(data.domain.classVar.values) > 2:
        # random value + check if it is diferent from the current one
        new_label = data.domain.classVar.randomvalue()
        while new_label == temp:
            new_label = data.domain.classVar.randomvalue()
        data[index][className] = new_label
##        else:
##            # switch the class value
##            data[index][className] = data.domain.classVar.nextvalue(data[index][className])
        #print "\t", temp, "changed to:", data[index].getclass(), "(", index, ")"
    #print "\n"
    noise_indices.sort()
    return noise_indices, data
Ejemplo n.º 3
0
def entropyDiscretization(data):
    """
  Discretizes continuous attributes using the entropy based discretization.
  It removes the attributes discretized to a single interval and prints their names.
  Arguments: data
  Returns:   table of examples with discretized atributes. Attributes that are
             categorized to a single value (constant) are removed.
  """
    orange.setrandseed(0)
    tablen = orange.Preprocessor_discretize(
        data, method=orange.EntropyDiscretization())

    attrlist = []
    nrem = 0
    for i in tablen.domain.attributes:
        if (len(i.values) > 1):
            attrlist.append(i)
        else:
            nrem = nrem + 1

    attrlist.append(tablen.domain.classVar)
    return tablen.select(attrlist)
Ejemplo n.º 4
0
        selection = orange.MakeRandomIndices2(data, p)
        train_data = data.select(selection, 0)
        test_data = data.select(selection, 1)
        classifiers = []
        for l in learners:
            classifiers.append(l(train_data))
        acc1 = accuracy(test_data, classifiers)
        print "%d: %s" % (i + 1, acc1)
        for j in range(len(learners)):
            acc[j] += acc1[j]
    for j in range(len(learners)):
        acc[j] = acc[j] / n
    return acc


orange.setrandseed(0)
# set up the learners
bayes = orange.BayesLearner()
tree = orngTree.TreeLearner()
#tree = orngTree.TreeLearner(mForPruning=2)
bayes.name = "bayes"
tree.name = "tree"
learners = [bayes, tree]

# compute accuracies on data
data = orange.ExampleTable("voting")
acc = test_rnd_sampling(data, learners)
print "Classification accuracies:"
for i in range(len(learners)):
    print learners[i].name, acc[i]
Ejemplo n.º 5
0
    ar = [0.0]*len(learners)
    selection = orange.MakeRandomIndicesCV(data, folds=k)
    for test_fold in range(k):
        train_data = data.select(selection, test_fold, negate=1)
        test_data = data.select(selection, test_fold)
        classifiers = []
        for l in learners:
            classifiers.append(l(train_data))
        result = aroc(test_data, classifiers)
        for j in range(len(learners)):
            ar[j] += result[j]
    for j in range(len(learners)):
        ar[j] = ar[j]/k
    return ar

orange.setrandseed(0)    
# set up the learners
bayes = orange.BayesLearner()
tree = orngTree.TreeLearner(mForPruning=2)
maj = orange.MajorityLearner()
bayes.name = "bayes"
tree.name = "tree"
maj.name = "majority"
learners = [bayes, tree, maj]

# compute accuracies on data
data = orange.ExampleTable("voting")
acc = cross_validation(data, learners, k=10)
print "Area under ROC:"
for i in range(len(learners)):
    print learners[i].name, "%.2f" % acc[i]