Beispiel #1
0
    def __call__(self, table, bound, weightID=0):
        if not len(bound):
            raise AttributeError, "no bound attributes"

        bound = [table.domain[a] for a in bound]
        newVar = orange.EnumVariable("-".join([a.name for a in bound]))

        if (len(bound) == 1):
            newVar.values = list(bound[0].values)
            clsfr = orange.ClassifierByLookupTable(newVar, bound[0])
        else:
            import orngMisc
            for vs in orngMisc.LimitedCounter([len(a.values) for a in bound]):
                newVar.values.append("-".join(
                    [bound[i].values[v] for i, v in enumerate(vs)]))
            clsfr = orange.ClassifierByLookupTable(newVar, bound)

##    elif (len(bound)==2):
##      for v1 in bound[0].values:
##        for v2 in bound[1].values:
##          newVar.values.append(v1+"-"+v2)
##      clsfr = orange.ClassifierByLookupTable2(newVar, bound[0], bound[1])
##    elif (len(bound)==3):
##      for v1 in bound[0].values:
##        for v2 in bound[1].values:
##          for v3 in bound[2].values:
##            newVar.values.append(v1+"-"+v2+"-"+v3)
##      clsfr = orange.ClassifierByLookupTable3(newVar, bound[0], bound[1], bound[2])
##    else:
##      raise AttributeError, "cannot deal with more than 3 bound attributes"

        for i in range(len(newVar.values)):
            clsfr.lookupTable[i] = orange.Value(newVar, i)

        newVar.getValueFrom = clsfr

        if self.measure:
            meas = self.measure(newVar, table)
        else:
            meas = 0
        return newVar, meas
Beispiel #2
0
    def __call__(self, gen, weightID=0):
        selectBest = orngMisc.BestOnTheFly()
        for attr in gen.domain.attributes:
            selectBest.candidate(self.measure(attr, gen, None, weightID))
        bestAttr = gen.domain.attributes[selectBest.winnerIndex()]
        classifier = orange.ClassifierByLookupTable(gen.domain.classVar,
                                                    bestAttr)

        contingency = orange.ContingencyAttrClass(bestAttr, gen, weightID)
        for i in range(len(contingency)):
            classifier.lookupTable[i] = contingency[i].modus()
            classifier.distributions[i] = contingency[i]
        classifier.lookupTable[-1] = contingency.innerDistribution.modus()
        classifier.distributions[-1] = contingency.innerDistribution
        for d in classifier.distributions:
            d.normalize()

        return classifier
Beispiel #3
0
    def sortAttrValues(self, attr, interattr=None):
        if not interattr:
            interattr = attr

        newvalues = list(interattr.values)
        newvalues.sort()
        if newvalues == list(interattr.values):
            return interattr

        newattr = orange.EnumVariable(interattr.name, values=newvalues)
        newattr.getValueFrom = orange.ClassifierByLookupTable(newattr, attr)
        lookupTable = newattr.getValueFrom.lookupTable
        distributions = newattr.getValueFrom.distributions
        for val in interattr.values:
            idx = attr.values.index(val)
            lookupTable[idx] = val
            distributions[idx][newvalues.index(val)] += 1
        return newattr
# Description: Shows how to construct and use classifiers by lookup table to construct new features from the existing
# Category:    classification, lookup classifiers, constructive induction, feature construction
# Classes:     ClassifierByLookupTable, ClassifierByLookupTable1, ClassifierByLookupTable2, ClassifierByLookupTable3
# Uses:        monk1
# Referenced:  lookup.htm

import orange

data = orange.ExampleTable("monk1")

a, b, e = data.domain["a"], data.domain["b"], data.domain["e"]

ab = orange.EnumVariable("a==b", values=["no", "yes"])
ab.getValueFrom = orange.ClassifierByLookupTable(
    ab, a, b, ["yes", "no", "no", "no", "yes", "no", "no", "no", "yes"])

e1 = orange.EnumVariable("e==1", values=["no", "yes"])
e1.getValueFrom = orange.ClassifierByLookupTable(
    e1, e, ["yes", "no", "no", "no", "?"])

data2 = data.select([a, b, ab, e, e1, data.domain.classVar])

for i in range(5):
    print data2.randomexample()

for i in range(5):
    ex = data.randomexample()
    print "%s: ab %i, e1 %i " % (ex, ab.getValueFrom.getindex(ex),
                                 e1.getValueFrom.getindex(ex))

# What follows is only for testing Orange...
Beispiel #5
0
    [meas(orange.ContingencyAttrClass(i, data), cdist) for i in range(attrs)]))
print fstr % (
    ("- by attribute name:", ) +
    tuple([meas(orange.ContingencyAttrClass(i, data), cdist) for i in names]))
print fstr % (("- by attribute descriptor:", ) + tuple([
    meas(orange.ContingencyAttrClass(i, data), cdist)
    for i in data.domain.attributes
]))
print

values = [
    "v%i" % i
    for i in range(len(data.domain[2].values) * len(data.domain[3].values))
]
cartesian = orange.EnumVariable("cart", values=values)
cartesian.getValueFrom = orange.ClassifierByLookupTable(
    cartesian, data.domain[2], data.domain[3], values)

print "Information gain of Cartesian product of %s and %s: %6.4f" % (
    data.domain[2].name, data.domain[3].name, meas(cartesian, data))

mid = orange.newmetaid()
data.domain.addmeta(mid, orange.EnumVariable(values=["v0", "v1"]))
data.addMetaAttribute(mid)

rg = random.Random()
rg.seed(0)
for ex in data:
    ex[mid] = orange.Value(rg.randint(0, 1))

print "Information gain for a random meta attribute: %6.4f" % meas(mid, data)