Beispiel #1
0
    def setOutput(self):
        matchingOutput = self.data
        nonMatchingOutput = None
        hasClass = False
        if self.data:
            hasClass = bool(self.data.domain.classVar)
            filterList = self.getFilterList(self.data.domain, self.Conditions, enabledOnly=True)
            if len(filterList)>0:
                filter = orange.Filter_disjunction([orange.Filter_conjunction(l) for l in filterList])
            else:
                filter = orange.Filter_conjunction([]) # a filter that does nothing
            matchingOutput = filter(self.data, 1)
            matchingOutput.name = self.data.name
            nonMatchingOutput = filter(self.data, 1, negate=1)
            nonMatchingOutput.name = self.data.name

            if self.purgeAttributes or self.purgeClasses:
                remover = orange.RemoveUnusedValues(removeOneValued=True)

                newDomain = remover(matchingOutput, 0, True, self.purgeClasses)
                if newDomain != matchingOutput.domain:
                    matchingOutput = orange.ExampleTable(newDomain, matchingOutput)

                newDomain = remover(nonMatchingOutput, 0, True, self.purgeClasses)
                if newDomain != nonMatchingOutput.domain:
                    nonmatchingOutput = orange.ExampleTable(newDomain, nonMatchingOutput)

        self.send("Matching Data", matchingOutput)
        self.send("Unmatched Data", nonMatchingOutput)

        self.updateInfoOut(matchingOutput)
Beispiel #2
0
def domainPurger(examples, purgeClasses):
    import orange
    newDomain = orange.RemoveUnusedValues(removeOneValued=True)(examples, 0,
                                                                True,
                                                                purgeClasses)
    if newDomain != examples.domain:
        return orange.ExampleTable(newDomain, examples)
    return examples
Beispiel #3
0
    def checkDomain(self, data, selection=None):

        cl = clo = data.domain.classVar
        if cl:
            if selection:
                cl = orange.RemoveUnusedValues(cl,
                                               selection,
                                               removeOneValued=1)
            else:
                cl = orange.RemoveUnusedValues(cl, data, removeOneValued=1)

        # Construct a new domain only if the class has changed
        # (ie to lesser number of values or to one value (alias None))
        if cl != clo:
            domain = orange.Domain(data.domain.attributes, cl)
            metas = data.domain.getmetas()
            for key in metas:
                domain.addmeta(key, metas[key])
            return domain
        else:
            return None
Beispiel #4
0
def discretizeDomain(data, removeUnusedValues=1, numberOfIntervals=2):
    entroDisc = orange.EntropyDiscretization()
    equiDisc = orange.EquiNDiscretization(numberOfIntervals=numberOfIntervals)
    discAttrs = []

    className = data and len(
        data
    ) > 0 and data.domain.classVar and data.domain.classVar.name or None
    #    if className:
    #        data = data.filterref(orange.Filter_hasClassValue())  # remove examples with missing classes

    if not data or len(data) == 0:
        return None

    # if we have a continuous class we have to discretize it before we can discretize the attributes
    if className and data.domain.classVar.varType == orange.VarTypes.Continuous:
        try:
            newClass = equiDisc(data.domain.classVar.name, data)
            newClass.name = className
        except orange.KernelException as ex:
            warnings.warn("Could not discretize class variable '%s'. %s" %
                          (data.domain.classVar.name, ex.message))
            newClass = None
            className = None
        newDomain = orange.Domain(data.domain.attributes, newClass)
        data = orange.ExampleTable(newDomain, data)

    for attr in data.domain.attributes:
        try:
            name = attr.name
            if attr.varType == orange.VarTypes.Continuous:  # if continuous attribute then use entropy discretization
                if data.domain.classVar and data.domain.classVar.varType == orange.VarTypes.Discrete:
                    new_attr = entroDisc(attr, data)
                else:
                    new_attr = equiDisc(attr, data)
            else:
                new_attr = attr
            if removeUnusedValues:
                new_attr = orange.RemoveUnusedValues(new_attr, data)
                if new_attr is None:
                    raise orange.KernelException("No values")

            new_attr.name = name
            discAttrs.append(new_attr)
        except orange.KernelException as ex:  # if all values are missing, entropy discretization will throw an exception. in such cases ignore the attribute
            warnings.warn("Could not discretize %s attribute. %s" %
                          (attr.name, ex.message))

    if className: discAttrs.append(data.domain.classVar)
    d2 = data.translate(discAttrs, True)
    return d2
Beispiel #5
0
def discretizeDomain(data, removeUnusedValues = 1, numberOfIntervals = 2):
    entroDisc = orange.EntropyDiscretization()
    equiDisc  = orange.EquiNDiscretization(numberOfIntervals = numberOfIntervals)
    discAttrs = []

    className = data and len(data) > 0 and data.domain.classVar and data.domain.classVar.name or None
#    if className:
#        data = data.filterref(orange.Filter_hasClassValue())  # remove examples with missing classes

    if not data or len(data) == 0:
        return None

    # if we have a continuous class we have to discretize it before we can discretize the attributes
    if className and data.domain.classVar.varType == orange.VarTypes.Continuous:
        newClass = equiDisc(data.domain.classVar.name, data)
        newClass.name = className
        newDomain = orange.Domain(data.domain.attributes, newClass)
        data = orange.ExampleTable(newDomain, data)

    for attr in data.domain.attributes:
        try:
            name = attr.name
            if attr.varType == orange.VarTypes.Continuous:  # if continuous attribute then use entropy discretization
                if data.domain.classVar and data.domain.classVar.varType == orange.VarTypes.Discrete:
                    attr = entroDisc(attr, data)
                else:
                    attr = equiDisc(attr, data)
            if removeUnusedValues:
                attr = orange.RemoveUnusedValues(attr, data)
            attr.name = name
            discAttrs.append(attr)
        except:     # if all values are missing, entropy discretization will throw an exception. in such cases ignore the attribute
            pass

    if className: discAttrs.append(data.domain.classVar)
    return data.select(discAttrs)
Beispiel #6
0
import orange
data = orange.ExampleTable("unusedValues")

newattrs = [
    orange.RemoveUnusedValues(attr, data) for attr in data.domain.variables
]

print
for attr in range(len(data.domain)):
    print data.domain[attr],
    if newattrs[attr] == data.domain[attr]:
        print "retained as is"
    elif newattrs[attr]:
        print "reduced, new values are", newattrs[attr].values
    else:
        print "removed"

filteredattrs = filter(bool, newattrs)
newdata = orange.ExampleTable(orange.Domain(filteredattrs), data)

print "\nOriginal example table"
for ex in data:
    print ex

print "\nReduced example table"
for ex in newdata:
    print ex

print "\nRemoval with 'removedOneValued=true'"
reducer = orange.RemoveUnusedValues(removeOneValued=1)
newattrs = [reducer(attr, data) for attr in data.domain.variables]
Beispiel #7
0
    def process(self):
        if self.data == None:
            return

        self.reducedAttrs = 0
        self.removedAttrs = 0
        self.resortedAttrs = 0
        self.classAttribute = 0

        if self.removeAttributes or self.sortValues:
            newattrs = []
            for attr in self.data.domain.attributes:
                if attr.varType == orange.VarTypes.Continuous:
                    if orange.RemoveRedundantOneValue.has_at_least_two_values(
                            self.data, attr):
                        newattrs.append(attr)
                    else:
                        self.removedAttrs += 1
                    continue

                if attr.varType != orange.VarTypes.Discrete:
                    newattrs.append(attr)
                    continue

                if self.removeValues:
                    newattr = orange.RemoveUnusedValues(attr, self.data)
                    if not newattr:
                        self.removedAttrs += 1
                        continue

                    if newattr != attr:
                        self.reducedAttrs += 1
                else:
                    newattr = attr

                if self.removeValues and len(newattr.values) < 2:
                    self.removedAttrs += 1
                    continue

                if self.sortValues:
                    newnewattr = self.sortAttrValues(attr, newattr)
                    if newnewattr != newattr:
                        self.resortedAttrs += 1
                        newattr = newnewattr

                newattrs.append(newattr)
        else:
            newattrs = self.data.domain.attributes

        klass = self.data.domain.classVar
        classChanged = False
        if not klass:
            newclass = klass
            self.classAttr = "No class"
        elif klass.varType != orange.VarTypes.Discrete:
            newclass = klass
            self.classAttr = "Class is not discrete"
        elif not (self.removeClassAttribute or self.sortClasses):
            newclass = klass
            self.classAttr = "Class is not checked"
        else:
            self.classAttr = ""

            if self.removeClasses:
                newclass = orange.RemoveUnusedValues(klass, self.data)
            else:
                newclass = klass

            if not newclass or self.removeClassAttribute and len(
                    newclass.values) < 2:
                newclass = None
                self.classAttr = "Class is removed"
            elif len(newclass.values) != len(klass.values):
                self.classAttr = "Class is reduced"

            if newclass and self.sortClasses:
                newnewclass = self.sortAttrValues(klass, newclass)
                if newnewclass != newclass:
                    if self.classAttr:
                        self.classAttr = "Class is reduced and sorted"
                    else:
                        self.classAttr = "Class is sorted"
                    newclass = newnewclass

            if not self.classAttr:
                self.classAttr = "Class is unchanged"

        if self.reducedAttrs or self.removedAttrs or self.resortedAttrs or newclass != klass:
            newDomain = orange.Domain(newattrs, newclass)
            newData = orange.ExampleTable(newDomain, self.data)
        else:
            newData = self.data

        self.send("Data", newData)

        self.dataChanged = False