def setOutput(self): matchingOutput = self.data nonMatchingOutput = None hasClass = False if self.data: hasClass = bool(self.data.domain.classVar) filterList = self.getFilterList(self.data.domain, self.Conditions, enabledOnly=True) if len(filterList)>0: filter = orange.Filter_disjunction([orange.Filter_conjunction(l) for l in filterList]) else: filter = orange.Filter_conjunction([]) # a filter that does nothing matchingOutput = filter(self.data, 1) matchingOutput.name = self.data.name nonMatchingOutput = filter(self.data, 1, negate=1) nonMatchingOutput.name = self.data.name if self.purgeAttributes or self.purgeClasses: remover = orange.RemoveUnusedValues(removeOneValued=True) newDomain = remover(matchingOutput, 0, True, self.purgeClasses) if newDomain != matchingOutput.domain: matchingOutput = orange.ExampleTable(newDomain, matchingOutput) newDomain = remover(nonMatchingOutput, 0, True, self.purgeClasses) if newDomain != nonMatchingOutput.domain: nonmatchingOutput = orange.ExampleTable(newDomain, nonMatchingOutput) self.send("Matching Data", matchingOutput) self.send("Unmatched Data", nonMatchingOutput) self.updateInfoOut(matchingOutput)
def domainPurger(examples, purgeClasses): import orange newDomain = orange.RemoveUnusedValues(removeOneValued=True)(examples, 0, True, purgeClasses) if newDomain != examples.domain: return orange.ExampleTable(newDomain, examples) return examples
def checkDomain(self, data, selection=None): cl = clo = data.domain.classVar if cl: if selection: cl = orange.RemoveUnusedValues(cl, selection, removeOneValued=1) else: cl = orange.RemoveUnusedValues(cl, data, removeOneValued=1) # Construct a new domain only if the class has changed # (ie to lesser number of values or to one value (alias None)) if cl != clo: domain = orange.Domain(data.domain.attributes, cl) metas = data.domain.getmetas() for key in metas: domain.addmeta(key, metas[key]) return domain else: return None
def discretizeDomain(data, removeUnusedValues=1, numberOfIntervals=2): entroDisc = orange.EntropyDiscretization() equiDisc = orange.EquiNDiscretization(numberOfIntervals=numberOfIntervals) discAttrs = [] className = data and len( data ) > 0 and data.domain.classVar and data.domain.classVar.name or None # if className: # data = data.filterref(orange.Filter_hasClassValue()) # remove examples with missing classes if not data or len(data) == 0: return None # if we have a continuous class we have to discretize it before we can discretize the attributes if className and data.domain.classVar.varType == orange.VarTypes.Continuous: try: newClass = equiDisc(data.domain.classVar.name, data) newClass.name = className except orange.KernelException as ex: warnings.warn("Could not discretize class variable '%s'. %s" % (data.domain.classVar.name, ex.message)) newClass = None className = None newDomain = orange.Domain(data.domain.attributes, newClass) data = orange.ExampleTable(newDomain, data) for attr in data.domain.attributes: try: name = attr.name if attr.varType == orange.VarTypes.Continuous: # if continuous attribute then use entropy discretization if data.domain.classVar and data.domain.classVar.varType == orange.VarTypes.Discrete: new_attr = entroDisc(attr, data) else: new_attr = equiDisc(attr, data) else: new_attr = attr if removeUnusedValues: new_attr = orange.RemoveUnusedValues(new_attr, data) if new_attr is None: raise orange.KernelException("No values") new_attr.name = name discAttrs.append(new_attr) except orange.KernelException as ex: # if all values are missing, entropy discretization will throw an exception. in such cases ignore the attribute warnings.warn("Could not discretize %s attribute. %s" % (attr.name, ex.message)) if className: discAttrs.append(data.domain.classVar) d2 = data.translate(discAttrs, True) return d2
def discretizeDomain(data, removeUnusedValues = 1, numberOfIntervals = 2): entroDisc = orange.EntropyDiscretization() equiDisc = orange.EquiNDiscretization(numberOfIntervals = numberOfIntervals) discAttrs = [] className = data and len(data) > 0 and data.domain.classVar and data.domain.classVar.name or None # if className: # data = data.filterref(orange.Filter_hasClassValue()) # remove examples with missing classes if not data or len(data) == 0: return None # if we have a continuous class we have to discretize it before we can discretize the attributes if className and data.domain.classVar.varType == orange.VarTypes.Continuous: newClass = equiDisc(data.domain.classVar.name, data) newClass.name = className newDomain = orange.Domain(data.domain.attributes, newClass) data = orange.ExampleTable(newDomain, data) for attr in data.domain.attributes: try: name = attr.name if attr.varType == orange.VarTypes.Continuous: # if continuous attribute then use entropy discretization if data.domain.classVar and data.domain.classVar.varType == orange.VarTypes.Discrete: attr = entroDisc(attr, data) else: attr = equiDisc(attr, data) if removeUnusedValues: attr = orange.RemoveUnusedValues(attr, data) attr.name = name discAttrs.append(attr) except: # if all values are missing, entropy discretization will throw an exception. in such cases ignore the attribute pass if className: discAttrs.append(data.domain.classVar) return data.select(discAttrs)
import orange data = orange.ExampleTable("unusedValues") newattrs = [ orange.RemoveUnusedValues(attr, data) for attr in data.domain.variables ] print for attr in range(len(data.domain)): print data.domain[attr], if newattrs[attr] == data.domain[attr]: print "retained as is" elif newattrs[attr]: print "reduced, new values are", newattrs[attr].values else: print "removed" filteredattrs = filter(bool, newattrs) newdata = orange.ExampleTable(orange.Domain(filteredattrs), data) print "\nOriginal example table" for ex in data: print ex print "\nReduced example table" for ex in newdata: print ex print "\nRemoval with 'removedOneValued=true'" reducer = orange.RemoveUnusedValues(removeOneValued=1) newattrs = [reducer(attr, data) for attr in data.domain.variables]
def process(self): if self.data == None: return self.reducedAttrs = 0 self.removedAttrs = 0 self.resortedAttrs = 0 self.classAttribute = 0 if self.removeAttributes or self.sortValues: newattrs = [] for attr in self.data.domain.attributes: if attr.varType == orange.VarTypes.Continuous: if orange.RemoveRedundantOneValue.has_at_least_two_values( self.data, attr): newattrs.append(attr) else: self.removedAttrs += 1 continue if attr.varType != orange.VarTypes.Discrete: newattrs.append(attr) continue if self.removeValues: newattr = orange.RemoveUnusedValues(attr, self.data) if not newattr: self.removedAttrs += 1 continue if newattr != attr: self.reducedAttrs += 1 else: newattr = attr if self.removeValues and len(newattr.values) < 2: self.removedAttrs += 1 continue if self.sortValues: newnewattr = self.sortAttrValues(attr, newattr) if newnewattr != newattr: self.resortedAttrs += 1 newattr = newnewattr newattrs.append(newattr) else: newattrs = self.data.domain.attributes klass = self.data.domain.classVar classChanged = False if not klass: newclass = klass self.classAttr = "No class" elif klass.varType != orange.VarTypes.Discrete: newclass = klass self.classAttr = "Class is not discrete" elif not (self.removeClassAttribute or self.sortClasses): newclass = klass self.classAttr = "Class is not checked" else: self.classAttr = "" if self.removeClasses: newclass = orange.RemoveUnusedValues(klass, self.data) else: newclass = klass if not newclass or self.removeClassAttribute and len( newclass.values) < 2: newclass = None self.classAttr = "Class is removed" elif len(newclass.values) != len(klass.values): self.classAttr = "Class is reduced" if newclass and self.sortClasses: newnewclass = self.sortAttrValues(klass, newclass) if newnewclass != newclass: if self.classAttr: self.classAttr = "Class is reduced and sorted" else: self.classAttr = "Class is sorted" newclass = newnewclass if not self.classAttr: self.classAttr = "Class is unchanged" if self.reducedAttrs or self.removedAttrs or self.resortedAttrs or newclass != klass: newDomain = orange.Domain(newattrs, newclass) newData = orange.ExampleTable(newDomain, self.data) else: newData = self.data self.send("Data", newData) self.dataChanged = False