예제 #1
0
 def __call__(self, data, weight=None):
     disc = orange.Preprocessor_discretize(
         data, method=orange.EntropyDiscretization())
     #show_values(disc, "Entropy based discretization")
     model = orange.BayesLearner(disc, weight, adjustThreshold=0)
     #print "model.distribution", model.distribution
     #print "model.conditionalDistributions", model.conditionalDistributions
     return Classifier(classifier=model)
예제 #2
0
  def __calculateMeasures(self):
  
    falsePositives = 0
    falseNegatives = 0
    truePositives = 0
    trueNegatives = 0
    totalPositives = 0
    totalNegatives = 0
    totalHoldOutGoodPhrases = 0
    totalHoldOutBadPhrases = 0

    self.trainD = self.__loadDataFromES("train", None)
    self.holdOutD = self.__loadDataFromES("hold", self.trainD.domain)
    self.trainD = orange.Preprocessor_discretize(self.trainD, method=orange.EntropyDiscretization())
    self.holdOutD = orange.ExampleTable(self.trainD.domain, self.holdOutD)
    
    for row in self.holdOutD:
      actualClassType = row[-1].value
      phrase = row.getmetas().values()[0].value

      featureSet = {}
      for i,feature in enumerate(self.features):
        featureSet[feature["name"]] = row[i].value

      if self.classifier == None:
        classifierFile = open(self.classifierFilePath)
        self.classifier = pickle.load(classifierFile)
        classifierFile.close()  
      prob = self.classifier.prob_classify(featureSet).prob("1")
      classType = self.classifier.classify(featureSet)

      if classType == "1":
        totalPositives += 1
        if classType == actualClassType:
          truePositives += 1
      else:
        totalNegatives += 1
        if classType == actualClassType:
          trueNegatives += 1

      if actualClassType == "1":
        totalHoldOutGoodPhrases += 1
      else:
        totalHoldOutBadPhrases += 1

    precisionOfGood = 100.0 * truePositives/totalPositives
    recallOfGood = 100.0 * truePositives/totalHoldOutGoodPhrases
    fMeasureOfGood = 2.0 * precisionOfGood * recallOfGood / (precisionOfGood + recallOfGood)
    precisionOfBad = 100.0 * trueNegatives/totalNegatives
    recallOfBad = 100.0*trueNegatives/totalHoldOutBadPhrases
    fMeasureOfBad = 2.0 * precisionOfBad * recallOfBad / (precisionOfBad + recallOfBad)
    self.logger.info("\nPrecision of Good: " + str(round(precisionOfGood, 2)) + "%")
    self.logger.info("Recall of Good: " + str(round(recallOfGood, 2)) + "%")
    self.logger.info("Balanced F-measure of Good: " + str(round(fMeasureOfGood, 2)) + "%")
    self.logger.info("Precision of Bad: " + str(round(precisionOfBad, 2)) + "%")
    self.logger.info("Recall of Bad: " + str(round(recallOfBad, 2)) + "%")
    self.logger.info("Balanced F-measure of Bad: " + str(round(fMeasureOfBad, 2)) + "%")
예제 #3
0
  def classify(self):
    while True:
      message = self.worker.receive()
      if message["content"] == "kill":
        message["responseId"] = message["requestId"]
        self.worker.close(message)
        if len(self.dispatchers) == 0:
          self.worker.end()
          break
        else:
          self.worker.send(content="kill", to=self.workerName)
          continue
      elif message["content"]["type"] == "classify":
        if message["content"]["from"] not in self.dispatchers:
          self.dispatchers[message["content"]["from"]] = RemoteChannel(message["content"]["from"], self.config)
          self.dispatchers[message["content"]["from"]].listen(self.unregisterDispatcher)
        self.phraseId = message["content"]["phraseId"]
        if self.classifier == None:
          self.trainD = self.__loadDataFromES("train", None)
          self.trainD = orange.Preprocessor_discretize(self.trainD, method=orange.EntropyDiscretization())
          self.__train()

        self.trainD = self.__loadDataFromES("train", None)
        testD = self.__loadDataFromES("test", self.trainD.domain)
      
        self.trainD = orange.Preprocessor_discretize(self.trainD, method=orange.EntropyDiscretization())
        testD = orange.ExampleTable(self.trainD.domain, testD)

        for row in testD:
          phrase = row.getmetas().values()[0].value
          featureSet = {}
          for i,feature in enumerate(self.features):
            featureSet[feature["name"]] = row[i].value

          prob = self.classifier.prob_classify(featureSet).prob("1")
          classType = self.classifier.classify(featureSet)
          self.phraseData["_source"]["prob"] = prob
          self.phraseData["_source"]["class_type"] = classType
          self.logger.info("Classified '" + phrase + "' as " + classType + " with probability " + str(prob))
          self.esClient.index(index=self.processorIndex, doc_type=self.processorPhraseType, id=self.phraseId, body=self.phraseData["_source"])
          self.worker.reply(message, {"phraseId": self.phraseId, "status" : "classified", "type" : "reply"}, 120000000)   

    self.logger.info("Terminating classification worker")
예제 #4
0
    def __call__(self, data, targetClass, num_of_rules=0):
        '''Returns CN2-SD rules by performing weighted covering algorithm.'''

        data_discretized = False
        # If any of the attributes are continuous, discretize them
        if data.domain.hasContinuousAttributes():
            original_data = data
            data_discretized = True
            new_domain = []
            discretize = orange.EntropyDiscretization(forceAttribute=True)
            for attribute in data.domain.attributes:
                if attribute.varType == orange.VarTypes.Continuous:
                    d_attribute = discretize(attribute, data)
                    # An attribute is irrelevant, if it is discretized into a single interval
                    #                        if len(d_attribute.getValueFrom.transformer.points) > 0:
                    new_domain.append(d_attribute)
                else:
                    new_domain.append(attribute)
            data = original_data.select(new_domain +
                                        [original_data.domain.classVar])

        self.data = data
        self.max_rules = num_of_rules
        rules = []

        tc = orange.Value(data.domain.classVar, targetClass)

        # weighted covering
        self.data.addMetaAttribute(
            self.weightID)  # set weights of all examples to 1
        self.data.addMetaAttribute(
            self.counter)  # set counters of all examples to 0

        targetClassRule = SDRule(data, targetClass, conditions=[], g=1)

        tmpRule = self.rbf(data, self.weightID, targetClass, None)
        while (tmpRule.quality > 0) and (self.max_rules == 0
                                         or len(rules) < self.max_rules):
            bestRule = SDRule(self.data, tc, tmpRule.filter.conditions)
            bestRule.quality = tmpRule.quality
            self.decreaseExampleWeights(bestRule)
            rules.append(bestRule)
            tmpRule = self.rbf(data, self.weightID, targetClass, None)

        if data_discretized:
            targetClassRule = SDRule(original_data,
                                     targetClass,
                                     conditions=[],
                                     g=1)
            # change beam so the rules apply to original data
            rules = [rule.getUndiscretized(original_data) for rule in rules]
        else:
            targetClassRule = SDRule(data, targetClass, conditions=[], g=1)

        return SDRules(rules, targetClassRule, "CN2-SD")
예제 #5
0
 def getDiscretizer(self):
     if self.discInd == 0:
         preprocessor = Preprocessor_discretizeEntropy(
             method=orange.EntropyDiscretization())
     elif self.discInd in [1, 2]:
         name, disc, kwds = self.DISCRETIZERS[self.discInd]
         preprocessor = Preprocessor_discretize(method=disc(
             **dict([(key, getattr(self, key, val))
                     for key, val in kwds.items()])))
     elif self.discInd == 3:
         preprocessor = Preprocessor_removeContinuous()
     return preprocessor
예제 #6
0
def discretizeDomain(data, removeUnusedValues=1, numberOfIntervals=2):
    entroDisc = orange.EntropyDiscretization()
    equiDisc = orange.EquiNDiscretization(numberOfIntervals=numberOfIntervals)
    discAttrs = []

    className = data and len(
        data
    ) > 0 and data.domain.classVar and data.domain.classVar.name or None
    #    if className:
    #        data = data.filterref(orange.Filter_hasClassValue())  # remove examples with missing classes

    if not data or len(data) == 0:
        return None

    # if we have a continuous class we have to discretize it before we can discretize the attributes
    if className and data.domain.classVar.varType == orange.VarTypes.Continuous:
        try:
            newClass = equiDisc(data.domain.classVar.name, data)
            newClass.name = className
        except orange.KernelException as ex:
            warnings.warn("Could not discretize class variable '%s'. %s" %
                          (data.domain.classVar.name, ex.message))
            newClass = None
            className = None
        newDomain = orange.Domain(data.domain.attributes, newClass)
        data = orange.ExampleTable(newDomain, data)

    for attr in data.domain.attributes:
        try:
            name = attr.name
            if attr.varType == orange.VarTypes.Continuous:  # if continuous attribute then use entropy discretization
                if data.domain.classVar and data.domain.classVar.varType == orange.VarTypes.Discrete:
                    new_attr = entroDisc(attr, data)
                else:
                    new_attr = equiDisc(attr, data)
            else:
                new_attr = attr
            if removeUnusedValues:
                new_attr = orange.RemoveUnusedValues(new_attr, data)
                if new_attr is None:
                    raise orange.KernelException("No values")

            new_attr.name = name
            discAttrs.append(new_attr)
        except orange.KernelException as ex:  # if all values are missing, entropy discretization will throw an exception. in such cases ignore the attribute
            warnings.warn("Could not discretize %s attribute. %s" %
                          (attr.name, ex.message))

    if className: discAttrs.append(data.domain.classVar)
    d2 = data.translate(discAttrs, True)
    return d2
예제 #7
0
    def __call__(self, data, targetClass, num_of_rules ):
        if self.dataOK(data):  # Checks weather targetClass is discrete
            data_discretized = False
            # If any of the attributes are continuous, discretize them
            if data.domain.hasContinuousAttributes():
                original_data = data
                data_discretized = True
                new_domain = []
                discretize = orange.EntropyDiscretization(forceAttribute=True)
                for attribute in data.domain.attributes:
                    if attribute.varType == orange.VarTypes.Continuous:
                        d_attribute = discretize(attribute, data)
                        # An attribute is irrelevant, if it is discretized into a single interval
#                        if len(d_attribute.getValueFrom.transformer.points) > 0:
                        new_domain.append(d_attribute)
                    else:
                        new_domain.append(attribute)
                data = original_data.select(new_domain + [original_data.domain.classVar])
            
            # initialization of beams
            beam = [SDRule(data=data, targetClass=targetClass,  g=self.g)] * self.beamWidth 
            newBeam = [SDRule(data=data, targetClass=targetClass,  g=self.g)] * self.beamWidth
            worstRuleIndex = 0
            
            improvements = true
            while improvements:
                improvements = false
                for rule in beam:
                    for attr in data.domain.attributes: 
                        value = attr.firstvalue() 
                        while(value): 
                            newRule = rule.cloneAndAddCondition(attr,value)  
                            if newRule.support > self.minSupport and self.betterThanWorstRule(newRule, newBeam, worstRuleIndex) and self.isRelevant(newRule, newBeam):
                                    worstRuleIndex = self.replaceWorstRule(newRule, newBeam, worstRuleIndex)
                                    improvements = true
                            value = attr.nextvalue(value)
                beam = newBeam
            
            # perform rule subset selection
            if num_of_rules != 0:
                beam = self.ruleSubsetSelection(beam, num_of_rules, data)
            
            if data_discretized:
                targetClassRule = SDRule(original_data, targetClass, conditions=[], g=1)
                # change beam so the rules apply to original data
                beam = [rule.getUndiscretized(original_data) for rule in beam]

            else:
                targetClassRule = SDRule(data, targetClass, conditions=[], g =1)
            
            return  SDRules(beam, targetClassRule, "SD")
예제 #8
0
def entropyDiscretization(data):
    """
  Discretizes continuous attributes using the entropy based discretization.
  It removes the attributes discretized to a single interval and prints their names.
  Arguments: data
  Returns:   table of examples with discretized atributes. Attributes that are
             categorized to a single value (constant) are removed.
  """
    orange.setrandseed(0)
    tablen = orange.Preprocessor_discretize(
        data, method=orange.EntropyDiscretization())

    attrlist = []
    nrem = 0
    for i in tablen.domain.attributes:
        if (len(i.values) > 1):
            attrlist.append(i)
        else:
            nrem = nrem + 1

    attrlist.append(tablen.domain.classVar)
    return tablen.select(attrlist)
예제 #9
0
def discretizeDomain(data, removeUnusedValues = 1, numberOfIntervals = 2):
    entroDisc = orange.EntropyDiscretization()
    equiDisc  = orange.EquiNDiscretization(numberOfIntervals = numberOfIntervals)
    discAttrs = []

    className = data and len(data) > 0 and data.domain.classVar and data.domain.classVar.name or None
#    if className:
#        data = data.filterref(orange.Filter_hasClassValue())  # remove examples with missing classes

    if not data or len(data) == 0:
        return None

    # if we have a continuous class we have to discretize it before we can discretize the attributes
    if className and data.domain.classVar.varType == orange.VarTypes.Continuous:
        newClass = equiDisc(data.domain.classVar.name, data)
        newClass.name = className
        newDomain = orange.Domain(data.domain.attributes, newClass)
        data = orange.ExampleTable(newDomain, data)

    for attr in data.domain.attributes:
        try:
            name = attr.name
            if attr.varType == orange.VarTypes.Continuous:  # if continuous attribute then use entropy discretization
                if data.domain.classVar and data.domain.classVar.varType == orange.VarTypes.Discrete:
                    attr = entroDisc(attr, data)
                else:
                    attr = equiDisc(attr, data)
            if removeUnusedValues:
                attr = orange.RemoveUnusedValues(attr, data)
            attr.name = name
            discAttrs.append(attr)
        except:     # if all values are missing, entropy discretization will throw an exception. in such cases ignore the attribute
            pass

    if className: discAttrs.append(data.domain.classVar)
    return data.select(discAttrs)
예제 #10
0
    def _prepare(self, t):
        # prepares an Orange table so that it doesn't contain continuous
        # attributes or missing values

        ### DISCRETIZE VARIABLES ###

        newatt = []
        oldatt = []
        entroD = orange.EntropyDiscretization()
        equiD = orange.EquiNDiscretization(numberOfIntervals=2)
        for i in t.domain.attributes:
            if i.varType == 2:
                d = entroD(i, t)
                if len(d.values) < 2:
                    # prevent discretization into a single value
                    d = equiD(i, t)
                    d.name = 'E' + d.name
                warnings.warn('Discretizing %s into %s with %d values.' %
                              (i.name, d.name, len(d.values)))
                newatt.append(d)
            else:
                oldatt.append(i)
        if len(newatt) > 0:
            t = t.select(oldatt + newatt + [t.domain.classVar])

        ### FIX MISSING VALUES ###

        special_attributes = []

        # 2006-08-23: fixed by PJ: append classVar only if it exists
        ##        all_attributes = [i for i in t.domain.attributes]+[t.domain.classVar]
        all_attributes = [i for i in t.domain.attributes]
        if t.domain.classVar:
            all_attributes += [t.domain.classVar]

        for i in range(len(all_attributes)):
            for j in t:
                if j[i].isSpecial():
                    special_attributes.append(i)
                    break
        # create new attributes
        if len(special_attributes) > 0:
            # prepare attributes
            newatts = []
            for i in range(len(all_attributes)):
                old = all_attributes[i]
                if i in special_attributes:
                    oldv = [v for v in old.values]
                    assert ('.' not in oldv)
                    new = orange.EnumVariable(name='M_' + old.name,
                                              values=oldv + ['.'])
                    warnings.warn('Removing special values from %s into %s.' %
                                  (old.name, new.name))
                    newatts.append(new)
                else:
                    newatts.append(old)
            # convert table
            exs = []

            # 2006-08-23: added by PJ: add a class variable (if not already existing)
            if not t.domain.classVar:
                newatts.append(orange.EnumVariable("class", values=["."]))
                t = orange.ExampleTable(
                    orange.Domain(t.domain.attributes, newatts[-1]), t)

            newd = orange.Domain(newatts)
            for ex in t:
                nex = []
                for i in range(len(newatts)):
                    if ex[i].isSpecial():
                        v = newatts[i]('.')
                    else:
                        v = newatts[i](int(ex[i]))
                    nex.append(v)
                exs.append(orange.Example(newd, nex))
            t = orange.ExampleTable(exs)
        return t
예제 #11
0
# Description: Entropy based discretization compared to discretization with equal-frequency
#              of instances in intervals
# Category:    preprocessing
# Uses:        iris.tab
# Classes:     Preprocessor_discretize, EntropyDiscretization
# Referenced:  o_categorization.htm

import orange


def show_values(data, heading):
    print heading
    for a in data.domain.attributes:
        print "%s: %s" % (
            a.name, reduce(lambda x, y: x + ', ' + y, [i for i in a.values]))


data = orange.ExampleTable("iris")

data_ent = orange.Preprocessor_discretize(
    data, method=orange.EntropyDiscretization())
show_values(data_ent, "Entropy based discretization")
print

data_n = orange.Preprocessor_discretize(
    data, method=orange.EquiNDiscretization(numberOfIntervals=3))
show_values(data_n, "Equal-frequency intervals")
예제 #12
0
 def __init__(self, discr = orange.EntropyDiscretization(), learnr = orange.BayesLearner()):
     self.disc = discr
     self.learner = learnr
예제 #13
0
파일: disc3.py 프로젝트: stefie10/slu_hri
# Description: Attribute-based discretization. Shows how different attributes may be discretized with different categorization methods and how the default attribute values names used by these methods may be simply replaced by the list of user-defined names.
# Category:    preprocessing
# Uses:        iris
# Classes:     EquiNDiscretization, EntropyDiscretization
# Referenced:  o_categorization.htm


def printexamples(data, inxs, msg="%i examples"):
    print msg % len(inxs)
    for i in inxs:
        print i, data[i]
    print


import orange
iris = orange.ExampleTable("iris")

equiN = orange.EquiNDiscretization(numberOfIntervals=4)
entropy = orange.EntropyDiscretization()

pl = equiN("petal length", iris)
sl = equiN("sepal length", iris)
pl.values = sl.values = ["very low", "low", "high", "very high"]
sl_ent = entropy("sepal length", iris)

inxs = [0, 15, 35, 50, 98]
d_iris = iris.select(
    ["sepal width", pl, "sepal length", sl, sl_ent, iris.domain.classVar])
printexamples(iris, inxs, "%i examples before discretization")
printexamples(d_iris, inxs, "%i examples before discretization")
예제 #14
0
    def computeDiscretizer(self, i, idx, onlyDefaults=False):
        attr = self.data.domain[idx]
        indiData = self.indiData[idx]

        discType, intervals = indiData[:2]
        discName = self.shortDiscNames[discType]

        defaultUsed = not discType

        if defaultUsed:
            discType = self.discretization+1
            intervals = self.intervals

        if discType >= self.D_N_METHODS + 1:

            try:
                customs = [float(r) for r in indiData[discType-self.D_N_METHODS+1]]
            except:
                customs = []

            if not customs:
                discType = self.discretization+1
                intervals = self.intervals
                discName = "%s ->%s)" % (self.shortDiscNames[indiData[0]][:-1], self.shortDiscNames[discType][2:-1])
                defaultUsed = True

        if onlyDefaults and not defaultUsed:
            return

        discType -= 1
        try:
            if discType == self.D_LEAVE: # leave continuous
                discretizer = None
            elif discType == self.D_ENTROPY:
                discretizer = orange.EntropyDiscretization(attr, self.data)
            elif discType == self.D_FREQUENCY:
                discretizer = orange.EquiNDiscretization(attr, self.data, numberOfIntervals = intervals)
            elif discType == self.D_WIDTH:
                discretizer = orange.EquiDistDiscretization(attr, self.data, numberOfIntervals = intervals)
            elif discType == self.D_REMOVE:
                discretizer = False
            else:
                discretizer = orange.IntervalDiscretizer(points = customs).constructVariable(attr)
        except:
            discretizer = False


        self.discretizers[idx] = discretizer

        if discType == self.D_LEAVE:
            discInts = ""
        elif discType == self.D_REMOVE:
            discInts = ""
        elif not discretizer:
            discInts = ": "+"<can't discretize>"
        else:
            points = discretizer.getValueFrom.transformer.points
            discInts = points and (": " + ", ".join([str(attr(x)) for x in points])) or ": "+"<removed>"
        self.indiLabels[i] = discInts + discName
        self.attrList.reset()

        if i == self.selectedAttr:
            self.graph.setSplits(discretizer and discretizer.getValueFrom.transformer.points or [])
예제 #15
0
    def __init__(self, parent=None, signalManager=None, name="Preprocess"):
        OWWidget.__init__(self, parent, signalManager, name)

        self.inputs = [("Example Table", ExampleTable, self.setData)
                       ]  #, ("Learner", orange.Learner, self.setLearner)]
        self.outputs = [("Preprocess", orngWrap.PreprocessedLearner),
                        ("Preprocessed Example Table", ExampleTable)
                        ]  #, ("Preprocessor", orange.Preprocessor)]

        self.autoCommit = False
        self.changedFlag = False

        #        self.allSchemas = [PreprocessorSchema("Default" , [Preprocessor_discretize(method=orange.EntropyDiscretization()), Preprocessor_dropMissing()])]
        self.allSchemas = [("Default", [
            Preprocessor_discretizeEntropy(
                method=orange.EntropyDiscretization()),
            Preprocessor_dropMissing()
        ], 0)]

        self.lastSelectedSchemaIndex = 0

        self.preprocessorsList = PyListModel([], self)

        box = OWGUI.widgetBox(self.controlArea, "Preprocessors", addSpace=True)
        box.layout().setSpacing(1)

        self.setStyleSheet("QListView::item { margin: 1px;}")
        self.preprocessorsListView = QListView()
        self.preprocessorsListSelectionModel = ListSingleSelectionModel(
            self.preprocessorsList, self)
        self.preprocessorsListView.setItemDelegate(
            PreprocessorItemDelegate(self))
        self.preprocessorsListView.setModel(self.preprocessorsList)

        self.preprocessorsListView.setSelectionModel(
            self.preprocessorsListSelectionModel)
        self.preprocessorsListView.setSelectionMode(QListView.SingleSelection)

        self.connect(self.preprocessorsListSelectionModel,
                     SIGNAL("selectedIndexChanged(QModelIndex)"),
                     self.onPreprocessorSelection)
        self.connect(self.preprocessorsList,
                     SIGNAL("dataChanged(QModelIndex, QModelIndex)"),
                     lambda arg1, arg2: self.commitIf)

        box.layout().addWidget(self.preprocessorsListView)

        self.addPreprocessorAction = QAction("+", self)
        self.addPreprocessorAction.pyqtConfigure(
            toolTip="Add a new preprocessor to the list")
        self.removePreprocessorAction = QAction("-", self)
        self.removePreprocessorAction.pyqtConfigure(
            toolTip="Remove selected preprocessor from the list")
        self.removePreprocessorAction.setEnabled(False)

        self.connect(
            self.preprocessorsListSelectionModel,
            SIGNAL("selectedIndexChanged(QModelIndex)"), lambda index: self.
            removePreprocessorAction.setEnabled(index.isValid()))

        actionsWidget = ModelActionsWidget(
            [self.addPreprocessorAction, self.removePreprocessorAction])
        actionsWidget.layout().setSpacing(1)
        actionsWidget.layout().addStretch(10)

        box.layout().addWidget(actionsWidget)

        self.connect(self.addPreprocessorAction, SIGNAL("triggered()"),
                     self.onAddPreprocessor)
        self.connect(self.removePreprocessorAction, SIGNAL("triggered()"),
                     self.onRemovePreprocessor)

        box = OWGUI.widgetBox(self.controlArea, "Saved Schemas", addSpace=True)

        self.schemaFilterEdit = OWGUIEx.LineEditFilter(self)
        box.layout().addWidget(self.schemaFilterEdit)

        self.schemaList = PyListModel([],
                                      self,
                                      flags=Qt.ItemIsSelectable
                                      | Qt.ItemIsEditable | Qt.ItemIsEnabled)
        self.schemaListProxy = PySortFilterProxyModel(filter_fmt="{0.name}",
                                                      parent=self)
        self.schemaListProxy.setFilterCaseSensitivity(Qt.CaseInsensitive)
        self.schemaListProxy.setSourceModel(self.schemaList)
        self.schemaListView = QListView()
        self.schemaListView.setItemDelegate(PreprocessorSchemaDelegate(self))
        #        self.schemaListView.setModel(self.schemaList)
        self.schemaListView.setModel(self.schemaListProxy)
        self.connect(self.schemaFilterEdit, SIGNAL("textEdited(QString)"),
                     self.schemaListProxy.setFilterRegExp)
        box.layout().addWidget(self.schemaListView)

        self.schemaListSelectionModel = ListSingleSelectionModel(
            self.schemaListProxy, self)
        self.schemaListView.setSelectionMode(QListView.SingleSelection)
        self.schemaListView.setSelectionModel(self.schemaListSelectionModel)

        self.connect(self.schemaListSelectionModel,
                     SIGNAL("selectedIndexChanged(QModelIndex)"),
                     self.onSchemaSelection)

        self.addSchemaAction = QAction("+", self)
        self.addSchemaAction.pyqtConfigure(
            toolTip="Add a new preprocessor schema")
        self.updateSchemaAction = QAction("Update", self)
        self.updateSchemaAction.pyqtConfigure(
            toolTip="Save changes made in the current schema")
        self.removeSchemaAction = QAction("-", self)
        self.removeSchemaAction.pyqtConfigure(toolTip="Remove selected schema")

        self.updateSchemaAction.setEnabled(False)
        self.removeSchemaAction.setEnabled(False)

        actionsWidget = ModelActionsWidget([])
        actionsWidget.addAction(self.addSchemaAction)
        actionsWidget.addAction(self.updateSchemaAction).setSizePolicy(
            QSizePolicy.MinimumExpanding, QSizePolicy.Fixed)
        actionsWidget.addAction(self.removeSchemaAction)
        actionsWidget.layout().setSpacing(1)

        box.layout().addWidget(actionsWidget)

        self.connect(self.addSchemaAction, SIGNAL("triggered()"),
                     self.onAddSchema)
        self.connect(self.updateSchemaAction, SIGNAL("triggered()"),
                     self.onUpdateSchema)
        self.connect(self.removeSchemaAction, SIGNAL("triggered()"),
                     self.onRemoveSchema)

        self.addPreprocessorsMenuActions = actions = []
        for name, pp, kwargs in self.preprocessors:
            action = QAction(name, self)
            self.connect(action,
                         SIGNAL("triggered()"),
                         lambda pp=pp, kwargs=kwargs: self.addPreprocessor(
                             pp(**kwargs)))
            actions.append(action)

        box = OWGUI.widgetBox(self.controlArea, "Output")
        cb = OWGUI.checkBox(box,
                            self,
                            "autoCommit",
                            "Commit on any change",
                            callback=self.commitIf)
        b = OWGUI.button(box, self, "Commit", callback=self.commit)
        OWGUI.setStopper(self, b, cb, "changedFlag", callback=self.commitIf)

        self.mainAreaStack = QStackedLayout()
        self.stackedEditorsCache = {}

        OWGUI.widgetBox(self.mainArea, orientation=self.mainAreaStack)

        self.data = None
        self.learner = None

        self.loadSettings()
        self.activateLoadedSettings()
예제 #16
0
 def __init__(self, method=orange.EntropyDiscretization()):
     self.method = method
     assert (isinstance(method, orange.EntropyDiscretization))
예제 #17
0
# Description: Shows how usage of different classes for discretization, including manual discretization
# Category:    discretization, categorization, preprocessing
# Classes:     EntropyDiscretization, EquiDistDiscretization, BiModalDiscretization, Discretization, IntervalDiscretizer, Discretizer, BiModalDiscretizer
# Uses:        iris
# Referenced:  discretization.htm

import orange

data = orange.ExampleTable("iris")

print "\nEntropy discretization, first 10 examples"
sep_w = orange.EntropyDiscretization("sepal width", data)

data2 = data.select([data.domain["sepal width"], sep_w, data.domain.classVar])
for ex in data2[:10]:
    print ex

print "\nDiscretized attribute:", sep_w
print "Continuous attribute:", sep_w.getValueFrom.whichVar
print "Cut-off points:", sep_w.getValueFrom.transformer.points

print "\nManual construction of IntervalDiscretizer - single attribute"
idisc = orange.IntervalDiscretizer(points=[3.0, 5.0])
sep_l = idisc.constructVariable(data.domain["sepal length"])
data2 = data.select([data.domain["sepal length"], sep_l, data.domain.classVar])
for ex in data2[:10]:
    print ex

print "\nManual construction of IntervalDiscretizer - all attributes"
idisc = orange.IntervalDiscretizer(points=[3.0, 5.0])
newattrs = [idisc.constructVariable(attr) for attr in data.domain.attributes]
예제 #18
0
 def __call__(self, data, weight=None):
     disc = orange.Preprocessor_discretize( \
         data, method=orange.EntropyDiscretization())
     model = orange.BayesLearner(disc, weight)
     return Classifier(classifier=model)
예제 #19
0
    def __call__(self, data, targetClass, max_rules=0):
        '''Returns the Apriori-C classifier.'''

        data_discretized = False
        # If any of the attributes are continuous, discretize them
        if data.domain.hasContinuousAttributes():
            original_data = data
            data_discretized = True
            new_domain = []
            discretize = orange.EntropyDiscretization(forceAttribute=True)
            for attribute in data.domain.attributes:
                if attribute.varType == orange.VarTypes.Continuous:
                    d_attribute = discretize(attribute, data)
                    # An attribute is irrelevant, if it is discretized into a single interval
                    #                        if len(d_attribute.getValueFrom.transformer.points) > 0:
                    new_domain.append(d_attribute)
                else:
                    new_domain.append(attribute)
            data = original_data.select(new_domain +
                                        [original_data.domain.classVar])

        self.data = data
        self.rulesSD = []

        # build association classification rules
        rules = orange.AssociationRulesInducer(data,
                                               support=self.minSup,
                                               classificationRules=1,
                                               maxItemSets=10000000)

        #_______________________________ post-processing step 1
        # select rules that classify in the target class
        right = orange.Example(
            data.domain,
            [orange.Value(orange.VarTypes.Discrete, orange.ValueTypes.DK)] *
            len(data.domain))
        right.setclass(targetClass)
        rules = rules.filter(lambda rule: rule.right == right)

        # select rules with confidence >= minConfidence
        rules = rules.filter(lambda rule: rule.confidence >= self.minConf)

        #________________________________ post processing step 2
        # weighted covering
        self.data.addMetaAttribute(
            self.weightID)  # set weights of all examples to 1
        bestRuleWRacc = 100
        while len(rules) > 0 and self.uncoveredExamples(
        ) > 0 and bestRuleWRacc > 0 and (max_rules == 0
                                         or len(self.rulesSD) < max_rules):
            (bestRule, bestRuleWRacc) = self.findBestRule(rules)
            rules.remove(bestRule)
            self.removeSimilarRules(bestRule, rules)
            self.decreaseExampleWeights(bestRule)
            self.rulesSD.append(bestRule)

        #____________________________ transform rules to SD format
        beam = []
        targetClassRule = SDRule(data, targetClass, conditions=[], g=1)

        for r in self.rulesSD:
            cond = []
            for i in range(len(r.left)):
                if not orange.Value.is_DC(r.left[i]):
                    cond.append(
                        orange.ValueFilter_discrete(
                            position=i,
                            values=[
                                orange.Value(data.domain.attributes[i],
                                             r.left[i])
                            ]))
            rSD = SDRule(data, targetClass, cond)
            beam.append(rSD)

        if data_discretized:
            targetClassRule = SDRule(original_data,
                                     targetClass,
                                     conditions=[],
                                     g=1)
            # change beam so the rules apply to original data
            beam = [rule.getUndiscretized(original_data) for rule in beam]
        else:
            targetClassRule = SDRule(data, targetClass, conditions=[], g=1)

        return SDRules(beam, targetClassRule, "Apriori-SD")
# Description: Shows how to assess the quality of attributes not in the dataset
# Category:    attribute quality
# Classes:     EntropyDiscretization, MeasureAttribute, MeasureAttribute_info
# Uses:        iris
# Referenced:  MeasureAttribute.htm

import orange
data = orange.ExampleTable("iris")

d1 = orange.EntropyDiscretization("petal length", data)
print orange.MeasureAttribute_relief(d1, data)

meas = orange.MeasureAttribute_relief()
for t in meas.thresholdFunction("petal length", data):
    print "%5.3f: %5.3f" % t

thresh, score, distr = meas.bestThreshold("petal length", data)
print "\nBest threshold: %5.3f (score %5.3f)" % (thresh, score)
예제 #21
0
class TestDiscretizeEntropy(testing.PreprocessorTestCase):
    PREPROCESSOR = Preprocessor_discretize(
        method=orange.EntropyDiscretization())
예제 #22
0
파일: disc1.py 프로젝트: stefie10/slu_hri
# Description: Entropy based discretization compared to discretization with equal-frequency
#              of instances in intervals
# Category:    preprocessing
# Uses:        wdbc.tab
# Classes:     Preprocessor_discretize, EntropyDiscretization
# Referenced:  o_categorization.htm

import orange

def show_values(data, heading):
  for a in data.domain.attributes:
    print "%s/%d: %s" % (a.name, len(a.values), reduce(lambda x,y: x+', '+y, [i for i in a.values]))
        
data = orange.ExampleTable("../datasets/wdbc")
print '%d features in original data set, discretized:' % len(data.domain.attributes)
data_ent = orange.Preprocessor_discretize(data, method=orange.EntropyDiscretization())
show_values(data_ent, "Entropy based discretization")

print '\nFeatures with sole value after discretization:'
for a in data_ent.domain.attributes:
  if len(a.values)==1:
    print a.name

import orngDisc
reload(orngDisc)
data_ent2 = orngDisc.entropyDiscretization(data)
print '%d features after removing features discretized to a constant value' % len(data_ent2.domain.attributes)