def __init__(self, measure=orange.MeasureAttribute_relief(), filter=None, limit=10): self.measure = measure self.filter = filter if filter is not None else self.bestN self.limit = limit
def __call__(self, data, weight): if hasattr(self, "inducer"): inducer = self.inducer else: if hasattr(self, "m"): inducer = FeatureByMinError() else: inducer = FeatureByMinComplexity() if hasattr(self, "m"): if not hasattr(inducer, m): raise TypeError, "invalid combination of arguments ('m' is given, but 'inducer' does not need it)" inducer.m = self.m import orngEvalAttr measure = getattr(self, "measure", orange.MeasureAttribute_relief(m=5, k=10)) ordered = orngEvalAttr.OrderAttributesByMeasure(measure)(data, weight) for attr in ordered: newattr = inducer(data, [attr], weight)[0] if len(newattr.values) < len(attr.values): newset = filter(lambda x: x != attr, data.domain.attributes) if len(newattr.values) > 1: newset.append(newattr) newattr.name = attr.name + "'" data = data.select(newset + [data.domain.classVar]) return data
def cforange_score_estimation(input_dict): import orange import orngFSS data = input_dict['dataset'] ma = orngFSS.attMeasure(data,orange.MeasureAttribute_relief(k=int(input_dict['k']), m=int(input_dict['m']))) output_string = "" output_dict = {} output_dict['results'] = ma return output_dict
def __call__(self, data, weight): if self.measure: measure = self.measure else: measure = orange.MeasureAttribute_relief(m=5, k=10) measured = [(attr, measure(attr, data, None, weight)) for attr in data.domain.attributes] measured.sort(key=itemgetter(1)) return [x[0] for x in measured]
def cforange_filter_relieff(input_dict): import orange import orngFSS data = input_dict['dataset'] measure = orange.MeasureAttribute_relief(k=int(input_dict['k']), m=int(input_dict['m'])) margin = float(input_dict['margin']) new_dataset = orngFSS.filterRelieff(data,measure,margin) output_dict = {} output_dict['new_dataset'] = new_dataset return output_dict
def __call__(self, data, weight): if self.measure: measure = self.measure else: measure = orange.MeasureAttribute_relief(m=5, k=10) measured = [(attr, measure(attr, data, None, weight)) for attr in data.domain.attributes] measured.sort(lambda x, y: cmp(x[1], y[1])) return [x[0] for x in measured]
def segmine_gene_ranker(input_dict, widget): import orange from numpy import mean, var from math import sqrt, floor CONTROL_GROUP_KEY = 'control group' DATA_GROUP_KEY = 'data group' CLASS_ATRR_NAME = 'group' table = input_dict['microarrayTable'] k = int(input_dict['k']) m = int(input_dict['m']) if m == 0: # special value m = -1 # all examples ranks = [] # ReliefF parameters: # - number of neighbours: 10 # - number of reference examples: all (-1) # - checksum computation: none (the data do not change) ranker = orange.MeasureAttribute_relief(k=k, m=m, checkCachedData=False) for attr in table.domain.attributes: ranks.append((ranker(attr, table), attr.name)) # tuples are sorted according to the first element, # here, this is attribute's quality ranks.sort(reverse=True) # reverse order inside sorted tuples list in result geneRanks = [(elt[1], elt[0]) for elt in ranks] tScores = {} control = table.selectref({CLASS_ATRR_NAME: CONTROL_GROUP_KEY}) data = table.selectref({CLASS_ATRR_NAME: DATA_GROUP_KEY}) nerrors = 0 widget.progress = 0 widget.save() for i, attr in enumerate(table.domain.attributes): geneID = attr.name controlValues = [float(example[attr]) for example in control] dataValues = [float(example[attr]) for example in data] try: average = mean(dataValues) - mean(controlValues) variance = var(controlValues)/len(controlValues) + \ var(dataValues)/len(dataValues) score = average / sqrt(variance) tScores[geneID] = score except ZeroDivisionError: tScores[geneID] = 0.0 if (i + 1) % 100 == 0: widget.progress = floor( (i + 1) / float(len(table.domain.attributes)) * 100) widget.save() widget.progress = 100 widget.save() sortedTScores = sorted(tScores.items(), reverse=True, key=lambda x: x[1]) return {'geneRanks': geneRanks, 'tScores': sortedTScores}
def __call__(self, examples, weight=0, fulldata=0): if examples.domain.classVar.varType != 1: raise "Logistic learner only works with discrete class." translate = orng2Array.DomainTranslation(self.translation_mode_d, self.translation_mode_c) if fulldata != 0: translate.analyse(fulldata, weight, warning=0) else: translate.analyse(examples, weight, warning=0) translate.prepareLR() mdata = translate.transform(examples) # get the attribute importances t = examples importance = [] for i in xrange(len(t.domain.attributes)): qi = orange.MeasureAttribute_relief(t.domain.attributes[i], t) importance.append((qi, i)) importance.sort() freqs = list(orange.Distribution(examples.domain.classVar, examples)) s = 1.0 / sum(freqs) freqs = [x * s for x in freqs] # normalize rl = RobustBLogisticLearner(regularization=self.regularization) if len(examples.domain.classVar.values) > 2: ## form several experiments: # identify the most frequent class value tfreqs = [(freqs[i], i) for i in xrange(len(freqs))] tfreqs.sort() base = tfreqs[-1][1] # the most frequent class classifiers = [] for i in xrange(len(tfreqs) - 1): # edit the translation alter = tfreqs[i][1] cfreqs = [tfreqs[-1][0], tfreqs[i][0]] # 0=base,1=alternative # edit all the examples for j in xrange(len(mdata)): c = int(examples[j].getclass()) if c == alter: mdata[j][-1] = 1 else: mdata[j][-1] = 0 r = rl(mdata, translate, importance, cfreqs) classifiers.append(r) return ArrayLogisticClassifier(classifiers, translate, tfreqs, examples.domain.classVar, len(mdata)) else: r = rl(mdata, translate, importance, freqs) return BasicLogisticClassifier(r, translate)
def attMeasure(data, measure=orange.MeasureAttribute_relief(k=20, m=50)): """ Assesses the quality of attributes using the given measure, outputs the results and returns a sorted list of tuples (attribute name, measure) Arguments: data example table measure an attribute scoring function (derived from orange.MeasureAttribute) Result: a sorted list of tuples (attribute name, measure) """ measl = [] for i in data.domain.attributes: measl.append((i.name, measure(i, data))) measl.sort(lambda x, y: cmp(y[1], x[1])) # for i in measl: # print "%25s, %6.3f" % (i[0], i[1]) return measl
def filterRelieff(data, measure=orange.MeasureAttribute_relief(k=20, m=50), margin=0): """ Takes the data set and an attribute measure (Relief by default). Estimates attibute score by the measure, removes worst attribute if its measure is below the margin. Repeats, until no attribute has negative or zero score. Arguments: data an example table measure an attribute measure (derived from mlpy.MeasureAttribute) margin if score is higher than margin, attribute is not removed """ measl = attMeasure(data, measure) while len(data.domain.attributes) > 0 and measl[-1][1] < margin: data = selectBestNAtts(data, measl, len(data.domain.attributes) - 1) # print 'remaining ', len(data.domain.attributes) measl = attMeasure(data, measure) return data
def selectAttributes(data, attrContOrder, attrDiscOrder, projections=None): if data.domain.classVar == None or data.domain.classVar.varType != orange.VarTypes.Discrete: return ([attr.name for attr in data.domain.attributes], [], 0) shown = [data.domain.classVar.name] hidden = [] maxIndex = 0 # initialize outputs # # both are RELIEF if attrContOrder == "ReliefF" and attrDiscOrder == "ReliefF": attrVals = orngFSS.attMeasure(data, orange.MeasureAttribute_relief()) s, h = getTopAttrs(attrVals, 0.95) return (shown + s, hidden + h, 0) # # both are NONE elif attrContOrder == "None" and attrDiscOrder == "None": for item in data.domain.attributes: shown.append(item.name) return (shown, hidden, 0) # disc and cont attribute list discAttrs = [] contAttrs = [] for attr in data.domain.attributes: if attr.varType == orange.VarTypes.Continuous: contAttrs.append(attr.name) elif attr.varType == orange.VarTypes.Discrete: discAttrs.append(attr.name) ############################### # sort continuous attributes if attrContOrder == "None": shown += contAttrs elif attrContOrder in [ "ReliefF", "Fisher discriminant", "Signal to Noise", "Signal to Noise For Each Class" ]: if attrContOrder == "ReliefF": measure = orange.MeasureAttribute_relief(k=10, m=50) elif attrContOrder == "Fisher discriminant": measure = MeasureFisherDiscriminant() elif attrContOrder == "Signal to Noise": measure = S2NMeasure() else: measure = S2NMeasureMix() dataNew = data.select(contAttrs + [data.domain.classVar]) attrVals = orngFSS.attMeasure(dataNew, measure) s, h = getTopAttrs(attrVals, 0.95) shown += s hidden += h else: print "Unknown value for attribute order: ", attrContOrder # ############################### # sort discrete attributes if attrDiscOrder == "None": shown += discAttrs elif attrDiscOrder == "GainRatio" or attrDiscOrder == "Gini" or attrDiscOrder == "ReliefF": if attrDiscOrder == "GainRatio": measure = orange.MeasureAttribute_gainRatio() elif attrDiscOrder == "Gini": measure = orange.MeasureAttribute_gini() else: measure = orange.MeasureAttribute_relief() dataNew = data.select(discAttrs + [data.domain.classVar]) attrVals = orngFSS.attMeasure(dataNew, measure) s, h = getTopAttrs(attrVals, 0.95) shown += s hidden += h elif attrDiscOrder == "Oblivious decision graphs": #shown.append(data.domain.classVar.name) attrs = getFunctionalList(data) for item in attrs: shown.append(item) for attr in data.domain.attributes: if attr.name not in shown and attr.varType == orange.VarTypes.Discrete: hidden.append(attr.name) else: print "Unknown value for attribute order: ", attrDiscOrder return (shown, hidden, maxIndex)
for e in nulls[attr]: data2[e][attr]="?" names = [a.name for a in data.domain.attributes] attrs = len(names) print print ("%30s"+"%15s"*attrs) % (("",) + tuple(names)) fstr = "%30s" + "%15.4f"*attrs def printVariants(meas): print fstr % (("- no unknowns:",) + tuple([meas(i, data) for i in range(attrs)])) meas.unknownsTreatment = meas.IgnoreUnknowns print fstr % (("- ignore unknowns:",) + tuple([meas(i, data2) for i in range(attrs)])) meas.unknownsTreatment = meas.ReduceByUnknowns print fstr % (("- reduce unknowns:",) + tuple([meas(i, data2) for i in range(attrs)])) meas.unknownsTreatment = meas.UnknownsToCommon print fstr % (("- unknowns to common:",) + tuple([meas(i, data2) for i in range(attrs)])) print print "MSE" printVariants(orange.MeasureAttribute_MSE()) print "Relief" meas = orange.MeasureAttribute_relief() print fstr % (("- no unknowns:",) + tuple([meas(i, data) for i in range(attrs)])) print fstr % (("- with unknowns:",) + tuple([meas(i, data2) for i in range(attrs)])) print
def __init__(self, measure=orange.MeasureAttribute_relief(k=20, m=50), margin=0): self.measure = measure self.margin = margin
def __init__(self, measure=orange.MeasureAttribute_relief(k=20, m=50), n=5): self.measure = measure self.n = n
def __init__(self, measure=orange.MeasureAttribute_relief(k=20, m=50), threshold=0.0): self.measure = measure self.threshold = threshold
def __init__(self, parent=None, signalManager=None, name="Interactive Discretization"): OWWidget.__init__(self, parent, signalManager, name) self.showBaseLine=1 self.showLookaheadLine=1 self.showTargetClassProb=1 self.showRug=0 self.snap=1 self.measure=0 self.targetClass=0 self.discretization = self.classDiscretization = self.indiDiscretization = 1 self.intervals = self.classIntervals = self.indiIntervals = 3 self.outputOriginalClass = True self.indiData = [] self.indiLabels = [] self.resetIndividuals = 0 self.customClassSplits = "" self.selectedAttr = 0 self.customSplits = ["", "", ""] self.autoApply = True self.dataChanged = False self.autoSynchronize = True self.pointsChanged = False self.customLineEdits = [] self.needsDiscrete = [] self.data = self.originalData = None self.loadSettings() self.inputs=[("Data", ExampleTable, self.setData)] self.outputs=[("Data", ExampleTable)] self.measures=[("Information gain", orange.MeasureAttribute_info()), #("Gain ratio", orange.MeasureAttribute_gainRatio), ("Gini", orange.MeasureAttribute_gini()), ("chi-square", orange.MeasureAttribute_chiSquare()), ("chi-square prob.", orange.MeasureAttribute_chiSquare(computeProbabilities=1)), ("Relevance", orange.MeasureAttribute_relevance()), ("ReliefF", orange.MeasureAttribute_relief())] self.discretizationMethods=["Leave continuous", "Entropy-MDL discretization", "Equal-frequency discretization", "Equal-width discretization", "Remove continuous attributes"] self.classDiscretizationMethods=["Equal-frequency discretization", "Equal-width discretization"] self.indiDiscretizationMethods=["Default", "Leave continuous", "Entropy-MDL discretization", "Equal-frequency discretization", "Equal-width discretization", "Remove attribute"] self.mainHBox = OWGUI.widgetBox(self.mainArea, orientation=0) vbox = self.controlArea box = OWGUI.radioButtonsInBox(vbox, self, "discretization", self.discretizationMethods[:-1], "Default discretization", callback=[self.clearLineEditFocus, self.defaultMethodChanged]) self.needsDiscrete.append(box.buttons[1]) box.setSizePolicy(QSizePolicy(QSizePolicy.Minimum, QSizePolicy.Fixed)) indent = OWGUI.checkButtonOffsetHint(self.needsDiscrete[-1]) self.interBox = OWGUI.widgetBox(OWGUI.indentedBox(box, sep=indent)) OWGUI.widgetLabel(self.interBox, "Number of intervals (for equal width/frequency)") OWGUI.separator(self.interBox, height=4) self.intervalSlider=OWGUI.hSlider(OWGUI.indentedBox(self.interBox), self, "intervals", None, 2, 10, callback=[self.clearLineEditFocus, self.defaultMethodChanged]) OWGUI.appendRadioButton(box, self, "discretization", self.discretizationMethods[-1]) OWGUI.separator(vbox) ribg = OWGUI.radioButtonsInBox(vbox, self, "resetIndividuals", ["Use default discretization for all attributes", "Explore and set individual discretizations"], "Individual attribute treatment", callback = self.setAllIndividuals) ll = QWidget(ribg) ll.setFixedHeight(1) OWGUI.widgetLabel(ribg, "Set discretization of all attributes to") hcustbox = OWGUI.widgetBox(OWGUI.indentedBox(ribg), 0, 0) for c in range(1, 4): OWGUI.appendRadioButton(ribg, self, "resetIndividuals", "Custom %i" % c, insertInto = hcustbox) OWGUI.separator(vbox) box = self.classDiscBox = OWGUI.radioButtonsInBox(vbox, self, "classDiscretization", self.classDiscretizationMethods, "Class discretization", callback=[self.clearLineEditFocus, self.classMethodChanged]) cinterBox = OWGUI.widgetBox(box) self.intervalSlider=OWGUI.hSlider(OWGUI.indentedBox(cinterBox, sep=indent), self, "classIntervals", None, 2, 10, callback=[self.clearLineEditFocus, self.classMethodChanged], label="Number of intervals") hbox = OWGUI.widgetBox(box, orientation = 0) OWGUI.appendRadioButton(box, self, "discretization", "Custom" + " ", insertInto = hbox) self.classCustomLineEdit = OWGUI.lineEdit(hbox, self, "customClassSplits", callback = self.classCustomChanged, focusInCallback = self.classCustomSelected) # Can't validate - need to allow spaces box.setSizePolicy(QSizePolicy(QSizePolicy.Minimum, QSizePolicy.Fixed)) OWGUI.separator(box) self.classIntervalsLabel = OWGUI.widgetLabel(box, "Current splits: ") OWGUI.separator(box) OWGUI.checkBox(box, self, "outputOriginalClass", "Output original class", callback = self.commitIf) OWGUI.widgetLabel(box, "("+"Widget always uses discretized class internally."+")") OWGUI.separator(vbox) #OWGUI.rubber(vbox) box = OWGUI.widgetBox(vbox, "Commit") applyButton = OWGUI.button(box, self, "Commit", callback = self.commit, default=True) autoApplyCB = OWGUI.checkBox(box, self, "autoApply", "Commit automatically", callback=[self.clearLineEditFocus]) OWGUI.setStopper(self, applyButton, autoApplyCB, "dataChanged", self.commit) OWGUI.rubber(vbox) #self.mainSeparator = OWGUI.separator(self.mainHBox, width=25) # space between control and main area self.mainIABox = OWGUI.widgetBox(self.mainHBox, "Individual attribute settings") self.mainBox = OWGUI.widgetBox(self.mainIABox, orientation=0) OWGUI.separator(self.mainIABox)#, height=30) graphBox = OWGUI.widgetBox(self.mainIABox, "", orientation=0) # self.needsDiscrete.append(graphBox) graphOptBox = OWGUI.widgetBox(graphBox) OWGUI.separator(graphBox, width=10) graphGraphBox = OWGUI.widgetBox(graphBox) self.graph = DiscGraph(self, graphGraphBox) graphGraphBox.layout().addWidget(self.graph) reportButton2 = OWGUI.button(graphGraphBox, self, "Report Graph", callback = self.reportGraph, debuggingEnabled=0) #graphOptBox.layout().setSpacing(4) box = OWGUI.widgetBox(graphOptBox, "Split gain measure", addSpace=True) self.measureCombo=OWGUI.comboBox(box, self, "measure", orientation=0, items=[e[0] for e in self.measures], callback=[self.clearLineEditFocus, self.graph.invalidateBaseScore, self.graph.plotBaseCurve]) OWGUI.checkBox(box, self, "showBaseLine", "Show discretization gain", callback=[self.clearLineEditFocus, self.graph.plotBaseCurve]) OWGUI.checkBox(box, self, "showLookaheadLine", "Show lookahead gain", callback=self.clearLineEditFocus) self.needsDiscrete.append(box) box = OWGUI.widgetBox(graphOptBox, "Target class", addSpace=True) self.targetCombo=OWGUI.comboBox(box, self, "targetClass", orientation=0, callback=[self.clearLineEditFocus, self.graph.targetClassChanged]) stc = OWGUI.checkBox(box, self, "showTargetClassProb", "Show target class probability", callback=[self.clearLineEditFocus, self.graph.plotProbCurve]) OWGUI.checkBox(box, self, "showRug", "Show rug (may be slow)", callback=[self.clearLineEditFocus, self.graph.plotRug]) self.needsDiscrete.extend([self.targetCombo, stc]) box = OWGUI.widgetBox(graphOptBox, "Editing", addSpace=True) OWGUI.checkBox(box, self, "snap", "Snap to grid", callback=[self.clearLineEditFocus]) syncCB = OWGUI.checkBox(box, self, "autoSynchronize", "Apply on the fly", callback=self.clearLineEditFocus) syncButton = OWGUI.button(box, self, "Apply", callback = self.synchronizePressed) OWGUI.setStopper(self, syncButton, syncCB, "pointsChanged", self.synchronize) OWGUI.rubber(graphOptBox) self.attrList = OWGUI.listBox(self.mainBox, self, callback = self.individualSelected) self.attrList.setItemDelegate(CustomListItemDelegate(self.attrList)) self.attrList.setFixedWidth(300) self.defaultMethodChanged() OWGUI.separator(self.mainBox, width=10) box = OWGUI.radioButtonsInBox(OWGUI.widgetBox(self.mainBox), self, "indiDiscretization", [], callback=[self.clearLineEditFocus, self.indiMethodChanged]) #hbbox = OWGUI.widgetBox(box) #hbbox.layout().setSpacing(4) for meth in self.indiDiscretizationMethods[:-1]: OWGUI.appendRadioButton(box, self, "indiDiscretization", meth) self.needsDiscrete.append(box.buttons[2]) self.indiInterBox = OWGUI.indentedBox(box, sep=indent, orientation = "horizontal") OWGUI.widgetLabel(self.indiInterBox, "Num. of intervals: ") self.indiIntervalSlider = OWGUI.hSlider(self.indiInterBox, self, "indiIntervals", None, 2, 10, callback=[self.clearLineEditFocus, self.indiMethodChanged], width = 100) OWGUI.rubber(self.indiInterBox) OWGUI.appendRadioButton(box, self, "indiDiscretization", self.indiDiscretizationMethods[-1]) #OWGUI.rubber(hbbox) #OWGUI.separator(box) #hbbox = OWGUI.widgetBox(box) for i in range(3): hbox = OWGUI.widgetBox(box, orientation = "horizontal") OWGUI.appendRadioButton(box, self, "indiDiscretization", "Custom %i" % (i+1) + " ", insertInto = hbox) le = OWGUI.lineEdit(hbox, self, "", callback = lambda w=i: self.customChanged(w), focusInCallback = lambda w=i: self.customSelected(w)) le.setFixedWidth(110) self.customLineEdits.append(le) OWGUI.toolButton(hbox, self, "CC", width=30, callback = lambda w=i: self.copyToCustom(w)) OWGUI.rubber(hbox) OWGUI.rubber(box) #self.controlArea.setFixedWidth(0) self.contAttrIcon = self.createAttributeIconDict()[orange.VarTypes.Continuous] self.setAllIndividuals()
# Description: Shows how to assess the quality of attributes not in the dataset # Category: attribute quality # Classes: EntropyDiscretization, MeasureAttribute, MeasureAttribute_info # Uses: iris # Referenced: MeasureAttribute.htm import orange data = orange.ExampleTable("iris") d1 = orange.EntropyDiscretization("petal length", data) print orange.MeasureAttribute_relief(d1, data) meas = orange.MeasureAttribute_relief() for t in meas.thresholdFunction("petal length", data): print "%5.3f: %5.3f" % t thresh, score, distr = meas.bestThreshold("petal length", data) print "\nBest threshold: %5.3f (score %5.3f)" % (thresh, score)
# Description: Shows why ReliefF needs to check the cached neighbours # Category: statistics # Classes: MeasureAttribute_relief # Uses: iris # Referenced: MeasureAttribute.htm import orange data = orange.ExampleTable("iris") r1 = orange.MeasureAttribute_relief() r2 = orange.MeasureAttribute_relief(checkCachedData=False) print "%.3f\t%.3f" % (r1(0, data), r2(0, data)) for ex in data: ex[0] = 0 print "%.3f\t%.3f" % (r1(0, data), r2(0, data))