def checkAttRelevance(self, data, margin): print "Before feature subset selection (%d attributes):" % len(data.domain.attributes) old = orngFSS.attMeasure(data) for i in old: print "%5.3f %s" % (i[1], i[0]) print "\nRelevance of best attributes" new = orngFSS.attMeasure(Fss.filtering(self, data, margin)) for j in new: print "%5.3f %s" % (j[1], j[0])
def cforange_score_estimation(input_dict): import orange import orngFSS data = input_dict['dataset'] ma = orngFSS.attMeasure(data,orange.MeasureAttribute_relief(k=int(input_dict['k']), m=int(input_dict['m']))) output_string = "" output_dict = {} output_dict['results'] = ma return output_dict
def feature_selection(data, classify, k=5, m=100): ''' perform feature selection using orange For more details see `orange feature selection <http://orange.biolab.si/doc/modules/orngFSS.htm>`_ and `orange measure attribute <http://orange.biolab.si/doc/reference/MeasureAttribute.htm>`_ the default measure is ReliefF ((MeasureAttribute_relief in Orange). :param data: data from :meth:`perform_experiments`. :param classify: function for classifying runs. :param k: the number of neighbors for each example (default 5). :param m: number of examples to use, Set to -1 to use all (default 100). :rtype: sorted list of tuples with uncertainty names and reliefF attribute scores. Orange provides other metrics for feature selection * Information Gain * Gain ratio * Gini index * Relevance of attributes * Costs If you want to use any of of these instead of ReliefF, use the code supplied here as a template, but modify the measure. That is replace:: measure = orange.MeasureAttribute_relief(k=k, m=m) with the measure of choice. See the above provided links for more details. ''' data = build_orange_data(data, classify) info("executing feature selection") measure = orange.MeasureAttribute_relief(k=k, m=m) ma = orngFSS.attMeasure(data, measure) results = [] for m in ma: results.append((m[1], m[0])) results.sort(reverse=True) results = [(entry[1], entry[0]) for entry in results] return results
def report_relevance(data): m = orngFSS.attMeasure(data) for i in m: print "%5.3f %s" % (i[1], i[0])
# Description: Ranking and selection of best N attributes # Category: preprocessing # Uses: voting # Referenced: orngFSS.htm # Classes: orngFSS.attMeasure, orngFSS.bestNAtts import orange, orngFSS data = orange.ExampleTable("voting") print 'Attribute scores for best three attributes:' ma = orngFSS.attMeasure(data) for m in ma[:3]: print "%5.3f %s" % (m[1], m[0]) n = 3 best = orngFSS.bestNAtts(ma, n) print '\nBest %d attributes:' % n for s in best: print s
# Description: Ranking of attributes with two different measures (Relief and gain ratio) # Category: preprocessing # Uses: voting.tab # Referenced: orngFSS.htm # Classes: orngFSS.attMeasure, MeasureAttribute_gainRatio import orange, orngFSS data = orange.ExampleTable("voting") print 'Relief GainRt Attribute' ma_def = orngFSS.attMeasure(data) gainRatio = orange.MeasureAttribute_gainRatio() ma_gr = orngFSS.attMeasure(data, gainRatio) for i in range(5): print "%5.3f %5.3f %s" % (ma_def[i][1], ma_gr[i][1], ma_def[i][0])
def __call__(self, data, weight=None): ma = orngFSS.attMeasure(data) filtered = orngFSS.selectBestNAtts(data, ma, self.N) model = orange.BayesLearner(filtered) return BayesFSS_Classifier(classifier=model, N=self.N, name=self.name)
# Description: Demonstrates the use of attribute evaluation # Category: feature scoring, FSS # Classes: MeasureAttribute_Distance, MeasureAttribute_MDL # Uses: zoo.tab import orange import orngEvalAttr import orngCI import orngFSS data = orange.ExampleTable("../datasets/zoo") print 'Distance(1-D) MDL Attribute' distance = orngEvalAttr.MeasureAttribute_Distance() ma_d = orngFSS.attMeasure(data, distance) mdl = orngEvalAttr.MeasureAttribute_MDL() ma_mdl = orngFSS.attMeasure(data, mdl) for i in range(5): print "%5.3f %5.3f %s" % (ma_d[i][1], ma_mdl[i][1], ma_d[i][0])
def selectAttributes(data, attrContOrder, attrDiscOrder, projections = None): if data.domain.classVar == None or data.domain.classVar.varType != orange.VarTypes.Discrete: return ([attr.name for attr in data.domain.attributes], [], 0) shown = [data.domain.classVar.name]; hidden = []; maxIndex = 0 # initialize outputs # # both are RELIEF if attrContOrder == "ReliefF" and attrDiscOrder == "ReliefF": attrVals = orngFSS.attMeasure(data, orange.MeasureAttribute_relief()) s,h = getTopAttrs(attrVals, 0.95) return (shown + s, hidden + h, 0) # # both are NONE elif attrContOrder == "None" and attrDiscOrder == "None": for item in data.domain.attributes: shown.append(item.name) return (shown, hidden, 0) # disc and cont attribute list discAttrs = []; contAttrs = [] for attr in data.domain.attributes: if attr.varType == orange.VarTypes.Continuous: contAttrs.append(attr.name) elif attr.varType == orange.VarTypes.Discrete: discAttrs.append(attr.name) ############################### # sort continuous attributes if attrContOrder == "None": shown += contAttrs elif attrContOrder in ["ReliefF", "Fisher discriminant", "Signal to Noise", "Signal to Noise For Each Class"]: if attrContOrder == "ReliefF": measure = orange.MeasureAttribute_relief(k=10, m=50) elif attrContOrder == "Fisher discriminant": measure = MeasureFisherDiscriminant() elif attrContOrder == "Signal to Noise": measure = S2NMeasure() else: measure = S2NMeasureMix() dataNew = data.select(contAttrs + [data.domain.classVar]) attrVals = orngFSS.attMeasure(dataNew, measure) s,h = getTopAttrs(attrVals, 0.95) shown += s hidden += h else: print "Unknown value for attribute order: ", attrContOrder # ############################### # sort discrete attributes if attrDiscOrder == "None": shown += discAttrs elif attrDiscOrder == "GainRatio" or attrDiscOrder == "Gini" or attrDiscOrder == "ReliefF": if attrDiscOrder == "GainRatio": measure = orange.MeasureAttribute_gainRatio() elif attrDiscOrder == "Gini": measure = orange.MeasureAttribute_gini() else: measure = orange.MeasureAttribute_relief() dataNew = data.select(discAttrs + [data.domain.classVar]) attrVals = orngFSS.attMeasure(dataNew, measure) s,h = getTopAttrs(attrVals, 0.95) shown += s; hidden += h elif attrDiscOrder == "Oblivious decision graphs": #shown.append(data.domain.classVar.name) attrs = getFunctionalList(data) for item in attrs: shown.append(item) for attr in data.domain.attributes: if attr.name not in shown and attr.varType == orange.VarTypes.Discrete: hidden.append(attr.name) else: print "Unknown value for attribute order: ", attrDiscOrder return (shown, hidden, maxIndex)
def selectAttributes(data, attrContOrder, attrDiscOrder, projections=None): if data.domain.classVar == None or data.domain.classVar.varType != orange.VarTypes.Discrete: return ([attr.name for attr in data.domain.attributes], [], 0) shown = [data.domain.classVar.name] hidden = [] maxIndex = 0 # initialize outputs # # both are RELIEF if attrContOrder == "ReliefF" and attrDiscOrder == "ReliefF": attrVals = orngFSS.attMeasure(data, orange.MeasureAttribute_relief()) s, h = getTopAttrs(attrVals, 0.95) return (shown + s, hidden + h, 0) # # both are NONE elif attrContOrder == "None" and attrDiscOrder == "None": for item in data.domain.attributes: shown.append(item.name) return (shown, hidden, 0) # disc and cont attribute list discAttrs = [] contAttrs = [] for attr in data.domain.attributes: if attr.varType == orange.VarTypes.Continuous: contAttrs.append(attr.name) elif attr.varType == orange.VarTypes.Discrete: discAttrs.append(attr.name) ############################### # sort continuous attributes if attrContOrder == "None": shown += contAttrs elif attrContOrder in [ "ReliefF", "Fisher discriminant", "Signal to Noise", "Signal to Noise For Each Class" ]: if attrContOrder == "ReliefF": measure = orange.MeasureAttribute_relief(k=10, m=50) elif attrContOrder == "Fisher discriminant": measure = MeasureFisherDiscriminant() elif attrContOrder == "Signal to Noise": measure = S2NMeasure() else: measure = S2NMeasureMix() dataNew = data.select(contAttrs + [data.domain.classVar]) attrVals = orngFSS.attMeasure(dataNew, measure) s, h = getTopAttrs(attrVals, 0.95) shown += s hidden += h else: print "Unknown value for attribute order: ", attrContOrder # ############################### # sort discrete attributes if attrDiscOrder == "None": shown += discAttrs elif attrDiscOrder == "GainRatio" or attrDiscOrder == "Gini" or attrDiscOrder == "ReliefF": if attrDiscOrder == "GainRatio": measure = orange.MeasureAttribute_gainRatio() elif attrDiscOrder == "Gini": measure = orange.MeasureAttribute_gini() else: measure = orange.MeasureAttribute_relief() dataNew = data.select(discAttrs + [data.domain.classVar]) attrVals = orngFSS.attMeasure(dataNew, measure) s, h = getTopAttrs(attrVals, 0.95) shown += s hidden += h elif attrDiscOrder == "Oblivious decision graphs": #shown.append(data.domain.classVar.name) attrs = getFunctionalList(data) for item in attrs: shown.append(item) for attr in data.domain.attributes: if attr.name not in shown and attr.varType == orange.VarTypes.Discrete: hidden.append(attr.name) else: print "Unknown value for attribute order: ", attrDiscOrder return (shown, hidden, maxIndex)