def __init__(self,parent=None, signalManager = None): OWWidget.__init__(self, parent, signalManager, "Sieve Multigram", wantGraph = True, wantStatusBar = True) self.inputs = [("Data", ExampleTable, self.setData), ("Features", AttributeList, self.setShownAttributes)] self.outputs = [] #add a graph widget self.graph = OWSieveMultigramGraph(self.mainArea) self.graph.useAntialiasing = 1 self.mainArea.layout().addWidget(self.graph) #set default settings self.graphCanvasColor = str(QColor(Qt.white).name()) self.data = None self.graph.lineWidth = 3 self.graph.minPearson = 2 self.graph.maxPearson = 10 self.showAllAttributes = 0 # add a settings dialog and initialize its values self.loadSettings() #GUI # add a settings dialog and initialize its values self.tabs = OWGUI.tabWidget(self.controlArea) self.GeneralTab = OWGUI.createTabPage(self.tabs, "Main") self.SettingsTab = OWGUI.createTabPage(self.tabs, "Settings") #add controls to self.controlArea widget self.createShowHiddenLists(self.GeneralTab, callback = self.updateGraph) OWGUI.button(self.GeneralTab, self, "Find Interesting Attr.", callback = self.interestingSubsetSelection, debuggingEnabled = 0) OWGUI.hSlider(self.SettingsTab, self, 'graph.lineWidth', box = 1, label = "Max line width:", minValue=1, maxValue=10, step=1, callback = self.updateGraph) residualBox = OWGUI.widgetBox(self.SettingsTab, "Attribute independence (Pearson residuals)") OWGUI.hSlider(residualBox, self, 'graph.maxPearson', label = "Max residual:", minValue=4, maxValue=12, step=1, callback = self.updateGraph) OWGUI.hSlider(residualBox, self, 'graph.minPearson', label = "Min residual:", minValue=0, maxValue=4, step=1, callback = self.updateGraph, tooltip = "The minimal absolute residual value that will be shown in graph") self.SettingsTab.layout().addStretch(100) self.connect(self.graphButton, SIGNAL("clicked()"), self.graph.saveToFile) self.resize(800, 600)
def __init__(self, parent=None, signalManager=None): OWWidget.__init__(self, parent, signalManager, "Sieve Multigram", wantGraph=True, wantStatusBar=True) self.inputs = [("Data", ExampleTable, self.setData), ("Features", AttributeList, self.setShownAttributes)] self.outputs = [] #add a graph widget self.graph = OWSieveMultigramGraph(self.mainArea) self.graph.useAntialiasing = 1 self.mainArea.layout().addWidget(self.graph) #set default settings self.graphCanvasColor = str(QColor(Qt.white).name()) self.data = None self.graph.lineWidth = 3 self.graph.minPearson = 2 self.graph.maxPearson = 10 self.showAllAttributes = 0 # add a settings dialog and initialize its values self.loadSettings() #GUI # add a settings dialog and initialize its values self.tabs = OWGUI.tabWidget(self.controlArea) self.GeneralTab = OWGUI.createTabPage(self.tabs, "Main") self.SettingsTab = OWGUI.createTabPage(self.tabs, "Settings") #add controls to self.controlArea widget self.createShowHiddenLists(self.GeneralTab, callback=self.updateGraph) OWGUI.button(self.GeneralTab, self, "Find Interesting Attr.", callback=self.interestingSubsetSelection, debuggingEnabled=0) OWGUI.hSlider(self.SettingsTab, self, 'graph.lineWidth', box=1, label="Max line width:", minValue=1, maxValue=10, step=1, callback=self.updateGraph) residualBox = OWGUI.widgetBox( self.SettingsTab, "Attribute independence (Pearson residuals)") OWGUI.hSlider(residualBox, self, 'graph.maxPearson', label="Max residual:", minValue=4, maxValue=12, step=1, callback=self.updateGraph) OWGUI.hSlider( residualBox, self, 'graph.minPearson', label="Min residual:", minValue=0, maxValue=4, step=1, callback=self.updateGraph, tooltip= "The minimal absolute residual value that will be shown in graph") self.SettingsTab.layout().addStretch(100) self.connect(self.graphButton, SIGNAL("clicked()"), self.graph.saveToFile) self.resize(800, 600)
class OWSieveMultigram(OWVisWidget): settingsList = [ "graph.lineWidth", "graph.maxPearson", "graph.minPearson", "showAllAttributes" ] contextHandlers = { "": DomainContextHandler("", [ ContextField("shownAttributes", DomainContextHandler.RequiredList, selected="selectedShown", reservoir="hiddenAttributes") ]) } def __init__(self, parent=None, signalManager=None): OWWidget.__init__(self, parent, signalManager, "Sieve Multigram", wantGraph=True, wantStatusBar=True) self.inputs = [("Data", ExampleTable, self.setData), ("Features", AttributeList, self.setShownAttributes)] self.outputs = [] #add a graph widget self.graph = OWSieveMultigramGraph(self.mainArea) self.graph.useAntialiasing = 1 self.mainArea.layout().addWidget(self.graph) #set default settings self.graphCanvasColor = str(QColor(Qt.white).name()) self.data = None self.graph.lineWidth = 3 self.graph.minPearson = 2 self.graph.maxPearson = 10 self.showAllAttributes = 0 # add a settings dialog and initialize its values self.loadSettings() #GUI # add a settings dialog and initialize its values self.tabs = OWGUI.tabWidget(self.controlArea) self.GeneralTab = OWGUI.createTabPage(self.tabs, "Main") self.SettingsTab = OWGUI.createTabPage(self.tabs, "Settings") #add controls to self.controlArea widget self.createShowHiddenLists(self.GeneralTab, callback=self.updateGraph) OWGUI.button(self.GeneralTab, self, "Find Interesting Attr.", callback=self.interestingSubsetSelection, debuggingEnabled=0) OWGUI.hSlider(self.SettingsTab, self, 'graph.lineWidth', box=1, label="Max line width:", minValue=1, maxValue=10, step=1, callback=self.updateGraph) residualBox = OWGUI.widgetBox( self.SettingsTab, "Attribute independence (Pearson residuals)") OWGUI.hSlider(residualBox, self, 'graph.maxPearson', label="Max residual:", minValue=4, maxValue=12, step=1, callback=self.updateGraph) OWGUI.hSlider( residualBox, self, 'graph.minPearson', label="Min residual:", minValue=0, maxValue=4, step=1, callback=self.updateGraph, tooltip= "The minimal absolute residual value that will be shown in graph") self.SettingsTab.layout().addStretch(100) self.connect(self.graphButton, SIGNAL("clicked()"), self.graph.saveToFile) self.resize(800, 600) def sendReport(self): self.reportImage(self.graph.saveToFileDirect, QSize(500, 500)) # receive new data and update all fields def setData(self, data): self.closeContext() self.information(0) self.error([0, 1]) self.data = None if data: data = orange.Preprocessor_dropMissing(data) if len(data) == 0: self.error(0, "No examples without missing values") data = None if not self.isDataWithClass(data): self.error(1, "Data is without class") data = None if data and data.domain.hasContinuousAttributes(): data = discretizeDomain(data, 1) self.information( 0, "Continuous attributes were discretized using entropy discretization." ) self.data = data self.computeProbabilities() self.setShownAttributeList() self.openContext("", self.data) self.resetAttrManipulation() self.updateGraph() def sendShownAttributes(self): pass def updateGraph(self, *args): self.graph.updateData(self.data, self.getShownAttributeList(), self.probabilities) def interestingSubsetSelection(self): labels = self.getShownAttributeList() interestingList = [] data = self.data # create a list of interesting attributes for attrXindex in range(len(labels)): attrXName = labels[attrXindex] for attrYindex in range(attrXindex + 1, len(labels)): attrYName = labels[attrYindex] for valXindex in range(len(data.domain[attrXName].values)): valX = data.domain[attrXName].values[valXindex] for valYindex in range(len(data.domain[attrYName].values)): valY = data.domain[attrYName].values[valYindex] ((nameX, countX), (nameY, countY), actual, sum) = self.probabilities['%s+%s:%s+%s' % (attrXName, valX, attrYName, valY)] expected = float(countX * countY) / float(sum) if actual == expected == 0: continue elif expected == 0: pearson = actual / sqrt(actual) else: pearson = (actual - expected) / sqrt(expected) if abs( pearson ) > self.graph.minPearson and attrXName not in interestingList: interestingList.append(attrXName) if abs( pearson ) > self.graph.minPearson and attrYName not in interestingList: interestingList.append(attrYName) # remove attributes that are not in interestingList from visible attribute list self.setShownAttributeList(interestingList) self.updateGraph() def computeProbabilities(self): self.probabilities = {} if not self.data: return self.setStatusBarText("Please wait. Computing...") total = len(self.data) conts = {} dc = [] for i in range(len(self.data.domain)): dc.append( orange.ContingencyAttrAttr(self.data.domain[i], self.data.domain[i], self.data)) for i in range(len(self.data.domain)): if self.data.domain[i].varType == orange.VarTypes.Continuous: continue # we can only check discrete attributes cont = dc[i] # distribution of X attribute vals = [] # compute contingency of x attribute for key in cont.keys(): sum = 0 try: for val in cont[key]: sum += val except: pass vals.append(sum) conts[self.data.domain[i].name] = (cont, vals) for attrX in range(len(self.data.domain)): if self.data.domain[attrX].varType == orange.VarTypes.Continuous: continue # we can only check discrete attributes for attrY in range(attrX, len(self.data.domain)): if self.data.domain[ attrY].varType == orange.VarTypes.Continuous: continue # we can only check discrete attributes (contX, valsX) = conts[self.data.domain[attrX].name] (contY, valsY) = conts[self.data.domain[attrY].name] # create cartesian product of selected attributes and compute contingency (cart, profit) = FeatureByCartesianProduct( self.data, [self.data.domain[attrX], self.data.domain[attrY]]) tempData = self.data.select(list(self.data.domain) + [cart]) contXY = orange.ContingencyAttrClass( cart, tempData) # distribution of X attribute # compute probabilities for i in range(len(valsX)): valx = valsX[i] for j in range(len(valsY)): valy = valsY[j] actualCount = 0 try: for val in contXY['%s-%s' % (contX.keys()[i], contY.keys()[j])]: actualCount += val except: pass self.probabilities['%s+%s:%s+%s' % ( self.data.domain[attrX].name, contX.keys()[i], self.data.domain[attrY].name, contY.keys()[j])] = ( (contX.keys()[i], valx), (contY.keys()[j], valy), actualCount, total) self.probabilities['%s+%s:%s+%s' % ( self.data.domain[attrY].name, contY.keys()[j], self.data.domain[attrX].name, contX.keys()[i])] = ( (contY.keys()[j], valy), (contX.keys()[i], valx), actualCount, total) self.setStatusBarText("") # receive info about which attributes to show def setShownAttributes(self, list): self.setShownAttributeList(list) self.updateGraph()
class OWSieveMultigram(OWVisWidget): settingsList = ["graph.lineWidth", "graph.maxPearson", "graph.minPearson", "showAllAttributes"] contextHandlers = {"": DomainContextHandler("", [ContextField("shownAttributes", DomainContextHandler.RequiredList, selected="selectedShown", reservoir="hiddenAttributes")])} def __init__(self,parent=None, signalManager = None): OWWidget.__init__(self, parent, signalManager, "Sieve Multigram", wantGraph = True, wantStatusBar = True) self.inputs = [("Data", ExampleTable, self.setData), ("Features", AttributeList, self.setShownAttributes)] self.outputs = [] #add a graph widget self.graph = OWSieveMultigramGraph(self.mainArea) self.graph.useAntialiasing = 1 self.mainArea.layout().addWidget(self.graph) #set default settings self.graphCanvasColor = str(QColor(Qt.white).name()) self.data = None self.graph.lineWidth = 3 self.graph.minPearson = 2 self.graph.maxPearson = 10 self.showAllAttributes = 0 # add a settings dialog and initialize its values self.loadSettings() #GUI # add a settings dialog and initialize its values self.tabs = OWGUI.tabWidget(self.controlArea) self.GeneralTab = OWGUI.createTabPage(self.tabs, "Main") self.SettingsTab = OWGUI.createTabPage(self.tabs, "Settings") #add controls to self.controlArea widget self.createShowHiddenLists(self.GeneralTab, callback = self.updateGraph) OWGUI.button(self.GeneralTab, self, "Find Interesting Attr.", callback = self.interestingSubsetSelection, debuggingEnabled = 0) OWGUI.hSlider(self.SettingsTab, self, 'graph.lineWidth', box = 1, label = "Max line width:", minValue=1, maxValue=10, step=1, callback = self.updateGraph) residualBox = OWGUI.widgetBox(self.SettingsTab, "Attribute independence (Pearson residuals)") OWGUI.hSlider(residualBox, self, 'graph.maxPearson', label = "Max residual:", minValue=4, maxValue=12, step=1, callback = self.updateGraph) OWGUI.hSlider(residualBox, self, 'graph.minPearson', label = "Min residual:", minValue=0, maxValue=4, step=1, callback = self.updateGraph, tooltip = "The minimal absolute residual value that will be shown in graph") self.SettingsTab.layout().addStretch(100) self.connect(self.graphButton, SIGNAL("clicked()"), self.graph.saveToFile) self.resize(800, 600) def sendReport(self): self.reportImage(self.graph.saveToFileDirect, QSize(500,500)) # receive new data and update all fields def setData(self, data): self.closeContext() self.information(0) self.error([0, 1]) self.data = None if data: data = orange.Preprocessor_dropMissing(data) if len(data) == 0: self.error(0, "No examples without missing values") data = None if not self.isDataWithClass(data): self.error(1, "Data is without class") data = None if data and data.domain.hasContinuousAttributes(): data = discretizeDomain(data, 1) self.information(0, "Continuous attributes were discretized using entropy discretization.") self.data = data self.computeProbabilities() self.setShownAttributeList() self.openContext("", self.data) self.resetAttrManipulation() self.updateGraph() def sendShownAttributes(self): pass def updateGraph(self, *args): self.graph.updateData(self.data, self.getShownAttributeList(), self.probabilities) def interestingSubsetSelection(self): labels = self.getShownAttributeList() interestingList = [] data = self.data # create a list of interesting attributes for attrXindex in range(len(labels)): attrXName = labels[attrXindex] for attrYindex in range(attrXindex+1, len(labels)): attrYName = labels[attrYindex] for valXindex in range(len(data.domain[attrXName].values)): valX = data.domain[attrXName].values[valXindex] for valYindex in range(len(data.domain[attrYName].values)): valY = data.domain[attrYName].values[valYindex] ((nameX, countX),(nameY, countY), actual, sum) = self.probabilities['%s+%s:%s+%s' %(attrXName, valX, attrYName, valY)] expected = float(countX*countY)/float(sum) if actual == expected == 0: continue elif expected == 0: pearson = actual/sqrt(actual) else: pearson = (actual - expected) / sqrt(expected) if abs(pearson) > self.graph.minPearson and attrXName not in interestingList: interestingList.append(attrXName) if abs(pearson) > self.graph.minPearson and attrYName not in interestingList: interestingList.append(attrYName) # remove attributes that are not in interestingList from visible attribute list self.setShownAttributeList(interestingList) self.updateGraph() def computeProbabilities(self): self.probabilities = {} if not self.data: return self.setStatusBarText("Please wait. Computing...") total = len(self.data) conts = {} dc = [] for i in range(len(self.data.domain)): dc.append(orange.ContingencyAttrAttr(self.data.domain[i], self.data.domain[i], self.data)) for i in range(len(self.data.domain)): if self.data.domain[i].varType == orange.VarTypes.Continuous: continue # we can only check discrete attributes cont = dc[i] # distribution of X attribute vals = [] # compute contingency of x attribute for key in cont.keys(): sum = 0 try: for val in cont[key]: sum += val except: pass vals.append(sum) conts[self.data.domain[i].name] = (cont, vals) for attrX in range(len(self.data.domain)): if self.data.domain[attrX].varType == orange.VarTypes.Continuous: continue # we can only check discrete attributes for attrY in range(attrX, len(self.data.domain)): if self.data.domain[attrY].varType == orange.VarTypes.Continuous: continue # we can only check discrete attributes (contX, valsX) = conts[self.data.domain[attrX].name] (contY, valsY) = conts[self.data.domain[attrY].name] # create cartesian product of selected attributes and compute contingency (cart, profit) = FeatureByCartesianProduct(self.data, [self.data.domain[attrX], self.data.domain[attrY]]) tempData = self.data.select(list(self.data.domain) + [cart]) contXY = orange.ContingencyAttrClass(cart, tempData) # distribution of X attribute # compute probabilities for i in range(len(valsX)): valx = valsX[i] for j in range(len(valsY)): valy = valsY[j] actualCount = 0 try: for val in contXY['%s-%s' %(contX.keys()[i], contY.keys()[j])]: actualCount += val except: pass self.probabilities['%s+%s:%s+%s' %(self.data.domain[attrX].name, contX.keys()[i], self.data.domain[attrY].name, contY.keys()[j])] = ((contX.keys()[i], valx), (contY.keys()[j], valy), actualCount, total) self.probabilities['%s+%s:%s+%s' %(self.data.domain[attrY].name, contY.keys()[j], self.data.domain[attrX].name, contX.keys()[i])] = ((contY.keys()[j], valy), (contX.keys()[i], valx), actualCount, total) self.setStatusBarText("") # receive info about which attributes to show def setShownAttributes(self, list): self.setShownAttributeList(list) self.updateGraph()