class OWLearningCurveC(OWWidget): settingsList = [ "folds", "steps", "scoringF", "commitOnChange", "graphPointSize", "graphDrawLines", "graphShowGrid" ] def __init__(self, parent=None, signalManager=None): OWWidget.__init__(self, parent, signalManager, 'LearningCurveC') self.inputs = [("Data", ExampleTable, self.dataset), ("Learner", orange.Learner, self.learner, Multiple)] self.folds = 5 # cross validation folds self.steps = 10 # points in the learning curve self.scoringF = 0 # scoring function self.commitOnChange = 1 # compute curve on any change of parameters self.graphPointSize = 5 # size of points in the graphs self.graphDrawLines = 1 # draw lines between points in the graph self.graphShowGrid = 1 # show gridlines in the graph self.selectedLearners = [] self.loadSettings() warnings.filterwarnings("ignore", ".*builtin attribute.*", orange.AttributeWarning) self.setCurvePoints( ) # sets self.curvePoints, self.steps equidistantpoints from 1/self.steps to 1 self.scoring = [("Classification Accuracy", orngStat.CA), ("AUC", orngStat.AUC), ("BrierScore", orngStat.BrierScore), ("Information Score", orngStat.IS), ("Sensitivity", orngStat.sens), ("Specificity", orngStat.spec)] self.learners = [ ] # list of current learners from input channel, tuples (id, learner) self.data = None # data on which to construct the learning curve self.curves = [ ] # list of evaluation results (one per learning curve point) self.scores = [] # list of current scores, learnerID:[learner scores] # GUI box = OWGUI.widgetBox(self.controlArea, "Info") self.infoa = OWGUI.widgetLabel(box, 'No data on input.') self.infob = OWGUI.widgetLabel(box, 'No learners.') ## class selection (classQLB) OWGUI.separator(self.controlArea) self.cbox = OWGUI.widgetBox(self.controlArea, "Learners") self.llb = OWGUI.listBox(self.cbox, self, "selectedLearners", selectionMode=QListWidget.MultiSelection, callback=self.learnerSelectionChanged) self.llb.setMinimumHeight(50) self.blockSelectionChanges = 0 OWGUI.separator(self.controlArea) box = OWGUI.widgetBox(self.controlArea, "Evaluation Scores") scoringNames = [x[0] for x in self.scoring] OWGUI.comboBox(box, self, "scoringF", items=scoringNames, callback=self.computeScores) OWGUI.separator(self.controlArea) box = OWGUI.widgetBox(self.controlArea, "Options") OWGUI.spin(box, self, 'folds', 2, 100, step=1, label='Cross validation folds: ', callback=lambda: self.computeCurve(self.commitOnChange)) OWGUI.spin(box, self, 'steps', 2, 100, step=1, label='Learning curve points: ', callback=[ self.setCurvePoints, lambda: self.computeCurve(self.commitOnChange) ]) OWGUI.checkBox(box, self, 'commitOnChange', 'Apply setting on any change') self.commitBtn = OWGUI.button(box, self, "Apply Setting", callback=self.computeCurve, disabled=1) # start of content (right) area tabs = OWGUI.tabWidget(self.mainArea) # graph widget tab = OWGUI.createTabPage(tabs, "Graph") self.graph = OWGraph(tab) self.graph.setAxisAutoScale(QwtPlot.xBottom) self.graph.setAxisAutoScale(QwtPlot.yLeft) tab.layout().addWidget(self.graph) self.setGraphGrid() # table widget tab = OWGUI.createTabPage(tabs, "Table") self.table = OWGUI.table(tab, selectionMode=QTableWidget.NoSelection) self.resize(550, 200) ############################################################################## # slots: handle input signals def dataset(self, data): if data: self.infoa.setText('%d instances in input data set' % len(data)) self.data = data if (len(self.learners)): self.computeCurve() self.replotGraph() else: self.infoa.setText('No data on input.') self.curves = [] self.scores = [] self.graph.removeDrawingCurves() self.graph.replot() self.commitBtn.setEnabled(self.data <> None) # manage learner signal # we use following additional attributes for learner: # - isSelected, learner is selected (display the learning curve) # - curve, learning curve for the learner # - score, evaluation score for the learning def learner(self, learner, id=None): ids = [x[0] for x in self.learners] if not learner: # remove a learner and corresponding results if not ids.count(id): return # no such learner, removed before indx = ids.index(id) for i in range(self.steps): self.curves[i].remove(indx) del self.scores[indx] self.learners[indx][1].curve.detach() del self.learners[indx] self.setTable() self.updatellb() else: if ids.count( id): # update (already seen a learner from this source) indx = ids.index(id) prevLearner = self.learners[indx][1] learner.isSelected = prevLearner.isSelected self.learners[indx] = (id, learner) if self.data: curve = self.getLearningCurve([learner]) score = [ self.scoring[self.scoringF][1](x)[0] for x in curve ] self.scores[indx] = score for i in range(self.steps): self.curves[i].add(curve[i], 0, replace=indx) learner.score = score prevLearner.curve.detach() self.drawLearningCurve(learner) self.updatellb() else: # add new learner learner.isSelected = 1 self.learners.append((id, learner)) if self.data: curve = self.getLearningCurve([learner]) score = [ self.scoring[self.scoringF][1](x)[0] for x in curve ] self.scores.append(score) if len(self.curves): for i in range(self.steps): self.curves[i].add(curve[i], 0) else: self.curves = curve learner.score = score self.updatellb() self.drawLearningCurve(learner) if len(self.learners): self.infob.setText("%d learners on input." % len(self.learners)) else: self.infob.setText("No learners.") self.commitBtn.setEnabled(len(self.learners)) if self.data: self.setTable() ############################################################################## # learning curve table, callbacks # recomputes the learning curve def computeCurve(self, condition=1): if condition: learners = [x[1] for x in self.learners] self.curves = self.getLearningCurve(learners) self.computeScores() def computeScores(self): self.scores = [[] for i in range(len(self.learners))] for x in self.curves: for (i, s) in enumerate(self.scoring[self.scoringF][1](x)): self.scores[i].append(s) for (i, l) in enumerate(self.learners): l[1].score = self.scores[i] self.setTable() self.replotGraph() def getLearningCurve(self, learners): pb = OWGUI.ProgressBar(self, iterations=self.steps * self.folds) curve = orngTest.learningCurveN(learners, self.data, folds=self.folds, proportions=self.curvePoints, callback=pb.advance) pb.finish() return curve def setCurvePoints(self): self.curvePoints = [(x + 1.) / self.steps for x in range(self.steps)] def setTable(self): self.table.setColumnCount(0) self.table.setColumnCount(len(self.learners)) self.table.setRowCount(self.steps) # set the headers self.table.setHorizontalHeaderLabels( [l.name for i, l in self.learners]) self.table.setVerticalHeaderLabels( ["%4.2f" % p for p in self.curvePoints]) # set the table contents for l in range(len(self.learners)): for p in range(self.steps): OWGUI.tableItem(self.table, p, l, "%7.5f" % self.scores[l][p]) for i in range(len(self.learners)): self.table.setColumnWidth(i, 80) # management of learner selection def updatellb(self): self.blockSelectionChanges = 1 self.llb.clear() colors = ColorPaletteHSV(len(self.learners)) for (i, lt) in enumerate(self.learners): l = lt[1] item = QListWidgetItem(ColorPixmap(colors[i]), l.name) self.llb.addItem(item) item.setSelected(l.isSelected) l.color = colors[i] self.blockSelectionChanges = 0 def learnerSelectionChanged(self): if self.blockSelectionChanges: return for (i, lt) in enumerate(self.learners): l = lt[1] if l.isSelected != (i in self.selectedLearners): if l.isSelected: # learner was deselected l.curve.detach() else: # learner was selected self.drawLearningCurve(l) self.graph.replot() l.isSelected = i in self.selectedLearners # Graph specific methods def setGraphGrid(self): self.graph.enableGridYL(self.graphShowGrid) self.graph.enableGridXB(self.graphShowGrid) def setGraphStyle(self, learner): curve = learner.curve if self.graphDrawLines: curve.setStyle(QwtPlotCurve.Lines) else: curve.setStyle(QwtPlotCurve.NoCurve) curve.setSymbol(QwtSymbol(QwtSymbol.Ellipse, \ QBrush(QColor(0,0,0)), QPen(QColor(0,0,0)), QSize(self.graphPointSize, self.graphPointSize))) curve.setPen(QPen(learner.color, 5)) def drawLearningCurve(self, learner): if not self.data: return curve = self.graph.addCurve(learner.name, xData=self.curvePoints, yData=learner.score, autoScale=True) learner.curve = curve self.setGraphStyle(learner) self.graph.replot() def replotGraph(self): self.graph.removeDrawingCurves() for l in self.learners: self.drawLearningCurve(l[1])
class OWCorrespondenceAnalysis(OWWidget): contextHandlers = { "": DomainContextHandler( "", ["colAttr", "rowAttr", "xPricipalAxis", "yPrincipalAxis"]) } settingsList = ["pointSize", "alpha", "jitter", "showGridlines"] def __init__(self, parent=None, signalManager=None, name="Correspondence Analysis"): OWWidget.__init__(self, parent, signalManager, name, wantGraph=True) self.inputs = [("Data", ExampleTable, self.setData)] self.outputs = [("Selected Data", ExampleTable), ("Remaining Data", ExampleTable)] self.colAttr = 0 self.rowAttr = 1 self.xPrincipalAxis = 0 self.yPrincipalAxis = 1 self.pointSize = 6 self.alpha = 240 self.jitter = 0 self.showGridlines = 0 self.percCol = 100 self.percRow = 100 self.autoSend = 0 self.loadSettings() # GUI self.graph = OWGraph(self) self.graph.sendData = self.sendData self.mainArea.layout().addWidget(self.graph) self.controlAreaTab = OWGUI.tabWidget(self.controlArea) # Graph tab self.graphTab = graphTab = OWGUI.createTabPage(self.controlAreaTab, "Graph") self.colAttrCB = OWGUI.comboBox(graphTab, self, "colAttr", "Column Attribute", tooltip="Column attribute", callback=self.runCA) self.rowAttrCB = OWGUI.comboBox(graphTab, self, "rowAttr", "Row Attribute", tooltip="Row attribute", callback=self.runCA) self.xAxisCB = OWGUI.comboBox(graphTab, self, "xPrincipalAxis", "Principal Axis X", tooltip="Principal axis X", callback=self.updateGraph) self.yAxisCB = OWGUI.comboBox(graphTab, self, "yPrincipalAxis", "Principal Axis Y", tooltip="Principal axis Y", callback=self.updateGraph) box = OWGUI.widgetBox(graphTab, "Contribution to Inertia") self.contributionInfo = OWGUI.widgetLabel(box, "NA\nNA") OWGUI.hSlider( graphTab, self, "percCol", "Percent of Column Points", 1, 100, 1, callback=self.updateGraph, tooltip= "The percent of column points with the largest contribution to inertia" ) OWGUI.hSlider( graphTab, self, "percRow", "Percent of Row Points", 1, 100, 1, callback=self.updateGraph, tooltip= "The percent of row points with the largest contribution to inertia" ) self.zoomSelect = ZoomSelectToolbar(self, graphTab, self.graph, self.autoSend) OWGUI.rubber(graphTab) # Settings tab self.settingsTab = settingsTab = OWGUI.createTabPage( self.controlAreaTab, "Settings") OWGUI.hSlider(settingsTab, self, "pointSize", "Point Size", 3, 20, step=1, callback=self.setPointSize) OWGUI.hSlider(settingsTab, self, "alpha", "Transparancy", 1, 255, step=1, callback=self.updateAlpha) OWGUI.hSlider(settingsTab, self, "jitter", "Jitter Points", 0, 20, step=1, callback=self.updateGraph) box = OWGUI.widgetBox(settingsTab, "General Settings") OWGUI.checkBox(box, self, "showGridlines", "Show gridlines", tooltip="Show gridlines in the plot.", callback=self.updateGridlines) OWGUI.rubber(settingsTab) self.connect(self.graphButton, SIGNAL("clicked()"), self.graph.saveToFile) self.contingency = None self.contColAttr = None self.contRowAttr = None self.resize(800, 600) def setData(self, data=None): self.closeContext("") self.clear() self.data = data self.warning([0]) if data is not None: attrs = data.domain.variables + data.domain.getmetas().values() attrs = [ attr for attr in attrs if isinstance(attr, orange.EnumVariable) ] if not attrs: self.warning(0, "Data has no discrete variables!") self.clear() return self.allAttrs = attrs self.colAttrCB.clear() self.rowAttrCB.clear() icons = OWGUI.getAttributeIcons() for attr in attrs: self.colAttrCB.addItem(QIcon(icons[attr.varType]), attr.name) self.rowAttrCB.addItem(QIcon(icons[attr.varType]), attr.name) self.colAttr = max(min(len(attrs) - 1, self.colAttr), 0) self.rowAttr = max(min(len(attrs) - 1, self.rowAttr), min(1, len(attrs) - 1)) self.openContext("", data) self.runCA() def clear(self): self.data = None self.colAttrCB.clear() self.rowAttrCB.clear() self.xAxisCB.clear() self.yAxisCB.clear() self.contributionInfo.setText("NA\nNA") self.graph.removeDrawingCurves(True, True, True) self.send("Selected Data", None) self.send("Remaining Data", None) self.allAttrs = [] def runCA(self): self.contColAttr = colAttr = self.allAttrs[self.colAttr] self.contRowAttr = rowAttr = self.allAttrs[self.rowAttr] self.contingency = orange.ContingencyAttrAttr(rowAttr, colAttr, self.data) self.error(0) try: self.CA = orngCA.CA([[c for c in row] for row in self.contingency]) except numpy.linalg.LinAlgError: self.error(0, "Could not compute the mapping! " + str(ex)) self.graph.removeDrawingCurves(True, True, True) raise self.rowItems = [ s for s, v in self.contingency.outerDistribution.items() ] self.colItems = [ s for s, v in self.contingency.innerDistribution.items() ] self.xAxisCB.clear() self.yAxisCB.clear() self.axisCount = min(self.CA.D.shape) self.xAxisCB.addItems([str(i + 1) for i in range(self.axisCount)]) self.yAxisCB.addItems([str(i + 1) for i in range(self.axisCount)]) self.xPrincipalAxis = min(self.xPrincipalAxis, self.axisCount - 1) self.yPrincipalAxis = min(self.yPrincipalAxis, self.axisCount - 1) self.updateGraph() def updateGraph(self): self.graph.removeAllSelections() self.graph.removeDrawingCurves(True, True, True) colors = ColorPaletteHSV(2) rowcor = self.CA.getPrincipalRowProfilesCoordinates( (self.xPrincipalAxis, self.yPrincipalAxis)) numCor = int(math.ceil(len(rowcor) * float(self.percRow) / 100.0)) indices = self.CA.PointsWithMostInertia( rowColumn=0, axis=(self.xPrincipalAxis, self.yPrincipalAxis))[:numCor] rowpoints = numpy.array([rowcor[i] for i in indices]) rowlabels = [self.rowItems[i] for i in indices] colcor = self.CA.getPrincipalColProfilesCoordinates( (self.xPrincipalAxis, self.yPrincipalAxis)) numRow = int(math.ceil(len(colcor) * float(self.percCol) / 100.0)) indices = self.CA.PointsWithMostInertia( rowColumn=1, axis=(self.xPrincipalAxis, self.yPrincipalAxis))[:numRow] colpoints = numpy.array([colcor[i] for i in indices]) collabels = [self.colItems[i] for i in indices] vstack = ((rowpoints,) if rowpoints.size else ()) + \ ((colpoints,) if colpoints.size else ()) allpoints = numpy.vstack(vstack) maxx, maxy = numpy.max(allpoints, axis=0) minx, miny = numpy.min(allpoints, axis=0) spanx = maxx - minx spany = maxy - miny random = numpy.random.mtrand.RandomState(0) if self.jitter > 0: rowpoints[:, 0] += random.normal(0, spanx * self.jitter / 100.0, (len(rowpoints), )) rowpoints[:, 1] += random.normal(0, spany * self.jitter / 100.0, (len(rowpoints), )) colpoints[:, 0] += random.normal(0, spanx * self.jitter / 100.0, (len(colpoints), )) colpoints[:, 1] += random.normal(0, spany * self.jitter / 100.0, (len(colpoints), )) # Plot the points self.graph.addCurve("Row points", brushColor=colors[0], penColor=colors[0], size=self.pointSize, enableLegend=True, xData=rowpoints[:, 0], yData=rowpoints[:, 1], autoScale=True, brushAlpha=self.alpha) for label, point in zip(rowlabels, rowpoints): self.graph.addMarker(label, point[0], point[1], alignment=Qt.AlignCenter | Qt.AlignBottom) self.graph.addCurve("Column points", brushColor=colors[1], penColor=colors[1], size=self.pointSize, enableLegend=True, xData=colpoints[:, 0], yData=colpoints[:, 1], autoScale=True, brushAlpha=self.alpha) for label, point in zip(collabels, colpoints): self.graph.addMarker(label, point[0], point[1], alignment=Qt.AlignCenter | Qt.AlignBottom) if self.jitter > 0: # Update min, max, span values again due to jittering vstack = ((rowpoints,) if rowpoints.size else ()) + \ ((colpoints,) if colpoints.size else ()) allpoints = numpy.vstack(vstack) maxx, maxy = numpy.max(allpoints, axis=0) minx, miny = numpy.min(allpoints, axis=0) spanx = maxx - minx spany = maxy - miny self.graph.setAxisScale(QwtPlot.xBottom, minx - spanx * 0.05, maxx + spanx * 0.05) self.graph.setAxisScale(QwtPlot.yLeft, miny - spany * 0.05, maxy + spany * 0.05) self.graph.setAxisTitle(QwtPlot.xBottom, "Axis %i" % (self.xPrincipalAxis + 1)) self.graph.setAxisTitle(QwtPlot.yLeft, "Axis %i" % (self.yPrincipalAxis + 1)) # Store labeled points for selection self.colPointsLabeled = zip(colpoints, collabels) self.rowPointsLabeled = zip(rowpoints, rowlabels) inertia = self.CA.InertiaOfAxis(1) fmt = """<table><tr><td>Axis %i:</td><td>%.3f%%</td></tr> <tr><td>Axis %i:</td><td>%.3f%%</td></tr></table> """ self.contributionInfo.setText( fmt % (self.xPrincipalAxis + 1, inertia[self.xPrincipalAxis], self.yPrincipalAxis + 1, inertia[self.yPrincipalAxis])) self.graph.replot() def setPointSize(self): for curve in self.graph.itemList(): if isinstance(curve, QwtPlotCurve): symbol = curve.symbol() symbol.setSize(self.pointSize) if QWT_VERSION_STR >= "5.2": curve.setSymbol(symbol) self.graph.replot() def updateAlpha(self): for curve in self.graph.itemList(): if isinstance(curve, QwtPlotCurve): brushColor = curve.symbol().brush().color() penColor = curve.symbol().pen().color() brushColor.setAlpha(self.alpha) brush = QBrush(curve.symbol().brush()) brush.setColor(brushColor) penColor.setAlpha(self.alpha) symbol = curve.symbol() symbol.setBrush(brush) symbol.setPen(QPen(penColor)) if QWT_VERSION_STR >= "5.2": curve.setSymbol(symbol) self.graph.replot() def updateGridlines(self): self.graph.enableGridXB(self.showGridlines) self.graph.enableGridYL(self.showGridlines) def sendData(self, *args): def selectedLabels(points_labels): return [ label for (x, y), label in points_labels if self.graph.isPointSelected(x, y) ] if self.contingency and self.data: colLabels = set(selectedLabels(self.colPointsLabeled)) rowLabels = set(selectedLabels(self.rowPointsLabeled)) colAttr = self.allAttrs[self.colAttr] rowAttr = self.allAttrs[self.rowAttr] selected = [] remaining = [] if colLabels and rowLabels: def test(ex): return str(ex[colAttr]) in colLabels and str( ex[rowAttr]) in rowLabels elif colLabels or rowLabels: def test(ex): return str(ex[colAttr]) in colLabels or str( ex[rowAttr]) in rowLabels else: def test(ex): return False for ex in self.data: if test(ex): selected.append(ex) else: remaining.append(ex) selected = orange.ExampleTable(self.data.domain, selected) if selected else \ orange.ExampleTable(self.data.domain) remaining = orange.ExampleTable(self.data.domain, remaining) if remaining else \ orange.ExampleTable(self.data.domain) self.send("Selected Data", selected) self.send("Remaining Data", remaining) else: self.send("Selected Data", None) self.send("Remaining Data", None)
class OWLearningCurveC(OWWidget): settingsList = ["folds", "steps", "scoringF", "commitOnChange", "graphPointSize", "graphDrawLines", "graphShowGrid"] def __init__(self, parent=None, signalManager=None): OWWidget.__init__(self, parent, signalManager, 'LearningCurveC') self.inputs = [("Data", Orange.data.Table, self.dataset), ("Learner", Orange.classification.Learner, self.learner, Multiple)] self.folds = 5 # cross validation folds self.steps = 10 # points in the learning curve self.scoringF = 0 # scoring function self.commitOnChange = 1 # compute curve on any change of parameters self.graphPointSize = 5 # size of points in the graphs self.graphDrawLines = 1 # draw lines between points in the graph self.graphShowGrid = 1 # show gridlines in the graph self.selectedLearners = [] self.loadSettings() warnings.filterwarnings("ignore", ".*builtin attribute.*", Orange.core.AttributeWarning) self.updateCurvePoints() # sets self.curvePoints, self.steps equidistant points from 1/self.steps to 1 self.scoring = [("Classification Accuracy", Orange.evaluation.scoring.CA), ("AUC", Orange.evaluation.scoring.AUC), ("BrierScore", Orange.evaluation.scoring.Brier_score), ("Information Score", Orange.evaluation.scoring.IS), ("Sensitivity", Orange.evaluation.scoring.sens), ("Specificity", Orange.evaluation.scoring.spec)] self.learners = [] # list of current learners from input channel, tuples (id, learner) self.data = None # data on which to construct the learning curve self.curves = [] # list of evaluation results (one per learning curve point) self.scores = [] # list of current scores, learnerID:[learner scores] # GUI box = OWGUI.widgetBox(self.controlArea, "Info") self.infoa = OWGUI.widgetLabel(box, 'No data on input.') self.infob = OWGUI.widgetLabel(box, 'No learners.') ## class selection (classQLB) OWGUI.separator(self.controlArea) # ~SPHINX start color cb~ self.cbox = OWGUI.widgetBox(self.controlArea, "Learners") self.llb = OWGUI.listBox(self.cbox, self, "selectedLearners", selectionMode=QListWidget.MultiSelection, callback=self.learnerSelectionChanged) self.llb.setMinimumHeight(50) self.blockSelectionChanges = 0 # ~SPHINX end color cb~ OWGUI.separator(self.controlArea) box = OWGUI.widgetBox(self.controlArea, "Evaluation Scores") scoringNames = [x[0] for x in self.scoring] OWGUI.comboBox(box, self, "scoringF", items=scoringNames, callback=self.computeScores) OWGUI.separator(self.controlArea) box = OWGUI.widgetBox(self.controlArea, "Options") OWGUI.spin(box, self, 'folds', 2, 100, step=1, label='Cross validation folds: ', callback=lambda: self.computeCurve() if self.commitOnChange else None) OWGUI.spin(box, self, 'steps', 2, 100, step=1, label='Learning curve points: ', callback=[self.updateCurvePoints, lambda: self.computeCurve() if self.commitOnChange else None]) OWGUI.checkBox(box, self, 'commitOnChange', 'Apply setting on any change') self.commitBtn = OWGUI.button(box, self, "Apply Setting", callback=self.computeCurve, disabled=1) # ~SPHINX start main area tabs~ # start of content (right) area tabs = OWGUI.tabWidget(self.mainArea) # graph tab tab = OWGUI.createTabPage(tabs, "Graph") self.graph = OWGraph(tab) self.graph.setAxisAutoScale(QwtPlot.xBottom) self.graph.setAxisAutoScale(QwtPlot.yLeft) tab.layout().addWidget(self.graph) self.setGraphGrid() # table tab tab = OWGUI.createTabPage(tabs, "Table") self.table = OWGUI.table(tab, selectionMode=QTableWidget.NoSelection) # ~SPHINX end main area tabs~ self.resize(550,200) ############################################################################## # slots: handle input signals def dataset(self, data): if data is not None: self.infoa.setText('%d instances in input data set' % len(data)) self.data = data if len(self.learners): self.computeCurve() self.replotGraph() else: self.infoa.setText('No data on input.') self.curves = [] self.scores = [] self.graph.removeDrawingCurves() self.graph.replot() self.commitBtn.setEnabled(self.data<>None) # manage learner signal # we use following additional attributes for learner: # - isSelected, learner is selected (display the learning curve) # - curve, learning curve for the learner # - score, evaluation score for the learning def learner(self, learner, id=None): ids = [x[0] for x in self.learners] if learner is None: # remove a learner and corresponding results if not ids.count(id): return # no such learner, removed before indx = ids.index(id) for i in range(self.steps): self.curves[i].remove(indx) del self.scores[indx] self.learners[indx][1].curve.detach() del self.learners[indx] self.updateTable() self.updatellb() else: if ids.count(id): # update (already seen a learner from this source) indx = ids.index(id) prevLearner = self.learners[indx][1] learner.isSelected = prevLearner.isSelected self.learners[indx] = (id, learner) if self.data: curve = self.getLearningCurve([learner]) score = [self.scoring[self.scoringF][1](x)[0] for x in curve] self.scores[indx] = score for i in range(self.steps): self.curves[i].add(curve[i], 0, replace=indx) learner.score = score prevLearner.curve.detach() self.drawLearningCurve(learner) self.updatellb() else: # add new learner learner.isSelected = 1 self.learners.append((id, learner)) if self.data: curve = self.getLearningCurve([learner]) score = [self.scoring[self.scoringF][1](x)[0] for x in curve] self.scores.append(score) if len(self.curves): for i in range(self.steps): self.curves[i].add(curve[i], 0) else: self.curves = curve learner.score = score self.updatellb() self.drawLearningCurve(learner) if len(self.learners): self.infob.setText("%d learners on input." % len(self.learners)) else: self.infob.setText("No learners.") self.commitBtn.setEnabled(len(self.learners)) if self.data: self.updateTable() ############################################################################## # learning curve table, callbacks # recomputes the learning curve def computeCurve(self): learners = [x[1] for x in self.learners] self.curves = self.getLearningCurve(learners) self.computeScores() def computeScores(self): self.scores = [[] for i in range(len(self.learners))] for x in self.curves: for (i,s) in enumerate(self.scoring[self.scoringF][1](x)): self.scores[i].append(s) for (i,l) in enumerate(self.learners): l[1].score = self.scores[i] self.updateTable() self.replotGraph() def getLearningCurve(self, learners): pb = OWGUI.ProgressBar(self, iterations=self.steps*self.folds) curve = Orange.evaluation.testing.learning_curve_n( learners, self.data, folds=self.folds, proportions=self.curvePoints, callback=pb.advance) pb.finish() return curve def updateCurvePoints(self): self.curvePoints = [(x+1.)/self.steps for x in range(self.steps)] def updateTable(self): self.table.setColumnCount(0) self.table.setColumnCount(len(self.learners)) self.table.setRowCount(self.steps) # set the headers self.table.setHorizontalHeaderLabels([l.name for i,l in self.learners]) self.table.setVerticalHeaderLabels(["%4.2f" % p for p in self.curvePoints]) # set the table contents for l in range(len(self.learners)): for p in range(self.steps): OWGUI.tableItem(self.table, p, l, "%7.5f" % self.scores[l][p]) for i in range(len(self.learners)): self.table.setColumnWidth(i, 80) # management of learner selection def updatellb(self): self.blockSelectionChanges = 1 self.llb.clear() colors = ColorPaletteHSV(len(self.learners)) for (i,lt) in enumerate(self.learners): l = lt[1] item = QListWidgetItem(ColorPixmap(colors[i]), l.name) self.llb.addItem(item) item.setSelected(l.isSelected) l.color = colors[i] self.blockSelectionChanges = 0 def learnerSelectionChanged(self): if self.blockSelectionChanges: return for (i,lt) in enumerate(self.learners): l = lt[1] if l.isSelected != (i in self.selectedLearners): if l.isSelected: # learner was deselected l.curve.detach() else: # learner was selected self.drawLearningCurve(l) self.graph.replot() l.isSelected = i in self.selectedLearners # Graph specific methods def setGraphGrid(self): self.graph.enableGridYL(self.graphShowGrid) self.graph.enableGridXB(self.graphShowGrid) def setGraphStyle(self, learner): curve = learner.curve if self.graphDrawLines: curve.setStyle(QwtPlotCurve.Lines) else: curve.setStyle(QwtPlotCurve.NoCurve) curve.setSymbol( QwtSymbol(QwtSymbol.Ellipse, QBrush(QColor(0,0,0)), QPen(QColor(0,0,0)), QSize(self.graphPointSize, self.graphPointSize))) curve.setPen(QPen(learner.color, 5)) def drawLearningCurve(self, learner): if not self.data: return curve = self.graph.addCurve( learner.name, xData=self.curvePoints, yData=learner.score, autoScale=True) learner.curve = curve self.setGraphStyle(learner) self.graph.replot() def replotGraph(self): self.graph.removeDrawingCurves() for l in self.learners: self.drawLearningCurve(l[1])
class OWCorrespondenceAnalysis(OWWidget): contextHandlers = {"": DomainContextHandler("", ["colAttr", "rowAttr", "xPricipalAxis", "yPrincipalAxis"])} settingsList = ["pointSize", "alpha", "jitter", "showGridlines"] def __init__(self, parent=None, signalManager=None, name="Correspondence Analysis"): OWWidget.__init__(self, parent, signalManager, name, wantGraph=True) self.inputs = [("Data", ExampleTable, self.setData)] self.outputs = [("Selected Data", ExampleTable), ("Remaining Data", ExampleTable)] self.colAttr = 0 self.rowAttr = 1 self.xPrincipalAxis = 0 self.yPrincipalAxis = 1 self.pointSize = 6 self.alpha = 240 self.jitter = 0 self.showGridlines = 0 self.percCol = 100 self.percRow = 100 self.autoSend = 0 self.loadSettings() # GUI self.graph = OWGraph(self) self.graph.sendData = self.sendData self.mainArea.layout().addWidget(self.graph) self.controlAreaTab = OWGUI.tabWidget(self.controlArea) # Graph tab self.graphTab = graphTab = OWGUI.createTabPage(self.controlAreaTab, "Graph") self.colAttrCB = OWGUI.comboBox(graphTab, self, "colAttr", "Column Attribute", tooltip="Column attribute", callback=self.runCA) self.rowAttrCB = OWGUI.comboBox(graphTab, self, "rowAttr", "Row Attribute", tooltip="Row attribute", callback=self.runCA) self.xAxisCB = OWGUI.comboBox(graphTab, self, "xPrincipalAxis", "Principal Axis X", tooltip="Principal axis X", callback=self.updateGraph) self.yAxisCB = OWGUI.comboBox(graphTab, self, "yPrincipalAxis", "Principal Axis Y", tooltip="Principal axis Y", callback=self.updateGraph) box = OWGUI.widgetBox(graphTab, "Contribution to Inertia") self.contributionInfo = OWGUI.widgetLabel(box, "NA\nNA") OWGUI.hSlider(graphTab, self, "percCol", "Percent of Column Points", 1, 100, 1, callback=self.updateGraph, tooltip="The percent of column points with the largest contribution to inertia") OWGUI.hSlider(graphTab, self, "percRow", "Percent of Row Points", 1, 100, 1, callback=self.updateGraph, tooltip="The percent of row points with the largest contribution to inertia") self.zoomSelect = ZoomSelectToolbar(self, graphTab, self.graph, self.autoSend) OWGUI.rubber(graphTab) # Settings tab self.settingsTab = settingsTab = OWGUI.createTabPage(self.controlAreaTab, "Settings") OWGUI.hSlider(settingsTab, self, "pointSize", "Point Size", 3, 20, step=1, callback=self.setPointSize) OWGUI.hSlider(settingsTab, self, "alpha", "Transparancy", 1, 255, step=1, callback=self.updateAlpha) OWGUI.hSlider(settingsTab, self, "jitter", "Jitter Points", 0, 20, step=1, callback=self.updateGraph) box = OWGUI.widgetBox(settingsTab, "General Settings") OWGUI.checkBox(box, self, "showGridlines", "Show gridlines", tooltip="Show gridlines in the plot.", callback=self.updateGridlines) OWGUI.rubber(settingsTab) self.connect(self.graphButton, SIGNAL("clicked()"), self.graph.saveToFile) self.contingency = None self.contColAttr = None self.contRowAttr = None self.resize(800, 600) def setData(self, data=None): self.closeContext("") self.clear() self.data = data self.warning([0]) if data is not None: attrs = data.domain.variables + data.domain.getmetas().values() attrs = [attr for attr in attrs if isinstance(attr, orange.EnumVariable)] if not attrs: self.warning(0, "Data has no discrete variables!") self.clear() return self.allAttrs = attrs self.colAttrCB.clear() self.rowAttrCB.clear() icons = OWGUI.getAttributeIcons() for attr in attrs: self.colAttrCB.addItem(QIcon(icons[attr.varType]), attr.name) self.rowAttrCB.addItem(QIcon(icons[attr.varType]), attr.name) self.colAttr = max(min(len(attrs) - 1, self.colAttr), 0) self.rowAttr = max(min(len(attrs) - 1, self.rowAttr), min(1, len(attrs) - 1)) self.openContext("", data) self.runCA() def clear(self): self.data = None self.colAttrCB.clear() self.rowAttrCB.clear() self.xAxisCB.clear() self.yAxisCB.clear() self.contributionInfo.setText("NA\nNA") self.graph.removeDrawingCurves(True, True, True) self.send("Selected Data", None) self.send("Remaining Data", None) self.allAttrs = [] def runCA(self): self.contColAttr = colAttr = self.allAttrs[self.colAttr] self.contRowAttr = rowAttr = self.allAttrs[self.rowAttr] self.contingency = orange.ContingencyAttrAttr(rowAttr, colAttr, self.data) self.error(0) try: self.CA = orngCA.CA([[c for c in row] for row in self.contingency]) except numpy.linalg.LinAlgError: self.error(0, "Could not compute the mapping! " + str(ex)) self.graph.removeDrawingCurves(True, True, True) raise self.rowItems = [s for s, v in self.contingency.outerDistribution.items()] self.colItems = [s for s, v in self.contingency.innerDistribution.items()] self.xAxisCB.clear() self.yAxisCB.clear() self.axisCount = min(self.CA.D.shape) self.xAxisCB.addItems([str(i + 1) for i in range(self.axisCount)]) self.yAxisCB.addItems([str(i + 1) for i in range(self.axisCount)]) self.xPrincipalAxis = min(self.xPrincipalAxis, self.axisCount - 1) self.yPrincipalAxis = min(self.yPrincipalAxis, self.axisCount - 1) self.updateGraph() def updateGraph(self): self.graph.removeAllSelections() self.graph.removeDrawingCurves(True, True, True) colors = ColorPaletteHSV(2) rowcor = self.CA.getPrincipalRowProfilesCoordinates((self.xPrincipalAxis, self.yPrincipalAxis)) numCor = int(math.ceil(len(rowcor) * float(self.percRow) / 100.0)) indices = self.CA.PointsWithMostInertia(rowColumn=0, axis=(self.xPrincipalAxis, self.yPrincipalAxis))[:numCor] rowpoints = numpy.array([rowcor[i] for i in indices]) rowlabels = [self.rowItems[i] for i in indices] colcor = self.CA.getPrincipalColProfilesCoordinates((self.xPrincipalAxis, self.yPrincipalAxis)) numRow = int(math.ceil(len(colcor) * float(self.percCol) / 100.0)) indices = self.CA.PointsWithMostInertia(rowColumn=1, axis=(self.xPrincipalAxis, self.yPrincipalAxis))[:numRow] colpoints = numpy.array([colcor[i] for i in indices]) collabels = [self.colItems[i] for i in indices] vstack = ((rowpoints,) if rowpoints.size else ()) + \ ((colpoints,) if colpoints.size else ()) allpoints = numpy.vstack(vstack) maxx, maxy = numpy.max(allpoints, axis=0) minx, miny = numpy.min(allpoints, axis=0) spanx = maxx - minx spany = maxy - miny random = numpy.random.mtrand.RandomState(0) if self.jitter > 0: rowpoints[:,0] += random.normal(0, spanx * self.jitter / 100.0, (len(rowpoints),)) rowpoints[:,1] += random.normal(0, spany * self.jitter / 100.0, (len(rowpoints),)) colpoints[:,0] += random.normal(0, spanx * self.jitter / 100.0, (len(colpoints),)) colpoints[:,1] += random.normal(0, spany * self.jitter / 100.0, (len(colpoints),)) # Plot the points self.graph.addCurve("Row points", brushColor=colors[0], penColor=colors[0], size=self.pointSize, enableLegend=True, xData=rowpoints[:, 0], yData=rowpoints[:, 1], autoScale=True, brushAlpha=self.alpha) for label, point in zip(rowlabels, rowpoints): self.graph.addMarker(label, point[0], point[1], alignment=Qt.AlignCenter | Qt.AlignBottom) self.graph.addCurve("Column points", brushColor=colors[1], penColor=colors[1], size=self.pointSize, enableLegend=True, xData=colpoints[:, 0], yData=colpoints[:, 1], autoScale=True, brushAlpha=self.alpha) for label, point in zip(collabels, colpoints): self.graph.addMarker(label, point[0], point[1], alignment=Qt.AlignCenter | Qt.AlignBottom) if self.jitter > 0: # Update min, max, span values again due to jittering vstack = ((rowpoints,) if rowpoints.size else ()) + \ ((colpoints,) if colpoints.size else ()) allpoints = numpy.vstack(vstack) maxx, maxy = numpy.max(allpoints, axis=0) minx, miny = numpy.min(allpoints, axis=0) spanx = maxx - minx spany = maxy - miny self.graph.setAxisScale(QwtPlot.xBottom, minx - spanx * 0.05, maxx + spanx * 0.05) self.graph.setAxisScale(QwtPlot.yLeft, miny - spany * 0.05, maxy + spany * 0.05) self.graph.setAxisTitle(QwtPlot.xBottom, "Axis %i" % (self.xPrincipalAxis + 1)) self.graph.setAxisTitle(QwtPlot.yLeft, "Axis %i" % (self.yPrincipalAxis + 1)) # Store labeled points for selection self.colPointsLabeled = zip(colpoints, collabels) self.rowPointsLabeled = zip(rowpoints, rowlabels) inertia = self.CA.InertiaOfAxis(1) fmt = """<table><tr><td>Axis %i:</td><td>%.3f%%</td></tr> <tr><td>Axis %i:</td><td>%.3f%%</td></tr></table> """ self.contributionInfo.setText(fmt % (self.xPrincipalAxis + 1, inertia[self.xPrincipalAxis], self.yPrincipalAxis + 1, inertia[self.yPrincipalAxis])) self.graph.replot() def setPointSize(self): for curve in self.graph.itemList(): if isinstance(curve, QwtPlotCurve): symbol = curve.symbol() symbol.setSize(self.pointSize) if QWT_VERSION_STR >= "5.2": curve.setSymbol(symbol) self.graph.replot() def updateAlpha(self): for curve in self.graph.itemList(): if isinstance(curve, QwtPlotCurve): brushColor = curve.symbol().brush().color() penColor = curve.symbol().pen().color() brushColor.setAlpha(self.alpha) brush = QBrush(curve.symbol().brush()) brush.setColor(brushColor) penColor.setAlpha(self.alpha) symbol = curve.symbol() symbol.setBrush(brush) symbol.setPen(QPen(penColor)) if QWT_VERSION_STR >= "5.2": curve.setSymbol(symbol) self.graph.replot() def updateGridlines(self): self.graph.enableGridXB(self.showGridlines) self.graph.enableGridYL(self.showGridlines) def sendData(self, *args): def selectedLabels(points_labels): return [label for (x, y), label in points_labels if self.graph.isPointSelected(x, y)] if self.contingency and self.data: colLabels = set(selectedLabels(self.colPointsLabeled)) rowLabels = set(selectedLabels(self.rowPointsLabeled)) colAttr = self.allAttrs[self.colAttr] rowAttr = self.allAttrs[self.rowAttr] selected = [] remaining = [] if colLabels and rowLabels: def test(ex): return str(ex[colAttr]) in colLabels and str(ex[rowAttr]) in rowLabels elif colLabels or rowLabels: def test(ex): return str(ex[colAttr]) in colLabels or str(ex[rowAttr]) in rowLabels else: def test(ex): return False for ex in self.data: if test(ex): selected.append(ex) else: remaining.append(ex) selected = orange.ExampleTable(self.data.domain, selected) if selected else \ orange.ExampleTable(self.data.domain) remaining = orange.ExampleTable(self.data.domain, remaining) if remaining else \ orange.ExampleTable(self.data.domain) self.send("Selected Data", selected) self.send("Remaining Data", remaining) else: self.send("Selected Data", None) self.send("Remaining Data", None)