class AnalyzerApp(model.Background): def on_initialize(self, event): i = InputDescriptor() i.addContinuousInput(200, False) self.agent = Agent(i, 1) self.agent.applyIdealInitialNoiseToTrainableWeights() learningRate = 0.01 eTraceTimeConstant = 0.1 discountTimeConstant = 0.1 self.agent.setLearningRate(learningRate) self.agent.setETraceTimeConstant(eTraceTimeConstant) self.agent.setTDDiscountTimeConstant(discountTimeConstant) self.stepSize = 0.1 self.components.updateTimeTextField.text = str(self.stepSize) self.elapsedSeconds = 0.0 self.timeBuffer = 0.0 # Assume one tick mark per second. self.tickWidth = 200 self.tickXBase = 0 self.ticksPerScreen = self.components.canvas.size[0] / self.tickWidth self.incrementWidth = self.tickWidth / (1 / self.stepSize) comp = self.components comp.canvas.foregroundColor = (0, 0, 0) comp.canvas.backgroundColor = (200, 200, 200) # y = 0 at the bottom of the canvas. self.inputPlot = Plot(comp.canvas, 700, 160, Color(128, 128, 128), self.incrementWidth) self.rewardPlot = Plot(comp.canvas, 500, 160, Color(0, 0, 255), self.incrementWidth) self.valEstPlot = Plot(comp.canvas, 300, 160, Color(0, 255, 0), self.incrementWidth) self.tdErrorPlot = Plot(comp.canvas, 100, 160, Color(255, 0, 0), self.incrementWidth) # Initialize parts of the GUI that need special starting values. comp.learningRateTextField.text = str(learningRate) comp.learningRateSlider.value = 100 * learningRate comp.eTraceTCTextField.text = str(eTraceTimeConstant) comp.eTraceTCSlider.value = 100 * eTraceTimeConstant comp.discountTCTextField.text = str(discountTimeConstant) comp.discountTCSlider.value = 100 * discountTimeConstant # Draw the initial border and axes. self.redraw() def drawBorder(self): canvas = self.components.canvas canvas.drawLine((0, 0), (canvas.size[0]-1, 0)) canvas.drawLine((canvas.size[0]-1, 0), (canvas.size[0]-1, canvas.size[1]-1)) canvas.drawLine((canvas.size[0]-1, canvas.size[1]-1), (0, canvas.size[1]-1)) canvas.drawLine((0, canvas.size[1]-1), (0, 0)) def redraw(self): self.components.canvas.clear() self.components.canvas.foregroundColor = (0, 0, 0) self.drawBorder() self.drawTickMarks() self.inputPlot.draw() self.rewardPlot.draw() self.valEstPlot.draw() self.tdErrorPlot.draw() def shiftLeft(self): self.tickXBase -= self.incrementWidth if self.tickXBase < -self.tickWidth: self.tickXBase = 0 self.inputPlot.shiftLeft() self.rewardPlot.shiftLeft() self.valEstPlot.shiftLeft() self.tdErrorPlot.shiftLeft() def drawTickMarks(self): canvas = self.components.canvas x = self.tickXBase while x < canvas.size[0]: canvas.drawLine((x, canvas.size[1] - 1), (x, canvas.size[1] - 10)) x += self.tickWidth def on_learningRateSlider_select(self, event): comp = self.components newValue = 0.01 * float(comp.learningRateSlider.value) self.agent.setLearningRate(newValue) comp.learningRateTextField.text = str(newValue) def on_eTraceTCSlider_select(self, event): comp = self.components newValue = 0.01 * float(comp.eTraceTCSlider.value) self.agent.setETraceTimeConstant(newValue) comp.eTraceTCTextField.text = str(newValue) def on_discountTCSlider_select(self, event): comp = self.components newValue = 0.01 * float(comp.discountTCSlider.value) self.agent.setTDDiscountTimeConstant(newValue) comp.discountTCTextField.text = str(newValue) def on_inputSlider_select(self, event): self.components.inputTextField.text = str(0.01 * float(self.components.inputSlider.value)) def on_rewardSlider_select(self, event): self.components.rewardTextField.text = str(0.01 * float(self.components.rewardSlider.value)) ## def on_inputTextField_textUpdate(self, event): ## pass ## def on_incrBtn_mouseClick(self, event): ## pass ## #startValue = int(self.components.field1.text) ## #endValue = startValue + 1 ## #self.components.field1.text = str(endValue) ## ## def on_decrBtn_mouseClick(self, event): ## pass ## #startValue = int(self.components.field1.text) ## #endValue = startValue - 1 ## #self.components.field1.text = str(endValue) def on_updateButton_mouseClick(self, event): input = float(self.components.inputTextField.text) reward = float(self.components.rewardTextField.text) self.agent.setContinuousInput(0, input) valueEst = 0 tdError = 0 self.timeBuffer += float(self.components.updateTimeTextField.text) while self.timeBuffer >= self.stepSize: self.agent.update(self.stepSize, reward) valueEst = self.agent.getCurrentValueEstimation() tdError = self.agent.getTDError() self.inputPlot.addPoint(input) self.rewardPlot.addPoint(reward) self.valEstPlot.addPoint(valueEst) self.tdErrorPlot.addPoint(tdError) self.timeBuffer -= self.stepSize self.elapsedSeconds += self.stepSize # Shift the graph left if we get close to the right side. if self.elapsedSeconds > 0.9 * self.ticksPerScreen: self.shiftLeft() self.components.valEstTextField.text = '%.2f' % valueEst self.components.tdErrorTextField.text = '%.2f' % tdError self.redraw()