def draw_state(self, state, treward, creward): # declarations apos = [None] * NUM_AGENTS # clear cells for i in range(len(self.cells)): self.cells[i].removeAll() # get agents' positions for i in range(NUM_AGENTS): apos[i] = ROWS * COLS - state[i] # last element in the array is wumpus' position wpos = ROWS * COLS - state[NUM_AGENTS] # add agents for i in range(NUM_AGENTS): self.cells[apos[i]].add( JLabel(self.scale(self.aIcon.getImage(), 0.6))) # add the wumpus self.cells[wpos].add(JLabel(self.scale(self.wIcon.getImage(), 0.6))) # gray-out illegal positions and update all cells for i in range(len(self.cells)): if gp.isLegalPosition(i) == False: self.cells[i].setBackground(Color.BLACK) self.cells[i].updateUI() # display total and cum rew on ctrl panel self.rewLbl.setText(Double.toString(treward)) self.crewLbl.setText(Double.toString(creward))
def update(self, ss, when): strLine = Long.toString(when.getTimeInMillis()).encode('utf-8') for s in bricSymbols: if s in ss: strLine = strLine + ',' \ + Double.toString(market.getLastPrice(0,s+'-OPEN'))\ .encode('utf-8') + ','\ + Double.toString(market.getLastPrice(0,s+'-HIGH'))\ .encode('utf-8') + ',' \ + Double.toString(market.getLastPrice(0,s+'-LOW'))\ .encode('utf-8') + ','\ + Double.toString(market.getLastPrice(0,s+'-CLOSE'))\ .encode('utf-8') + ','\ + Long.toString(market.getLastVolume(0,s))\ .encode('utf-8') else: strLine = strLine + ',-,-,-,-,-' if 'XAG' in ss: strLine = strLine + ',' \ + Double.toString(market.getLastPrice(0,'XAG-OPEN'))\ .encode('utf-8') + ','\ + Double.toString(market.getLastPrice(0,'XAG-HIGH'))\ .encode('utf-8') + ',' \ + Double.toString(market.getLastPrice(0,'XAG-LOW'))\ .encode('utf-8') + ','\ + Double.toString(market.getLastPrice(0,'XAG-CLOSE'))\ .encode('utf-8') + ','\ + Long.toString(market.getLastVolume(0,'XAG'))\ .encode('utf-8') else: strLine = strLine + ',-,-,-,-,-' print strLine
def update(self, ss, when): strLine = Long.toString(when.getTimeInMillis()).encode('utf-8') strLine = strLine + ',' \ + Double.toString(market.getLastPrice(0,'XAG-OPEN'))\ .encode('utf-8') + ','\ + Double.toString(market.getLastPrice(0,'XAG-HIGH'))\ .encode('utf-8') + ',' \ + Double.toString(market.getLastPrice(0,'XAG-LOW'))\ .encode('utf-8') + ','\ + Double.toString(market.getLastPrice(0,'XAG-CLOSE'))\ .encode('utf-8') + ','\ + Long.toString(market.getLastVolume(0,'XAG'))\ .encode('utf-8') print strLine
def update(self, ss, when): strLine = Long.toString(when.getTimeInMillis()).encode('utf-8') if 'X' in ss: strLine = strLine + ',' \ + Double.toString(market.getLastPrice(0,'X-OPEN'))\ .encode('utf-8') + ','\ + Double.toString(market.getLastPrice(0,'X-HIGH'))\ .encode('utf-8') + ',' \ + Double.toString(market.getLastPrice(0,'X-LOW'))\ .encode('utf-8') + ','\ + Double.toString(market.getLastPrice(0,'X-CLOSE'))\ .encode('utf-8') + ','\ + Long.toString(market.getLastVolume(0,'X'))\ .encode('utf-8') else: strLine = strLine + ',-,-,-,-,-' if 'Y' in ss: strLine = strLine + ',' \ + Double.toString(market.getLastPrice(0,'Y-OPEN'))\ .encode('utf-8') + ','\ + Double.toString(market.getLastPrice(0,'Y-HIGH'))\ .encode('utf-8') + ',' \ + Double.toString(market.getLastPrice(0,'Y-LOW'))\ .encode('utf-8') + ','\ + Double.toString(market.getLastPrice(0,'Y-CLOSE'))\ .encode('utf-8') + ','\ + Long.toString(market.getLastVolume(0,'Y'))\ .encode('utf-8') else: strLine = strLine + ',-,-,-,-,-' print strLine
def draw_state(self, state, treward, creward): # declarations apos = [None] * NUM_AGENTS # clear cells for i in range(len(self.cells)): self.cells[i].removeAll() # get agents' positions for i in range(NUM_AGENTS): apos[i] = ROWS * COLS - state[i] # last element in the array is wumpus' position wpos = ROWS * COLS - state[NUM_AGENTS] # add agents for i in range(NUM_AGENTS): self.cells[apos[i]].add(JLabel(self.scale(self.aIcon.getImage(), 0.6))) # add the wumpus self.cells[wpos].add(JLabel(self.scale(self.wIcon.getImage(), 0.6))) # gray-out illegal positions and update all cells for i in range(len(self.cells)): if gp.isLegalPosition(i) == False: self.cells[i].setBackground(Color.BLACK) self.cells[i].updateUI() # display total and cum rew on ctrl panel self.rewLbl.setText(Double.toString(treward)) self.crewLbl.setText(Double.toString(creward))
def __str__(self): output = "" for i in range(self.theMatrix.getRowDimension()): if i == 0: output = output +"[" for j in range(self.theMatrix.getColumnDimension()): if j == 0: output = output +"[" output = output + Double.toString(self.theMatrix.get(i,j)) if (j == (self.theMatrix.getColumnDimension() -1)): output = output +"]" else: output = output + ", " if (i == (self.theMatrix.getRowDimension() -1)): output = output +"]" else: output = output + ", " return output
def __str__(self): output = "" for i in range(self.theMatrix.getRowDimension()): if i == 0: output = output + "[" for j in range(self.theMatrix.getColumnDimension()): if j == 0: output = output + "[" output = output + Double.toString(self.theMatrix.get(i, j)) if (j == (self.theMatrix.getColumnDimension() - 1)): output = output + "]" else: output = output + ", " if (i == (self.theMatrix.getRowDimension() - 1)): output = output + "]" else: output = output + ", " return output
def run_sim(self): episoderunning = True # starting belief state currbelief = initBelief factoredS = initState instance = 1 cumrew = 0 run = 1 # execution loop # while(episoderunning): while (run <= 100): # draw current state # self.draw_state(factoredS) # extract action from direct controller - ie., 0-step LA exreward = valueFunction.V(currbelief) action = valueFunction.directControl(currbelief) print "value of b %f and selected action %d" % (exreward, action) # show action that we are about to execute self.nactLbl.setText(pomdpProblem.getactStr(action)) time.sleep(1) # sample new state, observation, and calculate reward print factoredS factoredS1 = pomdpProblem.sampleNextState(factoredS, action) print " sampled state is" print factoredS1 factoredO = pomdpProblem.sampleObservation(factoredS, factoredS1, action) print " sampled o is" print factoredO reward = pomdpProblem.getReward(factoredS, action) print reward # draw new state, and display what's happening self.draw_state(factoredS1) self.lactLbl.setText(pomdpProblem.getactStr(action)) self.nactLbl.setText("nil") self.rewLbl.setText(Double.toString(reward)) cumrew = cumrew + reward * gamma**instance self.crewLbl.setText(Double.toString(cumrew)) self.obsLbl.setText( pomdpProblem.getobsStr( pomdpProblem.sencode(factoredO, pomdpProblem.getnrObsV(), pomdpProblem.getobsArity()) - 1)) # iterate nextbelief = pomdpProblem.tao( currbelief, action, pomdpProblem.sencode(factoredO, pomdpProblem.getnrObsV(), pomdpProblem.getobsArity()) - 1) currbelief = nextbelief factoredS = factoredS1 instance = instance + 1 # step button if self.stepChk.isSelected() == True: self.stepBtn.setEnabled(True) while self.stepBtn.isEnabled() == True: time.sleep(1) # check whether this episode has ended #if reward == CATCH_REWARD - 1: # print "Episode ended!" # episoderunning = false # smooth the sim a little in case we're not stepping time.sleep(1) # stop after 100 iterations if instance == 100: run = run + 1 stats.append(cumrew) currbelief = initBelief factoredS = initState cumrew = 0 instance = 1 print stats
def run_sim(self): episoderunning = True # starting belief state currbelief = initBelief factoredS = initState instance = 1 cumrew = 0 run = 1 # execution loop # while(episoderunning): while run <= 100: # draw current state # self.draw_state(factoredS) # extract action from direct controller - ie., 0-step LA exreward = valueFunction.V(currbelief) action = valueFunction.directControl(currbelief) print "value of b %f and selected action %d" % (exreward, action) # show action that we are about to execute self.nactLbl.setText(pomdpProblem.getactStr(action)) time.sleep(1) # sample new state, observation, and calculate reward print factoredS factoredS1 = pomdpProblem.sampleNextState(factoredS, action) print " sampled state is" print factoredS1 factoredO = pomdpProblem.sampleObservation(factoredS, factoredS1, action) print " sampled o is" print factoredO reward = pomdpProblem.getReward(factoredS, action) print reward # draw new state, and display what's happening self.draw_state(factoredS1) self.lactLbl.setText(pomdpProblem.getactStr(action)) self.nactLbl.setText("nil") self.rewLbl.setText(Double.toString(reward)) cumrew = cumrew + reward * gamma ** instance self.crewLbl.setText(Double.toString(cumrew)) self.obsLbl.setText( pomdpProblem.getobsStr( pomdpProblem.sencode(factoredO, pomdpProblem.getnrObsV(), pomdpProblem.getobsArity()) - 1 ) ) # iterate nextbelief = pomdpProblem.tao( currbelief, action, pomdpProblem.sencode(factoredO, pomdpProblem.getnrObsV(), pomdpProblem.getobsArity()) - 1, ) currbelief = nextbelief factoredS = factoredS1 instance = instance + 1 # step button if self.stepChk.isSelected() == True: self.stepBtn.setEnabled(True) while self.stepBtn.isEnabled() == True: time.sleep(1) # check whether this episode has ended # if reward == CATCH_REWARD - 1: # print "Episode ended!" # episoderunning = false # smooth the sim a little in case we're not stepping time.sleep(1) # stop after 100 iterations if instance == 100: run = run + 1 stats.append(cumrew) currbelief = initBelief factoredS = initState cumrew = 0 instance = 1 print stats