def runEpisode(agent, environment, discount, decision, display, message, pause, episode): returns = 0 totalDiscount = 1.0 environment.reset() message("BEGINNING EPISODE: " + str(episode) + "\n") while True: # DISPLAY CURRENT STATE state = environment.getCurrentState() display(state) pause() # END IF IN A TERMINAL STATE actions = environment.getPossibleActions(state) if len(actions) == 0: message("EPISODE " + str(episode) + " COMPLETE: RETURN WAS " + str(returns) + "\n") return returns # GET ACTION (USUALLY FROM AGENT) action = decision(state) if action == None: raise 'Error: Agent returned None action' # EXECUTE ACTION nextState, reward = environment.doAction(action) message("Started in state: " + str(state) + "\nTook action: " + str(action) + "\nEnded in state: " + str(nextState) + "\nGot reward: " + str(reward) + "\n") # UPDATE LEARNER agent.update(state, action, nextState, reward) returns += reward * totalDiscount totalDiscount *= discount
def runEpisode(agent, environment, discount, decision, display, message, pause, episode, testFlag=0, foldN=0): returns = 0 totalDiscount = 1.0 environment.reset(testFlag, foldN) if 'startEpisode' in dir(agent): agent.startEpisode() while True: # DISPLAY CURRENT STATE state = environment.getCurrentState() # display(state) pause() # END IF IN A TERMINAL STATE actions = environment.getPossibleActions(state) if len(actions) == 0: return returns, reward, AOP, explore # returns: accumulative rewards, reward: # GET ACTION (USUALLY FROM AGENT) action = decision(state) if action == None: raise 'Error: Agent returned None action' # EXECUTE ACTION nextState, reward, AOP, explore = environment.doAction(action) if testFlag == 0: # training episode # UPDATE LEARNER if 'observeTransition' in dir(agent): agent.observeTransition(state, action, nextState, reward) else: # testing episode pass returns += reward * totalDiscount totalDiscount *= discount
def runEpisode(agent, environment, discount, decision, display, message, pause, episode, train=False): returns = 0 totalDiscount = 1.0 environment.reset() if 'startEpisode' in dir(agent): agent.startEpisode() message("BEGINNING EPISODE: " + str(episode) + "\n") while True: # DISPLAY CURRENT STATE state = environment.getCurrentState() state_copy = copy.deepcopy(state) display(state) pause() # END IF IN A TERMINAL STATE actions = environment.getPossibleActions(state) if len(actions) == 0: message("EPISODE " + str(episode) + " COMPLETE: TOOK " + str(len(state.history)) + "STEPS; RETURN WAS " + str(returns) + "\n") message(str(state.history)) message(str(len(state.history))) return returns # GET ACTION (USUALLY FROM AGENT) action = decision(state) if action == None: raise 'Error: Agent returned None action' # EXECUTE ACTION nextState, reward = environment.doAction(action) message("Started in state: " + str(state_copy.x) + str(state_copy.y) + str(state_copy.direction) + str(state_copy.num_explored) + " " + str(len(state_copy.history)) + "\nTook action: " + str(action) + "\nEnded in state: " + str(nextState.x) + str(nextState.y) + str(nextState.direction) + str(nextState.num_explored) + " " + str(len(nextState.history)) + "\nGot reward: " + str(reward) + "\n") # UPDATE LEARNER # print_grid(state_copy.explored_grid) # print_grid(nextState.explored_grid) if 'observeTransition' in dir( agent) and not nextState.is_terminal and train: agent.observeTransition(state_copy, action, nextState, reward) print(agent.weights) print("############################################") print("############################################\n") returns += reward * totalDiscount totalDiscount *= discount
def runEpisode(agent, qtype, environment, discount, decision, display, message, pause, episode): ########################### # GET THE GRIDWORLD ########################### returns = 0 totalDiscount = 1.0 environment.reset() #for state in mdp.getStates(): # display(state) if 'startEpisode' in dir(agent): agent.startEpisode() message("BEGINNING EPISODE: "+str(episode)+"\n") while True: # DISPLAY CURRENT STATE state = environment.getCurrentState() display(state) pause() # END IF IN A TERMINAL STATE actions = environment.getPossibleActions(state) if len(actions) == 0: message("EPISODE "+str(episode)+" COMPLETE: RETURN WAS "+str(returns)+"\n") return returns # GET ACTION (USUALLY FROM AGENT) action = decision(state) if action == None: raise 'Error: Agent returned None action' # EXECUTE ACTION nextState, reward, action2 = environment.doAction(action, qtype) message("Started in state: "+str(state)+ "\nSpecified action: "+str(action)+ "\nTook action: "+str(action2)+ "\nEnded in state: "+str(nextState)+ "\nGot reward: "+str(reward)+"\n") # UPDATE LEARNER if 'observeTransition' in dir(agent): agent.observeTransition(state, action2, nextState, reward, environment) if isinstance(reward, tuple): reward=sum(reward) returns += reward * totalDiscount totalDiscount *= discount if 'stopEpisode' in dir(agent): agent.stopEpisode()
def runEpisode(agent, environment, discount, decision, display, message, pause, episode): returns = 0 totalDiscount = 1.0 environment.reset(agent) if 'startEpisode' in dir(agent): agent.startEpisode() message("BEGINNING EPISODE: " + str(episode) + "\n") timestep = 0 MAX_TIMESTEPS = 20 while True: if timestep >= MAX_TIMESTEPS: return returns # DISPLAY CURRENT STATE state = environment.getCurrentState() display(state) #pause() if timestep == 0 and episode == 1: input("") # END IF IN A TERMINAL STATE actions = environment.getPossibleActions(state) if len(actions) == 0: message("EPISODE " + str(episode) + " COMPLETE: RETURN WAS " + str(returns) + "\n") return returns # GET ACTION (USUALLY FROM AGENT) action = decision(state) if action == None: raise 'Error: Agent returned None action' # EXECUTE ACTION nextState, reward = environment.doAction(action) #message("Started in state: "+str(state)+ # "\nTook action: "+str(action)+ # "\nEnded in state: "+str(nextState)+ # "\nGot reward: "+str(reward)+"\n") # UPDATE LEARNER if 'observeTransition' in dir(agent): agent.observeTransition(state, action, nextState, reward) returns += reward * totalDiscount totalDiscount *= discount timestep += 1 if 'stopEpisode' in dir(agent): agent.stopEpisode()
def runEpisode(agent, environment, discount, decision, display, message, pause, episode): returns = 0 totalDiscount = 1.0 environment.reset() if 'startEpisode' in dir(agent): agent.startEpisode() message("BEGINNING EPISODE: " + str(episode) + "\n") stateAndValues = -1 while True: # DISPLAY CURRENT STATE state = environment.getCurrentState() display(state) pause() # END IF IN A TERMINAL STATE actions = environment.getPossibleActions(state) if len(actions) == 0: message("EPISODE " + str(episode) + " COMPLETE: RETURN WAS " + str(returns) + "\n") return (returns, stateAndValues) # GET ACTION (USUALLY FROM AGENT) action = decision(state) if action == None: raise 'Error: Agent returned None action' # EXECUTE ACTION nextState, reward = environment.doAction(action) message("Started in state: " + str(state) + "\nTook action: " + str(action) + "\nEnded in state: " + str(nextState) + "\nGot reward: " + str(reward) + "\n") # UPDATE LEARNER val = -1 if 'observeTransition' in dir(agent): val = agent.observeTransition(state, action, nextState, reward) if state == (2, 2): stateAndValues = [0, val, 0, 0] returns += reward * totalDiscount totalDiscount *= discount if 'stopEpisode' in dir(agent): agent.stopEpisode()
def runEpisode(agent, environment, discount, decision, display, message, pause, episode): returns = 0 totalDiscount = 1.0 environment.reset() if "startEpisode" in dir(agent): agent.startEpisode() message("BEGINNING EPISODE: " + str(episode) + "\n") while True: # DISPLAY CURRENT STATE state = environment.getCurrentState() display(state) pause() # END IF IN A TERMINAL STATE actions = environment.getPossibleActions(state) if len(actions) == 0: message("EPISODE " + str(episode) + " COMPLETE: RETURN WAS " + str(returns) + "\n") return returns # GET ACTION (USUALLY FROM AGENT) action = decision(state) if action == None: raise "Error: Agent returned None action" # EXECUTE ACTION nextState, reward = environment.doAction(action) message( "Started in state: " + str(state) + "\nTook action: " + str(action) + "\nEnded in state: " + str(nextState) + "\nGot reward: " + str(reward) + "\n" ) # UPDATE LEARNER if "observeTransition" in dir(agent): agent.observeTransition(state, action, nextState, reward) returns += reward * totalDiscount totalDiscount *= discount if "stopEpisode" in dir(agent): agent.stopEpisode()
def runEpisode(agent, environment, discount, decision, display, message, pause, episode): returns = 0 totalDiscount = 1.0 environment.reset() if "startEpisode" in dir(agent): agent.startEpisode() message("BEGINNING EPISODE: " + str(episode) + "\n") while True: # Display current state state = environment.getCurrentState() display(state) pause() # End if in a terminal state actions = environment.getPossibleActions(state) if len(actions) == 0: message("EPISODE " + str(episode) + " COMPLETE: RETURN WAS " + str(returns) + "\n") return returns # Get action (usually from agent) action = decision(state) if action == None: raise Exception("Error: Agent returned None action") # Execute action nextState, reward = environment.doAction(action) message("Started in state: " + str(state) + "\nTook action: " + str(action) + "\nEnded in state: " + str(nextState) + "\nGot reward: " + str(reward) + "\n") # Update learner if "observeTransition" in dir(agent): agent.observeTransition(state, action, nextState, reward) returns += reward * totalDiscount totalDiscount *= discount if "stopEpisode" in dir(agent): agent.stopEpisode()
def runEpisode(agent, environment, discount, decision, display, message, pause, episode, rates, offset, pmNum, locks,pllock , q,plinfo): returns = 0 totalDiscount = 1.0 environment.reset() if 'startEpisode' in dir(agent): agent.startEpisode() #message("BEGINNING EPISODE: "+str(episode)+"\n") pm = environment.baeplatform.pm while True: # END IF IN A TERMINAL STATE if environment.isTerminal(): message("EPISODE "+str(episode)+" COMPLETE: RETURN WAS "+str(returns)+"\n") return returns, offset #print rates[offset%len(rates)] # DISPLAY CURRENT STATE state = environment.getCurrentState() pause() offset += 1 # GET ACTION (USUALLY FROM AGENT) action = decision(state) if action == None: raise 'Error: Agent returned None action' # EXECUTE ACTION nextState, reward = environment.doAction(state, action, pmNum,locks,pllock,q,plinfo) message("VM " + str(pm.id) + " Started in state: "+str(state)+ "\nTook action: "+str(action)+ "\nEnded in state: "+str(nextState)+ "\nGot reward: "+str(reward)+"\n") # UPDATE LEARNER if 'observeTransition' in dir(agent): agent.observeTransition(state, action, nextState, reward) returns += reward * totalDiscount totalDiscount *= discount if 'stopEpisode' in dir(agent): agent.stopEpisode()
def runEpisode(agent, agent2, environment, discount, decision, decision2, display, message, pause, episode): returns = 0 returns2 = 0 totalDiscount2 = 0 totalDiscount = 1.0 a1Done = False a2Done = False environment.reset() if 'startEpisode' in dir(agent): agent.startEpisode() if 'startEpisode' in dir(agent2): agent2.startEpisode() message("BEGINNING EPISODE: " + str(episode) + "\n") while True: # DISPLAY CURRENT STATE state = environment.getCurrentState(1) state2 = environment.getCurrentState(2) display(state, state2) pause() actions = environment.getPossibleActions(state) if len(actions) == 0: message("EPISODE " + str(episode) + " COMPLETE: RETURN WAS " + str(returns) + "\n") a1Done = True actions = environment.getPossibleActions(state2) if len(actions) == 0: message("EPISODE " + str(episode) + " COMPLETE: RETURN WAS " + str(returns) + "\n") a2Done = True player1NextState = None player2NextState = None reward1 = None reward2 = None action1 = None action2 = None if (a1Done == False and a2Done == False): action1 = decision(state) action2 = decision2(state2) result = env.twoAgentDoAction(action1, action2) player1NextState = result[0] player2NextState = result[1] reward1 = result[2] reward2 = result[3] # EXECUTE ACTION if 'observeTransition' in dir(agent): agent.observeTransition(state, action1, player1NextState, reward1) message("Agent 1 Started in state: " + str(state) + "\nTook action: " + str(action1) + "\n.Ended in state: " + str(player1NextState) + "\nGot reward: " + str(reward1) + "\n") returns += reward1 * totalDiscount totalDiscount *= discount # GET ACTION (USUALLY FROM AGENT) action = decision2(state2) if action == None: raise 'Error: Agent returned None action' # EXECUTE ACTION message("Agent 2 Started in state: " + str(state2) + "\nTook action: " + str(action2) + "\nEnded in state: " + str(player2NextState) + "\nGot reward: " + str(reward2) + "\n") # UPDATE LEARNER if 'observeTransition' in dir(agent2): agent2.observeTransition(state2, action, player2NextState, reward2) returns2 += reward2 * totalDiscount totalDiscount2 *= discount if a1Done or a2Done: return returns if 'stopEpisode' in dir(agent): agent.stopEpisode() if 'stopEpisode' in dir(agent2): agent2.stopEpisode()
def runEpisode(agent, environment, discount, decision, display, message, pause, episode, update=True, bounded=False): returns = 0 totalDiscount = 1.0 environment.reset(agent) if 'startEpisode' in dir(agent): agent.startEpisode(environment.getCurrentState()) #message("BEGINNING EPISODE: "+str(episode)+"\n") timestep = 0 MAX_TIMESTEPS = 40 while True: #print("timestep ", timestep) # DISPLAY CURRENT STATE state = environment.getCurrentState() if display is not None: display(state) #pause() #if timestep == 0 and episode == 1: #if not update: # input("") if 'should_end_episode' in dir(agent) and agent.should_end_episode(): #message("EPISODE "+str(episode)+" COMPLETE: RETURN WAS "+str(returns)+"\n") if 'stopEpisode' in dir(agent): agent.stopEpisode() return (timestep, returns) # END IF IN A TERMINAL STATE actions = environment.getPossibleActions(state) if len(actions) == 0 or (bounded and timestep >= MAX_TIMESTEPS): if update and len(actions) == 0: # reached terminal state but we are using n-step agent agent.update(state, None, None, None, update) # keep going until n-step agent says stop continue # for n-step agent elif not update: # not n-step agent so terminate on goal state or time exceeded message("EPISODE "+str(episode)+" COMPLETE: RETURN WAS "+str(returns)+"\n") if 'stopEpisode' in dir(agent): agent.stopEpisode() return (timestep, returns) # GET ACTION (USUALLY FROM AGENT) action = decision(state) #print(action) if action == None: raise 'Error: Agent returned None action' # EXECUTE ACTION nextState, reward = environment.doAction(action) #message("Started in state: "+str(state)+ # "\nTook action: "+str(action)+ # "\nEnded in state: "+str(nextState)+ # "\nGot reward: "+str(reward)+"\n") # UPDATE LEARNER if 'observeTransition' in dir(agent): agent.observeTransition(state, action, nextState, reward, update) returns += reward * totalDiscount totalDiscount *= discount timestep += 1 if 'stopEpisode' in dir(agent): agent.stopEpisode()
def runEpisode(agent, environment, discount, decision, display, message, pause, episode): returns = 0 totalDiscount = 1.0 environment.reset() if 'startEpisode' in dir(agent): agent.startEpisode() message("BEGINNING EPISODE: "+str(episode)+"\n") f = open("trajectories.txt", "a") appendString='[' steps=0 grid_width=10 while True: # DISPLAY CURRENT STATE state = environment.getCurrentState() display(state) pause() # END IF IN A TERMINAL STATE actions = environment.getPossibleActions(state) if len(actions) == 0: message("EPISODE "+str(episode)+" COMPLETE: RETURN WAS "+str(returns)+"\n") return returns # GET ACTION (USUALLY FROM AGENT) action = decision(state) if action == None: raise 'Error: Agent returned None action' # EXECUTE ACTION nextState=state if action != 'stay': nextState, reward = environment.doAction(action) message("Started in state: "+str(state)+ "\nTook action: "+str(action)+ "\nEnded in state: "+str(nextState)+ "\nGot reward: "+str(reward)+"\n") # UPDATE LEARNER if 'observeTransition' in dir(agent): agent.observeTransition(state, action, nextState, reward) returns += reward * totalDiscount totalDiscount *= discount stateNumber= (state[1]*grid_width +state[0]) appendString+=str(stateNumber) steps+=1 if(steps==stepsLimit): break; appendString+=',' appendString+='],' print("AppendString ",appendString) f.write(appendString+"\n") f.close() runEpisode(agent, environment, discount, decision, display, message, pause, episode); if 'stopEpisode' in dir(agent): agent.stopEpisode()