Beispiel #1
0
def runEpisode(agent, environment, discount, decision, display, message, pause,
               episode):
    returns = 0
    totalDiscount = 1.0
    environment.reset()
    message("BEGINNING EPISODE: " + str(episode) + "\n")
    while True:

        # DISPLAY CURRENT STATE
        state = environment.getCurrentState()
        display(state)
        pause()

        # END IF IN A TERMINAL STATE
        actions = environment.getPossibleActions(state)
        if len(actions) == 0:
            message("EPISODE " + str(episode) + " COMPLETE: RETURN WAS " +
                    str(returns) + "\n")
            return returns

        # GET ACTION (USUALLY FROM AGENT)
        action = decision(state)
        if action == None:
            raise 'Error: Agent returned None action'

        # EXECUTE ACTION
        nextState, reward = environment.doAction(action)
        message("Started in state: " + str(state) + "\nTook action: " +
                str(action) + "\nEnded in state: " + str(nextState) +
                "\nGot reward: " + str(reward) + "\n")

        # UPDATE LEARNER
        agent.update(state, action, nextState, reward)
        returns += reward * totalDiscount
        totalDiscount *= discount
Beispiel #2
0
def runEpisode(agent, environment, discount, decision, display, message, pause, episode, testFlag=0, foldN=0):
	returns = 0
	totalDiscount = 1.0
	environment.reset(testFlag, foldN)
	
	if 'startEpisode' in dir(agent): agent.startEpisode()
	while True:

		# DISPLAY CURRENT STATE
		state = environment.getCurrentState()
#		display(state)
		pause()
    
		# END IF IN A TERMINAL STATE
		actions = environment.getPossibleActions(state)
		if len(actions) == 0:
			return returns, reward, AOP, explore # returns: accumulative rewards, reward: 
    
		# GET ACTION (USUALLY FROM AGENT)
		action = decision(state)
		if action == None:
			raise 'Error: Agent returned None action'
    
		# EXECUTE ACTION
		nextState, reward, AOP, explore = environment.doAction(action)
		
		if testFlag == 0: # training episode
			# UPDATE LEARNER
			if 'observeTransition' in dir(agent): 
			    agent.observeTransition(state, action, nextState, reward) 
		else: # testing episode
			pass

		returns += reward * totalDiscount
		totalDiscount *= discount
Beispiel #3
0
def runEpisode(agent,
               environment,
               discount,
               decision,
               display,
               message,
               pause,
               episode,
               train=False):
    returns = 0
    totalDiscount = 1.0
    environment.reset()
    if 'startEpisode' in dir(agent): agent.startEpisode()
    message("BEGINNING EPISODE: " + str(episode) + "\n")
    while True:

        # DISPLAY CURRENT STATE
        state = environment.getCurrentState()
        state_copy = copy.deepcopy(state)
        display(state)
        pause()

        # END IF IN A TERMINAL STATE
        actions = environment.getPossibleActions(state)
        if len(actions) == 0:
            message("EPISODE " + str(episode) + " COMPLETE: TOOK " +
                    str(len(state.history)) + "STEPS; RETURN WAS " +
                    str(returns) + "\n")
            message(str(state.history))
            message(str(len(state.history)))
            return returns

        # GET ACTION (USUALLY FROM AGENT)
        action = decision(state)
        if action == None:
            raise 'Error: Agent returned None action'

        # EXECUTE ACTION
        nextState, reward = environment.doAction(action)
        message("Started in state: " + str(state_copy.x) + str(state_copy.y) +
                str(state_copy.direction) + str(state_copy.num_explored) +
                " " + str(len(state_copy.history)) + "\nTook action: " +
                str(action) + "\nEnded in state: " + str(nextState.x) +
                str(nextState.y) + str(nextState.direction) +
                str(nextState.num_explored) + " " +
                str(len(nextState.history)) + "\nGot reward: " + str(reward) +
                "\n")
        # UPDATE LEARNER
        # print_grid(state_copy.explored_grid)
        # print_grid(nextState.explored_grid)

        if 'observeTransition' in dir(
                agent) and not nextState.is_terminal and train:
            agent.observeTransition(state_copy, action, nextState, reward)
        print(agent.weights)
        print("############################################")
        print("############################################\n")

        returns += reward * totalDiscount
        totalDiscount *= discount
Beispiel #4
0
def runEpisode(agent, qtype, environment, discount, decision, display, message, pause, episode):

  ###########################
  # GET THE GRIDWORLD
  ###########################

  returns = 0
  totalDiscount = 1.0
  environment.reset()
  
  #for state in mdp.getStates():
  #  display(state)

  if 'startEpisode' in dir(agent): agent.startEpisode()
  message("BEGINNING EPISODE: "+str(episode)+"\n")
  while True:

    # DISPLAY CURRENT STATE
    state = environment.getCurrentState()
    display(state)
    pause()
    
    # END IF IN A TERMINAL STATE
    actions = environment.getPossibleActions(state)
    if len(actions) == 0:
      message("EPISODE "+str(episode)+" COMPLETE: RETURN WAS "+str(returns)+"\n")
      return returns
    
    # GET ACTION (USUALLY FROM AGENT)
    action = decision(state)
    if action == None:
      raise 'Error: Agent returned None action'
    
    # EXECUTE ACTION
    nextState, reward, action2 = environment.doAction(action, qtype)
    message("Started in state: "+str(state)+
            "\nSpecified action: "+str(action)+
            "\nTook action: "+str(action2)+
            "\nEnded in state: "+str(nextState)+
            "\nGot reward: "+str(reward)+"\n")    
    # UPDATE LEARNER
    if 'observeTransition' in dir(agent): 
      agent.observeTransition(state, action2, nextState, reward, environment)
    if isinstance(reward, tuple):
       reward=sum(reward)
    
    returns += reward * totalDiscount
    totalDiscount *= discount

  if 'stopEpisode' in dir(agent):
    agent.stopEpisode()
def runEpisode(agent, environment, discount, decision, display, message, pause,
               episode):
    returns = 0
    totalDiscount = 1.0
    environment.reset(agent)
    if 'startEpisode' in dir(agent): agent.startEpisode()
    message("BEGINNING EPISODE: " + str(episode) + "\n")

    timestep = 0
    MAX_TIMESTEPS = 20
    while True:
        if timestep >= MAX_TIMESTEPS:
            return returns

        # DISPLAY CURRENT STATE
        state = environment.getCurrentState()
        display(state)
        #pause()
        if timestep == 0 and episode == 1:
            input("")

        # END IF IN A TERMINAL STATE
        actions = environment.getPossibleActions(state)
        if len(actions) == 0:
            message("EPISODE " + str(episode) + " COMPLETE: RETURN WAS " +
                    str(returns) + "\n")
            return returns

        # GET ACTION (USUALLY FROM AGENT)
        action = decision(state)
        if action == None:
            raise 'Error: Agent returned None action'

        # EXECUTE ACTION
        nextState, reward = environment.doAction(action)
        #message("Started in state: "+str(state)+
        #        "\nTook action: "+str(action)+
        #        "\nEnded in state: "+str(nextState)+
        #        "\nGot reward: "+str(reward)+"\n")
        # UPDATE LEARNER
        if 'observeTransition' in dir(agent):
            agent.observeTransition(state, action, nextState, reward)

        returns += reward * totalDiscount
        totalDiscount *= discount

        timestep += 1

    if 'stopEpisode' in dir(agent):
        agent.stopEpisode()
Beispiel #6
0
def runEpisode(agent, environment, discount, decision, display, message, pause,
               episode):
    returns = 0
    totalDiscount = 1.0
    environment.reset()
    if 'startEpisode' in dir(agent): agent.startEpisode()
    message("BEGINNING EPISODE: " + str(episode) + "\n")

    stateAndValues = -1
    while True:

        # DISPLAY CURRENT STATE
        state = environment.getCurrentState()
        display(state)
        pause()

        # END IF IN A TERMINAL STATE
        actions = environment.getPossibleActions(state)
        if len(actions) == 0:
            message("EPISODE " + str(episode) + " COMPLETE: RETURN WAS " +
                    str(returns) + "\n")
            return (returns, stateAndValues)

        # GET ACTION (USUALLY FROM AGENT)
        action = decision(state)
        if action == None:
            raise 'Error: Agent returned None action'

        # EXECUTE ACTION
        nextState, reward = environment.doAction(action)
        message("Started in state: " + str(state) + "\nTook action: " +
                str(action) + "\nEnded in state: " + str(nextState) +
                "\nGot reward: " + str(reward) + "\n")
        # UPDATE LEARNER

        val = -1
        if 'observeTransition' in dir(agent):
            val = agent.observeTransition(state, action, nextState, reward)

        if state == (2, 2):
            stateAndValues = [0, val, 0, 0]

        returns += reward * totalDiscount
        totalDiscount *= discount

    if 'stopEpisode' in dir(agent):

        agent.stopEpisode()
Beispiel #7
0
def runEpisode(agent, environment, discount, decision, display, message, pause, episode):
    returns = 0
    totalDiscount = 1.0
    environment.reset()
    if "startEpisode" in dir(agent):
        agent.startEpisode()
    message("BEGINNING EPISODE: " + str(episode) + "\n")
    while True:

        # DISPLAY CURRENT STATE
        state = environment.getCurrentState()
        display(state)
        pause()

        # END IF IN A TERMINAL STATE
        actions = environment.getPossibleActions(state)
        if len(actions) == 0:
            message("EPISODE " + str(episode) + " COMPLETE: RETURN WAS " + str(returns) + "\n")
            return returns

        # GET ACTION (USUALLY FROM AGENT)
        action = decision(state)
        if action == None:
            raise "Error: Agent returned None action"

        # EXECUTE ACTION
        nextState, reward = environment.doAction(action)
        message(
            "Started in state: "
            + str(state)
            + "\nTook action: "
            + str(action)
            + "\nEnded in state: "
            + str(nextState)
            + "\nGot reward: "
            + str(reward)
            + "\n"
        )
        # UPDATE LEARNER
        if "observeTransition" in dir(agent):
            agent.observeTransition(state, action, nextState, reward)

        returns += reward * totalDiscount
        totalDiscount *= discount

    if "stopEpisode" in dir(agent):
        agent.stopEpisode()
Beispiel #8
0
def runEpisode(agent, environment, discount, decision, display, message, pause,
               episode):
    returns = 0
    totalDiscount = 1.0
    environment.reset()
    if "startEpisode" in dir(agent):
        agent.startEpisode()
    message("BEGINNING EPISODE: " + str(episode) + "\n")
    while True:

        # Display current state
        state = environment.getCurrentState()
        display(state)
        pause()

        # End if in a terminal state
        actions = environment.getPossibleActions(state)
        if len(actions) == 0:
            message("EPISODE " + str(episode) + " COMPLETE: RETURN WAS " +
                    str(returns) + "\n")
            return returns

        # Get action (usually from agent)
        action = decision(state)
        if action == None:
            raise Exception("Error: Agent returned None action")

        # Execute action
        nextState, reward = environment.doAction(action)
        message("Started in state: " + str(state) + "\nTook action: " +
                str(action) + "\nEnded in state: " + str(nextState) +
                "\nGot reward: " + str(reward) + "\n")
        # Update learner
        if "observeTransition" in dir(agent):
            agent.observeTransition(state, action, nextState, reward)

        returns += reward * totalDiscount
        totalDiscount *= discount

    if "stopEpisode" in dir(agent):
        agent.stopEpisode()
def runEpisode(agent, environment, discount, decision, display, message, pause, episode, rates, offset, pmNum, locks,pllock , q,plinfo):
  returns = 0
  totalDiscount = 1.0
  environment.reset()
  if 'startEpisode' in dir(agent): agent.startEpisode()
  #message("BEGINNING EPISODE: "+str(episode)+"\n")
  pm = environment.baeplatform.pm
  while True:

    
    # END IF IN A TERMINAL STATE
    if environment.isTerminal():
      message("EPISODE "+str(episode)+" COMPLETE: RETURN WAS "+str(returns)+"\n")
      return returns, offset
    #print rates[offset%len(rates)]
    # DISPLAY CURRENT STATE
    state = environment.getCurrentState()
    pause()
    offset += 1
    # GET ACTION (USUALLY FROM AGENT)
    action = decision(state)
    if action == None:
      raise 'Error: Agent returned None action'
    
    # EXECUTE ACTION
    nextState, reward = environment.doAction(state, action, pmNum,locks,pllock,q,plinfo)
    message("VM " + str(pm.id) + " Started in state: "+str(state)+
            "\nTook action: "+str(action)+
            "\nEnded in state: "+str(nextState)+
            "\nGot reward: "+str(reward)+"\n")    
    # UPDATE LEARNER
    if 'observeTransition' in dir(agent):
        agent.observeTransition(state, action, nextState, reward)
    
    returns += reward * totalDiscount
    totalDiscount *= discount

  if 'stopEpisode' in dir(agent):
    agent.stopEpisode()
def runEpisode(agent, agent2, environment, discount, decision, decision2,
               display, message, pause, episode):
    returns = 0
    returns2 = 0
    totalDiscount2 = 0
    totalDiscount = 1.0
    a1Done = False
    a2Done = False
    environment.reset()

    if 'startEpisode' in dir(agent): agent.startEpisode()
    if 'startEpisode' in dir(agent2): agent2.startEpisode()
    message("BEGINNING EPISODE: " + str(episode) + "\n")
    while True:
        # DISPLAY CURRENT STATE
        state = environment.getCurrentState(1)
        state2 = environment.getCurrentState(2)

        display(state, state2)
        pause()

        actions = environment.getPossibleActions(state)
        if len(actions) == 0:
            message("EPISODE " + str(episode) + " COMPLETE: RETURN WAS " +
                    str(returns) + "\n")
            a1Done = True

        actions = environment.getPossibleActions(state2)
        if len(actions) == 0:
            message("EPISODE " + str(episode) + " COMPLETE: RETURN WAS " +
                    str(returns) + "\n")
            a2Done = True

        player1NextState = None
        player2NextState = None
        reward1 = None
        reward2 = None
        action1 = None
        action2 = None

        if (a1Done == False and a2Done == False):
            action1 = decision(state)
            action2 = decision2(state2)
            result = env.twoAgentDoAction(action1, action2)
            player1NextState = result[0]
            player2NextState = result[1]
            reward1 = result[2]
            reward2 = result[3]

            # EXECUTE ACTION
            if 'observeTransition' in dir(agent):
                agent.observeTransition(state, action1, player1NextState,
                                        reward1)
            message("Agent 1 Started in state: " + str(state) +
                    "\nTook action: " + str(action1) + "\n.Ended in state: " +
                    str(player1NextState) + "\nGot reward: " + str(reward1) +
                    "\n")
            returns += reward1 * totalDiscount
            totalDiscount *= discount

            # GET ACTION (USUALLY FROM AGENT)
            action = decision2(state2)
            if action == None:
                raise 'Error: Agent returned None action'

            # EXECUTE ACTION
            message("Agent 2 Started in state: " + str(state2) +
                    "\nTook action: " + str(action2) + "\nEnded in state: " +
                    str(player2NextState) + "\nGot reward: " + str(reward2) +
                    "\n")
            # UPDATE LEARNER
            if 'observeTransition' in dir(agent2):
                agent2.observeTransition(state2, action, player2NextState,
                                         reward2)

            returns2 += reward2 * totalDiscount
            totalDiscount2 *= discount

        if a1Done or a2Done:
            return returns

    if 'stopEpisode' in dir(agent):
        agent.stopEpisode()
    if 'stopEpisode' in dir(agent2):
        agent2.stopEpisode()
Beispiel #11
0
def runEpisode(agent, environment, discount, decision, display, message, pause, episode, update=True, bounded=False):
    returns = 0
    totalDiscount = 1.0
    environment.reset(agent)
    if 'startEpisode' in dir(agent): agent.startEpisode(environment.getCurrentState())
    #message("BEGINNING EPISODE: "+str(episode)+"\n")
    
    timestep = 0
    MAX_TIMESTEPS = 40
    
    while True:
        #print("timestep ", timestep)
        # DISPLAY CURRENT STATE
        state = environment.getCurrentState()
        if display is not None:
            display(state)
        #pause()
        #if timestep == 0 and episode == 1:
        #if not update:
        #    input("")
        
        if 'should_end_episode' in dir(agent) and agent.should_end_episode():
            #message("EPISODE "+str(episode)+" COMPLETE: RETURN WAS "+str(returns)+"\n")
            if 'stopEpisode' in dir(agent):
                agent.stopEpisode()
            return (timestep, returns)
        
        # END IF IN A TERMINAL STATE
        actions = environment.getPossibleActions(state)
        if len(actions) == 0 or (bounded and timestep >= MAX_TIMESTEPS):
            if update and len(actions) == 0: # reached terminal state but we are using n-step agent
                agent.update(state, None, None, None, update) # keep going until n-step agent says stop
                continue # for n-step agent
            elif not update: # not n-step agent so terminate on goal state or time exceeded
                message("EPISODE "+str(episode)+" COMPLETE: RETURN WAS "+str(returns)+"\n")
                if 'stopEpisode' in dir(agent):
                    agent.stopEpisode()
                return (timestep, returns)

        # GET ACTION (USUALLY FROM AGENT)
        action = decision(state)
        #print(action)
        if action == None:
            raise 'Error: Agent returned None action'

        # EXECUTE ACTION
        nextState, reward = environment.doAction(action)
        #message("Started in state: "+str(state)+
        #        "\nTook action: "+str(action)+
        #        "\nEnded in state: "+str(nextState)+
        #        "\nGot reward: "+str(reward)+"\n")
        # UPDATE LEARNER
        if 'observeTransition' in dir(agent):
            agent.observeTransition(state, action, nextState, reward, update)

        returns += reward * totalDiscount
        totalDiscount *= discount
        
        timestep += 1

    if 'stopEpisode' in dir(agent):
        agent.stopEpisode()
Beispiel #12
0
def runEpisode(agent, environment, discount, decision, display, message, pause, episode):
    returns = 0
    totalDiscount = 1.0
    environment.reset()
    if 'startEpisode' in dir(agent): agent.startEpisode()
    message("BEGINNING EPISODE: "+str(episode)+"\n")
    
    f = open("trajectories.txt", "a")
    
    appendString='['

    steps=0
    grid_width=10
    
    while True:
        # DISPLAY CURRENT STATE
        state = environment.getCurrentState()
        display(state)
        pause()

        # END IF IN A TERMINAL STATE
        actions = environment.getPossibleActions(state)
        if len(actions) == 0:
            message("EPISODE "+str(episode)+" COMPLETE: RETURN WAS "+str(returns)+"\n")
            return returns

        # GET ACTION (USUALLY FROM AGENT)
        action = decision(state)
        
        if action == None:
            raise 'Error: Agent returned None action'

        # EXECUTE ACTION
        nextState=state
        
        if action != 'stay':
            nextState, reward = environment.doAction(action)
            message("Started in state: "+str(state)+
                    "\nTook action: "+str(action)+
                    "\nEnded in state: "+str(nextState)+
                    "\nGot reward: "+str(reward)+"\n")
            # UPDATE LEARNER
            if 'observeTransition' in dir(agent):
                agent.observeTransition(state, action, nextState, reward)
    
            returns += reward * totalDiscount
            totalDiscount *= discount
        
        stateNumber= (state[1]*grid_width +state[0])
        appendString+=str(stateNumber)
        
        steps+=1
        if(steps==stepsLimit):
            break;
        appendString+=','
        
    appendString+='],'
    print("AppendString ",appendString)
    f.write(appendString+"\n")
    f.close()
    
    runEpisode(agent, environment, discount, decision, display, message, pause, episode);
    if 'stopEpisode' in dir(agent):
        agent.stopEpisode()