Example #1
0
def runEpisode(agent, environment, discount, decision, display, message, pause, episode, testFlag=0, foldN=0):
	returns = 0
	totalDiscount = 1.0
	environment.reset(testFlag, foldN)
	
	if 'startEpisode' in dir(agent): agent.startEpisode()
	while True:

		# DISPLAY CURRENT STATE
		state = environment.getCurrentState()
#		display(state)
		pause()
    
		# END IF IN A TERMINAL STATE
		actions = environment.getPossibleActions(state)
		if len(actions) == 0:
			return returns, reward, AOP, explore # returns: accumulative rewards, reward: 
    
		# GET ACTION (USUALLY FROM AGENT)
		action = decision(state)
		if action == None:
			raise 'Error: Agent returned None action'
    
		# EXECUTE ACTION
		nextState, reward, AOP, explore = environment.doAction(action)
		
		if testFlag == 0: # training episode
			# UPDATE LEARNER
			if 'observeTransition' in dir(agent): 
			    agent.observeTransition(state, action, nextState, reward) 
		else: # testing episode
			pass

		returns += reward * totalDiscount
		totalDiscount *= discount
Example #2
0
    def model(self, parameters):
        """Model a year of data for a location.

        Args:
            parameters: (tuple) capacity (Wh), PV Size (STC).

        Returns:
            (domain): results from model.
        """
        env.reset()
        size, pv = parameters
        # don't go below 1 negative/division by zero issues
        pv = max(pv, 1.)
        size = max(size, 1.)

        plane = InclinedPlane(Site(self.place), self.tilt, self.azimuth)
        load = self.load()
        SHS = Gateway([load,
                      self.cc([SimplePV(pv, plane)]),
                      IdealStorage(size)])

        for r in eere.EPWdata('418830'):
            env.update_time(r['datetime'])
            SHS()

        print SHS.details()
        self.foo.write('%s,%s,%s\n' % (size, pv, SHS.merit()))
        self.foo.flush()
        return SHS
Example #3
0
def runEpisode(agent,
               environment,
               discount,
               decision,
               display,
               message,
               pause,
               episode,
               train=False):
    returns = 0
    totalDiscount = 1.0
    environment.reset()
    if 'startEpisode' in dir(agent): agent.startEpisode()
    message("BEGINNING EPISODE: " + str(episode) + "\n")
    while True:

        # DISPLAY CURRENT STATE
        state = environment.getCurrentState()
        state_copy = copy.deepcopy(state)
        display(state)
        pause()

        # END IF IN A TERMINAL STATE
        actions = environment.getPossibleActions(state)
        if len(actions) == 0:
            message("EPISODE " + str(episode) + " COMPLETE: TOOK " +
                    str(len(state.history)) + "STEPS; RETURN WAS " +
                    str(returns) + "\n")
            message(str(state.history))
            message(str(len(state.history)))
            return returns

        # GET ACTION (USUALLY FROM AGENT)
        action = decision(state)
        if action == None:
            raise 'Error: Agent returned None action'

        # EXECUTE ACTION
        nextState, reward = environment.doAction(action)
        message("Started in state: " + str(state_copy.x) + str(state_copy.y) +
                str(state_copy.direction) + str(state_copy.num_explored) +
                " " + str(len(state_copy.history)) + "\nTook action: " +
                str(action) + "\nEnded in state: " + str(nextState.x) +
                str(nextState.y) + str(nextState.direction) +
                str(nextState.num_explored) + " " +
                str(len(nextState.history)) + "\nGot reward: " + str(reward) +
                "\n")
        # UPDATE LEARNER
        # print_grid(state_copy.explored_grid)
        # print_grid(nextState.explored_grid)

        if 'observeTransition' in dir(
                agent) and not nextState.is_terminal and train:
            agent.observeTransition(state_copy, action, nextState, reward)
        print(agent.weights)
        print("############################################")
        print("############################################\n")

        returns += reward * totalDiscount
        totalDiscount *= discount
Example #4
0
def runEpisode(agent, environment, discount, decision, display, message, pause,
               episode):
    returns = 0
    totalDiscount = 1.0
    environment.reset()
    message("BEGINNING EPISODE: " + str(episode) + "\n")
    while True:

        # DISPLAY CURRENT STATE
        state = environment.getCurrentState()
        display(state)
        pause()

        # END IF IN A TERMINAL STATE
        actions = environment.getPossibleActions(state)
        if len(actions) == 0:
            message("EPISODE " + str(episode) + " COMPLETE: RETURN WAS " +
                    str(returns) + "\n")
            return returns

        # GET ACTION (USUALLY FROM AGENT)
        action = decision(state)
        if action == None:
            raise 'Error: Agent returned None action'

        # EXECUTE ACTION
        nextState, reward = environment.doAction(action)
        message("Started in state: " + str(state) + "\nTook action: " +
                str(action) + "\nEnded in state: " + str(nextState) +
                "\nGot reward: " + str(reward) + "\n")

        # UPDATE LEARNER
        agent.update(state, action, nextState, reward)
        returns += reward * totalDiscount
        totalDiscount *= discount
Example #5
0
def runEpisode(agent, qtype, environment, discount, decision, display, message, pause, episode):

  ###########################
  # GET THE GRIDWORLD
  ###########################

  returns = 0
  totalDiscount = 1.0
  environment.reset()
  
  #for state in mdp.getStates():
  #  display(state)

  if 'startEpisode' in dir(agent): agent.startEpisode()
  message("BEGINNING EPISODE: "+str(episode)+"\n")
  while True:

    # DISPLAY CURRENT STATE
    state = environment.getCurrentState()
    display(state)
    pause()
    
    # END IF IN A TERMINAL STATE
    actions = environment.getPossibleActions(state)
    if len(actions) == 0:
      message("EPISODE "+str(episode)+" COMPLETE: RETURN WAS "+str(returns)+"\n")
      return returns
    
    # GET ACTION (USUALLY FROM AGENT)
    action = decision(state)
    if action == None:
      raise 'Error: Agent returned None action'
    
    # EXECUTE ACTION
    nextState, reward, action2 = environment.doAction(action, qtype)
    message("Started in state: "+str(state)+
            "\nSpecified action: "+str(action)+
            "\nTook action: "+str(action2)+
            "\nEnded in state: "+str(nextState)+
            "\nGot reward: "+str(reward)+"\n")    
    # UPDATE LEARNER
    if 'observeTransition' in dir(agent): 
      agent.observeTransition(state, action2, nextState, reward, environment)
    if isinstance(reward, tuple):
       reward=sum(reward)
    
    returns += reward * totalDiscount
    totalDiscount *= discount

  if 'stopEpisode' in dir(agent):
    agent.stopEpisode()
def runEpisode(agent, environment, discount, decision, display, message, pause,
               episode):
    returns = 0
    totalDiscount = 1.0
    environment.reset(agent)
    if 'startEpisode' in dir(agent): agent.startEpisode()
    message("BEGINNING EPISODE: " + str(episode) + "\n")

    timestep = 0
    MAX_TIMESTEPS = 20
    while True:
        if timestep >= MAX_TIMESTEPS:
            return returns

        # DISPLAY CURRENT STATE
        state = environment.getCurrentState()
        display(state)
        #pause()
        if timestep == 0 and episode == 1:
            input("")

        # END IF IN A TERMINAL STATE
        actions = environment.getPossibleActions(state)
        if len(actions) == 0:
            message("EPISODE " + str(episode) + " COMPLETE: RETURN WAS " +
                    str(returns) + "\n")
            return returns

        # GET ACTION (USUALLY FROM AGENT)
        action = decision(state)
        if action == None:
            raise 'Error: Agent returned None action'

        # EXECUTE ACTION
        nextState, reward = environment.doAction(action)
        #message("Started in state: "+str(state)+
        #        "\nTook action: "+str(action)+
        #        "\nEnded in state: "+str(nextState)+
        #        "\nGot reward: "+str(reward)+"\n")
        # UPDATE LEARNER
        if 'observeTransition' in dir(agent):
            agent.observeTransition(state, action, nextState, reward)

        returns += reward * totalDiscount
        totalDiscount *= discount

        timestep += 1

    if 'stopEpisode' in dir(agent):
        agent.stopEpisode()
Example #7
0
def runEpisode(agent, environment, discount, decision, display, message, pause,
               episode):
    returns = 0
    totalDiscount = 1.0
    environment.reset()
    if 'startEpisode' in dir(agent): agent.startEpisode()
    message("BEGINNING EPISODE: " + str(episode) + "\n")

    stateAndValues = -1
    while True:

        # DISPLAY CURRENT STATE
        state = environment.getCurrentState()
        display(state)
        pause()

        # END IF IN A TERMINAL STATE
        actions = environment.getPossibleActions(state)
        if len(actions) == 0:
            message("EPISODE " + str(episode) + " COMPLETE: RETURN WAS " +
                    str(returns) + "\n")
            return (returns, stateAndValues)

        # GET ACTION (USUALLY FROM AGENT)
        action = decision(state)
        if action == None:
            raise 'Error: Agent returned None action'

        # EXECUTE ACTION
        nextState, reward = environment.doAction(action)
        message("Started in state: " + str(state) + "\nTook action: " +
                str(action) + "\nEnded in state: " + str(nextState) +
                "\nGot reward: " + str(reward) + "\n")
        # UPDATE LEARNER

        val = -1
        if 'observeTransition' in dir(agent):
            val = agent.observeTransition(state, action, nextState, reward)

        if state == (2, 2):
            stateAndValues = [0, val, 0, 0]

        returns += reward * totalDiscount
        totalDiscount *= discount

    if 'stopEpisode' in dir(agent):

        agent.stopEpisode()
Example #8
0
def runEpisode(agent, environment, discount, decision, display, message, pause, episode):
    returns = 0
    totalDiscount = 1.0
    environment.reset()
    if "startEpisode" in dir(agent):
        agent.startEpisode()
    message("BEGINNING EPISODE: " + str(episode) + "\n")
    while True:

        # DISPLAY CURRENT STATE
        state = environment.getCurrentState()
        display(state)
        pause()

        # END IF IN A TERMINAL STATE
        actions = environment.getPossibleActions(state)
        if len(actions) == 0:
            message("EPISODE " + str(episode) + " COMPLETE: RETURN WAS " + str(returns) + "\n")
            return returns

        # GET ACTION (USUALLY FROM AGENT)
        action = decision(state)
        if action == None:
            raise "Error: Agent returned None action"

        # EXECUTE ACTION
        nextState, reward = environment.doAction(action)
        message(
            "Started in state: "
            + str(state)
            + "\nTook action: "
            + str(action)
            + "\nEnded in state: "
            + str(nextState)
            + "\nGot reward: "
            + str(reward)
            + "\n"
        )
        # UPDATE LEARNER
        if "observeTransition" in dir(agent):
            agent.observeTransition(state, action, nextState, reward)

        returns += reward * totalDiscount
        totalDiscount *= discount

    if "stopEpisode" in dir(agent):
        agent.stopEpisode()
Example #9
0
def analizar_texto():
    response = txt_consultas.get("1.0", "end")
    salida_lexico_ast = analizarLex(response)
    texto = analizarSin(response)
    txt_salida.insert('end', '\n>>>\n')
    txt_salida.insert('end', '\n=====SALIDA C3D======\n')
    txt_salida.insert('end', texto + '\n\n\n')
    txt_salida.insert('end', tab_simbolos())
    txt_salida.insert('end', '\n=====REPORTE DE INDEX======')
    txt_salida.insert('end', tab_string())
    txt_salida.insert('end', '\n=====REPORTE DE FUNCIONES======')
    txt_salida.insert('end', tab_func())
    txt_salida.insert('end', '\n=====REPORTE DE ERRORES======')
    txt_salida.insert('end', get_errores())
    txt_salida.insert('end', '\n>>>\n')

    reset()
Example #10
0
def runEpisode(agent, environment, discount, decision, display, message, pause,
               episode):
    returns = 0
    totalDiscount = 1.0
    environment.reset()
    if "startEpisode" in dir(agent):
        agent.startEpisode()
    message("BEGINNING EPISODE: " + str(episode) + "\n")
    while True:

        # Display current state
        state = environment.getCurrentState()
        display(state)
        pause()

        # End if in a terminal state
        actions = environment.getPossibleActions(state)
        if len(actions) == 0:
            message("EPISODE " + str(episode) + " COMPLETE: RETURN WAS " +
                    str(returns) + "\n")
            return returns

        # Get action (usually from agent)
        action = decision(state)
        if action == None:
            raise Exception("Error: Agent returned None action")

        # Execute action
        nextState, reward = environment.doAction(action)
        message("Started in state: " + str(state) + "\nTook action: " +
                str(action) + "\nEnded in state: " + str(nextState) +
                "\nGot reward: " + str(reward) + "\n")
        # Update learner
        if "observeTransition" in dir(agent):
            agent.observeTransition(state, action, nextState, reward)

        returns += reward * totalDiscount
        totalDiscount *= discount

    if "stopEpisode" in dir(agent):
        agent.stopEpisode()
Example #11
0
def run_episode(agent, environment, discount, decision, display, message,
                pause, episode):
    returns = 0
    total_discount = 1.0
    environment.reset()
    if 'start_episode' in dir(agent):
        agent.start_episode()
    message("BEGINNING EPISODE: " + str(episode) + "\n")
    while True:

        # DISPLAY CURRENT STATE
        state = environment.get_current_state()
        display(state)
        pause()

        # END IF IN A TERMINAL STATE
        actions = environment.get_possible_actions(state)
        if len(actions) == 0:
            message("EPISODE " + str(episode) + " COMPLETE: RETURN WAS " +
                    str(returns) + "\n")
            return returns

        # GET ACTION (USUALLY FROM AGENT)
        action = decision(state)
        if action == None:
            raise Exception('Error: Agent returned None action')

        # EXECUTE ACTION
        next_state, reward = environment.do_action(action)
        message("Started in state: " + str(state) + "\nTook action: " +
                str(action) + "\nEnded in state: " + str(next_state) +
                "\nGot reward: " + str(reward) + "\n")
        # UPDATE LEARNER
        if 'observe_transition' in dir(agent):
            agent.observe_transition(state, action, next_state, reward)

        returns += reward * total_discount
        total_discount *= discount

    if 'stop_episode' in dir(agent):
        agent.stop_episode()
def runEpisode(agent, environment, discount, decision, display, message, pause, episode, rates, offset, pmNum, locks,pllock , q,plinfo):
  returns = 0
  totalDiscount = 1.0
  environment.reset()
  if 'startEpisode' in dir(agent): agent.startEpisode()
  #message("BEGINNING EPISODE: "+str(episode)+"\n")
  pm = environment.baeplatform.pm
  while True:

    
    # END IF IN A TERMINAL STATE
    if environment.isTerminal():
      message("EPISODE "+str(episode)+" COMPLETE: RETURN WAS "+str(returns)+"\n")
      return returns, offset
    #print rates[offset%len(rates)]
    # DISPLAY CURRENT STATE
    state = environment.getCurrentState()
    pause()
    offset += 1
    # GET ACTION (USUALLY FROM AGENT)
    action = decision(state)
    if action == None:
      raise 'Error: Agent returned None action'
    
    # EXECUTE ACTION
    nextState, reward = environment.doAction(state, action, pmNum,locks,pllock,q,plinfo)
    message("VM " + str(pm.id) + " Started in state: "+str(state)+
            "\nTook action: "+str(action)+
            "\nEnded in state: "+str(nextState)+
            "\nGot reward: "+str(reward)+"\n")    
    # UPDATE LEARNER
    if 'observeTransition' in dir(agent):
        agent.observeTransition(state, action, nextState, reward)
    
    returns += reward * totalDiscount
    totalDiscount *= discount

  if 'stopEpisode' in dir(agent):
    agent.stopEpisode()
Example #13
0
    def watch_play(self):
        done = False
        board = env.reset()
        while not done:
            # finds the best action
            action = env.process_state(board)
            self.drop_piece(action, board)
            board, done = env.step(board, *action)

            self.board = board.area
            self.update()
            self.root.update()
            time.sleep(self.speed)
Example #14
0
def runEpisode(agent, environment, discount, decision, display, message, pause, episode):
    returns = 0
    totalDiscount = 1.0
    environment.reset()
    if 'startEpisode' in dir(agent): agent.startEpisode()
    message("BEGINNING EPISODE: "+str(episode)+"\n")
    
    f = open("trajectories.txt", "a")
    
    appendString='['

    steps=0
    grid_width=10
    
    while True:
        # DISPLAY CURRENT STATE
        state = environment.getCurrentState()
        display(state)
        pause()

        # END IF IN A TERMINAL STATE
        actions = environment.getPossibleActions(state)
        if len(actions) == 0:
            message("EPISODE "+str(episode)+" COMPLETE: RETURN WAS "+str(returns)+"\n")
            return returns

        # GET ACTION (USUALLY FROM AGENT)
        action = decision(state)
        
        if action == None:
            raise 'Error: Agent returned None action'

        # EXECUTE ACTION
        nextState=state
        
        if action != 'stay':
            nextState, reward = environment.doAction(action)
            message("Started in state: "+str(state)+
                    "\nTook action: "+str(action)+
                    "\nEnded in state: "+str(nextState)+
                    "\nGot reward: "+str(reward)+"\n")
            # UPDATE LEARNER
            if 'observeTransition' in dir(agent):
                agent.observeTransition(state, action, nextState, reward)
    
            returns += reward * totalDiscount
            totalDiscount *= discount
        
        stateNumber= (state[1]*grid_width +state[0])
        appendString+=str(stateNumber)
        
        steps+=1
        if(steps==stepsLimit):
            break;
        appendString+=','
        
    appendString+='],'
    print("AppendString ",appendString)
    f.write(appendString+"\n")
    f.close()
    
    runEpisode(agent, environment, discount, decision, display, message, pause, episode);
    if 'stopEpisode' in dir(agent):
        agent.stopEpisode()
Example #15
0
numActions = 10
Actions = np.linspace(0, 1, numActions)

# This is our learning agent
gamma = .95
agent = sarsaAgent(4, numActions, 10, 1, epsilon=5e-2, gamma=gamma, alpha=1e-2)
maxSteps = 1e5

R = []
UpTime = []

step = 0
ep = 0
while step < maxSteps:
    ep += 1
    x = environment.reset()  # initialize the state
    C = 0.

    done = False
    t = 1
    while not done:
        t += 1
        step += 1
        a = agent.action(x)
        u = Actions[a]
        #env.render() # only for visual effects
        x_next, c, done = environment.step(u, x)

        C += (1. / t) * (c - C)
        agent.update(x, a, c, x_next, done)
        x = x_next
Example #16
0

def handler(signum, frane):
    """Handles CTRL-C to terminate the session early."""
    global terminate_early
    terminate_early = True


signal.signal(signal.SIGINT, handler)

# Create SNN and Environment objects
snn = net.SpikingNeuralNetwork()
env = env.VrepEnvironment()

# Initialize environment, get initial state and reward
state, reward = env.reset()

# Simulate for training_length steps
for i in range(params.training_length):

    # Run network for 50 ms: Get left and right output spikes, get weights
    n_l, n_r, weights = snn.simulate(state, reward)
    w_l = weights[0]
    w_r = weights[1]

    # Perform a step
    # Get state, distance, pos_data, reward, terminate, steps,
    # travelled_distances, vrep_steps
    (state, distance, pos_data, reward, t, step, travelled_distances,
     vrep_steps) = env.step(n_l, n_r)
Example #17
0
STATS_EVERY = 10

# Exploration settings
epsilon = 1  # not a constant, going to be decayed
START_EPSILON_DECAYING = 1
END_EPSILON_DECAYING = EPISODES//2
epsilon_decay_value = epsilon/(END_EPSILON_DECAYING - START_EPSILON_DECAYING)

# For stats
ep_rewards = []
aggr_ep_rewards = {'ep': [], 'avg': [], 'max': [], 'min': []}


q_table = np.load("./qtables/1/800-qtable.npy", allow_pickle=True).item()
for episode in range(EPISODES):
    state = env.reset()

    if state not in q_table:
        q_table[state] = np.random.uniform(
            low=-2, high=0, size=env.action_space_n)

    episode_reward = 0
    done = False

    while not done:
        valid_actions = env.get_valid_actions(0)

        action = max(valid_actions, key=lambda a: q_table[state][a])
        new_state, reward, done = env.step(action)
        episode_reward += reward
def runEpisode(agent, agent2, environment, discount, decision, decision2,
               display, message, pause, episode):
    returns = 0
    returns2 = 0
    totalDiscount2 = 0
    totalDiscount = 1.0
    a1Done = False
    a2Done = False
    environment.reset()

    if 'startEpisode' in dir(agent): agent.startEpisode()
    if 'startEpisode' in dir(agent2): agent2.startEpisode()
    message("BEGINNING EPISODE: " + str(episode) + "\n")
    while True:
        # DISPLAY CURRENT STATE
        state = environment.getCurrentState(1)
        state2 = environment.getCurrentState(2)

        display(state, state2)
        pause()

        actions = environment.getPossibleActions(state)
        if len(actions) == 0:
            message("EPISODE " + str(episode) + " COMPLETE: RETURN WAS " +
                    str(returns) + "\n")
            a1Done = True

        actions = environment.getPossibleActions(state2)
        if len(actions) == 0:
            message("EPISODE " + str(episode) + " COMPLETE: RETURN WAS " +
                    str(returns) + "\n")
            a2Done = True

        player1NextState = None
        player2NextState = None
        reward1 = None
        reward2 = None
        action1 = None
        action2 = None

        if (a1Done == False and a2Done == False):
            action1 = decision(state)
            action2 = decision2(state2)
            result = env.twoAgentDoAction(action1, action2)
            player1NextState = result[0]
            player2NextState = result[1]
            reward1 = result[2]
            reward2 = result[3]

            # EXECUTE ACTION
            if 'observeTransition' in dir(agent):
                agent.observeTransition(state, action1, player1NextState,
                                        reward1)
            message("Agent 1 Started in state: " + str(state) +
                    "\nTook action: " + str(action1) + "\n.Ended in state: " +
                    str(player1NextState) + "\nGot reward: " + str(reward1) +
                    "\n")
            returns += reward1 * totalDiscount
            totalDiscount *= discount

            # GET ACTION (USUALLY FROM AGENT)
            action = decision2(state2)
            if action == None:
                raise 'Error: Agent returned None action'

            # EXECUTE ACTION
            message("Agent 2 Started in state: " + str(state2) +
                    "\nTook action: " + str(action2) + "\nEnded in state: " +
                    str(player2NextState) + "\nGot reward: " + str(reward2) +
                    "\n")
            # UPDATE LEARNER
            if 'observeTransition' in dir(agent2):
                agent2.observeTransition(state2, action, player2NextState,
                                         reward2)

            returns2 += reward2 * totalDiscount
            totalDiscount2 *= discount

        if a1Done or a2Done:
            return returns

    if 'stopEpisode' in dir(agent):
        agent.stopEpisode()
    if 'stopEpisode' in dir(agent2):
        agent2.stopEpisode()
Example #19
0
    initial_epsilon = qlearn.epsilon

    epsilon_discount = 0.9986

    start_time = time.time()
    total_episodes = 10000
    highest_reward = 0

    f = open('q_table.txt', 'a')
    f2 = open('q_table_list.pickle', 'wb')
    for x in range(total_episodes):
        done = False

        cumulated_reward = 0  #Should going forward give more reward then L/R ?

        observation = environment.reset()

        if qlearn.epsilon > 0.05:
            qlearn.epsilon *= epsilon_discount

        state = ''.join(map(str, observation))
        # print("State = ",state," observation = ",observation)
        for i in range(1500):

            # Pick an action based on the current state
            action = qlearn.chooseAction(state)

            # Execute the action and get feedback
            observation, reward, done, info = environment.step(action)
            cumulated_reward += reward
Example #20
0
optimizer = chainer.optimizers.Adam(eps = 1e-2)
gamma = 0.95
#explorer = chainerrl.explorers.LinearDecayEpsilonGreedy(start_epsilon = 1.0, end_epsilon = 0.3 , decay_steps = 50000 ,random_action_func = env.random_move)  
explorer = chainerrl.explorers.ConstantEpsilonGreedy(epsilon = 1.0, random_action_func = env.random_move)    
replay_buffer = chainerrl.replay_buffer.ReplayBuffer(capacity=10 ** 5)
phi = lambda x: x.astype(np.float32, copy = False)
optimizer.setup(q_func)
agent = chainerrl.agents.DQN(
    q_func, optimizer, replay_buffer, gamma, explorer, 
    replay_start_size = 500, update_interval = 1, 
    target_update_interval = 100, phi = phi)


#学習ループ
obs = env.reset()
r = 0
done = False

x = np.array([])
y = np.array([])
win_array = np.array([])
win_sum = 0

#agent.load('final_agent')

for cnt2 in range(1):
    turny = 0
    win = 0
    print("試行回数" + str(cnt2+1))
    for cnt in range(1):
Example #21
0
def runEpisode(agent, environment, discount, decision, display, message, pause, episode, update=True, bounded=False):
    returns = 0
    totalDiscount = 1.0
    environment.reset(agent)
    if 'startEpisode' in dir(agent): agent.startEpisode(environment.getCurrentState())
    #message("BEGINNING EPISODE: "+str(episode)+"\n")
    
    timestep = 0
    MAX_TIMESTEPS = 40
    
    while True:
        #print("timestep ", timestep)
        # DISPLAY CURRENT STATE
        state = environment.getCurrentState()
        if display is not None:
            display(state)
        #pause()
        #if timestep == 0 and episode == 1:
        #if not update:
        #    input("")
        
        if 'should_end_episode' in dir(agent) and agent.should_end_episode():
            #message("EPISODE "+str(episode)+" COMPLETE: RETURN WAS "+str(returns)+"\n")
            if 'stopEpisode' in dir(agent):
                agent.stopEpisode()
            return (timestep, returns)
        
        # END IF IN A TERMINAL STATE
        actions = environment.getPossibleActions(state)
        if len(actions) == 0 or (bounded and timestep >= MAX_TIMESTEPS):
            if update and len(actions) == 0: # reached terminal state but we are using n-step agent
                agent.update(state, None, None, None, update) # keep going until n-step agent says stop
                continue # for n-step agent
            elif not update: # not n-step agent so terminate on goal state or time exceeded
                message("EPISODE "+str(episode)+" COMPLETE: RETURN WAS "+str(returns)+"\n")
                if 'stopEpisode' in dir(agent):
                    agent.stopEpisode()
                return (timestep, returns)

        # GET ACTION (USUALLY FROM AGENT)
        action = decision(state)
        #print(action)
        if action == None:
            raise 'Error: Agent returned None action'

        # EXECUTE ACTION
        nextState, reward = environment.doAction(action)
        #message("Started in state: "+str(state)+
        #        "\nTook action: "+str(action)+
        #        "\nEnded in state: "+str(nextState)+
        #        "\nGot reward: "+str(reward)+"\n")
        # UPDATE LEARNER
        if 'observeTransition' in dir(agent):
            agent.observeTransition(state, action, nextState, reward, update)

        returns += reward * totalDiscount
        totalDiscount *= discount
        
        timestep += 1

    if 'stopEpisode' in dir(agent):
        agent.stopEpisode()