def runEpisode(agent, environment, discount, decision, display, message, pause, episode, testFlag=0, foldN=0): returns = 0 totalDiscount = 1.0 environment.reset(testFlag, foldN) if 'startEpisode' in dir(agent): agent.startEpisode() while True: # DISPLAY CURRENT STATE state = environment.getCurrentState() # display(state) pause() # END IF IN A TERMINAL STATE actions = environment.getPossibleActions(state) if len(actions) == 0: return returns, reward, AOP, explore # returns: accumulative rewards, reward: # GET ACTION (USUALLY FROM AGENT) action = decision(state) if action == None: raise 'Error: Agent returned None action' # EXECUTE ACTION nextState, reward, AOP, explore = environment.doAction(action) if testFlag == 0: # training episode # UPDATE LEARNER if 'observeTransition' in dir(agent): agent.observeTransition(state, action, nextState, reward) else: # testing episode pass returns += reward * totalDiscount totalDiscount *= discount
def model(self, parameters): """Model a year of data for a location. Args: parameters: (tuple) capacity (Wh), PV Size (STC). Returns: (domain): results from model. """ env.reset() size, pv = parameters # don't go below 1 negative/division by zero issues pv = max(pv, 1.) size = max(size, 1.) plane = InclinedPlane(Site(self.place), self.tilt, self.azimuth) load = self.load() SHS = Gateway([load, self.cc([SimplePV(pv, plane)]), IdealStorage(size)]) for r in eere.EPWdata('418830'): env.update_time(r['datetime']) SHS() print SHS.details() self.foo.write('%s,%s,%s\n' % (size, pv, SHS.merit())) self.foo.flush() return SHS
def runEpisode(agent, environment, discount, decision, display, message, pause, episode, train=False): returns = 0 totalDiscount = 1.0 environment.reset() if 'startEpisode' in dir(agent): agent.startEpisode() message("BEGINNING EPISODE: " + str(episode) + "\n") while True: # DISPLAY CURRENT STATE state = environment.getCurrentState() state_copy = copy.deepcopy(state) display(state) pause() # END IF IN A TERMINAL STATE actions = environment.getPossibleActions(state) if len(actions) == 0: message("EPISODE " + str(episode) + " COMPLETE: TOOK " + str(len(state.history)) + "STEPS; RETURN WAS " + str(returns) + "\n") message(str(state.history)) message(str(len(state.history))) return returns # GET ACTION (USUALLY FROM AGENT) action = decision(state) if action == None: raise 'Error: Agent returned None action' # EXECUTE ACTION nextState, reward = environment.doAction(action) message("Started in state: " + str(state_copy.x) + str(state_copy.y) + str(state_copy.direction) + str(state_copy.num_explored) + " " + str(len(state_copy.history)) + "\nTook action: " + str(action) + "\nEnded in state: " + str(nextState.x) + str(nextState.y) + str(nextState.direction) + str(nextState.num_explored) + " " + str(len(nextState.history)) + "\nGot reward: " + str(reward) + "\n") # UPDATE LEARNER # print_grid(state_copy.explored_grid) # print_grid(nextState.explored_grid) if 'observeTransition' in dir( agent) and not nextState.is_terminal and train: agent.observeTransition(state_copy, action, nextState, reward) print(agent.weights) print("############################################") print("############################################\n") returns += reward * totalDiscount totalDiscount *= discount
def runEpisode(agent, environment, discount, decision, display, message, pause, episode): returns = 0 totalDiscount = 1.0 environment.reset() message("BEGINNING EPISODE: " + str(episode) + "\n") while True: # DISPLAY CURRENT STATE state = environment.getCurrentState() display(state) pause() # END IF IN A TERMINAL STATE actions = environment.getPossibleActions(state) if len(actions) == 0: message("EPISODE " + str(episode) + " COMPLETE: RETURN WAS " + str(returns) + "\n") return returns # GET ACTION (USUALLY FROM AGENT) action = decision(state) if action == None: raise 'Error: Agent returned None action' # EXECUTE ACTION nextState, reward = environment.doAction(action) message("Started in state: " + str(state) + "\nTook action: " + str(action) + "\nEnded in state: " + str(nextState) + "\nGot reward: " + str(reward) + "\n") # UPDATE LEARNER agent.update(state, action, nextState, reward) returns += reward * totalDiscount totalDiscount *= discount
def runEpisode(agent, qtype, environment, discount, decision, display, message, pause, episode): ########################### # GET THE GRIDWORLD ########################### returns = 0 totalDiscount = 1.0 environment.reset() #for state in mdp.getStates(): # display(state) if 'startEpisode' in dir(agent): agent.startEpisode() message("BEGINNING EPISODE: "+str(episode)+"\n") while True: # DISPLAY CURRENT STATE state = environment.getCurrentState() display(state) pause() # END IF IN A TERMINAL STATE actions = environment.getPossibleActions(state) if len(actions) == 0: message("EPISODE "+str(episode)+" COMPLETE: RETURN WAS "+str(returns)+"\n") return returns # GET ACTION (USUALLY FROM AGENT) action = decision(state) if action == None: raise 'Error: Agent returned None action' # EXECUTE ACTION nextState, reward, action2 = environment.doAction(action, qtype) message("Started in state: "+str(state)+ "\nSpecified action: "+str(action)+ "\nTook action: "+str(action2)+ "\nEnded in state: "+str(nextState)+ "\nGot reward: "+str(reward)+"\n") # UPDATE LEARNER if 'observeTransition' in dir(agent): agent.observeTransition(state, action2, nextState, reward, environment) if isinstance(reward, tuple): reward=sum(reward) returns += reward * totalDiscount totalDiscount *= discount if 'stopEpisode' in dir(agent): agent.stopEpisode()
def runEpisode(agent, environment, discount, decision, display, message, pause, episode): returns = 0 totalDiscount = 1.0 environment.reset(agent) if 'startEpisode' in dir(agent): agent.startEpisode() message("BEGINNING EPISODE: " + str(episode) + "\n") timestep = 0 MAX_TIMESTEPS = 20 while True: if timestep >= MAX_TIMESTEPS: return returns # DISPLAY CURRENT STATE state = environment.getCurrentState() display(state) #pause() if timestep == 0 and episode == 1: input("") # END IF IN A TERMINAL STATE actions = environment.getPossibleActions(state) if len(actions) == 0: message("EPISODE " + str(episode) + " COMPLETE: RETURN WAS " + str(returns) + "\n") return returns # GET ACTION (USUALLY FROM AGENT) action = decision(state) if action == None: raise 'Error: Agent returned None action' # EXECUTE ACTION nextState, reward = environment.doAction(action) #message("Started in state: "+str(state)+ # "\nTook action: "+str(action)+ # "\nEnded in state: "+str(nextState)+ # "\nGot reward: "+str(reward)+"\n") # UPDATE LEARNER if 'observeTransition' in dir(agent): agent.observeTransition(state, action, nextState, reward) returns += reward * totalDiscount totalDiscount *= discount timestep += 1 if 'stopEpisode' in dir(agent): agent.stopEpisode()
def runEpisode(agent, environment, discount, decision, display, message, pause, episode): returns = 0 totalDiscount = 1.0 environment.reset() if 'startEpisode' in dir(agent): agent.startEpisode() message("BEGINNING EPISODE: " + str(episode) + "\n") stateAndValues = -1 while True: # DISPLAY CURRENT STATE state = environment.getCurrentState() display(state) pause() # END IF IN A TERMINAL STATE actions = environment.getPossibleActions(state) if len(actions) == 0: message("EPISODE " + str(episode) + " COMPLETE: RETURN WAS " + str(returns) + "\n") return (returns, stateAndValues) # GET ACTION (USUALLY FROM AGENT) action = decision(state) if action == None: raise 'Error: Agent returned None action' # EXECUTE ACTION nextState, reward = environment.doAction(action) message("Started in state: " + str(state) + "\nTook action: " + str(action) + "\nEnded in state: " + str(nextState) + "\nGot reward: " + str(reward) + "\n") # UPDATE LEARNER val = -1 if 'observeTransition' in dir(agent): val = agent.observeTransition(state, action, nextState, reward) if state == (2, 2): stateAndValues = [0, val, 0, 0] returns += reward * totalDiscount totalDiscount *= discount if 'stopEpisode' in dir(agent): agent.stopEpisode()
def runEpisode(agent, environment, discount, decision, display, message, pause, episode): returns = 0 totalDiscount = 1.0 environment.reset() if "startEpisode" in dir(agent): agent.startEpisode() message("BEGINNING EPISODE: " + str(episode) + "\n") while True: # DISPLAY CURRENT STATE state = environment.getCurrentState() display(state) pause() # END IF IN A TERMINAL STATE actions = environment.getPossibleActions(state) if len(actions) == 0: message("EPISODE " + str(episode) + " COMPLETE: RETURN WAS " + str(returns) + "\n") return returns # GET ACTION (USUALLY FROM AGENT) action = decision(state) if action == None: raise "Error: Agent returned None action" # EXECUTE ACTION nextState, reward = environment.doAction(action) message( "Started in state: " + str(state) + "\nTook action: " + str(action) + "\nEnded in state: " + str(nextState) + "\nGot reward: " + str(reward) + "\n" ) # UPDATE LEARNER if "observeTransition" in dir(agent): agent.observeTransition(state, action, nextState, reward) returns += reward * totalDiscount totalDiscount *= discount if "stopEpisode" in dir(agent): agent.stopEpisode()
def analizar_texto(): response = txt_consultas.get("1.0", "end") salida_lexico_ast = analizarLex(response) texto = analizarSin(response) txt_salida.insert('end', '\n>>>\n') txt_salida.insert('end', '\n=====SALIDA C3D======\n') txt_salida.insert('end', texto + '\n\n\n') txt_salida.insert('end', tab_simbolos()) txt_salida.insert('end', '\n=====REPORTE DE INDEX======') txt_salida.insert('end', tab_string()) txt_salida.insert('end', '\n=====REPORTE DE FUNCIONES======') txt_salida.insert('end', tab_func()) txt_salida.insert('end', '\n=====REPORTE DE ERRORES======') txt_salida.insert('end', get_errores()) txt_salida.insert('end', '\n>>>\n') reset()
def runEpisode(agent, environment, discount, decision, display, message, pause, episode): returns = 0 totalDiscount = 1.0 environment.reset() if "startEpisode" in dir(agent): agent.startEpisode() message("BEGINNING EPISODE: " + str(episode) + "\n") while True: # Display current state state = environment.getCurrentState() display(state) pause() # End if in a terminal state actions = environment.getPossibleActions(state) if len(actions) == 0: message("EPISODE " + str(episode) + " COMPLETE: RETURN WAS " + str(returns) + "\n") return returns # Get action (usually from agent) action = decision(state) if action == None: raise Exception("Error: Agent returned None action") # Execute action nextState, reward = environment.doAction(action) message("Started in state: " + str(state) + "\nTook action: " + str(action) + "\nEnded in state: " + str(nextState) + "\nGot reward: " + str(reward) + "\n") # Update learner if "observeTransition" in dir(agent): agent.observeTransition(state, action, nextState, reward) returns += reward * totalDiscount totalDiscount *= discount if "stopEpisode" in dir(agent): agent.stopEpisode()
def run_episode(agent, environment, discount, decision, display, message, pause, episode): returns = 0 total_discount = 1.0 environment.reset() if 'start_episode' in dir(agent): agent.start_episode() message("BEGINNING EPISODE: " + str(episode) + "\n") while True: # DISPLAY CURRENT STATE state = environment.get_current_state() display(state) pause() # END IF IN A TERMINAL STATE actions = environment.get_possible_actions(state) if len(actions) == 0: message("EPISODE " + str(episode) + " COMPLETE: RETURN WAS " + str(returns) + "\n") return returns # GET ACTION (USUALLY FROM AGENT) action = decision(state) if action == None: raise Exception('Error: Agent returned None action') # EXECUTE ACTION next_state, reward = environment.do_action(action) message("Started in state: " + str(state) + "\nTook action: " + str(action) + "\nEnded in state: " + str(next_state) + "\nGot reward: " + str(reward) + "\n") # UPDATE LEARNER if 'observe_transition' in dir(agent): agent.observe_transition(state, action, next_state, reward) returns += reward * total_discount total_discount *= discount if 'stop_episode' in dir(agent): agent.stop_episode()
def runEpisode(agent, environment, discount, decision, display, message, pause, episode, rates, offset, pmNum, locks,pllock , q,plinfo): returns = 0 totalDiscount = 1.0 environment.reset() if 'startEpisode' in dir(agent): agent.startEpisode() #message("BEGINNING EPISODE: "+str(episode)+"\n") pm = environment.baeplatform.pm while True: # END IF IN A TERMINAL STATE if environment.isTerminal(): message("EPISODE "+str(episode)+" COMPLETE: RETURN WAS "+str(returns)+"\n") return returns, offset #print rates[offset%len(rates)] # DISPLAY CURRENT STATE state = environment.getCurrentState() pause() offset += 1 # GET ACTION (USUALLY FROM AGENT) action = decision(state) if action == None: raise 'Error: Agent returned None action' # EXECUTE ACTION nextState, reward = environment.doAction(state, action, pmNum,locks,pllock,q,plinfo) message("VM " + str(pm.id) + " Started in state: "+str(state)+ "\nTook action: "+str(action)+ "\nEnded in state: "+str(nextState)+ "\nGot reward: "+str(reward)+"\n") # UPDATE LEARNER if 'observeTransition' in dir(agent): agent.observeTransition(state, action, nextState, reward) returns += reward * totalDiscount totalDiscount *= discount if 'stopEpisode' in dir(agent): agent.stopEpisode()
def watch_play(self): done = False board = env.reset() while not done: # finds the best action action = env.process_state(board) self.drop_piece(action, board) board, done = env.step(board, *action) self.board = board.area self.update() self.root.update() time.sleep(self.speed)
def runEpisode(agent, environment, discount, decision, display, message, pause, episode): returns = 0 totalDiscount = 1.0 environment.reset() if 'startEpisode' in dir(agent): agent.startEpisode() message("BEGINNING EPISODE: "+str(episode)+"\n") f = open("trajectories.txt", "a") appendString='[' steps=0 grid_width=10 while True: # DISPLAY CURRENT STATE state = environment.getCurrentState() display(state) pause() # END IF IN A TERMINAL STATE actions = environment.getPossibleActions(state) if len(actions) == 0: message("EPISODE "+str(episode)+" COMPLETE: RETURN WAS "+str(returns)+"\n") return returns # GET ACTION (USUALLY FROM AGENT) action = decision(state) if action == None: raise 'Error: Agent returned None action' # EXECUTE ACTION nextState=state if action != 'stay': nextState, reward = environment.doAction(action) message("Started in state: "+str(state)+ "\nTook action: "+str(action)+ "\nEnded in state: "+str(nextState)+ "\nGot reward: "+str(reward)+"\n") # UPDATE LEARNER if 'observeTransition' in dir(agent): agent.observeTransition(state, action, nextState, reward) returns += reward * totalDiscount totalDiscount *= discount stateNumber= (state[1]*grid_width +state[0]) appendString+=str(stateNumber) steps+=1 if(steps==stepsLimit): break; appendString+=',' appendString+='],' print("AppendString ",appendString) f.write(appendString+"\n") f.close() runEpisode(agent, environment, discount, decision, display, message, pause, episode); if 'stopEpisode' in dir(agent): agent.stopEpisode()
numActions = 10 Actions = np.linspace(0, 1, numActions) # This is our learning agent gamma = .95 agent = sarsaAgent(4, numActions, 10, 1, epsilon=5e-2, gamma=gamma, alpha=1e-2) maxSteps = 1e5 R = [] UpTime = [] step = 0 ep = 0 while step < maxSteps: ep += 1 x = environment.reset() # initialize the state C = 0. done = False t = 1 while not done: t += 1 step += 1 a = agent.action(x) u = Actions[a] #env.render() # only for visual effects x_next, c, done = environment.step(u, x) C += (1. / t) * (c - C) agent.update(x, a, c, x_next, done) x = x_next
def handler(signum, frane): """Handles CTRL-C to terminate the session early.""" global terminate_early terminate_early = True signal.signal(signal.SIGINT, handler) # Create SNN and Environment objects snn = net.SpikingNeuralNetwork() env = env.VrepEnvironment() # Initialize environment, get initial state and reward state, reward = env.reset() # Simulate for training_length steps for i in range(params.training_length): # Run network for 50 ms: Get left and right output spikes, get weights n_l, n_r, weights = snn.simulate(state, reward) w_l = weights[0] w_r = weights[1] # Perform a step # Get state, distance, pos_data, reward, terminate, steps, # travelled_distances, vrep_steps (state, distance, pos_data, reward, t, step, travelled_distances, vrep_steps) = env.step(n_l, n_r)
STATS_EVERY = 10 # Exploration settings epsilon = 1 # not a constant, going to be decayed START_EPSILON_DECAYING = 1 END_EPSILON_DECAYING = EPISODES//2 epsilon_decay_value = epsilon/(END_EPSILON_DECAYING - START_EPSILON_DECAYING) # For stats ep_rewards = [] aggr_ep_rewards = {'ep': [], 'avg': [], 'max': [], 'min': []} q_table = np.load("./qtables/1/800-qtable.npy", allow_pickle=True).item() for episode in range(EPISODES): state = env.reset() if state not in q_table: q_table[state] = np.random.uniform( low=-2, high=0, size=env.action_space_n) episode_reward = 0 done = False while not done: valid_actions = env.get_valid_actions(0) action = max(valid_actions, key=lambda a: q_table[state][a]) new_state, reward, done = env.step(action) episode_reward += reward
def runEpisode(agent, agent2, environment, discount, decision, decision2, display, message, pause, episode): returns = 0 returns2 = 0 totalDiscount2 = 0 totalDiscount = 1.0 a1Done = False a2Done = False environment.reset() if 'startEpisode' in dir(agent): agent.startEpisode() if 'startEpisode' in dir(agent2): agent2.startEpisode() message("BEGINNING EPISODE: " + str(episode) + "\n") while True: # DISPLAY CURRENT STATE state = environment.getCurrentState(1) state2 = environment.getCurrentState(2) display(state, state2) pause() actions = environment.getPossibleActions(state) if len(actions) == 0: message("EPISODE " + str(episode) + " COMPLETE: RETURN WAS " + str(returns) + "\n") a1Done = True actions = environment.getPossibleActions(state2) if len(actions) == 0: message("EPISODE " + str(episode) + " COMPLETE: RETURN WAS " + str(returns) + "\n") a2Done = True player1NextState = None player2NextState = None reward1 = None reward2 = None action1 = None action2 = None if (a1Done == False and a2Done == False): action1 = decision(state) action2 = decision2(state2) result = env.twoAgentDoAction(action1, action2) player1NextState = result[0] player2NextState = result[1] reward1 = result[2] reward2 = result[3] # EXECUTE ACTION if 'observeTransition' in dir(agent): agent.observeTransition(state, action1, player1NextState, reward1) message("Agent 1 Started in state: " + str(state) + "\nTook action: " + str(action1) + "\n.Ended in state: " + str(player1NextState) + "\nGot reward: " + str(reward1) + "\n") returns += reward1 * totalDiscount totalDiscount *= discount # GET ACTION (USUALLY FROM AGENT) action = decision2(state2) if action == None: raise 'Error: Agent returned None action' # EXECUTE ACTION message("Agent 2 Started in state: " + str(state2) + "\nTook action: " + str(action2) + "\nEnded in state: " + str(player2NextState) + "\nGot reward: " + str(reward2) + "\n") # UPDATE LEARNER if 'observeTransition' in dir(agent2): agent2.observeTransition(state2, action, player2NextState, reward2) returns2 += reward2 * totalDiscount totalDiscount2 *= discount if a1Done or a2Done: return returns if 'stopEpisode' in dir(agent): agent.stopEpisode() if 'stopEpisode' in dir(agent2): agent2.stopEpisode()
initial_epsilon = qlearn.epsilon epsilon_discount = 0.9986 start_time = time.time() total_episodes = 10000 highest_reward = 0 f = open('q_table.txt', 'a') f2 = open('q_table_list.pickle', 'wb') for x in range(total_episodes): done = False cumulated_reward = 0 #Should going forward give more reward then L/R ? observation = environment.reset() if qlearn.epsilon > 0.05: qlearn.epsilon *= epsilon_discount state = ''.join(map(str, observation)) # print("State = ",state," observation = ",observation) for i in range(1500): # Pick an action based on the current state action = qlearn.chooseAction(state) # Execute the action and get feedback observation, reward, done, info = environment.step(action) cumulated_reward += reward
optimizer = chainer.optimizers.Adam(eps = 1e-2) gamma = 0.95 #explorer = chainerrl.explorers.LinearDecayEpsilonGreedy(start_epsilon = 1.0, end_epsilon = 0.3 , decay_steps = 50000 ,random_action_func = env.random_move) explorer = chainerrl.explorers.ConstantEpsilonGreedy(epsilon = 1.0, random_action_func = env.random_move) replay_buffer = chainerrl.replay_buffer.ReplayBuffer(capacity=10 ** 5) phi = lambda x: x.astype(np.float32, copy = False) optimizer.setup(q_func) agent = chainerrl.agents.DQN( q_func, optimizer, replay_buffer, gamma, explorer, replay_start_size = 500, update_interval = 1, target_update_interval = 100, phi = phi) #学習ループ obs = env.reset() r = 0 done = False x = np.array([]) y = np.array([]) win_array = np.array([]) win_sum = 0 #agent.load('final_agent') for cnt2 in range(1): turny = 0 win = 0 print("試行回数" + str(cnt2+1)) for cnt in range(1):
def runEpisode(agent, environment, discount, decision, display, message, pause, episode, update=True, bounded=False): returns = 0 totalDiscount = 1.0 environment.reset(agent) if 'startEpisode' in dir(agent): agent.startEpisode(environment.getCurrentState()) #message("BEGINNING EPISODE: "+str(episode)+"\n") timestep = 0 MAX_TIMESTEPS = 40 while True: #print("timestep ", timestep) # DISPLAY CURRENT STATE state = environment.getCurrentState() if display is not None: display(state) #pause() #if timestep == 0 and episode == 1: #if not update: # input("") if 'should_end_episode' in dir(agent) and agent.should_end_episode(): #message("EPISODE "+str(episode)+" COMPLETE: RETURN WAS "+str(returns)+"\n") if 'stopEpisode' in dir(agent): agent.stopEpisode() return (timestep, returns) # END IF IN A TERMINAL STATE actions = environment.getPossibleActions(state) if len(actions) == 0 or (bounded and timestep >= MAX_TIMESTEPS): if update and len(actions) == 0: # reached terminal state but we are using n-step agent agent.update(state, None, None, None, update) # keep going until n-step agent says stop continue # for n-step agent elif not update: # not n-step agent so terminate on goal state or time exceeded message("EPISODE "+str(episode)+" COMPLETE: RETURN WAS "+str(returns)+"\n") if 'stopEpisode' in dir(agent): agent.stopEpisode() return (timestep, returns) # GET ACTION (USUALLY FROM AGENT) action = decision(state) #print(action) if action == None: raise 'Error: Agent returned None action' # EXECUTE ACTION nextState, reward = environment.doAction(action) #message("Started in state: "+str(state)+ # "\nTook action: "+str(action)+ # "\nEnded in state: "+str(nextState)+ # "\nGot reward: "+str(reward)+"\n") # UPDATE LEARNER if 'observeTransition' in dir(agent): agent.observeTransition(state, action, nextState, reward, update) returns += reward * totalDiscount totalDiscount *= discount timestep += 1 if 'stopEpisode' in dir(agent): agent.stopEpisode()