def test_agent(logfile): display_on = True # Create a random seed, which will define the environment random_seed = int(time.time()) np.random.seed(random_seed) # Create a random environment environment = Environment(magnification=500) # Create an agent agent = Agent() # Get the initial state state = environment.init_state # Determine the time at which training will stop, i.e. in 10 minutes (600 seconds) time start_time = time.time() end_time = start_time + 600 # Train the agent, until the time is up while time.time() < end_time: # If the action is to start a new episode, then reset the state if agent.has_finished_episode(): state = environment.init_state # Get the state and action from the agent action = agent.get_next_action(state) # Get the next state and the distance to the goal next_state, distance_to_goal = environment.step(state, action) # Return this to the agent agent.set_next_state_and_distance(next_state, distance_to_goal) # Set what the new state is state = next_state # Optionally, show the environment if display_on: environment.show(state) # Test the agent for 100 steps, using its greedy policy state = environment.init_state has_reached_goal = False for step_num in range(100): action = agent.get_greedy_action(state) next_state, distance_to_goal = environment.step(state, action) # The agent must achieve a maximum distance of 0.03 for use to consider it "reaching the goal" if distance_to_goal < 0.03: has_reached_goal = True break state = next_state # Print out the result resultstr = (f"Reached goal in {step_num} steps." if has_reached_goal else f"Did not reach goal. Final distance = {distance_to_goal}") print(f"Result for maze {random_seed}: {resultstr}") with open(logfile, "a") as f: f.write(f"Result for maze {random_seed}: {resultstr}\n") return has_reached_goal
# Main entry point if __name__ == "__main__": # This determines whether the environment will be displayed on each each step. # When we train your code for the 10 minute period, we will not display the environment. display_on = False # Create a random seed, which will define the environment random_seed = int(time.time()) print("random seed:", random_seed) #np.random.seed(1606588311) #difficult maze np.random.seed(1606694413) # Create a random environment environment = Environment(magnification=500) # Create an agent agent = Agent() # Get the initial state state = environment.init_state # Determine the time at which training will stop, i.e. in 10 minutes (600 seconds) time start_time = time.time() end_time = start_time + 600 # Train the agent, until the time is up while time.time() < end_time: # If the action is to start a new episode, then reset the state if agent.has_finished_episode():
# Main entry point if __name__ == "__main__": # This determines whether the environment will be displayed on each each step. # When we train your code for the 10 minute period, we will not display the environment. display_on = True # Create a random seed, which will define the environment random_seed = int(time.time()) print(random_seed) np.random.seed( 1606726164 ) #(1606721886) #(random_seed) #(1606719832) #(1606670816) #(1606436114) #(1606612092) # Create a random environment environment = Environment(magnification=500) # Create an agent agent = Agent() # Get the initial state state = environment.init_state # Determine the time at which training will stop, i.e. in 10 minutes (600 seconds) time start_time = time.time() end_time = start_time + 600 # Train the agent, until the time is up while time.time() < end_time: # If the action is to start a new episode, then reset the state if agent.has_finished_episode():
def train(): config_defaults = { 'batch_size': 400, 'learning_rate': 0.01, 'gamma': 0.95, 'epsilon_decay': 0.993, 'episode_length': 1000, 'network_update': 50 } # Initialize a new wandb run wandb.init(config=config_defaults) # Config is a variable that holds and saves hyperparameters and inputs config = wandb.config # This determines whether the environment will be displayed on each each step. # When we train your code for the 10 minute period, we will not display the environment. display_on = False # Create a random seed, which will define the environment random_seed = int(time.time()) np.random.seed(random_seed) # Create a random environment environment = Environment(magnification=500) # Create an agent agent = Agent(config_defaults) # Get the initial state state = environment.init_state # Determine the time at which training will stop, i.e. in 10 minutes (600 seconds) time start_time = time.time() end_time = start_time + 600 # Train the agent, until the time is up while time.time() < end_time: # If the action is to start a new episode, then reset the state if agent.has_finished_episode(): state = environment.init_state # Get the state and action from the agent action = agent.get_next_action(state) # Get the next state and the distance to the goal next_state, distance_to_goal = environment.step(state, action) wandb.log({"loss": distance_to_goal}) # Return this to the agent agent.set_next_state_and_distance(next_state, distance_to_goal) # Set what the new state is state = next_state # Optionally, show the environment if display_on: environment.show(state) if agent.has_finished_episode(): wandb.log({"episode_end_distance": distance_to_goal}) # Test the agent for 100 steps, using its greedy policy state = environment.init_state has_reached_goal = False for step_num in range(100): action = agent.get_greedy_action(state) next_state, distance_to_goal = environment.step(state, action) # The agent must achieve a maximum distance of 0.03 for use to consider it "reaching the goal" if distance_to_goal < 0.03: has_reached_goal = True wandb.log({"goal_reached": has_reached_goal}) break state = next_state # Print out the result if has_reached_goal: print('Reached goal in ' + str(step_num) + ' steps.') else: print('Did not reach goal. Final distance = ' + str(distance_to_goal)) wandb.log({"distance_to_goal": distance_to_goal})