def go(self): plotter = Plotter("outdir", "sim", clean=False) grid = Grid(plotter) for num_of_agents in poisson.rvs(5, size=42): agents = [Agent.build() for i in xrange(num_of_agents)] # print agents print "\nStep {0}\n".format(self.step) grid.push(agents) grid.tick() self.step += 1
def data_func(net, device, train_queue): # Function to run in sub processes envs = [make_env() for _ in range(ENV_COUNT)] agent = Agent(net=net, device=device) exp_source = ExperienceSourceFirstLast(envs, agent, gamma=GAMMA, steps_count=STEPS_COUNT) print(f'{mp.current_process().name} Started') for exp in exp_source: new_rewards = exp_source.pop_rewards_steps() if new_rewards: train_queue.put(RewardSteps(reward=new_rewards)) train_queue.put(exp)
def data_func(net, device, train_queue): envs = [make_env() for _ in range(ENV_COUNT)] agent = Agent(lambda x: net(x)[0], device=device, apply_softmax=True) exp_source = ExperienceSourceFirstLast(envs, agent, gamma=GAMMA, steps_count=STEPS_COUNT) print(f'{mp.current_process().name} Started') for exp in exp_source: new_rewards = exp_source.pop_total_rewards() if new_rewards: train_queue.put(TotalReward(reward=np.mean(new_rewards))) train_queue.put(exp)
def data_func(net, device, train_queue): envs = [make_env() for _ in range(NUM_ENVS)] agent = Agent(net, device=device) # TODO compare training with rgb to semantic and other variations exp_source = ExperienceSourceFirstLast(envs, agent, gamma=GAMMA, steps_count=STEPS_COUNT) print(f'{mp.current_process().name} Started') for exp in exp_source: new_rewards = exp_source.pop_total_rewards() # print('New rewards', new_rewards) if new_rewards: train_queue.put(TotalReward(reward=np.mean(new_rewards))) # print('Pop goes the weasel!') train_queue.put(exp)
def main(): """ After training is done we test and watch our model perform """ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') net = torch.load(LOAD_MODEL) net = net.to(device) net.eval() # set model into evaluation mode agent = Agent(net, device=device) print('Model loaded from:', LOAD_MODEL) spec = gym.envs.registry.spec(ENV_NAME) spec._kwargs['render'] = True env = make_env() state = [env.reset()] rewards_history, current_rewards = [], [] episode_counter = 0 try: while True: env.render() time.sleep(1 / FRAME_RATE) if state is not None: action = agent(state) else: action = env.action_space.sample( ) # if obs is none, sample random action next_state, reward, done, _ = env.step(action[0]) state = [next_state] current_rewards.append(reward) if done: episode_counter += 1 print('Episode', episode_counter, 'Done.') # Mean Reward:', np.mean(current_rewards)) rewards_history.append(current_rewards) current_rewards.clear() print('Starting Next Episode...') state = [env.reset()] except KeyboardInterrupt: print('Stopped By The User') print('Exiting...')
def main(): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') net = torch.load(LOAD_MODEL) net = net.to(device) net.eval() # set model into evaluation mode agent = Agent(lambda x: net(x)[0], device=device, apply_softmax=True) print('Model loaded from:', LOAD_MODEL) if FRAME_RATE > 0: print(f'Playing at {FRAME_RATE} FPS') else: print('Playing with no framerate limitations') env = make_env() state = [env.reset()] rewards_history, current_rewards = [], [] game_counter = 0 try: while True: env.render() if FRAME_RATE > 0: time.sleep(1 / FRAME_RATE) if state is not None: action = agent(state) else: action = env.action_space.sample( ) # if obs is none, sample random action next_state, reward, done, _ = env.step(action) state = [next_state] current_rewards.append(reward) if done: game_counter += 1 print('Game', game_counter, 'Done.') # Mean Reward:', np.mean(current_rewards)) rewards_history.append(current_rewards) current_rewards.clear() print('Starting Next Game...') state = [env.reset()] except KeyboardInterrupt: print('Stopped By The User') print('Exiting...')
def main(): """ After training is done we test and watch our model perform """ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') net = torch.load(LOAD_MODEL) net = net.to(device) net.eval() # set model into evaluation mode agent = Agent(net, device=device) print('Model loaded from:', LOAD_MODEL) env = make_env() state = [env.reset()] rewards_history, current_rewards = [], [] episode_counter = 0 try: while True: time.sleep(1/FRAME_RATE) if state is not None: action = agent(state) else: action = env.action_space.sample() # if obs is none, sample random action next_state, reward, done, _ = env.step(action[0]) state = [next_state] current_rewards.append(reward) if done: episode_counter += 1 rewards_history.append(np.mean(current_rewards)) print('Episode', episode_counter, 'Done, mean reward', np.mean(rewards_history[-100:])) # Mean Reward:', np.mean(current_rewards)) current_rewards.clear() print('Starting Next Episode...') state = [env.reset()] except KeyboardInterrupt: env.close() print('Stopped By The User') print('Exiting...') client = carla.Client('localhost', 2000) client.reload_world()
def test_would_to_walking_with_zero_twait(self): fakeAgent = Agent() #t(0) self.assertEqual(fakeAgent.switching_walking, True) self.assertEqual(fakeAgent.switching_waiting, False) fakeAgent.pstop = 1 self.assertEqual(fakeAgent.would_to_walking(), True) #t(1) self.assertEqual(fakeAgent.switching_walking, False) self.assertEqual(fakeAgent.switching_waiting, False) fakeAgent.pstop = -1 fakeAgent._twait = 0 self.assertEqual(fakeAgent.would_to_walking(), False) #t(2) sleep = 0 self.assertEqual(fakeAgent.switching_walking, True) self.assertEqual(fakeAgent.switching_waiting, True) fakeAgent.pstop = 1 self.assertEqual(fakeAgent.would_to_walking(), True) #t(3) sleep = 0 self.assertEqual(fakeAgent.switching_walking, False) self.assertEqual(fakeAgent.switching_waiting, False) self.assertEqual(fakeAgent.would_to_walking(), True) #t(4) self.assertEqual(fakeAgent.switching_walking, False) self.assertEqual(fakeAgent.switching_waiting, False) fakeAgent.pstop = -1 self.assertEqual(fakeAgent.would_to_walking(), False) #t(5) self.assertEqual(fakeAgent.switching_walking, True) self.assertEqual(fakeAgent.switching_waiting, True) self.assertEqual(fakeAgent.would_to_walking(), False) #t(6) self.assertEqual(fakeAgent.switching_walking, True) self.assertEqual(fakeAgent.switching_waiting, True) fakeAgent.pstop = 1 self.assertEqual(fakeAgent.would_to_walking(), True) self.assertEqual(fakeAgent.switching_walking, False) self.assertEqual(fakeAgent.switching_waiting, False)
BATCH_SIZE = 32 TARGET_UPDATE_FREQ = 1000 DELAY_LEARNING = 50000 GAMMA = 0.99 model = 'DDQN' # Environment and neural networks env = make_env("PongNoFrameskip-v4") env_test = gym.make("PongNoFrameskip-v4") net = DQN(env.observation_space.shape, env.action_space.n, learning_rate).to(device) target_net = DQN(env.observation_space.shape, env.action_space.n, learning_rate).to(device) # Agent and memory handling memory = Memory(REPLAY_SIZE) agent = Agent(env, memory) initial_observation = env.reset() if 'cuda' in str(device): print('The GPU is being used') else: print('The CPU is being used') if option_dict['random']: play_random(env, UP_ACTION, DOWN_ACTION, seconds=5) if option_dict['train']: print("Training")
from lib.Arena import Arena from lib.Agent import Agent from random import randint arena = Arena(5) hero = Agent(arena) arena.printEverything() points = hero.play()