def go(self):
        plotter = Plotter("outdir", "sim", clean=False)
        grid = Grid(plotter)

        for num_of_agents in poisson.rvs(5, size=42):
            agents = [Agent.build() for i in xrange(num_of_agents)]
            # print agents
            print "\nStep {0}\n".format(self.step)
            grid.push(agents)
            grid.tick()
            self.step += 1
Exemple #2
0
def data_func(net, device, train_queue):
    # Function to run in sub processes
    envs = [make_env() for _ in range(ENV_COUNT)]
    agent = Agent(net=net, device=device)
    exp_source = ExperienceSourceFirstLast(envs, agent, gamma=GAMMA, steps_count=STEPS_COUNT)
    
    print(f'{mp.current_process().name} Started')

    for exp in exp_source:
        new_rewards = exp_source.pop_rewards_steps()
        if new_rewards:
            train_queue.put(RewardSteps(reward=new_rewards))
        train_queue.put(exp)
def data_func(net, device, train_queue):
    envs = [make_env() for _ in range(ENV_COUNT)]
    agent = Agent(lambda x: net(x)[0], device=device, apply_softmax=True)
    exp_source = ExperienceSourceFirstLast(envs,
                                           agent,
                                           gamma=GAMMA,
                                           steps_count=STEPS_COUNT)

    print(f'{mp.current_process().name} Started')

    for exp in exp_source:
        new_rewards = exp_source.pop_total_rewards()
        if new_rewards:
            train_queue.put(TotalReward(reward=np.mean(new_rewards)))
        train_queue.put(exp)
def data_func(net, device, train_queue):
    envs = [make_env() for _ in range(NUM_ENVS)]
    agent = Agent(net, device=device)
    # TODO compare training with rgb to semantic and other variations
    exp_source = ExperienceSourceFirstLast(envs,
                                           agent,
                                           gamma=GAMMA,
                                           steps_count=STEPS_COUNT)

    print(f'{mp.current_process().name} Started')

    for exp in exp_source:
        new_rewards = exp_source.pop_total_rewards()
        # print('New rewards', new_rewards)
        if new_rewards:
            train_queue.put(TotalReward(reward=np.mean(new_rewards)))
            # print('Pop goes the weasel!')
        train_queue.put(exp)
def main():
    """
        After training is done we test and watch our model perform
    """
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    net = torch.load(LOAD_MODEL)
    net = net.to(device)
    net.eval()  # set model into evaluation mode
    agent = Agent(net, device=device)
    print('Model loaded from:', LOAD_MODEL)

    spec = gym.envs.registry.spec(ENV_NAME)
    spec._kwargs['render'] = True

    env = make_env()
    state = [env.reset()]

    rewards_history, current_rewards = [], []
    episode_counter = 0

    try:
        while True:
            env.render()
            time.sleep(1 / FRAME_RATE)
            if state is not None:
                action = agent(state)
            else:
                action = env.action_space.sample(
                )  # if obs is none, sample random action
            next_state, reward, done, _ = env.step(action[0])
            state = [next_state]
            current_rewards.append(reward)
            if done:
                episode_counter += 1
                print('Episode', episode_counter,
                      'Done.')  # Mean Reward:', np.mean(current_rewards))
                rewards_history.append(current_rewards)
                current_rewards.clear()
                print('Starting Next Episode...')
                state = [env.reset()]

    except KeyboardInterrupt:
        print('Stopped By The User')
        print('Exiting...')
Exemple #6
0
def main():
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    net = torch.load(LOAD_MODEL)
    net = net.to(device)
    net.eval()  # set model into evaluation mode
    agent = Agent(lambda x: net(x)[0], device=device, apply_softmax=True)
    print('Model loaded from:', LOAD_MODEL)
    if FRAME_RATE > 0:
        print(f'Playing at {FRAME_RATE} FPS')
    else:
        print('Playing with no framerate limitations')

    env = make_env()
    state = [env.reset()]

    rewards_history, current_rewards = [], []
    game_counter = 0

    try:
        while True:
            env.render()
            if FRAME_RATE > 0:
                time.sleep(1 / FRAME_RATE)
            if state is not None:
                action = agent(state)
            else:
                action = env.action_space.sample(
                )  # if obs is none, sample random action
            next_state, reward, done, _ = env.step(action)
            state = [next_state]
            current_rewards.append(reward)
            if done:
                game_counter += 1
                print('Game', game_counter,
                      'Done.')  # Mean Reward:', np.mean(current_rewards))
                rewards_history.append(current_rewards)
                current_rewards.clear()
                print('Starting Next Game...')
                state = [env.reset()]

    except KeyboardInterrupt:
        print('Stopped By The User')
        print('Exiting...')
Exemple #7
0
def main():
    """
        After training is done we test and watch our model perform
    """
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    net = torch.load(LOAD_MODEL)
    net = net.to(device)
    net.eval()  # set model into evaluation mode
    agent = Agent(net, device=device)
    print('Model loaded from:', LOAD_MODEL)

    env = make_env()
    state = [env.reset()]

    rewards_history, current_rewards = [], []
    episode_counter = 0

    try:
        while True:
            time.sleep(1/FRAME_RATE)
            if state is not None:
                action = agent(state)
            else:
                action = env.action_space.sample()  # if obs is none, sample random action
            next_state, reward, done, _ = env.step(action[0])
            state = [next_state]
            current_rewards.append(reward)
            if done:
                episode_counter += 1
                rewards_history.append(np.mean(current_rewards))
                print('Episode', episode_counter, 'Done, mean reward', np.mean(rewards_history[-100:]))  # Mean Reward:', np.mean(current_rewards))
                current_rewards.clear()
                print('Starting Next Episode...')
                state = [env.reset()]

    except KeyboardInterrupt:
        env.close()
        print('Stopped By The User')
        print('Exiting...')

        client = carla.Client('localhost', 2000)
        client.reload_world()
    def test_would_to_walking_with_zero_twait(self):
        fakeAgent = Agent()
        
        #t(0)
        self.assertEqual(fakeAgent.switching_walking, True)
        self.assertEqual(fakeAgent.switching_waiting, False)
        fakeAgent.pstop = 1
        self.assertEqual(fakeAgent.would_to_walking(), True)
        
        #t(1)
        self.assertEqual(fakeAgent.switching_walking, False)
        self.assertEqual(fakeAgent.switching_waiting, False)
        fakeAgent.pstop = -1
        fakeAgent._twait = 0
        self.assertEqual(fakeAgent.would_to_walking(), False)
        
        #t(2) sleep = 0
        self.assertEqual(fakeAgent.switching_walking, True)
        self.assertEqual(fakeAgent.switching_waiting, True)
        fakeAgent.pstop = 1
        self.assertEqual(fakeAgent.would_to_walking(), True)
                
        #t(3) sleep = 0
        self.assertEqual(fakeAgent.switching_walking, False)
        self.assertEqual(fakeAgent.switching_waiting, False)

        self.assertEqual(fakeAgent.would_to_walking(), True)
          
        #t(4)
        self.assertEqual(fakeAgent.switching_walking, False)
        self.assertEqual(fakeAgent.switching_waiting, False)
        fakeAgent.pstop = -1
        self.assertEqual(fakeAgent.would_to_walking(), False)
           
        #t(5) 
        self.assertEqual(fakeAgent.switching_walking, True)
        self.assertEqual(fakeAgent.switching_waiting, True)
        
        self.assertEqual(fakeAgent.would_to_walking(), False)
        
        #t(6) 
        self.assertEqual(fakeAgent.switching_walking, True)
        self.assertEqual(fakeAgent.switching_waiting, True)
        fakeAgent.pstop = 1
        self.assertEqual(fakeAgent.would_to_walking(), True)
        
        self.assertEqual(fakeAgent.switching_walking, False)
        self.assertEqual(fakeAgent.switching_waiting, False)
BATCH_SIZE = 32
TARGET_UPDATE_FREQ = 1000
DELAY_LEARNING = 50000
GAMMA = 0.99

model = 'DDQN'

# Environment and neural networks
env = make_env("PongNoFrameskip-v4")
env_test = gym.make("PongNoFrameskip-v4")
net = DQN(env.observation_space.shape, env.action_space.n, learning_rate).to(device)
target_net = DQN(env.observation_space.shape, env.action_space.n, learning_rate).to(device)

# Agent and memory handling
memory = Memory(REPLAY_SIZE)
agent = Agent(env, memory)

initial_observation = env.reset()



if 'cuda' in str(device):
    print('The GPU is being used')
else:
    print('The CPU is being used')

if option_dict['random']:
    play_random(env, UP_ACTION, DOWN_ACTION, seconds=5)

if option_dict['train']:
    print("Training")
Exemple #10
0
from lib.Arena import Arena
from lib.Agent import Agent
from random import randint

arena = Arena(5)

hero = Agent(arena)

arena.printEverything()

points = hero.play()