Esempio n. 1
0
 def testModel(EXPLORE_RATE):
   for e in environments: e.reset()
   replays = [replay for replay, _ in Utils.emulateBatch(
       environments,
       DQNAgent(model, exploreRate=EXPLORE_RATE, noise=params.get('agent noise', 0)),
       maxSteps=params.get('max test steps')
     )
   ]
   
   ################
   # explore if hit the loop
   envsIndexes = [i for i, e in enumerate(environments) if e.hitTheLoop]
   if envsIndexes:
     envs = [environments[i] for i in envsIndexes]
     for e in envs: e.Continue()
     exploreReplays = Utils.emulateBatch(
       envs,
       DQNAgent(
         model,
         exploreRate=params.get('explore rate after loop', 1),
         noise=params.get('agent noise after loop', 0)
       ),
       maxSteps=params.get('max steps after loop', 16)
     )
     for ind, (replay, _) in zip(envsIndexes, exploreReplays):
       replays[ind] += replay[1:]
   ################
   for replay in replays:
     if BOOTSTRAPPED_STEPS < len(replay):
       memory.addEpisode(replay, terminated=True)
   return [x.score for x in environments]
Esempio n. 2
0
    def _createNewAgent(self):
        self._agents = []
        models = []
        for i, x in enumerate(glob.iglob('weights/*.h5')):
            filename = os.path.abspath(x)
            model = createModel(shape=self._maze.input_size)
            model.load_weights(filename)
            name = os.path.basename(filename)
            if name.startswith('agent-'):
                models.append(model)
            agent = DQNAgent(model)

            self._agents.append(RLAgent(name[:-3], agent, None, None))

        self._agents.insert(
            0, RLAgent('ensemble', DQNEnsembleAgent(models), None, None))

        self._assignMaze2Agents()
        self._activeAgent = 0
        self._paused = True
        return
Esempio n. 3
0
from itertools import count

from ICRAField import ICRAField
from Agent.DQNAgent import DQNAgent
from Agent.HandAgent import HandAgent

TARGET_UPDATE = 10

seed = 14
torch.random.manual_seed(seed)
torch.cuda.random.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)

env = ICRAField()
agent = DQNAgent()
agent2 = HandAgent()
agent.load('attack.model')
device = agent.device
episode_durations = []

num_episodes = 50
for i_episode in range(num_episodes):
    print("Epoch: {}".format(i_episode))
    # Initialize the environment and state
    action = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
    env.reset()
    agent2.reset()
    state, reward, done, info = env.step(action)
    for t in range(7 * 60 * 30):
        if t % (60 * 30) == 0:
Esempio n. 4
0
from Agent.DQNAgent import DQNAgent
from Agent.HandAgent import HandAgent
from SupportAlgorithm.NaiveMove import NaiveMove

move = NaiveMove()

TARGET_UPDATE = 10

seed = 233
torch.random.manual_seed(seed)
torch.cuda.random.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)

env = ICRAField()
agent = DQNAgent()
#agent.load()
agent2 = HandAgent()
episode_durations = []

num_episodes = 2000
for i_episode in range(num_episodes):
    print("Epoch: [{}/{}]".format(i_episode, num_episodes))
    # Initialize the environment and state
    action = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
    pos = env.reset()
    agent2.reset(pos)
    state, reward, done, info = env.step(action)
    state_obs = agent.perprocess_state(state)
    for t in range(2*60*30):
        if t % (60*30) == 0:
import torch
import numpy as np
from collections import namedtuple
from itertools import count

from ICRAField import ICRAField
from Agent.DQNAgent import DQNAgent
from Agent.HandAgent import HandAgent
from SupportAlgorithm.NaiveMove import NaiveMove

move = NaiveMove()

TARGET_UPDATE = 10

seed = 233
torch.random.manual_seed(seed)
torch.cuda.random.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)

agent = DQNAgent()
#agent.load()
agent.load_memory()

for epoch in range(2000):
    print("Epoch: [{}/{}]".format(epoch, 2000))
    agent.optimize_model()
    if epoch % TARGET_UPDATE == 0:
        agent.update_target_net()
        agent.save()
Esempio n. 6
0
if __name__ == "__main__":
    MAZE_PARAMS = {
        'size': 64,
        'FOV': MAZE_FOV,
        'minimapSize': MAZE_MINIMAP_SIZE,
        'loop limit': 1000,
    }
    environments = [MazeRLWrapper(MAZE_PARAMS) for _ in range(100)]
    MODEL_INPUT_SHAPE = environments[0].input_size

    metrics = {'Worst scores (top 90%)': {}, 'Best scores (top 10%)': {}}
    agents = []
    for i, x in enumerate(glob.iglob('weights/*.h5')):
        filename = os.path.abspath(x)
        model = createModel(shape=MODEL_INPUT_SHAPE)
        model.load_weights(filename)
        if os.path.basename(filename).startswith('agent-'):
            agents.append(model)

        testAgent(environments,
                  DQNAgent(model),
                  name=os.path.basename(filename)[:-3],
                  metrics=metrics)

    testAgent(environments,
              DQNEnsembleAgent(agents),
              name='ensemble',
              metrics=metrics)

    for i, name in enumerate(metrics.keys()):
        plot2file(metrics, 'chart-%d.jpg' % i, name)
Esempio n. 7
0
from itertools import count

from ICRAField import ICRAField
from Agent.DQNAgent import DQNAgent
from Agent.HandAgent import HandAgent

TARGET_UPDATE = 10

seed = 233
torch.random.manual_seed(seed)
torch.cuda.random.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)

env = ICRAField()
agent = DQNAgent()
# agent.load()
agent2 = HandAgent()
episode_durations = []

num_episodes = 2000
for i_episode in range(num_episodes):
    print("Epoch: [{}/{}]".format(i_episode, num_episodes))
    # Initialize the environment and state
    action = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
    env.reset()
    state, reward, done, info = env.step(action)
    for t in range(2 * 60 * 30):
        if t % (60 * 30) == 0:
            print("Simulation in minute: [{}:00/7:00]".format(t // (60 * 30)))
        # Other agent
Esempio n. 8
0
from Agent.HandAgent import HandAgent
from ICRAField import ICRAField
from SupportAlgorithm.NaiveMove import NaiveMove

move = NaiveMove()

TARGET_UPDATE = 10

seed = 14
torch.random.manual_seed(seed)
torch.cuda.random.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)

env = ICRAField()
agent = DQNAgent()
agent2 = HandAgent()
agent.load()
device = agent.device
episode_durations = []

num_episodes = 50
for i_episode in range(num_episodes):
    print("Epoch: {}".format(i_episode))
    # Initialize the environment and state
    action = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
    pos = env.reset()
    agent2.reset(pos)
    state, reward, done, info = env.step(action)
    state_obs = agent.perprocess_state(state)
    for t in range(7 * 60 * 30):