def testModel(EXPLORE_RATE): for e in environments: e.reset() replays = [replay for replay, _ in Utils.emulateBatch( environments, DQNAgent(model, exploreRate=EXPLORE_RATE, noise=params.get('agent noise', 0)), maxSteps=params.get('max test steps') ) ] ################ # explore if hit the loop envsIndexes = [i for i, e in enumerate(environments) if e.hitTheLoop] if envsIndexes: envs = [environments[i] for i in envsIndexes] for e in envs: e.Continue() exploreReplays = Utils.emulateBatch( envs, DQNAgent( model, exploreRate=params.get('explore rate after loop', 1), noise=params.get('agent noise after loop', 0) ), maxSteps=params.get('max steps after loop', 16) ) for ind, (replay, _) in zip(envsIndexes, exploreReplays): replays[ind] += replay[1:] ################ for replay in replays: if BOOTSTRAPPED_STEPS < len(replay): memory.addEpisode(replay, terminated=True) return [x.score for x in environments]
def _createNewAgent(self): self._agents = [] models = [] for i, x in enumerate(glob.iglob('weights/*.h5')): filename = os.path.abspath(x) model = createModel(shape=self._maze.input_size) model.load_weights(filename) name = os.path.basename(filename) if name.startswith('agent-'): models.append(model) agent = DQNAgent(model) self._agents.append(RLAgent(name[:-3], agent, None, None)) self._agents.insert( 0, RLAgent('ensemble', DQNEnsembleAgent(models), None, None)) self._assignMaze2Agents() self._activeAgent = 0 self._paused = True return
from itertools import count from ICRAField import ICRAField from Agent.DQNAgent import DQNAgent from Agent.HandAgent import HandAgent TARGET_UPDATE = 10 seed = 14 torch.random.manual_seed(seed) torch.cuda.random.manual_seed(seed) np.random.seed(seed) random.seed(seed) env = ICRAField() agent = DQNAgent() agent2 = HandAgent() agent.load('attack.model') device = agent.device episode_durations = [] num_episodes = 50 for i_episode in range(num_episodes): print("Epoch: {}".format(i_episode)) # Initialize the environment and state action = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] env.reset() agent2.reset() state, reward, done, info = env.step(action) for t in range(7 * 60 * 30): if t % (60 * 30) == 0:
from Agent.DQNAgent import DQNAgent from Agent.HandAgent import HandAgent from SupportAlgorithm.NaiveMove import NaiveMove move = NaiveMove() TARGET_UPDATE = 10 seed = 233 torch.random.manual_seed(seed) torch.cuda.random.manual_seed(seed) np.random.seed(seed) random.seed(seed) env = ICRAField() agent = DQNAgent() #agent.load() agent2 = HandAgent() episode_durations = [] num_episodes = 2000 for i_episode in range(num_episodes): print("Epoch: [{}/{}]".format(i_episode, num_episodes)) # Initialize the environment and state action = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] pos = env.reset() agent2.reset(pos) state, reward, done, info = env.step(action) state_obs = agent.perprocess_state(state) for t in range(2*60*30): if t % (60*30) == 0:
import torch import numpy as np from collections import namedtuple from itertools import count from ICRAField import ICRAField from Agent.DQNAgent import DQNAgent from Agent.HandAgent import HandAgent from SupportAlgorithm.NaiveMove import NaiveMove move = NaiveMove() TARGET_UPDATE = 10 seed = 233 torch.random.manual_seed(seed) torch.cuda.random.manual_seed(seed) np.random.seed(seed) random.seed(seed) agent = DQNAgent() #agent.load() agent.load_memory() for epoch in range(2000): print("Epoch: [{}/{}]".format(epoch, 2000)) agent.optimize_model() if epoch % TARGET_UPDATE == 0: agent.update_target_net() agent.save()
if __name__ == "__main__": MAZE_PARAMS = { 'size': 64, 'FOV': MAZE_FOV, 'minimapSize': MAZE_MINIMAP_SIZE, 'loop limit': 1000, } environments = [MazeRLWrapper(MAZE_PARAMS) for _ in range(100)] MODEL_INPUT_SHAPE = environments[0].input_size metrics = {'Worst scores (top 90%)': {}, 'Best scores (top 10%)': {}} agents = [] for i, x in enumerate(glob.iglob('weights/*.h5')): filename = os.path.abspath(x) model = createModel(shape=MODEL_INPUT_SHAPE) model.load_weights(filename) if os.path.basename(filename).startswith('agent-'): agents.append(model) testAgent(environments, DQNAgent(model), name=os.path.basename(filename)[:-3], metrics=metrics) testAgent(environments, DQNEnsembleAgent(agents), name='ensemble', metrics=metrics) for i, name in enumerate(metrics.keys()): plot2file(metrics, 'chart-%d.jpg' % i, name)
from itertools import count from ICRAField import ICRAField from Agent.DQNAgent import DQNAgent from Agent.HandAgent import HandAgent TARGET_UPDATE = 10 seed = 233 torch.random.manual_seed(seed) torch.cuda.random.manual_seed(seed) np.random.seed(seed) random.seed(seed) env = ICRAField() agent = DQNAgent() # agent.load() agent2 = HandAgent() episode_durations = [] num_episodes = 2000 for i_episode in range(num_episodes): print("Epoch: [{}/{}]".format(i_episode, num_episodes)) # Initialize the environment and state action = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] env.reset() state, reward, done, info = env.step(action) for t in range(2 * 60 * 30): if t % (60 * 30) == 0: print("Simulation in minute: [{}:00/7:00]".format(t // (60 * 30))) # Other agent
from Agent.HandAgent import HandAgent from ICRAField import ICRAField from SupportAlgorithm.NaiveMove import NaiveMove move = NaiveMove() TARGET_UPDATE = 10 seed = 14 torch.random.manual_seed(seed) torch.cuda.random.manual_seed(seed) np.random.seed(seed) random.seed(seed) env = ICRAField() agent = DQNAgent() agent2 = HandAgent() agent.load() device = agent.device episode_durations = [] num_episodes = 50 for i_episode in range(num_episodes): print("Epoch: {}".format(i_episode)) # Initialize the environment and state action = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] pos = env.reset() agent2.reset(pos) state, reward, done, info = env.step(action) state_obs = agent.perprocess_state(state) for t in range(7 * 60 * 30):