def __init__(self, map_env): self.num_action = 6 # (left, right, stay_turn, speed up, speed down, stay_speed), turn 10 degrees self.map_env = map_env self.game = self.map_env.parent # Building the AI self.cnn = CNN(self.num_action) self.softmaxBody = SoftmaxBosy(T=1.0) self.ai = AI(brain=self.cnn, body=self.softmaxBody) # Setting up Experience Replay self.n_steps = experience_replay.NStepProgress(env=self.game, ai=self.ai, n_step=10) self.mem = experience_replay.ReplayMemory(n_steps=self.n_steps, capacity=10000) # movinfg average recorder of 100 self.ma = MA(100) # Training AI self.epoch = 1 self.loss = nn.MSELoss() self.ai.brain.load() self.pause = True
def init_model(self, config: Config, env, checkpoint: AlienGymCheckpoint = None) -> AlienGymAI: image_size: ImageSize = ImageSize.from_str(config.image_size) cnn = CNN(env.action_space.n, image_w=image_size.w, image_h=image_size.h) cnn.to(self.device) if checkpoint is not None: cnn.load_state_dict(checkpoint.model_state_dict) cnn.eval() body = SoftmaxBody(config.softmax_temp) body.to(self.device) optimizer = optim.Adam(cnn.parameters(), lr=config.optimizer_lr) if checkpoint is not None: optimizer.load_state_dict(checkpoint.optimizer_state_dict) ai = AI(brain=cnn, body=body, device=self.device) n_steps = experience_replay.NStepProgress(env=env, ai=ai, n_step=config.n_step) memory = experience_replay.ReplayMemory( n_steps=n_steps, capacity=config.memory_capacity) return AlienGymAI(cnn=cnn, ai=ai, loss=nn.MSELoss(), optimizer=optimizer, n_step=n_steps, replay_memory=memory)
doom_env = gym.wrappers.Monitor(doom_env, 'videos', force=True) #Getting number of actions from doom_enviroment number_actions = doom_env.action_space.n #Building an AI #Creating an object of our CNN class cnn = CNN(number_actions) #Creating an object of our SoftmaxBoddy class and inputing temperature softmax_body = SoftmaxBody(T=1.0) #Creating an object of our AI class and inputing the brain and body ai = AI(cnn, softmax_body) #Setting up Experiance Replay #10 step learning with a capacity of 10,000 n_steps = experience_replay.NStepProgress(env=doom_env, ai=ai, n_step=10) #Replay memory, create mini batches of 10 steps from 10,000 capacity memory = experience_replay.ReplayMemory(n_steps=n_steps, capacity=10000) #Implementing Elgibility Trace #n-step Q-learning (Not Asynchronous because we only have one agent) #AKA sarsa? #Training on batches def eligibility_trace(batch): gamma = 0.99 #Prediction inputs = [] #Target targets = []
# Part 2 - Training the AI with Deep Convolutional Q-Learning # Getting the Doom environment number_actions = 2925 # Building an AI #cnn = CNN(2925).to(device) cnn = torch.load("D:\\envPython\\OsuIA\\training\\brain-30.ty").to(device) cnn.eval() softmax_body = SoftmaxBody(T=1.0) ai = AI(brain=cnn, body=softmax_body) # Setting up Experience Replay n_steps = experience_replay.NStepProgress(ai=ai, n_step=10) memory = experience_replay.ReplayMemory(n_steps=n_steps, capacity=20000) # Implementing Eligibility Trace def eligibility_trace(batch): gamma = 0.99 inputs = [] targets = [] for series in batch: input = Variable( torch.from_numpy( np.array([series[0].state, series[-1].state], dtype=np.float32))) output = cnn(input) cumul_reward = 0.0 if series[-1].done else output[1].data.max()
# train deep q conv doom_env = image_preprocessing.PreprocessImage(SkipWrapper(4)( ToDiscrete("minimal")(gym.make("ppaquette/DoomDefendLine-v0"))), height=80, width=80, grayscale=True) doom_env = gym.wrappers.Monitor(doom_env, "videos", force=True) number_actions = doom_env.action_space.n cnn = CNN(number_actions) softmax_body = SoftmaxBody(T=1.0) ai = AI(brain=cnn, body=softmax_body) # setting up exp Replay n_steps = experience_replay.NStepProgress(doom_env, ai, 10) memory = experience_replay.ReplayMemory(n_steps=n_steps, capacity=10000) #eligibility retrace ... n-step Q-Learning def eligibility_trace(batch): gamma = 0.99 inputs = [] targets = [] for series in batch: input = Variable( torch.from_numpy( np.array([series[0].state, series[-1].state], dtype=np.float32))) output = cnn(input)
actions = self.body(output) return actions.data.numpy() # Getting the Doom environment doom_env = image_preprocessing.PreprocessImage(SkipWrapper(4)(ToDiscrete("minimal")(gym.make("ppaquette/DoomCorridor-v0"))), width = 80, height = 80, grayscale = True) doom_env = gym.wrappers.Monitor(doom_env, "videos", force = True) number_actions = doom_env.action_space.n # Building an Model cnn = CNN(number_actions) softmax_body = SoftmaxBody(T = 1.0) Model = Model(brModeln = cnn, body = softmax_body) # Setting up Experience Replay n_steps = experience_replay.NStepProgress(env = doom_env, Model = Model, n_step = 10) memory = experience_replay.ReplayMemory(n_steps = n_steps, capacity = 10000) # Implementing Eligibility Trace def eligibility_trace(batch): gamma = 0.99 inputs = [] targets = [] for series in batch: input = Variable(torch.from_numpy(np.array([series[0].state, series[-1].state], dtype = np.float32))) output = cnn(input) cumul_reward = 0.0 if series[-1].done else output[1].data.max() for step in reversed(series[:-1]): cumul_reward = step.reward + gamma * cumul_reward state = series[0].state target = output[0].data
# Getting the Warrior environment warrior_env = image_preprocessing.PreprocessImage(SkipWrapper(4)( ToDiscrete("minimal")(gym.make("ppaquette/WarriorCorridor-v0"))), width=80, height=80, grayscale=True) warrior_env = gym.wrappers.Monitor(warrior_env, "videos", force=True) number_actions = warrior_env.action_space.n # Building an AI cnn = CNN(number_actions) softmax_body = SoftmaxBody(T=1.0) ai = AI(brain=cnn, body=softmax_body) # Setting up Experience Replay n_steps = experience_replay.NStepProgress(env=warrior_env, ai=ai, n_step=10) memory = experience_replay.ReplayMemory(n_steps=n_steps, capacity=10000) # Implementing Eligibility Trace def eligibility_trace(batch): gamma = 0.99 inputs = [] targets = [] for series in batch: input = Variable( torch.from_numpy( np.array([series[0].state, series[-1].state], dtype=np.float32))) output = cnn(input) cumul_reward = 0.0 if series[-1].done else output[1].data.max()
# Getting the Doom environment doom_env = image_preprocessing.PreprocessImage(SkipWrapper(4)( ToDiscrete("minimal")(gym.make("ppaquette/DoomCorridor-v0"))), width=80, height=80, grayscale=True) doom_env = gym.wrappers.Monitor(doom_env, "videos", force=True) number_actions = doom_env.action_space.n #N is the number of actions that we can take in this environment # building an AI cnn = CNN(number_actions) softmax_body = SoftmaxBody(T=1.0) ai = AI(brain=cnn, body=softmax_body) # setting up the Experience Replay n_steps = experience_replay.NStepProgress( env=doom_env, ai=ai, n_step=10) #learning is happening every 10 steps memory = experience_replay.ReplayMemory(n_steps=n_steps, capacity=10000) #n step Q learning is almost like A3C def eligibility_trace(batch): gamma = 0.99 inputs = [] targets = [] for series in batch: input = Variable( torch.from_numpy( np.array([series[0].state, series[-1].state], dtype=np.float32))) output = cnn(input) cumul_reward = 0.0 if series[-1].done else output[1].data.max()