#Putting output into body and taking the actions actions = self.body(output) #Returning actions in right format return actions.data.numpy() #Part 2 - Implementing Deep COnvolutional Q-Learning #Getting the doom enviroment #gym.make imports the enviroment #image_preprocessing proccesses images coming in with 80 by 80 size in grayscale doom_env = image_preprocessing.PreprocessImage(SkipWrapper(4)( ToDiscrete('minimal')(gym.make('ppaquette/DoomCorridor-v0'))), width=80, height=80, grayscale=True) #Saves videos of AI playing doom into the videos folder doom_env = gym.wrappers.Monitor(doom_env, 'videos', force=True) #Getting number of actions from doom_enviroment number_actions = doom_env.action_space.n #Building an AI #Creating an object of our CNN class cnn = CNN(number_actions) #Creating an object of our SoftmaxBoddy class and inputing temperature softmax_body = SoftmaxBody(T=1.0) #Creating an object of our AI class and inputing the brain and body ai = AI(cnn, softmax_body)
action = probs.multinomial(num_samples=1) return action class AI: def __init__(self, brain, body): self.brain = brain self.body = body def __call__(self, inputs): inputs = Variable(torch.from_numpy(np.array(inputs, dtype=np.float32))) outputs = self.brain(inputs) actions = self.body(outputs) return actions.data.numpy() doom_env = image_preprocessing.PreprocessImage(gym.make("VizdoomCorridor-v0"), width=80, height=80, grayscale=True) doom_env = wrappers.Monitor(doom_env, "videos", force = True) number_actions = doom_env.action_space.n cnn = CNN(number_actions) softmax_body = SoftmaxBody(T = 1.) ai = AI(brain=cnn, body=softmax_body) # XP replay n_steps = experience_replay.NStepProgress(doom_env, ai, n_step=10) memory = experience_replay.ReplayMemory(n_steps = n_steps, capacity = 10000) #Eligibility trace def eligibility_trace(batch):
class AI: def __init__(self, brain, body): self.brain = brain self.body = body def __call__(self, inputs): input = Variable(torch.from_numpy(np.array(inputs, dtype=np.float32))) brain_out = self.brain.forward(input) actions = self.body.forward(brain_out) return actions.data.numpy() # train deep q conv doom_env = image_preprocessing.PreprocessImage(SkipWrapper(4)( ToDiscrete("minimal")(gym.make("ppaquette/DoomDefendLine-v0"))), height=80, width=80, grayscale=True) doom_env = gym.wrappers.Monitor(doom_env, "videos", force=True) number_actions = doom_env.action_space.n cnn = CNN(number_actions) softmax_body = SoftmaxBody(T=1.0) ai = AI(brain=cnn, body=softmax_body) # setting up exp Replay n_steps = experience_replay.NStepProgress(doom_env, ai, 10) memory = experience_replay.ReplayMemory(n_steps=n_steps, capacity=10000) #eligibility retrace ... n-step Q-Learning
# Inputs are the images from the simulation def __call__(self, inputs): input = Variable(torch.from_numpy(np.array(inputs, dtype=np.float32))) # Feed state to brain, uses the forward function from the CNN class output = self.brain(input) # Get our determined action, uses the forward function from the SoftmaxBody class actions = self.body(output) # convert from torch tensor to numpy array return actions.data.numpy() # Part 2 - Training the AI with Deep convolutional Q-Learning # Getting the Doom environment doom_env = image_preprocessing.PreprocessImage(doom_env, width=80, height=80, grayscale=True) doom_env = wrappers.Monitor(doom_env, "doom_videos", force=True) number_actions = doom_env.action_space.n # Building an AI cnn = CNN(number_actions) softmax_body = SoftmaxBody(T=1.0) # Temperature value dictates exploration ai = AI(brain=cnn, body=softmax_body) # Setting up Experience Replay n_steps = experience_replay.NStepProgress(env=doom_env, ai=ai, n_step=10) memory = experience_replay.ReplayMemory(n_steps=n_steps, capacity=10000) # Implementing Eligibility Trace
self.brain = brain self.body = body def __call__(self, inputs): input = Variable(torch.from_numpy(np.array(inputs, dtype=np.float32))) output = self.brain(input) actions = self.body(output) return actions.data.numpy() # Part 2 - Training the AI with Deep Convolutional Q-Learning # Getting the Warrior environment warrior_env = image_preprocessing.PreprocessImage(SkipWrapper(4)( ToDiscrete("minimal")(gym.make("ppaquette/WarriorCorridor-v0"))), width=80, height=80, grayscale=True) warrior_env = gym.wrappers.Monitor(warrior_env, "videos", force=True) number_actions = warrior_env.action_space.n # Building an AI cnn = CNN(number_actions) softmax_body = SoftmaxBody(T=1.0) ai = AI(brain=cnn, body=softmax_body) # Setting up Experience Replay n_steps = experience_replay.NStepProgress(env=warrior_env, ai=ai, n_step=10) memory = experience_replay.ReplayMemory(n_steps=n_steps, capacity=10000)
class ai: def __init__(self, brain, body): self.brain = brain self.body = body def __call__(self, inputs): input = Variable(torch.from_numpy(np.array(inputs, dtype=np.float32))) output = self.brain(input) actions = self.body(output) return actions.data.numpy() env = gym.make('ppaquette/DoomBasic-v0') game_env = image_preprocessing.PreprocessImage(SkipWrapper(4)( ToDiscrete("minimal")(env)), width=80, height=80, grayscale=True) game_env = gym.wrappers.Monitor(game_env, "videos", force=True) number_actions = game_env.action_space.n #building AI cnn = CNN(number_actions) softmaxbody = SoftmaxBody(temp=1.0) ai = ai(brain=cnn, body=softmaxbody) #setting up experiecne replay n_steps = experience_replay.NStepProgress(env=game_env, ai=ai, n_step=10) memory = experience_replay.ReplayMemory(n_steps=n_steps, capacity=10000)
def __init__(self, brain, body): self.brain = brain self.body = body def __call__(self, inputs): input = Variable(torch.from_numpy(np.array(inputs, dtype=np.float32))) output = self.brain(input) actions = self.body(output) return actions.data.numpy() # Part 2 - Training the AI with Deep Convolutional Q-Learning # Getting the Doom environment doom_env = image_preprocessing.PreprocessImage( w.SkipWrapper(4)(ToDiscrete("minimal")(gym.make("CP1"))), width=80, height=80, grayscale=True) doom_env = gym.wrappers.Monitor(doom_env, "videos", force=True) number_actions = doom_env.action_space.n # Building an AI cnn = CNN(number_actions) softmax_body = SoftmaxBody(T=1.0) ai = AI(brain=cnn, body=softmax_body) # Setting up Experience Replay n_steps = experience_replay.NStepProgress(env=doom_env, ai=ai, n_step=10) memory = experience_replay.ReplayMemory(n_steps=n_steps, capacity=10000) # Implementing Eligibility Trace def eligibility_trace(batch):
#converting images to correct format input = Variable(torch.from_numpy(np.array(inputs, dtype=np.float32))) #converting image to numpy array #converting pixels to float 32 #converting mupy to torch tensor(variable class) output = self.brain(input) #passing tensor to brain actions = self.body(output) #passing output to body return actions.data.numpy() #converting action to numpy # Part 2 - Training the AI with Deep Convolutional Q-Learning # Getting the Doom environment doom_env = image_preprocessing.PreprocessImage( SkipWrapper(4)(ToDiscrete("minimal")( gym.make("ppaquette/DoomCorridor-v0"))), width=80, height=80, grayscale=True) # importing the environment and preprocessing it #dimensions should be same as neural network doom_env = gym.wrappers.Monitor(doom_env, "videos", force=True) #saving to videos number_actions = doom_env.action_space.n # number of actions possible in the environment # Building an AI cnn = CNN(number_actions) #calling cnn softmax_body = SoftmaxBody(T=1.0) #setting value of temperature for softmax ai = AI(brain=cnn, body=softmax_body) #calling brain # Setting up Experience Replay n_steps = experience_replay.NStepProgress(env=doom_env, ai=ai, n_step=10) #learning every 10 steps