def run(): # Initialize supervisor object # Whenever we want to access attributes, etc., from the supervisor controller we use # supervisorPre. supervisorPre = PitEscapeSupervisor() # Wrap the Pit Escape supervisor in the custom keyboard printer supervisorEnv = KeyboardControllerPitEscape(supervisorPre) # The agent used here is trained with the PPO algorithm (https://arxiv.org/abs/1707.06347). agent = PPOAgent(supervisorPre.observationSpace, supervisorPre.actionSpace) episodeCount = 0 episodeLimit = 10000 solved = False # Whether the solved requirement is met repeatActionSteps = 1 # Amount of steps for which to repeat a certain action averageEpisodeActionProbs = [ ] # Save average episode taken actions probability to plot later # Run outer loop until the episodes limit is reached or the task is solved while not solved and episodeCount < episodeLimit: state = supervisorEnv.reset( ) # Reset robot and get starting observation supervisorPre.episodeScore = 0 actionProbs = [ ] # This list holds the probability of each chosen action # Inner loop is the episode loop step = 0 # Episode is terminated based on time elapsed and not on number of steps while True: # In training mode the agent samples from the probability distribution, naturally implementing exploration actionValues, actionProb = agent.work(state, type_="selectAction") # Save the current selectedAction's probability actionProbs.append(actionProb) # Step the supervisor to get the current action's reward, the new state and whether we reached the done # condition newState, reward, done, info = supervisorEnv.step( [actionValues], repeatActionSteps) # Save the current state transition in agent's memory trans = Transition(state, actionValues, actionProb, reward, newState) agent.storeTransition(trans) supervisorPre.episodeScore += reward # Accumulate episode reward if done: # Save the episode's score supervisorPre.episodeScoreList.append( supervisorPre.episodeScore) agent.trainStep(batchSize=step + 1) solved = supervisorPre.solved( ) # Check whether the task is solved break state = newState # state for next step is current step's newState step += 1 if supervisorPre.test: # If test flag is externally set to True, agent is deployed break print("Episode #", episodeCount, "score:", supervisorPre.episodeScore) # The average action probability tells us how confident the agent was of its actions. # By looking at this we can check whether the agent is converging to a certain policy. avgActionProb = mean(actionProbs) averageEpisodeActionProbs.append(avgActionProb) print("Avg action prob:", avgActionProb) episodeCount += 1 # Increment episode counter # np.convolve is used as a moving average, see https://stackoverflow.com/a/22621523 movingAvgN = 10 plotData( convolve(supervisorPre.episodeScoreList, ones((movingAvgN, )) / movingAvgN, mode='valid'), "episode", "episode score", "Episode scores over episodes") plotData( convolve(averageEpisodeActionProbs, ones((movingAvgN, )) / movingAvgN, mode='valid'), "episode", "average episode action probability", "Average episode action probability over episodes") if not solved and not supervisorPre.test: print("Reached episode limit and task was not solved.") else: if not solved: print("Task is not solved, deploying agent for testing...") elif solved: print("Task is solved, deploying agent for testing...") print("Press R to reset.") state = supervisorEnv.reset() supervisorPre.test = True supervisorPre.episodeScore = 0 while True: actionValues, _ = agent.work(state, type_="selectActionMax") state, reward, done, _ = supervisorEnv.step([actionValues], repeatActionSteps) supervisorPre.episodeScore += reward # Accumulate episode reward if done: print("Reward accumulated =", supervisorPre.episodeScore) supervisorPre.episodeScore = 0 state = supervisorEnv.reset()
def run(): # Initialize supervisor object # Whenever we want to access attributes, etc., from the supervisor controller we use # supervisorPre. supervisorPre = CartPoleSupervisor() # Wrap the CartPole supervisor in the custom keyboard controller supervisorEnv = KeyboardControllerCartPole(supervisorPre) # The agent used here is trained with the DDPG algorithm (https://arxiv.org/abs/1509.02971). agent = DDPGAgent(supervisorPre.observationSpace, supervisorPre.actionSpace, lr_actor=0.000025, lr_critic=0.00025, layer1_size=30, layer2_size=50, layer3_size=30, batch_size=64) episodeCount = 0 episodeLimit = 10000 solved = False # Whether the solved requirement is met # Run outer loop until the episodes limit is reached or the task is solved while not solved and episodeCount < episodeLimit: state = supervisorEnv.reset( ) # Reset robot and get starting observation supervisorPre.episodeScore = 0 # Inner loop is the episode loop for step in range(supervisorPre.stepsPerEpisode): # In training mode the agent returns the action plus OU noise for exploration selectedAction = agent.choose_action_train(state) # Step the supervisor to get the current selectedAction reward, the new state and whether we reached # the done condition newState, reward, done, info = supervisorEnv.step(selectedAction) # Save the current state transition in agent's memory agent.remember(state, selectedAction, reward, newState, int(done)) supervisorPre.episodeScore += reward # Accumulate episode reward # Perform a learning step agent.learn() if done or step == supervisorPre.stepsPerEpisode - 1: # Save the episode's score supervisorPre.episodeScoreList.append( supervisorPre.episodeScore) solved = supervisorPre.solved( ) # Check whether the task is solved break state = newState # state for next step is current step's newState if supervisorPre.test: # If test flag is externally set to True, agent is deployed break print("Episode #", episodeCount, "score:", supervisorPre.episodeScore) episodeCount += 1 # Increment episode counter # np.convolve is used as a moving average, see https://stackoverflow.com/a/22621523 # this is done to smooth out the plots movingAvgN = 10 plotData( convolve(supervisorPre.episodeScoreList, ones((movingAvgN, )) / movingAvgN, mode='valid'), "episode", "episode score", "Episode scores over episodes") if not solved and not supervisorPre.test: print("Reached episode limit and task was not solved.") else: if not solved: print("Task is not solved, deploying agent for testing...") elif solved: print("Task is solved, deploying agent for testing...") print("Press R to reset.") state = supervisorEnv.reset() supervisorPre.test = True supervisorPre.episodeScore = 0 while True: selectedAction = agent.choose_action_test(state) state, reward, done, _ = supervisorEnv.step(selectedAction) supervisorPre.episodeScore += reward # Accumulate episode reward if done: print("Reward accumulated =", supervisorPre.episodeScore) supervisorPre.episodeScore = 0 state = supervisorEnv.reset()
test1 = open('inputs/test01.txt') test1Data = np.loadtxt(test1, delimiter=',') test1Data = util.normalizeData(test1Data) errors = [] NUM_INPUTS = len(test1Data) - offset for i in range(1750, NUM_INPUTS): current = test1Data[i] features = util.createFeatureRow(test1Data, i, offset, current) td = np.array(features) predX = neigh.predict(td) predY = neighY.predict(td) actual = test1Data[i + offset] prediction = [predX[0], predY[0]] actuals.append(actual) predictions.append(prediction) errors.append(util.error([actual], [prediction])) if onlyTrainingAndCV == False: # util.plotLines(actuals, predictions, 'Actual position', 'Predicted position') # util.plotGraph(actuals, predictions, 'Actual position', 'Predicted position') util.plotData(actuals, 'Actual position') util.plotData(predictions, 'Predicted position') util.plotLine(errors, 'Error graph') print len(actuals), len(predictions) print np.sum(errors) else: util.plotLine(cvScoresX, 'CV label x') util.plotLine(cvScoresY, 'CV label y') # util.plotLines(cvScoresX, cvScoresY, 'CV label X', 'CV label y')
# -*- coding: utf-8 -*- """ Created on Wed Apr 27 20:00:14 2016 @author: badarim """ import numpy as np import utilities as util t = open('inputs/test01.txt'); f = open('training_data.txt'); data = np.loadtxt(f, delimiter = ','); util.plotData(data, 'Training data') data = np.loadtxt(t, delimiter = ','); util.plotData(data, 'Test1 data')