last_full_run = [] # movement of last full game current_run = [] # current game for trials in range(trials_max) : # if the bot touches something other than regular 'ground' then restart. if reward != -1.0 : location = [0,0] # w,h action = 'hold' last_full_run = current_run current_run = [location] next_location = move( location, action ) next_action = sarsa.chooseAction( next_location, action_list ) # 5% of the time the bot does not go where it wants but instead does something random if ( random() <= 0.05 ) and ( trials < trials_max - 800 ) : next_action = choice(action_list[:4]) # get reward from map, see top reward = map[next_location[0]][next_location[1]]; sarsa.update(location,action,reward,next_location,next_action) # set the current location and action for the next step location = next_location; action = next_action; current_run.append(location)
cumulated_reward = 0 #Should going forward give more reward then L/R ? observation = env.reset() if sarsa.epsilon > 0.05: sarsa.epsilon *= epsilon_discount env.render() state = ''.join(map(str, observation)) for i in range(1000): # Pick an action based on the current state action = sarsa.chooseAction(state) # Execute the action and get feedback observation, reward, done, info = env.step(action) cumulated_reward += reward if highest_reward < cumulated_reward: highest_reward = cumulated_reward nextState = ''.join(map(str, observation)) nextAction = sarsa.chooseAction(nextState) #sarsa.learn(state, action, reward, nextState) sarsa.learn(state, action, reward, nextState, nextAction) #env.monitor.flush(force=True)
done = False if sarsa.epsilon > 0.05: sarsa.epsilon *= epsilon_discount # Initialize the environment and get first state of the robot observation = env.reset() state = ''.join(map(str, observation)) # Show on screen the actual situation of the robot # for each episode, we test the robot for nsteps for i in range(nsteps): rospy.loginfo("############### Start Step => "+str(i)) # Pick an action based on the current state action = sarsa.chooseAction(state) rospy.loginfo ("Next action is: %d", action) # Execute the action in the environment and get feedback observation, reward, done, info = env.step(action) rospy.loginfo(str(observation) + " " + str(reward)) cumulated_reward += reward if highest_reward < cumulated_reward: highest_reward = cumulated_reward nextState = ''.join(map(str, observation)) # Make the algorithm learn based on the results #rospy.logwarn("############### State we were => " + str(state)) #rospy.logwarn("############### Action that we took => " + str(action)) #rospy.logwarn("############### Reward that action gave => " + str(reward)) #rospy.logwarn("############### State in which we will start next step => " + str(nextState))
last_full_run = [] # movement of last full game current_run = [] # current game for trials in range(trials_max): # if the bot touches something other than regular 'ground' then restart. if reward != -1.0: location = [0, 0] # w,h action = 'hold' last_full_run = current_run current_run = [location] next_location = move(location, action) next_action = sarsa.chooseAction(next_location, action_list) # 5% of the time the bot does not go where it wants but instead does something random if (random() <= 0.05) and (trials < trials_max - 800): next_action = choice(action_list[:4]) # get reward from map, see top reward = map[next_location[0]][next_location[1]] sarsa.update(location, action, reward, next_location, next_action) # set the current location and action for the next step location = next_location action = next_action current_run.append(location)