Ejemplo n.º 1
0
last_full_run = [] # movement of last full game
current_run = [] # current game

for trials in range(trials_max) :

    # if the bot touches something other than regular 'ground' then restart.
    if reward != -1.0 :
        location = [0,0] # w,h
        action = 'hold'
        last_full_run = current_run
        current_run = [location]


    next_location = move( location, action )
    next_action = sarsa.chooseAction( next_location, action_list )

    # 5% of the time the bot does not go where it wants but instead does something random
    if ( random() <= 0.05 ) and ( trials < trials_max - 800 ) :
        next_action = choice(action_list[:4])

    # get reward from map, see top
    reward = map[next_location[0]][next_location[1]];

    sarsa.update(location,action,reward,next_location,next_action)

    # set the current location and action for the next step
    location = next_location;
    action = next_action;

    current_run.append(location)
Ejemplo n.º 2
0
        cumulated_reward = 0  #Should going forward give more reward then L/R ?

        observation = env.reset()

        if sarsa.epsilon > 0.05:
            sarsa.epsilon *= epsilon_discount

        env.render()

        state = ''.join(map(str, observation))

        for i in range(1000):

            # Pick an action based on the current state
            action = sarsa.chooseAction(state)

            # Execute the action and get feedback
            observation, reward, done, info = env.step(action)
            cumulated_reward += reward

            if highest_reward < cumulated_reward:
                highest_reward = cumulated_reward

            nextState = ''.join(map(str, observation))
            nextAction = sarsa.chooseAction(nextState)

            #sarsa.learn(state, action, reward, nextState)
            sarsa.learn(state, action, reward, nextState, nextAction)

            #env.monitor.flush(force=True)
Ejemplo n.º 3
0
        done = False
        if sarsa.epsilon > 0.05:
            sarsa.epsilon *= epsilon_discount

        # Initialize the environment and get first state of the robot

        observation = env.reset()
        state = ''.join(map(str, observation))

        # Show on screen the actual situation of the robot
        # for each episode, we test the robot for nsteps
        for i in range(nsteps):

            rospy.loginfo("############### Start Step => "+str(i))
            # Pick an action based on the current state
            action = sarsa.chooseAction(state)
            rospy.loginfo ("Next action is: %d", action)
            # Execute the action in the environment and get feedback
            observation, reward, done, info = env.step(action)
            rospy.loginfo(str(observation) + " " + str(reward))
            cumulated_reward += reward
            if highest_reward < cumulated_reward:
                highest_reward = cumulated_reward

            nextState = ''.join(map(str, observation))

            # Make the algorithm learn based on the results
            #rospy.logwarn("############### State we were => " + str(state))
            #rospy.logwarn("############### Action that we took => " + str(action))
            #rospy.logwarn("############### Reward that action gave => " + str(reward))
            #rospy.logwarn("############### State in which we will start next step => " + str(nextState))
Ejemplo n.º 4
0

last_full_run = []  # movement of last full game
current_run = []  # current game

for trials in range(trials_max):

    # if the bot touches something other than regular 'ground' then restart.
    if reward != -1.0:
        location = [0, 0]  # w,h
        action = 'hold'
        last_full_run = current_run
        current_run = [location]

    next_location = move(location, action)
    next_action = sarsa.chooseAction(next_location, action_list)

    # 5% of the time the bot does not go where it wants but instead does something random
    if (random() <= 0.05) and (trials < trials_max - 800):
        next_action = choice(action_list[:4])

    # get reward from map, see top
    reward = map[next_location[0]][next_location[1]]

    sarsa.update(location, action, reward, next_location, next_action)

    # set the current location and action for the next step
    location = next_location
    action = next_action

    current_run.append(location)