Python chooseAction Examples

Programming Language: Python

Namespace/Package Name: qlearn

Method/Function: chooseAction

Examples at hotexamples.com: 4

Python chooseAction - 4 examples found. These are the top rated real world Python examples of qlearn.chooseAction extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

        cumulated_reward = 0 #Should going forward give more reward then L/R ?
        print ("Episode = "+str(x))
        observation = env.reset()
        if qlearn.epsilon > 0.05:
            qlearn.epsilon *= epsilon_discount

        #render()
        env.render()

        state = ''.join(map(str, observation))

        for i in range(1000):

            # Pick an action based on the current state
            action = qlearn.chooseAction(state)
            #print ("Action Chosen"+str(action))
            # Execute the action and get feedback
            observation, reward, done, info = env.step(action)
            cumulated_reward += reward
            #print ("Reward="+str(reward))
            if highest_reward < cumulated_reward:
                highest_reward = cumulated_reward

            nextState = ''.join(map(str, observation))

            qlearn.learn(state, action, reward, nextState)

            #env.monitor.flush(force=True)

            if not(done):

Example #2

Show file

File: start_training.py Project: tahsinkose/openai-ros-gazebo-workout

        # for each episode, we test the robot for nsteps
        for t in itertools.count():
            state_ = np.zeros(2)
            #state_[0] = int(np.digitize(state[2],vertical_bin))# z first
            state_[0] = int(np.digitize(state[0], horizontal_bins[0]))
            state_[1] = int(np.digitize(state[1], horizontal_bins[1]))

            #Clip the state
            for j in range(2):
                if state_[j] < 0:
                    state_[j] = 0
                elif state_[j] > env.shape[j] - 1:
                    state_[j] = env.shape[j] - 1

            # Pick an action based on the current state
            action = qlearn.chooseAction(tuple(state_))

            # Execute the action in the environment and get feedback
            next_state, reward, done, info = env.step(action)

            cumulated_reward += reward
            next_state_ = np.zeros(2)

            #next_state_[0] = int(np.digitize(next_state[2],vertical_bin)) # z first
            next_state_[0] = int(np.digitize(next_state[0],
                                             horizontal_bins[0]))
            next_state_[1] = int(np.digitize(next_state[1],
                                             horizontal_bins[1]))
            for j in range(2):
                if next_state_[j] < 0:
                    next_state_[j] = 0

Example #3

Show file

File: turtlebot_qlearn.py Project: lntk/robotics-project

    for x in range(total_episodes):
        done = False

        cumulated_reward = 0  #Should going forward give more reward then L/R ?

        observation = env.reset()

        if qlearn.epsilon > 0.05:
            qlearn.epsilon *= epsilon_discount

        step_counter = 0

        for i in range(1500):
            step_counter += 1
            binarized_observation = binarize_observation(observation)
            action = qlearn.chooseAction(binarized_observation)
            newObservation, reward, done, info = env.step(action)
            binarized_new_observation = binarize_observation(newObservation)
            cumulated_reward += reward

            if highest_reward < cumulated_reward:
                highest_reward = cumulated_reward

            qlearn.learn(binarized_observation, action, reward,
                         binarized_new_observation)
            if not (done):
                observation = newObservation
            else:
                last_time_steps = numpy.append(last_time_steps, [int(i + 1)])
                break

Example #4

Show file

File: mazecolor_rl_selection.py Project: lntk/robotics-project

        for t in range(total_steps):

            step_counter += 1

            [image, laser] = observation
            hint_pos = getTargetPoints(image)
            if hint_pos == 0:
                hint_pos = getHintPoints(image)

            height, width, depth = image.shape
            image_action = get_image_action(width, hint_pos)
            laser_action = get_expert_action(laser)

            state = threshold_laser(laser, threshold_value)

            rl_choice = qlearn.chooseAction(state)

            if rl_choice == 1:
                action = 20 - image_action
            else:
                action = laser_action

            next_observation, reward, done, info = env.step(action)

            cumulated_reward += reward
            next_image, next_laser = next_observation
            next_state = threshold_laser(next_laser, threshold_value)

            qlearn.learn(state, rl_choice, reward, next_state)

            if not done: