def run_random(waitTime=0, verbose=True):
    rospy.init_node("test")
    env = BaxterEnvironment()
    env.reset()
    rospy.on_shutdown(env.close)
    cv2.namedWindow("Feed", cv2.WINDOW_NORMAL)

    # Run randomly untill interupteds = time.time()
    done = True
    while not rospy.is_shutdown():
        if done:
            # Reset
            state = env.reset()
            if verbose:
                print ""
                print "horiz: " + str(state[0])
                print "vert: " + str(state[1])
            done = False
        else:
            # Random action
            action = [random.uniform(-1, 1), random.uniform(-1, 1)]
            s = time.time()
            state, reward, done = env.step(action)
            e = time.time()
            if verbose:
                print ""
                print "action: " + str(action)
                print "horiz: " + str(state[0])
                print "vert: " + str(state[1])
                print "reward: " + str(reward)
                print "done: " + str(done)
                print "time: " + str(e - s)
        # Display camera image
        cv2.imshow("Feed", env.image)
        cv2.waitKey(waitTime)
Exemple #2
0
def run():
    rospy.init_node("test")
    env = BaxterEnvironment((1280, 800))
    retina = Retina(1280, 800)
    env.reset()
    rospy.on_shutdown(env.close)
    cv2.namedWindow("Feed", cv2.WINDOW_NORMAL)

    # Key action mapping
    actions = {ord('a'): [0.1, 0], ord('s'): [-0.1, 0],
               ord('d'): [0, 0.1], ord('f'): [0, -0.1],
               ord('q'): [1, 0], ord('w'): [-1, 0],
               ord('e'): [0, 1], ord('r'): [0, -1]}

    # Run until interupted or 'esc' pressed
    done = True
    key = None
    ep_step = 0
    while not rospy.is_shutdown():
        if done:
            # Reset environment
            state = env.reset()
            print ""
            print "horiz: " + str(state[0])
            print "vert: " + str(state[1])
            print "x pos: " + str(state[2])
            print "y pos: " + str(state[3])
            done = False
            ep_step = 0
        else:
            if key in actions:
                ep_step = ep_step + 1

                # If valid key pressed execute corresponding action
                action = actions[key]
                state, reward, done = env.step(action)
                print ""
                print "action: " + str(action)
                print "horiz: " + str(state[0])
                print "vert: " + str(state[1])
                print "x pos: " + str(state[2])
                print "y pos: " + str(state[3])
                print "reward: " + str(reward)
                print "done: " + str(done)

                done = done or (ep_step == 15)

        # Display image until key pressed
        cv2.imshow("Feed", retina.sample(env.image))
        key = cv2.waitKey(0)

        # If 'esc' pressed exit program
        if key == 27:
            break
def run_set_actions(waitTime=0, verbose=True):
    rospy.init_node("test")
    env = BaxterEnvironment()
    rospy.on_shutdown(env.close)
    cv2.namedWindow("Feed", cv2.WINDOW_NORMAL)

    # Display starting environment
    env._update_state()
    if verbose:
        print ""
        print "horiz: " + str(env.horiz_dist)
        print "vert: " + str(env.vert_dist)
    cv2.imshow("Feed", env.image)
    cv2.waitKey(waitTime)

    # Create list of actions
    actions = []
    actions.append([0.9, 0.0])
    actions.append([-0.9, 0.0])
    actions.append([-0.9, 0.0])
    actions.append([0.9, 0.0])
    actions.append([0.0, 0.9])
    actions.append([0.0, -0.9])
    actions.append([0.0, -0.9])
    actions.append([0.0, 0.9])

    # Run actions untill interupted
    i = -1
    while not rospy.is_shutdown():
        i = (i + 1) % len(actions)
        action = actions[i]
        s = time.time()
        state, reward, done = env.step(actions[i])
        e = time.time()
        if verbose:
            print ""
            print "action: " + str(action)
            print "horiz: " + str(state[0])
            print "vert: " + str(state[1])
            print "reward: " + str(reward)
            print "done: " + str(done)
            print "time: " + str(e - s)
        # Display camera image
        cv2.imshow("Feed", env.image)
        cv2.waitKey(waitTime)
        retina_image = retina.sample(image)
        # If not visible through retina then skip data and reset
        if not block_visible(retina_image):
            done = True
            continue

        # Save images and state to be extracted from image
        cv2.imwrite(NORM_IMAGES + "img" + str(index) + ".png",
                    cv2.resize(image, (468, 246),
                               interpolation=cv2.INTER_AREA))
        cv2.imwrite(RETINA_IMAGES + "img" + str(index) + ".png", retina_image)
        states[index] = state[:2]

        # Step through environment using noise function
        next_obs, _, done = env.step(np.clip(noise_function(), -1, 1))
        done = done or (timestep_ep == MAX_EP_STEPS)
        next_state = preprocessor(next_obs)

        # Advance state, image and indices
        state = next_state
        image = env.image
        timestep_ep = timestep_ep + 1
        index = index + 1

    # Save corresponding states
    torch.save(states, DATA_FOLDER + "states")

    # Close environment
    env.close()
    timestep_ep = 0
    done = False
    state = preprocessor(env.reset()).to(device)
    while index < SIZE:

        # If episode finished start and new one and reset noise function
        if done:
            state = preprocessor(env.reset()).to(device)
            noise_function.reset()
            timestep_ep = 0

        # Step through environment using noise function
        action = torch.tensor(noise_function(),
                              dtype=torch.float,
                              device=device).clamp(-1, 1)
        next_obs, reward, done = env.step(action)
        done = done or (timestep_ep == MAX_EP_STEPS)
        next_state = preprocessor(next_obs).to(device)

        # Add data to tensors
        states[index] = state
        actions[index] = action
        next_states[index] = next_state
        rewards[index] = torch.tensor(reward, device=device)
        dones[index] = torch.tensor(0.0 if done else 1.0, device=device)

        # Advance state and indices
        state = next_state
        timestep_ep = timestep_ep + 1
        index = index + 1