def run_random(waitTime=0, verbose=True): rospy.init_node("test") env = BaxterEnvironment() env.reset() rospy.on_shutdown(env.close) cv2.namedWindow("Feed", cv2.WINDOW_NORMAL) # Run randomly untill interupteds = time.time() done = True while not rospy.is_shutdown(): if done: # Reset state = env.reset() if verbose: print "" print "horiz: " + str(state[0]) print "vert: " + str(state[1]) done = False else: # Random action action = [random.uniform(-1, 1), random.uniform(-1, 1)] s = time.time() state, reward, done = env.step(action) e = time.time() if verbose: print "" print "action: " + str(action) print "horiz: " + str(state[0]) print "vert: " + str(state[1]) print "reward: " + str(reward) print "done: " + str(done) print "time: " + str(e - s) # Display camera image cv2.imshow("Feed", env.image) cv2.waitKey(waitTime)
def run(): rospy.init_node("test") env = BaxterEnvironment((1280, 800)) retina = Retina(1280, 800) env.reset() rospy.on_shutdown(env.close) cv2.namedWindow("Feed", cv2.WINDOW_NORMAL) # Key action mapping actions = {ord('a'): [0.1, 0], ord('s'): [-0.1, 0], ord('d'): [0, 0.1], ord('f'): [0, -0.1], ord('q'): [1, 0], ord('w'): [-1, 0], ord('e'): [0, 1], ord('r'): [0, -1]} # Run until interupted or 'esc' pressed done = True key = None ep_step = 0 while not rospy.is_shutdown(): if done: # Reset environment state = env.reset() print "" print "horiz: " + str(state[0]) print "vert: " + str(state[1]) print "x pos: " + str(state[2]) print "y pos: " + str(state[3]) done = False ep_step = 0 else: if key in actions: ep_step = ep_step + 1 # If valid key pressed execute corresponding action action = actions[key] state, reward, done = env.step(action) print "" print "action: " + str(action) print "horiz: " + str(state[0]) print "vert: " + str(state[1]) print "x pos: " + str(state[2]) print "y pos: " + str(state[3]) print "reward: " + str(reward) print "done: " + str(done) done = done or (ep_step == 15) # Display image until key pressed cv2.imshow("Feed", retina.sample(env.image)) key = cv2.waitKey(0) # If 'esc' pressed exit program if key == 27: break
def run_set_actions(waitTime=0, verbose=True): rospy.init_node("test") env = BaxterEnvironment() rospy.on_shutdown(env.close) cv2.namedWindow("Feed", cv2.WINDOW_NORMAL) # Display starting environment env._update_state() if verbose: print "" print "horiz: " + str(env.horiz_dist) print "vert: " + str(env.vert_dist) cv2.imshow("Feed", env.image) cv2.waitKey(waitTime) # Create list of actions actions = [] actions.append([0.9, 0.0]) actions.append([-0.9, 0.0]) actions.append([-0.9, 0.0]) actions.append([0.9, 0.0]) actions.append([0.0, 0.9]) actions.append([0.0, -0.9]) actions.append([0.0, -0.9]) actions.append([0.0, 0.9]) # Run actions untill interupted i = -1 while not rospy.is_shutdown(): i = (i + 1) % len(actions) action = actions[i] s = time.time() state, reward, done = env.step(actions[i]) e = time.time() if verbose: print "" print "action: " + str(action) print "horiz: " + str(state[0]) print "vert: " + str(state[1]) print "reward: " + str(reward) print "done: " + str(done) print "time: " + str(e - s) # Display camera image cv2.imshow("Feed", env.image) cv2.waitKey(waitTime)
retina_image = retina.sample(image) # If not visible through retina then skip data and reset if not block_visible(retina_image): done = True continue # Save images and state to be extracted from image cv2.imwrite(NORM_IMAGES + "img" + str(index) + ".png", cv2.resize(image, (468, 246), interpolation=cv2.INTER_AREA)) cv2.imwrite(RETINA_IMAGES + "img" + str(index) + ".png", retina_image) states[index] = state[:2] # Step through environment using noise function next_obs, _, done = env.step(np.clip(noise_function(), -1, 1)) done = done or (timestep_ep == MAX_EP_STEPS) next_state = preprocessor(next_obs) # Advance state, image and indices state = next_state image = env.image timestep_ep = timestep_ep + 1 index = index + 1 # Save corresponding states torch.save(states, DATA_FOLDER + "states") # Close environment env.close()
timestep_ep = 0 done = False state = preprocessor(env.reset()).to(device) while index < SIZE: # If episode finished start and new one and reset noise function if done: state = preprocessor(env.reset()).to(device) noise_function.reset() timestep_ep = 0 # Step through environment using noise function action = torch.tensor(noise_function(), dtype=torch.float, device=device).clamp(-1, 1) next_obs, reward, done = env.step(action) done = done or (timestep_ep == MAX_EP_STEPS) next_state = preprocessor(next_obs).to(device) # Add data to tensors states[index] = state actions[index] = action next_states[index] = next_state rewards[index] = torch.tensor(reward, device=device) dones[index] = torch.tensor(0.0 if done else 1.0, device=device) # Advance state and indices state = next_state timestep_ep = timestep_ep + 1 index = index + 1