# agent.save() print('\rEpisode {}\tAverage Score: {:.2f}'.format( i_episode, np.mean(scores_deque))) if i_episode % 50 == 0: agent.save() print('Save Model\n\rEpisode {}\tAverage Score: {:.2f}'.format( i_episode, np.mean(scores_deque))) return scores, actor_losses, critic_losses obs_config = ObservationConfig() obs_config.set_all(False) obs_config.joint_velocities = True obs_config.joint_forces = True obs_config.joint_positions = True action_mode = ActionMode(ArmActionMode.ABS_JOINT_TORQUE) env = RLBenchEnv( "ReachTarget", state_type_list=[ "joint_positions", "joint_velocities", # 'left_shoulder_rgb', # 'right_shoulder_rgb', 'task_low_dim_state', ]) state = env.reset() action_dim = env.action_space.shape[0] state_space = env.observation_space
import numpy as np from natsort import natsorted from rlbench.environment import Environment from rlbench.observation_config import ObservationConfig from rlbench.action_modes import ArmActionMode, ActionMode from rlbench.tasks import ReachTarget from data.rlbench_data_collection import collect_demos obs_config = ObservationConfig() obs_config.front_camera.rgb = True obs_config.wrist_camera.rgb = True obs_config.joint_forces = False action_mode = ActionMode(ArmActionMode.ABS_EE_POSE_WORLD_FRAME) # action_mode = ActionMode(ArmActionMode.ABS_JOINT_POSITION) # action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY) env = Environment(action_mode, obs_config=obs_config, robot_configuration='ur3robotiq', headless=False) import time time.sleep(3) demos = collect_demos(env, ReachTarget, "hola", n_iter_per_var=1, n_demos_per_iter=3)