# agent.save()
            print('\rEpisode {}\tAverage Score: {:.2f}'.format(
                i_episode, np.mean(scores_deque)))

        if i_episode % 50 == 0:
            agent.save()
            print('Save Model\n\rEpisode {}\tAverage Score: {:.2f}'.format(
                i_episode, np.mean(scores_deque)))

    return scores, actor_losses, critic_losses


obs_config = ObservationConfig()
obs_config.set_all(False)
obs_config.joint_velocities = True
obs_config.joint_forces = True
obs_config.joint_positions = True

action_mode = ActionMode(ArmActionMode.ABS_JOINT_TORQUE)
env = RLBenchEnv(
    "ReachTarget",
    state_type_list=[
        "joint_positions",
        "joint_velocities",
        # 'left_shoulder_rgb',
        # 'right_shoulder_rgb',
        'task_low_dim_state',
    ])
state = env.reset()
action_dim = env.action_space.shape[0]
state_space = env.observation_space
Beispiel #2
0
import numpy as np
from natsort import natsorted
from rlbench.environment import Environment
from rlbench.observation_config import ObservationConfig
from rlbench.action_modes import ArmActionMode, ActionMode
from rlbench.tasks import ReachTarget

from data.rlbench_data_collection import collect_demos

obs_config = ObservationConfig()
obs_config.front_camera.rgb = True
obs_config.wrist_camera.rgb = True
obs_config.joint_forces = False

action_mode = ActionMode(ArmActionMode.ABS_EE_POSE_WORLD_FRAME)
# action_mode = ActionMode(ArmActionMode.ABS_JOINT_POSITION)
# action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY)
env = Environment(action_mode,
                  obs_config=obs_config,
                  robot_configuration='ur3robotiq',
                  headless=False)
import time

time.sleep(3)

demos = collect_demos(env,
                      ReachTarget,
                      "hola",
                      n_iter_per_var=1,
                      n_demos_per_iter=3)