if epi % rest_interval == 0:  # Take rest after every 10 * 100 steps
        print('Taking Rest at {}'.format(epi))
        robot.reset()
        robot.step()

    if epi % steps_until_resample == 0:  # Sample a new action after certain steps
        action = np.random.uniform(-1, 1, 6)
        action[:][0], action[:][3] = 0, 0  # Motor 0 and 3 fixed => 4DOF
    '''Perform action and record the observation and the tip position'''
    actions[epi, :] = action
    robot.act2(actions[epi, :])
    robot.step()
    obs = robot.observe()

    sim_trajectories[epi, :] = obs
    end_position = robot.get_tip()[0][1:]
    end_pos.append(end_position)

    if len(robot.get_hits(robot1=0, robot2=None)) > 1:  # check for bad actions
        count += 1
        actions[epi, :] = 0
        sim_trajectories[epi, :] = 0

final_pos = np.asarray(end_pos)
end_pos_path = file_path + '/random_end_pos_{}.npy'.format(args.freq)
np.save(end_pos_path, final_pos)
plt.scatter(final_pos[:, 0], final_pos[:, 1], alpha=0.5)
plt.show()
plt.hist2d(final_pos[:, 0], final_pos[:, 1], bins=100)
plt.show()
# np.savez(file_path + '/action_trajectories.npz', actions=actions, trajectories=sim_trajectories)
import time

from gym_ergojr.sim.single_robot import SingleRobot

robot = SingleRobot(debug=True)
robot.reset()
robot.step()

for i in range(100):
    # robot.act2([0,-1,0,0,-1,0]) # min y: -0.1436
    # robot.act2([0,1,-1,0,0,0]) # max y: 0.22358
    # robot.act2([0,-.2,0,0,-.8,0]) # max z: 0.25002
    robot.act2([0, -.2, 1, 0, 0, 0.5])  # min z: 0.016000
    robot.step()
    print(robot.get_tip()[0]
          [1:])  # cut off x axis, only return y (new x) and z (new y)
    time.sleep(0.1)
Esempio n. 3
0
class GoalBabbling(object):
    def __init__(self, action_noise, num_retries, task):
        self.noise = action_noise
        self.retries = num_retries
        self.task = task
        if task == 'pusher':
            self.env = gym.make('Nas-Pusher-3dof-Vanilla-v1')
            self.action_len = self.env.action_space.shape[0]
        else:
            self.robot = SingleRobot(debug=False)
            self.action_len = len(self.robot.motor_ids)
        self._nn_set = learners.NNSet()
        np.random.seed(seed=225)
        random.seed(225)

    def nearest_neighbor(self, goal, history):
        """Return the motor command of the nearest neighbor of the goal"""
        if len(history) < len(self._nn_set):  # HACK
            self._nn_set = learners.NNSet()
        for m_command, effect in history[len(self._nn_set):]:
            self._nn_set.add(m_command, y=effect)
        idx = self._nn_set.nn_y(goal)[1][0]
        return history[idx][0]

    def add_noise(self, nn_command):
        action = np.asarray(nn_command)
        action_noise = np.random.uniform(-self.noise, self.noise,
                                         self.action_len)
        new_command = action + action_noise
        if self.task == 'reacher':
            new_command[0], new_command[3] = 0, 0
            new_command = np.clip(new_command, -1, 1)
        new_command = np.clip(new_command, -3, 3)
        return new_command

    def sample_action(self):
        action = self.env.action_space.sample()
        if self.task == 'reacher':
            action = np.random.uniform(-1, 1, self.action_len)
            action[0], action[3] = 0, 0
        return action

    def action_retries(self, goal, history):
        history_local = []
        goal = np.asarray([goal])
        action = self.nearest_neighbor(goal, history)
        for _ in range(self.retries):
            action_noise = self.add_noise(action)
            _, end_position, obs = self.perform_action(action_noise)
            history_local.append((action_noise, end_position))
        action_new = self.nearest_neighbor(goal, history_local)
        return action_new

    def perform_action(self, action):
        if self.task == 'reacher':
            self.robot.act(action)
            self.robot.step()
            end_pos = self.robot.get_tip()
            obs = self.robot.observe()
        else:
            obs, _, _, _ = self.env.step(action)
            end_pos = list(obs[6:8])
        return action, end_pos, obs

    def reset_robot(self):
        if self.task == 'reacher':
            self.robot.reset()
            self.robot.rest()
            self.robot.step()
        else:
            observation = self.env.reset()
        return observation

    @staticmethod
    def dist(a, b):
        return np.linalg.norm(a - b)  # We dont need this method anymore