Beispiel #1
0
    def __init__(self, multi=False, multi_no=3, tracking=False):
        self.multigoal = multi
        self.n_goals = multi_no
        self.tracking = tracking
        self.rand = Randomizer()

        self.goal = None

        # self.step_in_episode = 0

        self.metadata = {
            'render.modes': ['human', 'rgb_array']
        }

        self.rhis = RandomPointInHalfSphere(0.0, 0.0369, 0.0437,
                                            radius=0.2022, height=0.2610,
                                            min_dist=0.1, halfsphere=True)

        # observation = 4 joints + 4 velocities + 2 coordinates for target
        joints_no = 4
        if self.tracking:
            joints_no = 3

        self.observation_space = spaces.Box(low=-1, high=1, shape=(joints_no + joints_no + 2,), dtype=np.float32)

        # action = 4 joint angles
        self.action_space = spaces.Box(low=-1, high=1, shape=(joints_no,), dtype=np.float32)

        self.diffs = [JOINT_LIMITS[i][1] - JOINT_LIMITS[i][0] for i in range(6)]

        self.cam = Camera(color=True)
        self.tracker = Tracker(TRACKING_GREEN["maxime_lower"], TRACKING_GREEN["maxime_upper"])
        # self.tracker = Tracker(TRACKING_GREEN["duct_lower"], TRACKING_GREEN["duct_upper"])
        if self.tracking:
            self.tracker_goal = Tracker(TRACKING_YELLOW["duckie_lower"], TRACKING_YELLOW["duckie_upper"])

        self.calibration = np.load(os.path.join(config_dir(), "calib-ergoreacher-adr.npz"))["calibration"].astype(
            np.int16)

        self.last_step_time = time.time()
        self.pts_tip = deque(maxlen=32)
        self.pts_goal = deque(maxlen=32)

        self.last_frame = np.ones((480, 640, 3), dtype=np.uint8)

        self.pause_counter = 0

        self.last_speed = 100

        self.goals_done = 0

        # self.goal_states=[]

        # for _ in range(10000):
        #     goal = self.rhis.sampleSimplePoint()
        #     self.goal_states.append(self._goal2pixel(goal))

        self._init_robot()

        super().__init__()
Beispiel #2
0
    def __init__(self,
                 no_move=False,
                 scaling=1,
                 shield=True,
                 sim=False,
                 compensation=True):
        self.no_move = no_move
        self.scaling = scaling
        self.shield = shield
        self.sim = sim
        self.compensation = compensation

        self.rand = Randomizer()
        self.last_step_time = None

        self.step_in_episode = 0
        self.step_in_fight = 0

        self.metadata = {'render.modes': ['human', 'rgb_array']}

        # 6 own joint pos, 6 own joint vel, 6 enemy joint pos, 6 enemy joint vel
        self.observation_space = spaces.Box(low=-1,
                                            high=1,
                                            shape=(6 + 6 + 6 + 6, ),
                                            dtype=np.float32)

        self.action_space = spaces.Box(low=-1,
                                       high=1,
                                       shape=(6, ),
                                       dtype=np.float32)

        self.diffs = [
            JOINT_LIMITS[i][1] - JOINT_LIMITS[i][0] for i in range(6)
        ]

        self._init_robots()
import time
from poppy_helpers.normalizer import Normalizer
from pypot.robot import from_remote
from pytorch_a2c_ppo_acktr.inference import Inference

from poppy_helpers.controller import SwordFightController
from poppy_helpers.randomizer import Randomizer

import numpy as np

from poppy_helpers.startups import startup_swordfight

controller_att, controller_def = startup_swordfight("flogo2","flogo4")

# random defense
rand = Randomizer()
# rand_def = rand.random_sf()
# controller_def.goto_pos(rand_def)

norm = Normalizer()

inf = Inference("/home/florian/dev/pytorch-a2c-ppo-acktr/"
                "trained_models/ppo/"
                "ErgoFightStatic-Headless-Fencing-v0-180209225957.pt")

# ErgoFightStatic-Headless-Fencing-v0-180301140951.pt - very stabby, bit dumb
# ErgoFightStatic-Headless-Fencing-v0-180301140937.pt - pretty good, slightly stabby
# ErgoFightStatic-Headless-Fencing-v0-180301140520.pt - ultimate stabby policy

# ErgoFightStatic-Headless-Fencing-v0-180209225957.pt - aggressive lunge policy, good tracking, best policy
Beispiel #4
0
import zmq
import numpy as np
from poppy_helpers.randomizer import Randomizer

ROBOT1 = "flogo2.local"
PORT = 5757

context = zmq.Context()
socket = context.socket(zmq.PAIR)
print("Connecting to server...")
socket.connect("tcp://{}:{}".format(ROBOT1, PORT))
print("Connected.")

observations = []
rand = Randomizer()

for i in range(1000):
    req = {"robot": {"get_pos_speed": {}}}
    socket.send_json(req)
    answer = socket.recv_json()
    observations.append(answer)

    if i % 100 == 0:
        req = {
            "robot": {
                "set_pos": {
                    "positions": rand.random_sf(scaling=1.0)
                }
            }
        }
Beispiel #5
0
import tkinter as tk
import time
import numpy as np
from pytorch_a2c_ppo_acktr.inference import Inference

from poppy_helpers.normalizer import Normalizer
from poppy_helpers.randomizer import Randomizer
from poppy_helpers.startups import startup_swordfight

from poppy_helpers.sound import RemoteSound

SCALING = 5  # how many hundred

controller_att, controller_def = startup_swordfight("flogo2", "flogo4")

rand = Randomizer()
norm = Normalizer()
inf = Inference("/home/florian/dev/pytorch-a2c-ppo-acktr/"
                "trained_models/ppo/"
                "ErgoFightStatic-Headless-Fencing-v0-180209225957.pt")
snd = RemoteSound("flogo4.local")

SOUND_EFFECT = "beep1"
# SOUND_EFFECT = "Wilhelm_Scream"


def get_observation():
    norm_pos_att = norm.normalize_pos(controller_att.get_pos_comp())
    norm_pos_att[0] *= -1
    norm_vel_att = norm.normalize_vel(controller_att.get_current_speed())
    norm_pos_def = norm.normalize_pos(controller_def.get_pos_comp())
import time
from pypot.robot import from_remote

from poppy_helpers.constants import SWORDFIGHT_REST_DEF, SWORDFIGHT_REST_ATT
from poppy_helpers.controller import Controller, SwordFightController
from poppy_helpers.randomizer import Randomizer

poppy_def = from_remote('flogo4.local', 4242)
poppy_att = from_remote('flogo2.local', 4242)

controller_def = SwordFightController(poppy_def, mode="def")
controller_att = SwordFightController(poppy_att, mode="att")

rand = Randomizer()

controller_def.rest()
controller_att.rest()

rand_def = rand.random_sf()
rand_att = rand.random_sf()

print(rand_def, rand_att)

controller_att.set_speed(50)
controller_def.set_speed(50)

controller_def.goto_pos(rand_def)
controller_att.goto_pos(rand_att)

for _ in range(20):
    pos_left = controller_def.get_pos()
Beispiel #7
0
class ErgoFightLiveEnv(gym.Env):
    def __init__(self,
                 no_move=False,
                 scaling=1,
                 shield=True,
                 sim=False,
                 compensation=True):
        self.no_move = no_move
        self.scaling = scaling
        self.shield = shield
        self.sim = sim
        self.compensation = compensation

        self.rand = Randomizer()
        self.last_step_time = None

        self.step_in_episode = 0
        self.step_in_fight = 0

        self.metadata = {'render.modes': ['human', 'rgb_array']}

        # 6 own joint pos, 6 own joint vel, 6 enemy joint pos, 6 enemy joint vel
        self.observation_space = spaces.Box(low=-1,
                                            high=1,
                                            shape=(6 + 6 + 6 + 6, ),
                                            dtype=np.float32)

        self.action_space = spaces.Box(low=-1,
                                       high=1,
                                       shape=(6, ),
                                       dtype=np.float32)

        self.diffs = [
            JOINT_LIMITS[i][1] - JOINT_LIMITS[i][0] for i in range(6)
        ]

        self._init_robots()

    def _init_robots(self):

        if self.compensation:
            self.controller_def = SwordFightZMQController(mode="def",
                                                          host="flogo4.local")
            self.controller_att = SwordFightZMQController(mode="att",
                                                          host="flogo2.local")
        else:
            self.controller_def = ZMQController(host="flogo4.local")
            self.controller_att = ZMQController(host="flogo2.local")

        self._setSpeedCompliance()

        self.controller_def.get_keys()  # in case there are keys stored

    def _setSpeedCompliance(self):
        self.controller_def.compliant(False)
        self.controller_att.compliant(False)
        self.controller_att.set_max_speed(100)
        self.controller_def.set_max_speed(100)

    def _seed(self, seed=None):
        np.random.seed(seed)

    def _restPos(self):
        self.done = False

        self._setSpeedCompliance()

        self.controller_def.safe_rest()
        self.controller_att.safe_rest()

        time.sleep(.6)  # FIXME: run loop, check joints

        self.randomize(robot=1, scaling=self.scaling)

        time.sleep(1)  # FIXME: instead run a loop here and check when
        # the joints are close to the given configuration
        self.controller_def.get_keys()  # clear queue

    def randomize(self, robot=1, scaling=1.0):
        new_pos = self.rand.random_sf(scaling)
        robot_ctrl = self.controller_att
        if robot == 1:
            robot_ctrl = self.controller_def

        robot_ctrl.goto_pos(new_pos)

    def _reset(self):
        self.step_in_episode = 0
        self._restPos()
        self._self_observe()
        self.last_step_time = time.time()
        return self.observation

    def _get_reward(self):

        collisions = self.controller_def.get_keys()

        reward = 0
        if "s" in collisions:
            reward = 1
            self._restPos()

        return reward

    def _self_observe(self):
        joint_vel_att = self.controller_att.get_posvel()
        joint_vel_def = self.controller_def.get_posvel()
        # self.observation = ((self._normalize( ### OLD AND WRONG IMPL
        #     np.hstack((joint_vel_att, joint_vel_def)).astype('float32')
        # )), )
        self.observation = (
            (  # this should be the technically correct solution
                np.hstack((self._normalize(joint_vel_att),
                           self._normalize(joint_vel_def)))), )

    def _normalize(self, pos):
        pos = np.array(pos).astype('float32')
        out = []
        for i in range(6):
            shifted = (pos[i] -
                       JOINT_LIMITS[i][0]) / self.diffs[i]  # now it's in [0,1]
            norm = shifted * 2 - 1
            if i == 0 and self.sim:
                norm *= -1  # between sim and real the first joint it inverted
            out.append(norm)
        if len(pos) > 6:
            shifted = (pos[6:] + JOINT_LIMITS_SPEED) / (JOINT_LIMITS_SPEED * 2)
            norm = shifted * 2 - 1
            if self.sim:
                norm[
                    0] *= -1  # between sim and real the first joint it inverted (probably both pos * vel)
            out += list(norm)
        return out

    def _denormalize(self, actions):
        out = []
        if self.sim:
            actions[0] *= -1

        for i in range(6):
            shifted = (actions[i] + 1) / 2  # now it's within [0,1]
            denorm = shifted * self.diffs[i] + JOINT_LIMITS[i][0]
            out.append(denorm)
        return out

    def prep_actions(self, actions):
        actions = np.clip(actions, -1,
                          1)  # first make sure actions are normalized
        actions = self._denormalize(
            actions)  # then scale them to the actual joint angles
        return actions

    def _step(self, actions):
        self.step_in_episode += 1
        actions = self.prep_actions(actions)

        self.controller_att.goto_pos(actions)

        if not self.no_move:
            if self.step_in_episode % MOVE_EVERY_N_STEPS == 0:
                # print ("step {}, randomizing".format(self.step_in_episode))
                self.randomize(1, scaling=self.scaling)

        # observe again
        self._self_observe()
        reward = self._get_reward()

        dt = (time.time() - self.last_step_time) * 1000
        if dt < MAX_REFRESHRATE:
            time.sleep((MAX_REFRESHRATE - dt) / 1000)

        self.last_step_time = time.time()
        return self.observation, reward, self.done, {}

    def _test_second_robot(self, actions):
        actions = self.prep_actions(actions)
        self.controller_def.goto_pos(actions)

    def _render(self, mode='human', close=False):
        # This intentionally does nothing and is only here for wrapper functions.
        # if you want graphical output, use the environments
        # "ErgoBallThrowAirtime-Graphical-Normalized-v0"
        # or
        # "ErgoBallThrowAirtime-Graphical-v0"
        # ... not the ones with "...-Headless-..."
        pass