def get_screen(self):
        if self.viewer is None:
            from gym.envs.IARC.roombasim.graphics import Display
            # from gym.envs.classic_control import rendering
            self.viewer = Display(self.environment,
                                  timescale=1.0,
                                  self_update=False)
            pyglet.app.event_loop.start()
        self.viewer.on_draw_roombas_only()
        buffer = pyglet.image.get_buffer_manager().get_color_buffer()
        image_data = buffer.get_image_data()
        arr = np.fromstring(image_data.data, dtype=np.uint8, sep='')
        arr = arr.reshape(buffer.height, buffer.width, 4)
        arr = arr[::-1, :, 0:3]
        arr1 = np.zeros([170, 170, 3], dtype=np.uint8)
        arr1[::, ::, 0] = skimage.measure.block_reduce(arr[::, ::, 0], (4, 4),
                                                       np.max)
        arr1[::, ::, 1] = skimage.measure.block_reduce(arr[::, ::, 1], (4, 4),
                                                       np.max)
        arr1[::, ::, 2] = skimage.measure.block_reduce(arr[::, ::, 2], (4, 4),
                                                       np.max)

        # from matplotlib import pyplot as plt
        # plt.ion()
        # plt.imshow(arr1/64.0)
        # plt.show()

        # from PIL import Image
        # img = Image.fromarray(arr, 'RGB')
        # img.save('my.png')
        # img.show()
        return arr
    def _render(self, mode='human', close=False):
        if close:
            if self.viewer is not None:
                self.viewer.close()
                self.viewer = None
            return

        if self.viewer is None:
            from gym.envs.IARC.roombasim.graphics import Display
            # from gym.envs.classic_control import rendering
            self.viewer = Display(self.environment,
                                  timescale=1.0,
                                  self_update=False)

            # def update_func(delta, elapsed):
            #     self.environment.update(delta, elapsed)
            #
            # self.viewer.set_update_func(update_func)

            # pyglet.app.run()

            # pyglet.app.platform_event_loop.start()
            pyglet.app.event_loop.start()

        # timeout = pyglet.app.event_loop.idle()
        # self.viewer.update(0.1)
        # self.viewer.on_draw()
        # self.viewer.

        # dt = self.clock.update_time()
        # redraw_all = self.clock.call_scheduled_functions(dt)

        # Redraw all windows
        self.viewer.update_time_only(0.1)
        for window in pyglet.app.windows:
            window.switch_to()
            window.dispatch_event('on_draw')
            window.flip()
            window._legacy_invalid = False
        pyglet.app.platform_event_loop.step(0.1)
Ejemplo n.º 3
0
    def _render(self, mode='human', close=False):
        if close:
            if self.viewer is not None:
                self.viewer.close()
                self.viewer = None
            return

        if self.viewer is None:
            from gym.envs.IARC.roombasim.graphics import Display
            # from gym.envs.classic_control import rendering
            self.viewer = Display(self.environment,
                                  timescale=1.0,
                                  self_update=False)
            pyglet.app.event_loop.start()

        # Redraw all windows
        self.viewer.update_time_only(0.1)
        for window in pyglet.app.windows:
            window.switch_to()
            window.dispatch_event('on_draw')
            window.flip()
            window._legacy_invalid = False
        pyglet.app.platform_event_loop.step(0.1)
class IARCEnv_2(gym.Env, IARCEnv_Master):
    metadata = {
        'render.modes': ['human', 'rgb_array'],
        'video.frames_per_second': 30
    }

    def __init__(self):
        self.last_rmba = 4
        self.action_space = spaces.MultiDiscrete(
            [cfg.MISSION_NUM_TARGETS, 2, 2])

        min_obs_template = list([0, 0, 0, False])
        max_obs_template = list([20, 20, math.pi * 2, True])
        min_obs = list()
        max_obs = list()
        for i in range(0, cfg.MISSION_NUM_TARGETS
                       ):  # (self.environment.roombas, start=1):
            min_obs = min_obs + min_obs_template
            max_obs = max_obs + max_obs_template

        min_obs = min_obs + list([0, 0, 0])
        max_obs = max_obs + list([20, 20, cfg.MISSION_NUM_TARGETS - 1])

        self.observation_space = spaces.Box(np.asarray(min_obs),
                                            np.asarray(max_obs),
                                            dtype=np.float32)
        # self.observation_space = spaces.Tuple(spaces.Box(np.array([0, 0, 0, False]), np.array([20, 20, math.pi*2, True]), dtype=np.float32))

        self.init_Master()
        self.reset()

        self.num_test_imgs = 10
        self.numTestsActivated = 0
        #TODO remove temporary test stuf
        self.test_set = [False] * self.num_test_imgs
        self.arr = np.zeros([32, 32, 1], dtype=np.uint8)
        self.arr[5, 5, :] = 255
        self.arr[6, 31, :] = 255
        self.arr[31, 23, :] = 255
        self.arr[10, 9, :] = 255
        self.arr[0, 21, :] = 255
        self.arr[5, 15, :] = 255
        self.arr[9, 9, :] = 255
        self.arr[5, 16, :] = 255
        self.arr[15, 27, :] = 255
        self.arr[30, 9, :] = 255
        self.test_img = [self.arr] * self.num_test_imgs
        self.test_img_triggerTime = [0.0] * self.num_test_imgs
        self.test_ob = [self.observation_space.sample()] * self.num_test_imgs
        for i in range(self.num_test_imgs):
            self.test_img_triggerTime[i] = 1000 + i * 4000

    def reset(self):
        self.reset_Master()

        self.state = list()
        for rmba in self.environment.roombas:
            if isinstance(rmba, environment.TargetRoomba):
                self.state = self.state + rmba.pos
                self.state = self.state + [rmba.heading]
                self.state = self.state + [(rmba.state
                                            == cfg.ROOMBA_STATE_FORWARD)]
        self.state = self.state + list(self.environment.agent.xy_pos) + list(
            [self.last_rmba])
        self.state = np.asarray(self.state)
        # self.state = tuple()
        # for rmba in self.environment.roombas:
        #     if isinstance(rmba, environment.TargetRoomba):
        #         self.state = self.state + tuple(rmba.pos)
        #         self.state = self.state + tuple([rmba.heading])
        #         self.state = self.state + tuple([rmba.state == cfg.ROOMBA_STATE_FORWARD])
        # self.state = self.state + tuple(self.environment.agent.xy_pos) + tuple([self.last_rmba])
        # self.state = np.asarray(self.state)

        return self.state

    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        np.random.seed(seed)
        return [seed]

    def step(self, action):
        rews = {
            'game_reward': 0.0,
            'speed_reward': 0.0,
            'direction_reward': 0.0
        }

        # assert self.action_space.contains(action), "%r (%s) invalid" % (action, type(action))
        if action.ndim >= 2:
            action = action[0]
            ac = {
                "rmba_sel": action[0],
                "ac_bool": action[1],
                "top_or_front": action[2]
            }
        else:
            ac = {
                "rmba_sel": action[0],
                "ac_bool": action[1],
                "top_or_front": action[2]
            }
        aav_targPos = self.environment.roombas[ac["rmba_sel"]].pos

        rmba_dists = dict()
        for rmba in self.environment.roombas:
            if isinstance(rmba, environment.TargetRoomba
                          ) and rmba.state is not cfg.ROOMBA_STATE_IDLE:
                rews["direction_reward"] += self.getDirectionRew(rmba)
                # rmba_dists[(np.linalg.norm(ac["rmba_sel"] - rmba.pos))] = rmba
                # rews["direction_reward"] = (rmba.state == cfg.ROOMBA_STATE_FORWARD) * (
                #         1 / math.pi / 3 * (math.fabs(math.pi - rmba.heading) - math.pi / 2)) / (
                #                   cfg.MISSION_NUM_TARGETS - (
                #                   self.environment.bad_exits + self.environment.good_exits))

        # for i in range(10):
        #     # ac["aav_pos"] = np.array([20.0, 0.0])
        #     dist2targ = np.linalg.norm(self.environment.agent.xy_pos - aav_targPos)
        #     if  dist2targ <= 0.35:
        #         self.environment.agent.xy_vel = np.array([0.0, 0.0])
        #         if ac["ac_bool"]:
        #             # converge = True
        #             # print("hit a roomba")
        #             if ac["top_or_front"]:
        #                 self.environment.roombas[ac["rmba_sel"]].collisions['top'] = True
        #             else:
        #                 self.environment.roombas[ac["rmba_sel"]].collisions['front'] = True
        #             break_early = True
        #     else:
        #         ang = math.atan2(aav_targPos[1] - self.environment.agent.xy_pos[1], aav_targPos[0] - self.environment.agent.xy_pos[0])
        #         self.environment.agent.xy_vel = np.array([cfg.DRONE_MAX_HORIZ_VELOCITY*np.cos(ang), cfg.DRONE_MAX_HORIZ_VELOCITY*np.sin(ang)])
        #
        #     self.time_elapsed_ms += 100
        #     self.environment.update(0.1, self.time_elapsed_ms)
        #     if self.render_bool:
        #         self._render()
        #     # if break_early:
        #     #     continue
        # rews["game_reward"] += self.environment.score/100 - self.prev_score
        # self.prev_score = self.environment.score/100

        def rmbaInteract():
            if ac["ac_bool"]:
                if ac["top_or_front"]:
                    self.environment.roombas[
                        ac["rmba_sel"]].collisions['top'] = True
                else:
                    self.environment.roombas[
                        ac["rmba_sel"]].collisions['front'] = True

        rews["game_reward"], rews["speed_reward"], done = self._updateEnv(
            self.environment.roombas[ac["rmba_sel"]].pos, rmbaInteract)

        # done = False
        # if (self.environment.bad_exits + self.environment.good_exits) >= cfg.MISSION_NUM_TARGETS:
        #     done = True
        #     # rews["end_reward"] += 11 * (10*60*1000 - self.environment.time_ms)/1000/60/10*self.environment.good_exits/cfg.MISSION_NUM_TARGETS
        # if self.environment.time_ms >= 10*60*1000:
        #     # self.reset()
        #     done = True
        # if self.earlyTerminationTime_ms is not None and self.earlyTerminationTime_ms <= self.environment.time_ms:
        #     done = True

        self.state = list()
        for rmba in self.environment.roombas:
            if isinstance(rmba, environment.TargetRoomba):
                self.state = self.state + rmba.pos
                self.state = self.state + [rmba.heading]
                self.state = self.state + [(rmba.state
                                            == cfg.ROOMBA_STATE_FORWARD)]
        self.state = self.state + list(self.environment.agent.xy_pos) + list(
            [self.last_rmba])
        self.state = np.array(self.state)

        # self.state = tuple()
        # for rmba in self.environment.roombas:
        #     if isinstance(rmba, environment.TargetRoomba):
        #         self.state = self.state + tuple(rmba.pos)
        #         self.state = self.state + tuple([rmba.heading])
        #         self.state = self.state + tuple([rmba.state == cfg.ROOMBA_STATE_FORWARD])
        # self.state = self.state + tuple(self.environment.agent.xy_pos) + tuple([self.last_rmba])

        # img_true = self.get_screen()
        # self.modelEnv.step2(np.array(self.state), img_true)
        # if self.environment.time_ms == 25000 and self.test_set is False:
        #     self.test_ob = np.array(self.state)
        #     self.test_img = self.get_screen2()
        #     self.test_set = True
        # if self.environment.time_ms == 45000 and self.test_set2 is False:
        #     self.test_ob2 = np.array(self.state)
        #     self.test_img2 = self.get_screen2()
        #     self.test_set2 = True
        # if np.random.random_integers(1, 5) % 2 == 0:
        #     test_ob = self.test_ob
        #     test_img = self.test_img
        # else:
        #     test_ob = self.test_ob2
        #     test_img = self.test_img2

        for i in range(self.num_test_imgs):
            if self.environment.time_ms == self.test_img_triggerTime[
                    i] and self.test_set[i] is False:
                self.test_ob[i] = self.state
                self.test_img[i] = self.get_screen2()
                self.test_set[i] = True
                self.numTestsActivated += 1
        if self.numTestsActivated == 1:
            test_num = 0
        else:
            test_num = np.random.random_integers(0, self.numTestsActivated - 1)
        test_ob = self.test_ob[test_num]
        test_img = self.test_img[test_num]
        if np.array_equal(test_img, self.arr):
            print("uh-oh spagettios")

        info = {
            "time_ms": self.time_elapsed_ms,
            "rews": rews,
            "img": self.get_screen2(),
            "test_ob": test_ob,
            "test_img": test_img
        }
        reward = 0
        for key, rew in rews.items():
            reward += rew
        return self.state, reward, done, info

    def get_screen(self):
        if self.viewer is None:
            from gym.envs.IARC.roombasim.graphics import Display
            # from gym.envs.classic_control import rendering
            self.viewer = Display(self.environment,
                                  timescale=1.0,
                                  self_update=False)
            pyglet.app.event_loop.start()
        self.viewer.on_draw_roombas_only()
        buffer = pyglet.image.get_buffer_manager().get_color_buffer()
        image_data = buffer.get_image_data()
        arr = np.fromstring(image_data.data, dtype=np.uint8, sep='')
        arr = arr.reshape(buffer.height, buffer.width, 4)
        arr = arr[::-1, :, 0:3]
        arr1 = np.zeros([170, 170, 3], dtype=np.uint8)
        arr1[::, ::, 0] = skimage.measure.block_reduce(arr[::, ::, 0], (4, 4),
                                                       np.max)
        arr1[::, ::, 1] = skimage.measure.block_reduce(arr[::, ::, 1], (4, 4),
                                                       np.max)
        arr1[::, ::, 2] = skimage.measure.block_reduce(arr[::, ::, 2], (4, 4),
                                                       np.max)

        # from matplotlib import pyplot as plt
        # plt.ion()
        # plt.imshow(arr1/64.0)
        # plt.show()

        # from PIL import Image
        # img = Image.fromarray(arr, 'RGB')
        # img.save('my.png')
        # img.show()
        return arr

    def get_screen2(self):
        img_size = 32  #TODO remove hardcode
        arr = np.zeros([img_size, img_size, 1], dtype=np.uint8)
        for rmba in self.environment.roombas:
            if isinstance(rmba, environment.TargetRoomba
                          ) and rmba.state is not cfg.ROOMBA_STATE_IDLE:
                arr[int(rmba.pos[0] / 20.0 * (img_size - 1)),
                    int(rmba.pos[1] / 20.0 * (img_size - 1)), :] = 255
        return arr

    def get_example_img(self):
        img_size = 32  #TODO remove hardcode
        self.get_example_img.counter += 1
        if self.get_example_img.counter >= 25:
            arr = self.get_screen()

        else:
            arr = np.zeros([img_size, img_size, 1], dtype=np.uint8)
            arr[5, 5, :] = 64
            arr[6, 31, :] = 64
            arr[31, 23, :] = 64
            arr[10, 9, :] = 64
            arr[0, 21, :] = 64
            arr[5, 15, :] = 64
            arr[9, 9, :] = 64
            arr[5, 16, :] = 64
            arr[15, 27, :] = 64
            arr[30, 9, :] = 64
        #
        # arr1 = np.zeros([img_size, img_size, 1], dtype=np.uint8)
        # arr1[7, 5, :] = 64
        # arr1[6, 10, :] = 64
        # arr1[15, 23, :] = 64
        # arr1[10, 30, :] = 64
        # arr1[10, 21, :] = 64
        # arr1[5, 17, :] = 64
        # arr1[25, 9, :] = 64
        # arr1[5, 7, :] = 64
        # arr1[5, 27, :] = 64
        # arr1[30, 3, :] = 64

        return arr

    get_example_img.counter = 0

    def _render(self, mode='human', close=False):
        if close:
            if self.viewer is not None:
                self.viewer.close()
                self.viewer = None
            return

        if self.viewer is None:
            from gym.envs.IARC.roombasim.graphics import Display
            # from gym.envs.classic_control import rendering
            self.viewer = Display(self.environment,
                                  timescale=1.0,
                                  self_update=False)

            # def update_func(delta, elapsed):
            #     self.environment.update(delta, elapsed)
            #
            # self.viewer.set_update_func(update_func)

            # pyglet.app.run()

            # pyglet.app.platform_event_loop.start()
            pyglet.app.event_loop.start()

        # timeout = pyglet.app.event_loop.idle()
        # self.viewer.update(0.1)
        # self.viewer.on_draw()
        # self.viewer.

        # dt = self.clock.update_time()
        # redraw_all = self.clock.call_scheduled_functions(dt)

        # Redraw all windows
        self.viewer.update_time_only(0.1)
        for window in pyglet.app.windows:
            window.switch_to()
            window.dispatch_event('on_draw')
            window.flip()
            window._legacy_invalid = False
        pyglet.app.platform_event_loop.step(0.1)
Ejemplo n.º 5
0
class IARCEnv_1(gym.Env):
    metadata = {
        'render.modes': ['human', 'rgb_array'],
        'video.frames_per_second': 30
    }

    def __init__(self):
        self.viewer = None

        self.earlyTerminationTime_ms = None

        # min_action = np.array([0.0, 0.0, 0.0, False, False, False])
        # max_action = np.array([20.0, 20.0, math.pi*2.0, True, True, True])
        # min_action = np.array([0.0, 0.0, -cfg.ROOMBA_LINEAR_SPEED, -cfg.ROOMBA_LINEAR_SPEED, False, False])
        # max_action = np.array([20.0, 20.0, cfg.ROOMBA_LINEAR_SPEED, cfg.ROOMBA_LINEAR_SPEED, True, True])
        # self.action_space = spaces.Box(min_action, max_action) # , shape=(1,))
        # self.action_space = spaces.MultiDiscrete((cfg.MISSION_NUM_TARGETS + 2)
        # self.action_space = spaces.MultiDiscrete([[0, cfg.MISSION_NUM_TARGETS - 1], [0, 1], [0, 1]])
        self.action_space = spaces.MultiDiscrete(
            [cfg.MISSION_NUM_TARGETS, 2, 2])

        import gym.envs.IARC.roombasim.pittras.config
        cfg.load(gym.envs.IARC.roombasim.pittras.config)

        self.environment = environment.Environment()
        self.environment.reset()

        # min_obs_template = np.array([0, 0, 0, False])
        # max_obs_template  = np.array([20, 20, math.pi*2, True])
        # min_obs = [min_obs_template]
        # max_obs = [max_obs_template]
        # for i in range(0, cfg.MISSION_NUM_TARGETS - 1): # (self.environment.roombas, start=1):
        #     min_obs = np.concatenate([min_obs, [min_obs_template]], axis=0)
        #     max_obs = np.concatenate([max_obs, [max_obs_template]], axis=0)
        # min_obs = np.expand_dims(min_obs, 2)
        # max_obs = np.expand_dims(max_obs, 2)

        min_obs_template = list([0, 0, 0, False])
        max_obs_template = list([20, 20, math.pi * 2, True])
        min_obs = list()
        max_obs = list()
        for i in range(0, cfg.MISSION_NUM_TARGETS
                       ):  # (self.environment.roombas, start=1):
            min_obs = min_obs + min_obs_template
            max_obs = max_obs + max_obs_template

        self.observation_space = spaces.Box(np.asarray(min_obs),
                                            np.asarray(max_obs))

        # setup agent
        agent = cfg.AGENT([13, 10], 0)

        self.environment.agent = agent
        import time
        self._seed(round(time.time()))
        self._reset()

    def _reset(self):
        self.time_elapsed_ms = 0
        self.prev_score = 0
        self.environment.reset()

        # setup agent
        agent = cfg.AGENT([13, 10], 0)

        self.environment.agent = agent
        self.environment.agent.z_pos = 2
        #self._render()

        # for i in range(self.q.maxlen):
        #     self.q.append( self._get_screen())
        #
        # self.state = np.concatenate((self.q[0], self.q[self.q.maxlen - 1]), axis=1)

        # self.state = np.zeros((cfg.MISSION_NUM_TARGETS, 4))
        # i = 0
        # for rmba in self.environment.roombas:
        #     if isinstance(rmba, environment.TargetRoomba):
        #         self.state[i, 0] = rmba.pos[0]
        #         self.state[i, 1] = rmba.pos[1]
        #         self.state[i, 2] = rmba.heading
        #         self.state[i, 3] = (rmba.state == cfg.ROOMBA_STATE_FORWARD)
        #         i = i + 1
        # self.state = np.expand_dims(self.state, 2)

        self.state = list()
        for rmba in self.environment.roombas:
            if isinstance(rmba, environment.TargetRoomba):
                self.state = self.state + rmba.pos
                self.state = self.state + [rmba.heading]
                self.state = self.state + [(rmba.state
                                            == cfg.ROOMBA_STATE_FORWARD)]
        self.state = np.asarray(self.state)

        return self.state

    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    # def grid2Pixels(self, gridVect):
    #     x = gridVect[0]
    #     y = gridVect[1]
    #     pixelVect = np.zeros((2, 1))
    #     pixelVect[0] = (x + self.grid_width / 2) * self.scale[0]
    #     pixelVect[1] = (y + self.grid_height / 2) * self.scale[1]
    #     return pixelVect
    #
    # def distance(self, obj1, obj2):
    #     return math.sqrt(math.pow(obj1[0] - obj2[0], 2) + math.pow(obj1[1] - obj2[1], 2))

    def _step(self, action):
        reward = 0

        assert self.action_space.contains(
            action), "%r (%s) invalid" % (action, type(action))
        if action.ndim >= 2:
            action = action[0]
            ac = {
                "rmba_sel": action[0],
                "ac_bool": action[1],
                "top_or_front": action[2]
            }
        else:

            ac = {
                "rmba_sel": action[0],
                "ac_bool": action[1],
                "top_or_front": action[2]
            }
        for rmba in self.environment.roombas:
            if isinstance(rmba, environment.TargetRoomba
                          ) and rmba.state is not cfg.ROOMBA_STATE_IDLE:
                reward += (rmba.state == cfg.ROOMBA_STATE_FORWARD) * (
                    1 / math.pi / 30 *
                    (math.fabs(math.pi - rmba.heading) - math.pi / 2)) / (
                        cfg.MISSION_NUM_TARGETS -
                        (self.environment.bad_exits +
                         self.environment.good_exits))

        # action = (np.tanh(action) + 1) / 2 * (self.action_space.high - self.action_space.low) + self.action_space.low
        # # action[3] = np.round(action[3])
        # action[4] = np.round(action[4])
        # action[5] = np.round(action[5])

        # assert self.action_space.contains(action), "%r (%s) invalid" % (action, type(action))
        # ac = {"rmba_sel" : action[0], "ac_bool" : action[1], "top_or_front" : action[2]}
        # dist_dict = {"min_dist_all": None, "min_dist_ac" : None}
        # distances = [None] * (cfg.MISSION_NUM_TARGETS)  # - (self.environment.bad_exits + self.environment.good_exits))
        # i = 0
        # for rmba in self.environment.roombas:
        #     if isinstance(rmba, environment.TargetRoomba):
        #         if rmba.state is cfg.ROOMBA_STATE_IDLE:
        #             distances[i] = math.inf
        #         else:
        #             reward += (rmba.state == cfg.ROOMBA_STATE_FORWARD) * (
        #                         1 / math.pi / 30 * (math.fabs(math.pi - rmba.heading) - math.pi / 2)) / (
        #                                   cfg.MISSION_NUM_TARGETS - (
        #                                       self.environment.bad_exits + self.environment.good_exits))
        #             x_vel = np.cos(rmba.heading) * cfg.ROOMBA_LINEAR_SPEED * (rmba.state == cfg.ROOMBA_STATE_FORWARD)
        #             y_vel = np.sin(rmba.heading) * cfg.ROOMBA_LINEAR_SPEED * (rmba.state == cfg.ROOMBA_STATE_FORWARD)
        #             # distances[i] = np.sqrt(
        #             #     np.power(action[0] - rmba.pos[0], 2) +
        #             #     np.power(action[1] - rmba.pos[1], 2) +
        #             #     np.power(self.action_space.high[0] / self.action_space.high[2] * (action[2] - x_vel), 2) +
        #             #     np.power(self.action_space.high[0] / self.action_space.high[2] * (action[3] - y_vel), 2))
        #             # distances[i] = np.sqrt(
        #             #     np.power(action[0] - rmba.pos[0], 2) +
        #             #     np.power(action[1] - rmba.pos[1], 2) +
        #             #     np.power(self.action_space.high[0] / self.action_space.high[2] * (math.pi-math.fabs(math.fabs(action[2] - rmba.heading) - math.pi)), 2) +
        #             #     np.power(self.action_space.high[0] / self.action_space.high[2] * (np.round(action[3]) - (rmba.state == cfg.ROOMBA_STATE_FORWARD)), 2))
        #         i += 1
        # dist_dict["min_dist_all"] = min(distances)
        if ac["ac_bool"]:
            from operator import itemgetter
            # dist_dict["min_dist_ac"] = min(distances)
            # target_iter =min(enumerate(distances), key=itemgetter(1))[0]
            self.environment.agent.xy_pos = self.environment.roombas[
                ac["rmba_sel"]].pos
            self.environment.agent.z_pos = 2
            if ac["top_or_front"]:
                self.environment.roombas[
                    ac["rmba_sel"]].collisions['top'] = True
            else:
                self.environment.roombas[
                    ac["rmba_sel"]].collisions['front'] = True

        done = False
        self.time_elapsed_ms += 100
        self.environment.update(0.1, self.time_elapsed_ms)
        reward += self.environment.score / 1000 - self.prev_score
        self.prev_score = self.environment.score / 1000
        # self.environment.agent.control(np.array([0.5, 0.5]), 0.001, 0.01)

        if (self.environment.bad_exits +
                self.environment.good_exits) >= cfg.MISSION_NUM_TARGETS:
            done = True
        if self.environment.time_ms >= 10 * 60 * 1000:
            # self.reset()
            done = True
        if self.earlyTerminationTime_ms is not None and self.earlyTerminationTime_ms <= self.environment.time_ms:
            done = True

        # self.q.pop()
        # self.q.appendleft(self._get_screen())
        # self.state = np.concatenate((self.q[0], self.q[self.q.maxlen - 1]), axis=1)

        self.state = list()
        for rmba in self.environment.roombas:
            if isinstance(rmba, environment.TargetRoomba):
                self.state = self.state + rmba.pos
                self.state = self.state + [rmba.heading]
                self.state = self.state + [(rmba.state
                                            == cfg.ROOMBA_STATE_FORWARD)]
        # for rmba in self.environment.roombas:
        #     if isinstance(rmba, environment.ObstacleRoomba):
        #         self.state = self.state + rmba.pos

        info = {"time_ms": self.time_elapsed_ms}
        return np.array(self.state), reward, done, info

    def _get_screen(self):
        self.viewer.on_draw_roombas_only()
        buffer = pyglet.image.get_buffer_manager().get_color_buffer()
        image_data = buffer.get_image_data()
        arr = np.fromstring(image_data.data, dtype=np.uint8, sep='')
        arr = arr.reshape(buffer.height, buffer.width, 4)
        arr = arr[::-1, :, 0:3]
        arr1 = np.zeros([170, 170, 3], dtype=np.uint8)
        arr1[::, ::, 0] = skimage.measure.block_reduce(arr[::, ::, 0], (4, 4),
                                                       np.max)
        arr1[::, ::, 1] = skimage.measure.block_reduce(arr[::, ::, 1], (4, 4),
                                                       np.max)
        arr1[::, ::, 2] = skimage.measure.block_reduce(arr[::, ::, 2], (4, 4),
                                                       np.max)

        # from matplotlib import pyplot as plt
        # plt.ion()
        # plt.imshow(arr1)
        # plt.show()
        return arr1

    def _render(self, mode='human', close=False):
        if close:
            if self.viewer is not None:
                self.viewer.close()
                self.viewer = None
            return

        if self.viewer is None:
            from gym.envs.IARC.roombasim.graphics import Display
            # from gym.envs.classic_control import rendering
            self.viewer = Display(self.environment,
                                  timescale=1.0,
                                  self_update=False)

            # def update_func(delta, elapsed):
            #     self.environment.update(delta, elapsed)
            #
            # self.viewer.set_update_func(update_func)

            # pyglet.app.run()

            # pyglet.app.platform_event_loop.start()
            pyglet.app.event_loop.start()

        # timeout = pyglet.app.event_loop.idle()
        # self.viewer.update(0.1)
        # self.viewer.on_draw()
        # self.viewer.

        # dt = self.clock.update_time()
        # redraw_all = self.clock.call_scheduled_functions(dt)

        # Redraw all windows
        self.viewer.update_time_only(0.1)
        for window in pyglet.app.windows:
            window.switch_to()
            window.dispatch_event('on_draw')
            window.flip()
            window._legacy_invalid = False
        pyglet.app.platform_event_loop.step(0.1)
Ejemplo n.º 6
0
class IARCEnv_4(gym.Env, IARCEnv_Master):
    metadata = {
        'render.modes': ['human', 'rgb_array'],
        'video.frames_per_second': 30
    }

    def __init__(self):
        self.last_rmba=4
        self.action_space = spaces.MultiDiscrete([cfg.MISSION_NUM_TARGETS, 2, 2])

        min_obs_template = list([0, 0, 0, False])
        max_obs_template = list([20, 20, math.pi * 2, True])
        min_obs = list()
        max_obs = list()
        for i in range(0, cfg.MISSION_NUM_TARGETS):  # (self.environment.roombas, start=1):
            min_obs = min_obs + min_obs_template
            max_obs = max_obs + max_obs_template

        min_obs = min_obs + list([0, 0, 0])
        max_obs = max_obs + list([20, 20, 9])

        self.observation_space = spaces.Box(np.asarray(min_obs), np.asarray(max_obs), dtype=np.float32)
        # self.observation_space = spaces.Tuple([spaces.Box(np.array([0, 0, 0, False]), np.array([20, 20, math.pi*2, True]), dtype=np.float32) for _ in range(cfg.MISSION_NUM_TARGETS)])

        self.init_Master()
        self.reset()

        self.num_test_imgs = 10
        self.numTestsActivated = 0
        # TODO remove temporary test stuf
        self.test_set = [False] * self.num_test_imgs
        self.arr = np.zeros([32, 32, 1], dtype=np.uint8)
        self.arr[5, 5, :] = 255
        self.arr[6, 31, :] = 255
        self.arr[31, 23, :] = 255
        self.arr[10, 9, :] = 255
        self.arr[0, 21, :] = 255
        self.arr[5, 15, :] = 255
        self.arr[9, 9, :] = 255
        self.arr[5, 16, :] = 255
        self.arr[15, 27, :] = 255
        self.arr[30, 9, :] = 255
        self.test_img = [self.arr] * self.num_test_imgs
        self.test_img_triggerTime = [0.0] * self.num_test_imgs
        self.test_ob = [self.observation_space.sample()] * self.num_test_imgs
        self.test_fov_mask = [self.observation_space.sample()] * self.num_test_imgs
        for i in range(self.num_test_imgs):
            self.test_img_triggerTime[i] = 2000 + i * 58000

    def _get_state(self):
        state = list()
        fov_mask = list()
        for rmba in self.environment.roombas:
            if isinstance(rmba, environment.TargetRoomba):
                if rmba.state == cfg.ROOMBA_STATE_IDLE:
                    fov_mask = fov_mask + [np.nan, np.nan, np.nan, np.nan]
                else:
                    fov_mask = fov_mask + [1, 1, 1, 1]
                state = state + rmba.pos
                state = state + [rmba.heading]
                state = state + [(rmba.state == cfg.ROOMBA_STATE_FORWARD)]
        state = state + list(self.environment.agent.xy_pos)+list([self.last_rmba])
        fov_mask = fov_mask + [1, 1, 1]
        state = np.array(state, dtype=np.float32)
        fov_mask = np.array(fov_mask, dtype=np.float32)
        return state/self.observation_space.high, fov_mask

    def reset(self):
        self.reset_Master()
        self.state, _ = self._get_state()
        self.last_good_exits = 0
        self.last_bad_exits = 0
        return self.state


    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        np.random.seed(seed)
        return [seed]

    def step(self, action):
        rews = {'game_reward': 0.0, 'speed_reward': 0.0, 'direction_reward': 0.0, 'target_reward': 0.0, 'num_good_exits':0, 'num_bad_exits':0}


        # assert self.action_space.contains(action), "%r (%s) invalid" % (action, type(action))
        if action.ndim >= 2:
            action = action[0]
            ac = {"rmba_sel": action[0], "ac_bool": action[1], "top_or_front": action[2]}
        else:
            ac = {"rmba_sel": action[0], "ac_bool": action[1], "top_or_front": action[2]}
        aav_targPos = self.environment.roombas[ac["rmba_sel"]].pos

        if self.environment.roombas[ac["rmba_sel"]].state is cfg.ROOMBA_STATE_IDLE:
            rews["target_reward"] -= 2*self.numRenderSecs

        for rmba in self.environment.roombas:
            if isinstance(rmba, environment.TargetRoomba) and rmba.state is not cfg.ROOMBA_STATE_IDLE:
                rews["direction_reward"] += 0.7*20*self.numRenderSecs*self.getDirectionRew(rmba)
                # rmba_dists[(np.linalg.norm(ac["aav_pos"] - rmba.pos))] = rmba


        def rmbaInteract():
            if ac["ac_bool"]:
                if ac["top_or_front"]:
                    self.environment.roombas[ac["rmba_sel"]].collisions['top'] = True
                else:
                    self.environment.roombas[ac["rmba_sel"]].collisions['front'] = True

        rews["game_reward"], rews["speed_reward"], done = self._updateEnv(aav_targPos, rmbaInteract)
        rews['num_good_exits'] = self.environment.good_exits - self.last_good_exits
        rews['num_bad_exits'] = self.environment.bad_exits - self.last_bad_exits
        self.last_good_exits = self.environment.good_exits
        self.last_bad_exits = self.environment.bad_exits

        self.state, fov_mask = self._get_state()


        for i in range(self.num_test_imgs):
            if self.environment.time_ms == self.test_img_triggerTime[i] and self.test_set[i] is False:
                self.test_ob[i] = self.state
                self.test_img[i] = self.get_screen()
                self.test_set[i] = True
                self.test_fov_mask[i] = fov_mask
                self.numTestsActivated += 1
        if self.numTestsActivated == 1:
            test_num = 0
        else:
            test_num = np.random.random_integers(0, self.numTestsActivated - 1)
        test_ob = self.test_ob[test_num]
        test_img = self.test_img[test_num]
        test_fov_mask = self.test_fov_mask[test_num]
        if np.array_equal(test_img, self.arr):
            print("uh-oh spagettios")

        info = {"time_ms": self.time_elapsed_ms, "rews": rews, "test_ob": test_ob,
                "test_img": test_img,  "test_fov_mask": test_fov_mask, "fov_mask":fov_mask}
        if done:
            self.environment.reset()
            info["img"] = self.get_screen()
        else:
            info["img"] = self.get_screen()
        reward = 0
        rews["num_bad_exits"] *= -1
        for key, rew in rews.items():
            reward += rew
        return self.state, reward, done, info

    # def get_screen2(self):
    #     if self.viewer is None:
    #         from gym.envs.IARC.roombasim.graphics import Display
    #         # from gym.envs.classic_control import rendering
    #         self.viewer = Display(self.environment, timescale=1.0, self_update=False)
    #         pyglet.app.event_loop.start()
    #     self.viewer.on_draw_roombas_only()
    #     buffer = pyglet.image.get_buffer_manager().get_color_buffer()
    #     image_data = buffer.get_image_data()
    #     arr = np.fromstring(image_data.data, dtype=np.uint8, sep='')
    #     arr = arr.reshape(buffer.height, buffer.width, 4)
    #     arr = arr[::-1, :, 0:3]
    #     arr1 = np.zeros([170, 170, 3], dtype=np.uint8)
    #     arr1[::, ::, 0] = skimage.measure.block_reduce(arr[::, ::, 0], (4, 4), np.max)
    #     arr1[::, ::, 1] = skimage.measure.block_reduce(arr[::, ::, 1], (4, 4), np.max)
    #     arr1[::, ::, 2] = skimage.measure.block_reduce(arr[::, ::, 2], (4, 4), np.max)
    #
    #     # from matplotlib import pyplot as plt
    #     # plt.ion()
    #     # plt.imshow(arr1/64.0)
    #     # plt.show()
    #
    #     # from PIL import Image
    #     # img = Image.fromarray(arr, 'RGB')
    #     # img.save('my.png')
    #     # img.show()
    #     return arr

    def get_screen(self):
        img_size = 32  # TODO remove hardcode
        arr = np.zeros([img_size, img_size, 1], dtype=np.uint8)
        for rmba in self.environment.roombas:
            if isinstance(rmba, environment.TargetRoomba) and rmba.state is not cfg.ROOMBA_STATE_IDLE:
                arr[int(rmba.pos[0] / 20.0 * (img_size - 1)), int(rmba.pos[1] / 20.0 * (img_size - 1)), :] = 255
        return arr

    def get_example_img(self):
        img_size = 32  # TODO remove hardcode
        self.get_example_img.counter += 1
        if self.get_example_img.counter >= 25:
            arr = self.get_screen()

        else:
            arr = np.zeros([img_size, img_size, 1], dtype=np.uint8)
            arr[5, 5, :] = 64
            arr[6, 31, :] = 64
            arr[31, 23, :] = 64
            arr[10, 9, :] = 64
            arr[0, 21, :] = 64
            arr[5, 15, :] = 64
            arr[9, 9, :] = 64
            arr[5, 16, :] = 64
            arr[15, 27, :] = 64
            arr[30, 9, :] = 64
        #
        # arr1 = np.zeros([img_size, img_size, 1], dtype=np.uint8)
        # arr1[7, 5, :] = 64
        # arr1[6, 10, :] = 64
        # arr1[15, 23, :] = 64
        # arr1[10, 30, :] = 64
        # arr1[10, 21, :] = 64
        # arr1[5, 17, :] = 64
        # arr1[25, 9, :] = 64
        # arr1[5, 7, :] = 64
        # arr1[5, 27, :] = 64
        # arr1[30, 3, :] = 64

        return arr

    get_example_img.counter = 0

    def _render(self, mode='human', close=False):
        if close:
            if self.viewer is not None:
                self.viewer.close()
                self.viewer = None
            return

        if self.viewer is None:
            from gym.envs.IARC.roombasim.graphics import Display
            # from gym.envs.classic_control import rendering
            self.viewer = Display(self.environment, timescale=1.0, self_update=False)

            # def update_func(delta, elapsed):
            #     self.environment.update(delta, elapsed)
            #
            # self.viewer.set_update_func(update_func)

            # pyglet.app.run()

            # pyglet.app.platform_event_loop.start()
            pyglet.app.event_loop.start()

        # timeout = pyglet.app.event_loop.idle()
        # self.viewer.update(0.1)
        # self.viewer.on_draw()
        # self.viewer.

        # dt = self.clock.update_time()
        # redraw_all = self.clock.call_scheduled_functions(dt)

        # Redraw all windows
        self.viewer.update_time_only(0.1)
        for window in pyglet.app.windows:
            window.switch_to()
            window.dispatch_event('on_draw')
            window.flip()
            window._legacy_invalid = False
        pyglet.app.platform_event_loop.step(0.1)
Ejemplo n.º 7
0
class IARCEnv_3(gym.Env, IARCEnv_Master):
    metadata = {
        'render.modes': ['human', 'rgb_array'],
        'video.frames_per_second': 30
    }

    def __init__(self):
        self.action_space = spaces.Box(np.array([0.0, 0.0, 0., 0.]),
                                       np.array([1., 1., 1., 1.]),
                                       dtype=float)

        min_obs_template = list([0, 0, 0, False])
        max_obs_template = list([20, 20, math.pi * 2, True])
        min_obs = list()
        max_obs = list()
        for i in range(0, cfg.MISSION_NUM_TARGETS
                       ):  # (self.environment.roombas, start=1):
            min_obs = min_obs + min_obs_template
            max_obs = max_obs + max_obs_template

        min_obs = min_obs + list([0, 0])
        max_obs = max_obs + list([20, 20])

        self.observation_space = spaces.Box(np.asarray(min_obs),
                                            np.asarray(max_obs),
                                            dtype=float)

        self.init_Master()
        self.reset()

    def reset(self):
        self.reset_Master()

        self.state = list()
        for rmba in self.environment.roombas:
            if isinstance(rmba, environment.TargetRoomba):
                self.state = self.state + rmba.pos
                self.state = self.state + [rmba.heading]
                self.state = self.state + [(rmba.state
                                            == cfg.ROOMBA_STATE_FORWARD)]
        self.state = self.state + list(self.environment.agent.xy_pos)
        self.state = np.asarray(self.state)

        return self.state

    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def step(self, action):
        rews = {
            'game_reward': 0.0,
            'end_reward': 0.0,
            'direction_reward': 0.0,
            'targ_reward': 0.0,
            'targ_reward2': 0.0
        }
        converge = False
        logger.logkv("targPos_x", action[0])
        logger.logkv("targPos_y", action[1])
        logger.dumpkvs()
        if action[0] > 1:
            rews['targ_reward2'] -= 0.01 * (action[0] - 1)
        elif action[0] < 0:
            rews['targ_reward2'] += 0.01 * action[0]
        if action[1] > 1:
            rews['targ_reward2'] -= 0.01 * (action[1] - 1)
        elif action[1] < 0:
            rews['targ_reward2'] += 0.01 * action[1]
        if action.ndim >= 2:
            action = action[0]
            action = np.clip(action, self.action_space.low,
                             self.action_space.high)
            ac = {
                "aav_pos": action[0:2] * 20.0,
                "ac_bool": bool(np.round(action[2])),
                "top_or_front": bool(np.round(action[3]))
            }
        else:
            action = np.clip(action, self.action_space.low,
                             self.action_space.high)
            ac = {
                "aav_pos": action[0:2] * 20.0,
                "ac_bool": bool(np.round(action[2])),
                "top_or_front": bool(np.round(action[3]))
            }
        # aav_targPos = self.environment.roombas[ac["rmba_sel"]].pos
        assert self.action_space.contains(
            action), "%r (%s) invalid" % (action, type(action))

        rmba_dists = dict()
        for rmba in self.environment.roombas:
            if isinstance(rmba, environment.TargetRoomba
                          ) and rmba.state is not cfg.ROOMBA_STATE_IDLE:
                rmba_dists[(np.linalg.norm(ac["aav_pos"] - rmba.pos))] = rmba

                # reward for moving in right direction
                rews["direction_reward"] += 10 * self.getDirectionRew(rmba)
        # reward for targeting rmba
        rews["targ_reward"] -= 0.0001 * np.min(list(rmba_dists.keys()))
        ac["aav_pos"] = rmba_dists[np.min(list(rmba_dists.keys()))].pos

        def rmbaInteract():
            if ac["ac_bool"] and np.min(list(rmba_dists.keys())) < 0.35:
                rmba = rmba_dists[np.min(list(rmba_dists.keys()))]
                if ac["top_or_front"]:
                    rmba.collisions['top'] = True
                else:
                    rmba.collisions['front'] = True

        rews["game_reward"], rews["end_reward"], done = self._updateEnv(
            ac["aav_pos"], rmbaInteract)

        self.state = list()
        for rmba in self.environment.roombas:
            if isinstance(rmba, environment.TargetRoomba):
                self.state = self.state + rmba.pos
                self.state = self.state + [rmba.heading]
                self.state = self.state + [(rmba.state
                                            == cfg.ROOMBA_STATE_FORWARD)]
        self.state = self.state + list(self.environment.agent.xy_pos)

        info = {"time_ms": self.time_elapsed_ms, "rews": rews}
        #if self.time_elapsed_ms/1000 % 10 == 0:
        #   info = dict(info, **{"img": self._get_screen()})
        reward = 0
        for key, rew in rews.items():
            reward += rew
        return np.array(self.state), reward, done, info

    def _get_screen(self):
        if self.viewer is None:
            from gym.envs.IARC.roombasim.graphics import Display
            # from gym.envs.classic_control import rendering
            self.viewer = Display(self.environment,
                                  timescale=1.0,
                                  self_update=False)
            pyglet.app.event_loop.start()
        self.viewer.on_draw()  #_roombas_only()
        buffer = pyglet.image.get_buffer_manager().get_color_buffer()
        image_data = buffer.get_image_data()
        arr = np.fromstring(image_data.data, dtype=np.uint8, sep='')
        arr = arr.reshape(buffer.height, buffer.width, 4)
        arr = arr[::-1, :, 0:3]
        arr1 = np.zeros([170, 170, 3], dtype=np.uint8)
        arr1[::, ::, 0] = skimage.measure.block_reduce(arr[::, ::, 0], (4, 4),
                                                       np.max)
        arr1[::, ::, 1] = skimage.measure.block_reduce(arr[::, ::, 1], (4, 4),
                                                       np.max)
        arr1[::, ::, 2] = skimage.measure.block_reduce(arr[::, ::, 2], (4, 4),
                                                       np.max)

        return arr1

    def _render(self, mode='human', close=False):
        if close:
            if self.viewer is not None:
                self.viewer.close()
                self.viewer = None
            return

        if self.viewer is None:
            from gym.envs.IARC.roombasim.graphics import Display
            # from gym.envs.classic_control import rendering
            self.viewer = Display(self.environment,
                                  timescale=1.0,
                                  self_update=False)
            pyglet.app.event_loop.start()

        # Redraw all windows
        self.viewer.update_time_only(0.1)
        for window in pyglet.app.windows:
            window.switch_to()
            window.dispatch_event('on_draw')
            window.flip()
            window._legacy_invalid = False
        pyglet.app.platform_event_loop.step(0.1)