class NumberLink(gym.Env):
    metadata = {
        'render.modes': ['human', 'rgb_array'],
        'video.frames_per_second': 30,

    board_generation = ["random", "generator"]

    def __init__(
            board_size: "(int or tuple) Shape of the board " = 7,
            num_wires: "(int) number of wires to connect" = 3,
        "(str) method used to generate the wires" = "random",
            seed: "(int) random seed" = None,
            numberlink_path: '(str) path to numberlink cpp generator' = None):

        if board_generation not in NumberLink.board_generation:
                "board_generation does not match any available method, got:",
        self.board_generation = board_generation

        self._seed = self.seed(seed=seed)

        if isinstance(board_size, list) or isinstance(board_size, tuple):
            self.board_size = tuple(board_size)
            self.board_size = (board_size, board_size)
        self.num_wires = num_wires

        self._directions = {0: "E", 1: "S", 2: "W", 3: "N"}
        self._moves = {0: [1, 0], 1: [0, -1], 2: [-1, 0], 3: [0, 1]}
        self.action_space = Discrete(self.num_wires * len(self._directions) *

        self.reward_range = (0.0, 1.0)
        self.observation_space = spaces.Box(low=0,
                                                   self.board_size[1], 2),

        self._numberlink_path = numberlink_path

        self.viewer = None

    def seed(self, seed=None):
        if self.board_generation is "generator" and (seed is None or seed < 1):
            raise ValueError(
                'Board with generator must have a specified (int) seed greater than 1.'
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def reset(self):
        """Resets the state of the environment and returns an initial observation.

            observation (object): the initial observation.
        if self.board_generation == "generator":
            self._seed[0] += 1
        return self._get_observation()

    def _get_observation(self):
        """Return a board representation with the paths and the pins"""
        current_heads = np.zeros(self.board_size, dtype=np.uint16)
        for k, wire in enumerate(self.wires):
            (x0, y0), (x1, y1) = wire.current()
            current_heads[x0, y0] = current_heads[x1, y1] = k + 1
        return np.stack([self.board.astype(np.uint16), current_heads], axis=2)

    def _create_wires(self):
        """Create the wires by placing their pins on the board"""
        if self.board_generation == "random":
            Sx, Tx = zip(*self.np_random.randint(self.board_size[0],
                                                 size=(self.num_wires, 2)))
            Sy, Ty = zip(*self.np_random.randint(self.board_size[1],
                                                 size=(self.num_wires, 2)))
            self.wires = []
            for sx, sy, tx, ty in zip(Sx, Sy, Tx, Ty):
                self.wires.append(Wire(np.array([sx, sy]), np.array([tx, ty])))

        if self.board_generation == "generator":
            board = self._generate_wires()
            start_positions = [path[0] for _, path in board.items()]
            end_positions = [path[-1] for _, path in board.items()]

            self.wires = []
            for sp, ep in zip(start_positions, end_positions):
                self.wires.append(Wire(np.array(sp), np.array(ep)))

    def _init_board(self):
        """Initialize the board with the pin values"""
        self.board = np.zeros(self.board_size, dtype=int)
        for k, wire in enumerate(self.wires):
            s, t = wire.pins()
            self.board[s[0], s[1]] = k + 1
            self.board[t[0], t[1]] = k + 1

    def _set_board(self, indices, wire_id=0):
        """Set the value of the board for the given indices to wire_id"""
        for i, j in indices:
            self.board[i, j] = wire_id

    def _display_board(self):
        """Print a string representation of the board"""
        tmp = np.char.mod('%d', np.rot90(self.board)).reshape(-1).tolist()
        tmp = [a.replace('0', ' ') for a in tmp]

    def _cond_not_connected(self, wire_id) -> bool:
        """True if the wire is not connected already"""
        return not self.wires[wire_id].connected

    def _cond_board_size(self, wire, pin, move) -> bool:
        """True if the wire stays within the boundaries of the board"""
        x, y = self.wires[wire].get_new_position(pin, move)
        return 0 <= x < self.board_size[0] and 0 <= y < self.board_size[1]

    def _cond_obstacle(self, wire, pin, move) -> bool:
        """True if the wire goes to an unoccupied space"""
        x, y = self.wires[wire].get_new_position(pin, move)
        return self.board[x, y] == 0 or self.board[x, y] == (wire + 1)

    def _compile_generator(self):
        """Compiles the generator files with Cpp compiler"""
        filename = str(
            os.path.join(self._numberlink_path, 'gameboard-generator', 'src',
        subprocess.check_call(' '.join(['g++', filename, '-o', 'numberlink']),

    def _generate_wires(self):
        Returns dict with paths as list of sublists; the latter representing nodes.
        NOTE: Only generates square boards for now!

        generation_command = [
            './numberlink', '-b',
            str(self.board_size[1]), '-lvl',
            str(self.num_wires), '-s',
            str(self._seed[0]), '-n',

        path_str = subprocess.Popen(
        path_str = path_str.communicate()[0]

        return NumberLink.decode_to_json(path_str)

    def decode_to_json(cls, text):
        """take string and transform it to json"""
        decoded_utf = text.decode("utf-8")
        return json.loads(decoded_utf)

    def step(self, action):
        """Run one timestep of the environment's dynamics.

        Accepts an action and returns a tuple (observation, reward, done, info).

            action (object): an action provided by the environment

            observation (object): agent's observation of the current environment
            reward (float) : amount of reward returned after previous action
            done (bool): whether the episode has ended, in which case further step() calls will return undefined results
            info (dict): contains auxiliary diagnostic information (helpful for debugging, and sometimes learning)
        obs, reward, done, info = self._get_observation(), 0.0, False, {}

        # check that the action is within the action space
        if not self.action_space.contains(action):
            return obs, reward, done, info

        wire = action // (2 * len(self._directions))
        action = action % (2 * len(self._directions))
        pin = action // len(self._directions)
        direction = action % len(self._directions)
        move = self._moves[direction]

        # check that the wire is not already connected
        # if so, the move is discarded
        if not self._cond_not_connected(wire):
            return obs, reward, done, info

        # check that the move will stay within the board boundaries
        # if so, the move is discarded
        if not self._cond_board_size(wire, pin, move):
            return obs, reward, done, info

        # chek that the wire will not go to an occupied space
        # if so, the move is discarded
        if not self._cond_obstacle(wire, pin, move):
            return obs, reward, done, info

        A, D, connect = self.wires[wire].move(pin, move)
        self._set_board(A, wire + 1)

        # receive a reward if the wire get connected
        if connect:
            reward = 1.0 / self.num_wires

        done = np.all([wire.connected for wire in self.wires])
        return self._get_observation(), reward, done, info

    def clone(self) -> object:
        """Clone the object, except the seed"""
        clone = NumberLink(self.board_size, self.num_wires)
        clone.wires = []
        for wire in self.wires:
        clone.board = np.copy(self.board)
        return clone

    def render(self, mode="human"):
        screen_height = screen_width = 500
        scale_height = screen_height / self.board_size[0]
        scale_width = screen_width / self.board_size[1]
        scales = np.array([scale_height, scale_width])

        if self.viewer is None:
            self.viewer = rendering.Viewer(screen_width, screen_height)

        radius = np.min(scales) / 2
        for k, wire in enumerate(self.wires):

            # display the pins of the wires
            for position in wire.pins():
                t = rendering.Transform(translation=position * scales)

            # display the paths of each wire
            for path in wire.paths.values():
                previous = None
                for point in path:
                    if previous is not None:
                        self.viewer.draw_line(previous * scales,
                                              point * scales,
                    previous = point

        return self.viewer.render(return_rgb_array=mode == "rgb_array")

    def _set_colour_map(self):
        colours = colour_dictionary()
        rgb_tuples = colours.values()
        self.colour_map = OrderedDict(zip(range(self.num_wires), rgb_tuples))

    def close(self):
        if self.viewer:
            self.viewer = None
class Blackjack(Env):
    metadata = {'render.modes': ['human']}

    def __init__(self):
        self.seed_num = None = []
        self.player = []
        # ACE, 2, 3, 4, 5, 6, 7, 8, 9, 10, Jack, Queen, King
        self.deck = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 10, 10, 10])
        self.action_space = Discrete(N_ACTIONS)
        self.observation_space = Tuple(
            (Discrete(11), Discrete(32), Discrete(2)))
        self.reward_range = (-1, 1)
        self.dealer_stop = DEALER_SICK_SUM

    def _render(self, mode='human', close=False):
        print('Dealer: sum={:2} cards={:4}'.format(
            self.calculate_hand_sum(, str(,
              end=' ')
        print('Player: sum={:2} cards={}'.format(
            self.calculate_hand_sum(self.player), str(self.player)))

    def _step(self, action):
        assert self.action_space.contains(action)
        done = False
        if action == ACTION_HIT:
            self.player += self.draw_card()
            if self.is_busted(self.player):
                done = True
            done = True
            while self.calculate_hand_sum( < self.dealer_stop:
       += self.draw_card()

        if done:
            reward = self.calculate_reward()
            reward = 0
        return self._observation(), reward, done, self._auxiliary()

    def _reset(self):
        self.player = list(self.draw_card(2))
        while self.calculate_hand_sum(self.player) < PLAYER_MIN:
            self.player += self.draw_card(1) = self.draw_card()
        return self._observation()

    def _seed(self, seed=None):
        self.seed_num = seed
        return [self.seed_num]

    def draw_card(self, n=1):
        return list(np.random.choice(self.deck, n))

    def calculate_hand_sum(self, cards):
        if self.has_usable_ace(cards):
            return sum(cards) + 10
            return sum(cards)

    def has_usable_ace(self, player):
        return ACE_CARD in player and sum(player) + 10 <= BLACKJACK

    def is_busted(self, player):
        return self.calculate_hand_sum(player) > BLACKJACK

    def calculate_reward(self):
        if self.is_busted(self.player):
            return -1
        elif self.is_busted(
            return 1
        elif self.is_natural(self.player):
            return 0 if self.is_natural( else 1
        elif self.calculate_hand_sum(self.player) == self.calculate_hand_sum(
            return 0
            return 1 if self.calculate_hand_sum(
                self.player) > self.calculate_hand_sum( else -1

    def is_natural(self, player):
        return self.calculate_hand_sum(player) == BLACKJACK and len(
            player) == 2

    def _observation(self):
        return self.calculate_hand_sum(, \
               self.calculate_hand_sum(self.player), \

    def _auxiliary(self):
        return BlackjackAuxiliary(, self.player)