Ejemplo n.º 1
0
    def recv(self) -> str:
        """
        Receives the string message from the socket. Waits (blocking) if not connected.
        Can throw Disconnected error.

        :return: the received message as a string
        """
        while not self._connected:
            # print(f'TCP connection, recv(), connected: {self._connected}')
            pass

        try:
            message = self._socket.recv(self._buffer_size)
            if message is None or message == b'':
                log(f'TCP Connection disconnected', self._verbose, level=1)
                self._socket = None
                self._connected = False
                raise Disconnected()

        except (ConnectionResetError, ConnectionAbortedError):
            log(f'TCP Connection disconnected', self._verbose, level=1)
            self._socket = None
            self._connected = False
            raise Disconnected()

        else:
            return message.decode('UTF-8')
Ejemplo n.º 2
0
    def _parse_messages(self, messages):
        """
        Parses the messages received from the game.

        :param messages: list of messages received from the game
        """
        state_message = None
        num_state_messages = 0
        for message in messages:
            log(f'Environment received message {message}',
                self._verbose,
                level=3)
            message_type = message[MessageAttributes.TYPE]
            if message_type == IncomingMessageTypes.STATE:
                num_state_messages += 1
                state_message = message

            else:
                handling_method = getattr(self, '_parse_' + message_type)
                handling_method(message)

        if state_message is not None:
            if num_state_messages > 1:
                log('Warning!!!!! Received multiple state messages - consider sending state less frequent.',
                    self._verbose,
                    level=2)

            self._parse_states(state_message)
Ejemplo n.º 3
0
    def get_messages(self):
        """
        Obtains the messages received from the connection and put into a queue. The queue is emptied after
        this method is invoked. If a message is received only partially it will be returned only after
        it is fully received. messages are returned as a list of dictionaries.

        :return: a list of messages as a dictionaries
        """
        messages = []
        while True:
            raw_message = self._messages.get(block=True)
            try:
                message_json = json.loads(raw_message)
                messages.append(message_json)
            except json.decoder.JSONDecodeError:
                if self._last_partial_message is not None:
                    raw_message = self._last_partial_message + raw_message
                    self._last_partial_message = None

                for m in json_utils.split_jsons(raw_message):
                    try:
                        m_json = json.loads(m)
                        messages.append(m_json)
                    except json.decoder.JSONDecodeError:
                        self._last_partial_message = m

            self._messages.task_done()

            if self._messages.empty() and len(messages) > 0:
                break

        log(f'Communication receiving messages: {messages}', self._verbose, 3)
        return messages
Ejemplo n.º 4
0
    def _parse_disconnected(self, message):
        """
        Parses a disconnected message and rises the Disconnected exception.

        :param message: a disconnected message
        """
        log(f'disconnected - message: {message}', self._verbose, level=1)
        self._connected = False
        raise Disconnected()
Ejemplo n.º 5
0
    def send(self, message):
        """
        Sends the message using the connection. Message is a dictionary and is encoded as a json string first.
        Requires the connection to be connected.

        :param message: a string keyed dictionary representing a message to be sent
        """
        log(f'Communication sending message: {message}', self._verbose, 3)
        self._assert_connected()
        self._connection.send(json.dumps(message))
Ejemplo n.º 6
0
    def act(self, actions):
        log(f'Environment acting: {actions}', self._verbose, level=2)
        self._states_ready = False
        self._check_actions(actions)
        self._communicator.send_actions(actions)
        while not self.states_ready:
            self._update()

        log(f'Environment states are ready: {self._states}',
            self._verbose,
            level=2)
Ejemplo n.º 7
0
    def start(self):
        """
        Starts the thread responsible for receiving messages.
        The method does not start a new thread if the thread has already been started.
        """
        if self._running:
            log('Communication thread already started', 2)
            return

        self._running = True
        self._thread = threading.Thread(target=self._run)
        self._thread.start()
Ejemplo n.º 8
0
    def connect(self):
        self._communicator.start()

        if self._connected:
            log('Environment already connected', self._verbose, level=2)
            return

        self._communicator.connect()

        while not self._connected:
            self._update()

        log('Environment connected', self._verbose, level=2)
Ejemplo n.º 9
0
    def train(self, num_players: int = None, options: Dict[str, str] = None):
        """
        Starts the training procedure using agent or agents provided in the constructor.
        This method will throw an error if training is not supported by the provided agents.
        Agents must implement Learner to be compatible with the training procedure.
        Number of players can optionally be provided. If the number of players is not provided the maximum supported
        number of players will be selected. Options in the form of dictionary can be provided to be sent
        to the environment at the start of each episode.
        The metrics gathered during the training procedure can be accessed by the metrics property.

        :param num_players: a number of players simultaneously interacting with the environment
        :param options: a dictionary representing the options sent to the environment
        """

        self._distributor.assert_training_supported()
        start_time = time.time()

        try:
            self._run(Mode.Training, num_players=num_players, options=options)
        except KeyboardInterrupt:
            log(f'Training interrupted by user.', level=1)
            self._environment.stop()
        except Disconnected:
            log(f'Disconnected from environment.', level=1)
        except ConnectionRefusedError:
            log(f'Error: Cannot connect to a game process. Make sure that the game is running.',
                level=0)
        else:
            log(f'Training completed in {round(time.time() - start_time)}s.',
                self._verbose,
                level=1)
Ejemplo n.º 10
0
    def play(self, num_episodes=1, options: Dict[str, str] = None):
        """
        Starts the testing procedure using agent or agents provided in the constructor.
        If an iterable of agents or a single MultiLearner agent was provided and the environment supports
        multi-agent training than it will be used. Otherwise the single-agent training will be performed.
        The metrics gathered during the testing procedure can be accessed using metrics property.

        :param num_episodes: a number of episodes to be run
        :param options: a dictionary representing the options sent to the environment
        """
        start_time = time.time()

        try:
            self._run(Mode.Playing, num_episodes=num_episodes, options=options)
        except KeyboardInterrupt:
            log(f'Playing interrupted by user.', level=1)
            self._environment.stop()
        except Disconnected:
            log(f'Disconnected from environment.', level=1)
        except ConnectionRefusedError:
            log(f'Error: Cannot connect to a game process. Make sure that the game is running.',
                level=0)
        else:
            log(f'Playing completed in {time.time() - start_time}s.',
                self._verbose,
                level=1)
Ejemplo n.º 11
0
    def reset(self, num_players=None, options=None):
        if num_players is None:
            num_players = self._manifest.possible_players[0]

        self._running = False
        self._states = None

        self._communicator.send_start(num_players, options)
        while not self.states_ready:
            self._update()
        else:
            self._running = True
            log(f'Environment reset for {num_players} players.',
                self._verbose,
                level=2)
Ejemplo n.º 12
0
def _log_metrics(metrics, metrics_list, episode, verbose):
    """
    Logs the metrics.

    :param metrics:
    :param metrics_list:
    :param episode:
    :param verbose:
    :return:
    """
    metrics_len = len(metrics_list)
    metrics_message = f'Metrics for episodes {episode - metrics_len + 1} - {episode}:\n'
    for i, metric in enumerate(metrics):
        metrics = tuple(m[i] for m in metrics_list)
        mean = statistics.mean(metrics)
        stdev = statistics.stdev(metrics)
        metrics_message += f'\t{metric} - mean: {mean}, stdev: {stdev}\n'
    log(metrics_message, verbose, 1)
Ejemplo n.º 13
0
    def _react_to_state(self, training: bool, agent: Union[Player, Learner], state_info: StateInfo,
                        agent_state: _AgentRunningState) -> Optional[Any]:
        """
        Passes the state to the agent. In case of training the agent is also informed about the reward.
        The action selected by the agent is returned.

        :param training: a flag indicating if an agent should be informed about the reward
        :param agent: an agent that will react to a state and select an action
        :param state_info: a StateInfo object representing state and reward information
        :param agent_state: an AgentRunningState object representing current state of the agent
        :return: an action selected by the agent or None if the episode has ended
        """
        if not agent_state.running:
            return None

        state, accumulated_reward, running = state_info
        state = adapt_object(state, self._state_adapters)
        accumulated_reward = adapt_object(accumulated_reward, self._reward_adapters)

        if training and agent_state.state is not None:
            agent.receive_reward(agent_state.state, agent_state.action,
                                 accumulated_reward - agent_state.accumulated_reward,
                                 accumulated_reward,
                                 state)

            log(f'Learner receive_reward(): previous_state: {agent_state.state},'
                f' previous_action:{agent_state.action},'
                f' accumulated_reward: {accumulated_reward}'
                f' previous_accumulated_reward: {agent_state.accumulated_reward},'
                f' next_state: {state}',
                self._verbose, level=4)

        action = agent.act(state) if running else None
        if agent_state.running:
            agent_state.update(state, action, accumulated_reward)
            if not running:
                agent_state.ended()

        if action is not None:
            return adapt_object(action, self._action_adapters)
        else:
            return None
Ejemplo n.º 14
0
    def _parse_manifest(self, message):
        """
        Parses the manifest message and sets the manifest attribute according to the message.

        :param message: a message containing manifest
        """
        name = message[MessageAttributes.NAME]
        description = message[MessageAttributes.DESCRIPTION]
        actions_definition = val.from_json(message[MessageAttributes.ACTIONS])
        states_definition = val.from_json(message[MessageAttributes.STATES])
        metrics_names = message[MessageAttributes.METRICS_NAMES]
        possible_players = message[MessageAttributes.PLAYERS]

        self._manifest = Manifest(name, description, actions_definition,
                                  states_definition, possible_players,
                                  metrics_names)
        self._connected = True

        log(f'Connected to a game with manifest:\n{self._manifest}',
            self._verbose,
            level=1)
Ejemplo n.º 15
0
    def _run(self,
             mode: Mode,
             num_episodes=None,
             num_players: int = None,
             options: Dict[str, str] = None):
        """
        Starts the training or testing procedure identified byt the mode enum.

        :param mode: an enum identifying the procedure
        :param num_episodes: a number of episodes used in the testing procedure
        :param num_players: a number of players used in the training procedure
        :param options: a dictionary representing options sent to the environment
        """
        self._environment.connect()
        self._distributor.initialize_players(self._environment.manifest)

        num_players = self._distributor.select_players_num(num_players)
        _assert_num_players_supported(
            num_players, self._environment.manifest.possible_players)

        episode = 0
        while (mode is Mode.Playing and episode < num_episodes
               or mode is mode.Training
               and self._distributor.do_start_episode(episode + 1)):

            episode += 1
            self._environment.reset(num_players, options)
            self._distributor.initialize_episode(episode)
            while self._distributor.is_episode_running():
                if mode is Mode.Playing:
                    actions = self._distributor.react_to_states(
                        self._environment.obtain_states())
                else:  # mode is Mode.Learning
                    actions = self._distributor.learn_from_states(
                        self._environment.obtain_states())

                if actions is not None:
                    self._environment.act(actions)
            else:
                log(f'Episode {episode} ended.', self._verbose, level=2)