Esempio n. 1
0
    def close(self):
        if self._closed:
            return

        docker_closer.unregister(self._docker_closer_id)

        # Make sure 1. we were the onse who started it, 2. it's
        # actually been started, and 3. we're meant to kill it.
        if self._container_id and not self.assigner.reuse:
            self._remove()

        self._closed = True

    @property
    def client(self):
        return self.assigner.client


if __name__ == '__main__':
    logging.getLogger().setLevel(logging.INFO)
    from universe.runtimes import registration

    # docker run --name test --rm -ti -p 5900:5900 -p 15900:15900 quay.io/openai/universe.gym-core
    instance = DockerManager(
        runtime=registration.runtime_spec('gym-core'),
        n=2,
    )
    instance.start()
    import ipdb
    ipdb.set_trace()
Esempio n. 2
0
def configure_with_latest_docker_runtime_tag(env):
    original_image = registration.runtime_spec(env.spec.tags['runtime']).image
    latest_image = re.sub(r':.*', ':latest', original_image)
    logger.info("Using latest image: {}".format(latest_image))
    env.configure(remotes=1, docker_image=latest_image)
Esempio n. 3
0
    def _configure(
        self,
        remotes=None,
        client_id=None,
        start_timeout=None,
        docker_image=None,
        ignore_clock_skew=False,
        disable_action_probes=False,
        vnc_driver=None,
        vnc_kwargs={},
        rewarder_driver=None,
        replace_on_crash=False,
        allocate_sync=True,
        observer=False,
        api_key=None,
        record=False,
    ):
        """Standard Gym hook to configure the environment.

        Args:

          ignore_clock_skew (bool): Assume remotes are on the same machine as us,
            for the purposes of diagnostics measurement.

            If true, we skip measuring the clock skew over the network,
            and skip generating diagnostics which rely on it.

            True when used by the rewarder to measure latency between
            the VNC frame and its calculation of reward for that
            frame.  In this case we share a common clock with the env
            generating the VNC frame, so we don't need to send/receive
            probes.  Clock skew is zero in this case.

            False when remotes are potentially different machines
            (such as an agent, or a demonstrator), and we will be
            sending probe keys and measuring network ping rountrip
            times to calculate clock skew.
        """
        if self._started:
            raise error.Error(
                '{} has already been started; cannot change configuration now.'
                .format(self))

        universe.configure_logging()

        twisty.start_once()

        if self.spec is not None:
            runtime = registration.runtime_spec(self.spec.tags['runtime'])
            # Let the user manually set the docker_image version
            if docker_image:
                # TODO: don't support this option?
                runtime.image = docker_image
        else:
            runtime = None

        if remotes is None:
            remotes = os.environ.get('GYM_VNC_REMOTES', '1')

        if client_id is None:
            client_id = default_client_id()

        self.remote_manager, self.n = remotes_module.build(
            client_id=client_id,
            remotes=remotes,
            runtime=runtime,
            start_timeout=start_timeout,
            api_key=api_key,
            use_recorder_ports=record,
        )
        self.connection_names = [None] * self.n
        self.connection_labels = [None] * self.n
        self.crashed = {}

        self.allow_reconnect = replace_on_crash and self.remote_manager.supports_reconnect
        if self.remote_manager.connect_vnc:
            cls = vnc_session(vnc_driver)
            vnc_kwargs.setdefault('start_timeout',
                                  self.remote_manager.start_timeout)
            if runtime == 'gym-core':
                vnc_kwargs.setdefault('encoding', 'zrle')
            else:
                vnc_kwargs.setdefault('encoding', 'tight')
                vnc_kwargs.setdefault('fine_quality_level', 50)
                vnc_kwargs.setdefault('subsample_level', 2)
            # Filter out None values, since some drivers may not handle them correctly
            vnc_kwargs = {k: v for k, v in vnc_kwargs.items() if v is not None}
            logger.info(
                'Using VNCSession arguments: %s. (Customize by running "env.configure(vnc_kwargs={...})"',
                vnc_kwargs)
            self.vnc_kwargs = vnc_kwargs
            self.vnc_session = cls()
        else:
            self.vnc_session = None

        self._observer = observer
        if self.remote_manager.connect_rewarder:
            cls = rewarder_session(rewarder_driver)
            self.rewarder_session = cls()
        else:
            self.rewarder_session = None

        if ignore_clock_skew:
            logger.info(
                'Printed stats will ignore clock skew. (This usually makes sense only when the environment and agent are on the same machine.)'
            )

        if self.rewarder_session or ignore_clock_skew:
            # Don't need rewarder session if we're ignoring clock skew
            if self.spec is not None:
                metadata_encoding = self.spec.tags.get('metadata_encoding')
            else:
                metadata_encoding = None
            self.diagnostics = diagnostics.Diagnostics(
                self.n,
                self._probe_key,
                ignore_clock_skew,
                metadata_encoding=metadata_encoding,
                disable_action_probes=disable_action_probes)
        else:
            self.diagnostics = None

        self._reset_mask()
        self._started = True

        self.remote_manager.allocate([str(i) for i in range(self.n)],
                                     initial=True)
        if allocate_sync:
            # Block until we've fulfilled n environments
            self._handle_connect(n=self.n)
        else:
            # Handle any backends which synchronously fufill their
            # allocation.
            self._handle_connect()
Esempio n. 4
0
    def _configure(self, remotes=None,
                   client_id=None,
                   start_timeout=None, docker_image=None,
                   ignore_clock_skew=False, disable_action_probes=False,
                   vnc_driver=None, vnc_kwargs=None,
                   rewarder_driver=None,
                   replace_on_crash=False, allocate_sync=True,
                   observer=False, api_key=None,
                   record=False,
    ):
        """Standard Gym hook to configure the environment.

        Args:

          ignore_clock_skew (bool): Assume remotes are on the same machine as us,
            for the purposes of diagnostics measurement.

            If true, we skip measuring the clock skew over the network,
            and skip generating diagnostics which rely on it.

            True when used by the rewarder to measure latency between
            the VNC frame and its calculation of reward for that
            frame.  In this case we share a common clock with the env
            generating the VNC frame, so we don't need to send/receive
            probes.  Clock skew is zero in this case.

            False when remotes are potentially different machines
            (such as an agent, or a demonstrator), and we will be
            sending probe keys and measuring network ping rountrip
            times to calculate clock skew.
        """
        if self._started:
            raise error.Error('{} has already been started; cannot change configuration now.'.format(self))

        universe.configure_logging()

        twisty.start_once()

        if self.spec is not None:
            runtime = registration.runtime_spec(self.spec.tags['runtime'])
            # Let the user manually set the docker_image version
            if docker_image:
                # TODO: don't support this option?
                runtime.image = docker_image
        else:
            runtime = None

        if remotes is None:
            remotes = os.environ.get('GYM_VNC_REMOTES', '1')

        if client_id is None:
            client_id = default_client_id()

        if vnc_kwargs is None:
            vnc_kwargs = {}

        self.remote_manager, self.n = remotes_module.build(
            client_id=client_id,
            remotes=remotes, runtime=runtime, start_timeout=start_timeout,
            api_key=api_key,
            use_recorder_ports=record,
        )
        self.connection_names = [None] * self.n
        self.connection_labels = [None] * self.n
        self.crashed = {}

        self.allow_reconnect = replace_on_crash and self.remote_manager.supports_reconnect
        if self.remote_manager.connect_vnc:
            cls = vnc_session(vnc_driver)
            vnc_kwargs.setdefault('start_timeout', self.remote_manager.start_timeout)
            if runtime == 'gym-core':
                vnc_kwargs.setdefault('encoding', 'zrle')
            else:
                vnc_kwargs.setdefault('encoding', 'tight')
                vnc_kwargs.setdefault('fine_quality_level', 50)
                vnc_kwargs.setdefault('subsample_level', 2)
            # Filter out None values, since some drivers may not handle them correctly
            vnc_kwargs = {k: v for k, v in vnc_kwargs.items() if v is not None}
            logger.info('Using VNCSession arguments: %s. (Customize by running "env.configure(vnc_kwargs={...})"', vnc_kwargs)
            self.vnc_kwargs = vnc_kwargs
            self.vnc_session = cls()
        else:
            self.vnc_session = None

        self._observer = observer
        if self.remote_manager.connect_rewarder:
            cls = rewarder_session(rewarder_driver)
            self.rewarder_session = cls()
        else:
            self.rewarder_session = None

        if ignore_clock_skew:
            logger.info('Printed stats will ignore clock skew. (This usually makes sense only when the environment and agent are on the same machine.)')

        if self.rewarder_session or ignore_clock_skew:
            # Don't need rewarder session if we're ignoring clock skew
            if self.spec is not None:
                metadata_encoding = self.spec.tags.get('metadata_encoding')
            else:
                metadata_encoding = None
            self.diagnostics = diagnostics.Diagnostics(self.n, self._probe_key, ignore_clock_skew, metadata_encoding=metadata_encoding, disable_action_probes=disable_action_probes)
        else:
            self.diagnostics = None

        self._reset_mask()
        self._started = True

        self.remote_manager.allocate([str(i) for i in range(self.n)], initial=True)
        if allocate_sync:
            # Block until we've fulfilled n environments
            self._handle_connect(n=self.n)
        else:
            # Handle any backends which synchronously fufill their
            # allocation.
            self._handle_connect()
Esempio n. 5
0
def configure_with_latest_docker_runtime_tag(env):
    original_image = registration.runtime_spec(env.spec.tags['runtime']).image
    latest_image = re.sub(r':.*', ':latest', original_image)
    logger.info("Using latest image: {}".format(latest_image))
    env.configure(remotes=1, docker_image=latest_image)
Esempio n. 6
0
        self.close()

    def close(self):
        if self._closed:
            return

        docker_closer.unregister(self._docker_closer_id)

        # Make sure 1. we were the onse who started it, 2. it's
        # actually been started, and 3. we're meant to kill it.
        if self._container_id and not self.assigner.reuse:
            self._remove()

        self._closed = True

    @property
    def client(self):
        return self.assigner.client

if __name__ == '__main__':
    logging.getLogger().setLevel(logging.INFO)
    from universe.runtimes import registration

    # docker run --name test --rm -ti -p 5900:5900 -p 15900:15900 quay.io/openai/universe.gym-core
    instance = DockerManager(
        runtime=registration.runtime_spec('gym-core'),
        n=2,
    )
    instance.start()
    import ipdb;ipdb.set_trace()