def __init__(self): self.show_ui = options.get('show_ui', False) self.lab_path = options.get('environment/lab_path', '/lab') self.level_script = options.get('environment/level_script', None) if self.level_script is None: self.level_script = '' else: self.level_script = ' --level_script %s' % self.level_script
def __init__(self): self.exploit = options.get('exploit', False) self.max_episodes = options.get('environment/max_episodes', 1) self.infinite_run = options.get('environment/infinite_run', False) rlx_address = options.get('rlx_server_address', None) if rlx_address is None: rlx_address = options.get('relaax_rlx_server/bind', 'localhost:7001') self.agent = AgentProxy(rlx_address)
def __init__(self, env='CartPole-v0'): self.gym = gym.make(env) frame_skip = options.get('environment/frame_skip', None) if frame_skip is not None: skip_wrapper = SkipWrapper(frame_skip) self.gym = skip_wrapper(self.gym) self._record = options.get('environment/record', False) if self._record: out_dir = options.get('environment/out_dir', '/tmp/' + env) if not os.path.exists(out_dir): os.makedirs(out_dir) self.gym = gym.wrappers.Monitor(self.gym, out_dir, force=True) self._no_op_max = options.get('environment/no_op_max', 0) self._reset_action = self.gym.action_space.sample() \ if options.get('environment/stochastic_reset', False) else 0 self.gym.seed(random.randrange(1000000)) self._show_ui = options.get('show_ui', False) limit = options.get( 'environment/limit', self.gym.spec.tags.get( 'wrapper_config.TimeLimit.max_episode_steps')) if limit is not None: self.gym._max_episode_steps = limit shape = options.get('environment/shape', options.get('environment/image', (84, 84))) self._shape = shape[:2] if len(self._shape) > 1: self._channels = 0 if len(shape) == 2 else shape[-1] self._crop = options.get('environment/crop', True) self._process_state = self._process_all atari = [name + 'Deterministic' for name in GymEnv.AtariGameList] + GymEnv.AtariGameList if any(item.startswith(env.split('-')[0]) for item in atari): self._process_state = self._process_img self.action_size = self._get_action_size() if self.action_size != options.algorithm.output.action_size: log.error( 'Algorithm expects action size %d; gym return %d. \n' 'Please set correct action size in you configuration yaml.' % (options.algorithm.output.action_size, self.action_size)) sys.exit(-1) self._scale = (1.0 / 255.0) self.reset()
def __init__(self): super(Training, self).__init__() self.steps = options.get('environment/steps', 1000) self.history_len = 2 self.ring_buffer = RingBuffer(self.history_len) for _ in range(self.ring_buffer.size - 1): self.ring_buffer.append(0)
def __init__(self, level='ppaquette/DoomMyWayHome-v0'): time.sleep(np.random.randint(100)) env = gym.make(level) modewrapper = wrappers.SetPlayingMode('algo') obwrapper = wrappers.SetResolution('160x120') acwrapper = wrappers.ToDiscrete('minimal') env = modewrapper(obwrapper(acwrapper(env))) frame_skip = options.get('environment/frame_skip', None) if frame_skip is not None: skip_wrapper = SkipWrapper(frame_skip) env = skip_wrapper(env) self._record = options.get('environment/record', False) if self._record: out_dir = options.get('environment/out_dir', '/tmp/' + level.split('/')[-1]) if not os.path.exists(out_dir): os.makedirs(out_dir) env = gym.wrappers.Monitor(env, out_dir, force=True) self._no_op_max = options.get('environment/no_op_max', 0) self._reset_action = env.action_space.sample() \ if options.get('environment/stochastic_reset', False) else 0 env.seed(random.randrange(1000000)) self._show_ui = options.get('show_ui', False) limit = options.get( 'environment/limit', env.spec.tags.get('wrapper_config.TimeLimit.max_episode_steps')) if limit is not None: env._max_episode_steps = limit shape = options.get('environment/shape', (42, 42)) self._shape = shape[:2] self._channels = 0 if len(shape) == 2 else shape[-1] self.action_size = self._get_action_size(env) if self.action_size != options.algorithm.output.action_size: print( 'Algorithm expects different action size (%d) from gym (%d). \n' 'Please set correct action size in you configuration yaml.' % (options.algorithm.output.action_size, self.action_size)) sys.exit(-1) self.env = NoNegativeRewardEnv(env) self._obs_buffer = deque(maxlen=2) self.observation_space = Box(0.0, 255.0, shape) self.observation_space.high[...] = 1.0 self._scale = (1.0 / 255.0) self.reset()
def _run_deepmind_lab(self): log.info('Run deepmind-lab, please wait, it may take a moment...') try: rlx_address = options.get('rlx_server_address', None) if rlx_address is None: rlx_address = options.get('relaax_rlx_server/bind', 'localhost:7001') app_path = os.path.dirname(os.path.abspath(__file__)) config = os.path.abspath(os.path.join(app_path, '../app.yaml')) headless = 'false' if self.show_ui else 'osmesa' cmd = 'cd %s && bazel run :random_agent --define headless=%s' % ( self.lab_path, headless) cmd = '%s --%s --app_path %s --config %s --show-ui %s --rlx-server-address %s' % \ (cmd, self.level_script, app_path, config, self.show_ui, rlx_address) log.info(cmd) subprocess.call(cmd, shell=True) except subprocess.CalledProcessError as e: log.info('Error while building deepmind-lab: %s' % str(e)) raise
def _get_rlx_address(): def parse_address(address): try: host, port = address.split(':') return host, int(port) except Exception: raise ValueError("Can't parse RLX server address.") host, port = parse_address(options.get('relaax_rlx_server/bind', '0.0.0.0:7001')) if platform.system() == 'Linux': host = '127.0.0.1' elif host == '0.0.0.0': import socket s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) s.connect(("8.8.8.8", 80)) host = s.getsockname()[0] s.close() return '%s:%d' % (host, port)
def __init__(self): self._fps = options.get('environment/fps', 20) self._show_ui = options.get('environment/show_ui', False) self._no_op_max = options.get('environment/no_op_max', 9) self._frame_skip = options.get('environment/frame_skip', 4) assert self._fps > 0, log.info( 'Frame per second rate should be above zero') assert self._no_op_max > 0, log.info( 'Number of random actions at start should be above zero') assert self._frame_skip > 0, log.info( 'Frame skipping rate should be above zero') shape = options.get('environment/shape', (84, 84)) self._height, self._width = shape[0], shape[1] self._channels = 0 if len(shape) == 2 else shape[-1] action_size = options.get('environment/action_size', 'small') if action_size in ACTIONS: self._actions = list(ACTIONS[action_size].values()) else: log.info('You\'ve provided an invalid action size. \n' 'Valid options are follows: \n {}'.format(ACTIONS.keys())) if self._show_ui: self._width = 640 self._height = 480 self.env = deepmind_lab.Lab(options.get('environment/level_script', 'nav_maze_static_01'), ['RGB_INTERLACED'], config={ 'fps': str(self._fps), 'width': str(self._width), 'height': str(self._height) }) self._scale = (1.0 / 255.0) self.reset()
def __init__(self): super(Training, self).__init__() self.gym = DoomEnv(level=options.get('environment/name', 'ppaquette/DoomMyWayHome-v0'))
def __init__(self): super(Training, self).__init__() self.episode_length = options.get('environment/episode_length', 5)
def __init__(self): super(Training, self).__init__() self.gym = GymEnv(env=options.get('environment/name', 'CartPole-v0'))
def __init__(self): self.exploit = options.exploit self.max_episodes = options.get('environment/max_episodes', 1) self.infinite_run = options.get('environment/infinite_run', False) self.agent = AgentProxy(options.rlx_server_address)
def __init__(self): super(Training, self).__init__() self.steps = options.get('environment/steps', 1000) self.bandit = Bandit()
def __init__(self, image_name): self.image_name = image_name self.show_ui = options.get('show_ui', False)