def demo(): T = 256 n_env = 128 n_stabled = 8 worlds = MockWorlds.initial(n_env) agent = MockAgent(0) network = agent.network league = League(agent, MockAgent, worlds.n_envs, n_stabled=n_stabled, stable_interval=8, verbose=False) fielded = np.zeros((T, T+1)) stabled = np.zeros((T, T+1)) for t in range(T): skills = agent(worlds) new_worlds, transitions = worlds.step(skills) league.update(agent, worlds.seats, transitions) worlds = new_worlds network.skill[()] = 10. if t == 6 else 0. for n, s in zip(league.splitter.names, league.splitter.slices): fielded[t, n] = s.stop - s.start for n in league.stable.names: stabled[t, n] = 1. parts = dotdict.dotdict( agent=agent, league=league, worlds=worlds) trace = dotdict.dotdict( fielded=fielded, stabled=stabled) return parts, trace
def state(self, e): """Returns a :class:`~rebar.dotdict.dotdict` tree representing the state of environment ``e``. A typical state looks like this:: arrdict: n_envs 1 n_agents 4 res 512 fov 60 agent_radius 0.10606601717798211 fps 10 scenery arrdict: model Tensor((8, 2, 2), torch.float32) lines Tensor((307, 2, 2), torch.float32) lights Tensor((21, 3), torch.float32) textures <megastepcuda.Ragged2D object at 0x7fba34112eb0> baked <megastepcuda.Ragged1D object at 0x7fba34112670> agents arrdict: angles Tensor((4,), torch.float32) positions Tensor((4, 2), torch.float32) progress Tensor((4,), torch.float32) This state tree is usually passed onto a :ref:`plotting` function.""" options = ('n_envs', 'n_agents', 'res', 'fov', 'agent_radius', 'fps') options = {k: getattr(self, k) for k in options} return arrdict.clone(dotdict.dotdict( **options, scenery=self.scenery.state(e), agents=self.agents.state(e), progress=self.progress[e]))
def geometry_data(regenerate=False): # Why .npz.gz? Because applying gzip manually manages x10 better compression than # np.savez_compressed. They use the same compression alg, so I assume the difference # is in the default compression setting - which isn't accessible in np.savez_compressec. p = Path('.cache/cubicasa-geometry.npz.gz') if not p.exists() or regenerate: p.parent.mkdir(exist_ok=True, parents=True) if regenerate: log.info('Regenerating geometry cache from SVG cache.') with parallel.parallel(safe_geometry) as pool: gs = pool.wait({ str(row.id): pool(row.id, row.svg) for _, row in svg_data().iterrows() }) gs = flatten({k: v for k, v in gs.items() if v is not None}) bs = BytesIO() np.savez(bs, **gs) p.write_bytes(gzip.compress(bs.getvalue())) else: #TODO: Shift this to Github url = 'https://www.dropbox.com/s/3ohut8lvmr8lkwg/cubicasa-geometry.npz.gz?raw=1' p.write_bytes(download(url)) # np.load is kinda slow. raw = gzip.decompress(p.read_bytes()) with ZipFile(BytesIO(raw)) as zf: flat = dotdict.dotdict( {n[:-4]: fastload(zf.read(n)) for n in zf.namelist()}) return unflatten(flat)
def __init__(self, n_envs, *args, **kwargs): geometries = cubicasa.sample(n_envs) scenery = scene.scenery(geometries, 1) self.core = core.Core(scenery, *args, res=4 * 64, fov=130, **kwargs) self._rgb = modules.RGB(self.core, n_agents=1, subsample=4) self._depth = modules.Depth(self.core, n_agents=1, subsample=4) self._mover = modules.MomentumMovement(self.core) self._imu = modules.IMU(self.core) self._respawner = modules.RandomSpawns(geometries, self.core) self.action_space = self._mover.space self.obs_space = dotdict.dotdict(rgb=self._rgb.space, d=self._depth.space, imu=self._imu.space) self._tex_to_env = self.core.scenery.lines.inverse[ self.core.scenery.textures.inverse.long()].long() self._seen = torch.full_like(self._tex_to_env, False) self._potential = self.core.env_full(0.) self._lengths = torch.zeros(self.core.n_envs, device=self.core.device, dtype=torch.int) self.device = self.core.device
def record(self, transitions, live, start, end): #TODO: Figure out how to get scatter_add_ to work on vector-valued vals wins = (transitions.rewards == 1).int() scatter_add_(self.stats.wins[:, :, 0], live, wins[:, 0]) scatter_add_(self.stats.wins[:, :, 1], live, wins[:, 1]) scatter_add_(self.stats.moves, live, 1) scatter_add_(self.stats.times, live, (end - start) / transitions.terminal.size(0)) done = self.stats.wins.sum(-1) == self.tracker.n_envs_per stats = self.stats[done].cpu() results = [] for idx in range(stats.indices.size(0)): item = stats[idx] names = tuple(self.tracker.names[i] for i in item.indices) results.append( dotdict.dotdict(names=names, wins=tuple(map(float, item.wins)), moves=float(item.moves), games=float(sum(item.wins)), times=float(item.times), boardsize=self.worlds.boardsize)) self.stats.wins[done] = -1 return results
def initial_stats(n_jobs): return dotdict.dotdict(finished=0, total=n_jobs, moves=0, games=0, matchups=0, start=time.time(), end=time.time() + 1)
def to(self, state, action=0, reward=0., weight=1.): action = int(action) self._builder._trans.append( dotdict.dotdict(prev=self._name, action=action, next=state, reward=reward, weight=weight)) return self
def gather(wins, moves, matchup_idxs, names, boardsize): names = np.array(names) n_envs, n_seats = matchup_idxs.shape results = [] for p in matchup_patterns(n_seats): ws = wins[(matchup_idxs == p).all(-1)].sum(0) ms = moves[(matchup_idxs == p).all(-1)].sum(0) results.append( dotdict.dotdict(names=tuple(names[p]), wins=tuple(map(float, ws)), moves=float(ms[0]), games=float(ws.sum()), boardsize=boardsize)) return results
def build(self): states = ({x.state for x in self._obs} | {x.prev for x in self._trans} | {x.next for x in self._trans}) actions = {x.action for x in self._trans} assert max(actions) == len(actions) - 1, 'Action set isn\'t contiguous' indices = {s: i for i, s in enumerate(states)} names = np.array(list(states)) n_states = len(states) n_actions = len(actions) (d_obs, ) = {len(x.obs) for x in self._obs} obs = torch.full((n_states, d_obs), np.nan) start = torch.full((n_states, ), 0.) for x in self._obs: obs[indices[x.state]] = torch.as_tensor(x.obs) start[indices[x.state]] = x.start trans = torch.full((n_states, n_actions, n_states), 0.) reward = torch.full((n_states, n_actions), 0.) for x in self._trans: trans[indices[x.prev], x.action, indices[x.next]] = x.weight reward[indices[x.prev], x.action] = x.reward terminal = (trans.sum(-1).max(-1).values == 0) assert start.sum() > 0, 'No start state declared' return dotdict.dotdict(obs=obs, trans=trans, reward=reward, terminal=terminal, start=start, indices=indices, names=names, n_states=n_states, n_actions=n_actions, d_obs=d_obs)
def __init__(self, n_envs, n_agents, *args, **kwargs): geometries = cubicasa.sample(max(n_envs // 4, 1)) scenery = scene.scenery(geometries, n_agents) self.core = core.Core(scenery, *args, res=4 * 128, fov=70, **kwargs) self._rgb = modules.RGB(self.core, n_agents=1, subsample=4) self._depth = modules.Depth(self.core, n_agents=1, subsample=4) self._imu = modules.IMU(self.core, n_agents=1) self._movement = modules.MomentumMovement(self.core, n_agents=1) self._spawner = modules.RandomSpawns(geometries, self.core) self.action_space = self._movement.space self.obs_space = dotdict.dotdict(rgb=self._rgb.space, d=self._depth.space, imu=self._imu.space, health=spaces.MultiVector(1, 1)) self._bounds = arrdict.torchify( np.stack([g.masks.shape * g.res for g in geometries])).to(self.core.device) self._health = self.core.agent_full(np.nan) self._damage = self.core.agent_full(np.nan) self.n_envs = self.core.n_envs * self.core.n_agents self.device = self.core.device
def auxinfo(f, K=1001, S=50): μ = np.linspace(*μ_lims, K) σ2 = np.logspace(*σ2_lims, K, base=10) zs, ws = np.polynomial.hermite_e.hermegauss(S) ds = (μ[:, None, None] + zs[None, None, :] * σ2[None, :, None]**.5) scale = 1 / (2 * np.pi)**.5 fs = scale * (f(ds) * ws).sum(-1) f = sp.interpolate.RectBivariateSpline(μ, σ2, fs, kx=1, ky=1) dμs = (fs[2:, :] - fs[:-2, :]) / (μ[2:] - μ[:-2])[:, None] dμ = sp.interpolate.RectBivariateSpline(μ[1:-1], σ2, dμs, kx=2, ky=2) dσ2s = (fs[:, 2:] - fs[:, :-2]) / (σ2[2:] - σ2[:-2])[None, :] dσ2 = sp.interpolate.RectBivariateSpline(μ, σ2[1:-1], dσ2s, kx=2, ky=2) return dotdict.dotdict(μ=μ, σ2=σ2, f=f, dμ=dμ, dσ2=dσ2, fs=fs, dμs=dμs, dσs=dσ2s)
def state(self, e=0): return dotdict.dotdict( core=self.core.state(e), rgb=self.rgb.state(e))
def pandas(soln, names): return dotdict.dotdict(μ=pd.Series(soln.μ, names), Σ=pd.DataFrame(soln.Σ, names, names))
def state(self, name, obs, start=0.): if isinstance(obs, (int, float, bool)): obs = (obs, ) self._obs.append(dotdict.dotdict(state=name, obs=obs, start=start)) return State(name, self)