def decide(self, ent, stim, isDead, n_dead=0): entID, annID = ent.entID, ent.annID reward = self.config.STEPREWARD + self.config.DEADREWARD * n_dead stim_tensor = torchlib.Stim(ent, stim, self.config) outsLm = self.lawmaker(stim_tensor.flat.view(1, -1), stim_tensor.ents.unsqueeze(0), isDead, annID) annReturns = self.anns[annID](stim_tensor.flat.view(1, -1), stim_tensor.ents.unsqueeze(0), outsLm, isDead) playerActions, actionTargets, actionDecisions = self.getActionArguments( annReturns, stim, ent) moveAction = int(annReturns['actions']['move']) attack = actionDecisions.get('attack', None) if moveAction > 4: moveAction -= 5 ent.moveDec = moveAction contact = int(attack is not None) Asw = -np.mean([float(t.mean()) for t in outsLm['Qs'].values()]) outsLm = self.lawmaker.get_punishment(outsLm, annReturns['actions']) Asw += np.mean([float(t) for t in outsLm['Qs'].values()]) self.collectStep(entID, annReturns['actions'], annReturns['policy'], stim_tensor.flat.numpy(), stim_tensor.ents.numpy(), reward, contact, float(annReturns['val'])) self.collectStepLm(entID, outsLm['actions'], outsLm['policy']) if not self.config.TEST: self.updates[entID].feather.scrawl(ent, float(annReturns['val']), reward, Asw, attack, contact) return playerActions, actionTargets
def visVals(self, food='max', water='max'): posList, vals = [], [] R, C = self.world.shape for r in range(self.config.BORDER, R - self.config.BORDER): for c in range(self.config.BORDER, C - self.config.BORDER): colorInd = int(12 * np.random.rand()) color = Neon.color12()[colorInd] color = (colorInd, color) ent = entity.Player(-1, color, self.config) ent._pos = (r, c) if food != 'max': ent._food = food if water != 'max': ent._water = water posList.append(ent.pos) self.world.env.tiles[r, c].addEnt(ent.entID, ent) stim = self.world.env.stim(ent.pos, self.config.STIM) s = torchlib.Stim(ent, stim, self.config) val = self.valNet(s).detach() self.world.env.tiles[r, c].delEnt(ent.entID) vals.append(float(val)) vals = list(zip(posList, vals)) return vals
def visDeps(self): from forge.blade.core import realm from forge.blade.core.tile import Tile colorInd = int(12 * np.random.rand()) color = Neon.color12()[colorInd] color = (colorInd, color) ent = realm.Desciple(-1, self.config, color).server targ = realm.Desciple(-1, self.config, color).server sz = 15 tiles = np.zeros((sz, sz), dtype=object) for r in range(sz): for c in range(sz): tiles[r, c] = Tile(enums.Grass, r, c, 1, None) targ.pos = (7, 7) tiles[7, 7].addEnt(0, targ) posList, vals = [], [] for r in range(sz): for c in range(sz): ent.pos = (r, c) tiles[r, c].addEnt(1, ent) s = torchlib.Stim(ent, tiles, self.config) conv, flat, ents = s.conv, s.flat, s.ents val = self.valNet(s) vals.append(float(val)) tiles[r, c].delEnt(1) posList.append((r, c)) vals = list(zip(posList, vals)) return vals
def forward(self, ent, env): s = torchlib.Stim(ent, env, self.config) val = self.valNet(s.conv, s.flat, s.ents) actions = ActionTree(env, ent, ActionV2).actions() _, move, attk = actions # Actions moveArg, moveOuts = self.moveNet(env, ent, move, s) attk, attkArg, attkOuts = self.attackNet(env, ent, attk, s) action = (move, attk) arguments = (moveArg, attkArg) outs = (moveOuts, *attkOuts) return action, arguments, outs, val
def prepareInput(self, ent, env): s = torchlib.Stim(ent, env, self.config) return s.flat.unsqueeze(0), s.ents.unsqueeze(0)