def makeRandomFOVDistr(self, agent): fov_key = stateKey(agent.name, FOV_FEATURE) tree = { 'if': equalRow(stateKey(agent.name, 'loc'), self.world_map.all_locations), None: noChangeMatrix(fov_key) } for il, loc in enumerate(self.world_map.all_locations): if loc not in self.victimClrCounts.keys(): tree[il] = setToConstantMatrix(fov_key, 'none') continue sub_tree, leaves = self._make_fov_color_dist(loc, 0) for dist in leaves: prob_dist = Distribution(dist) prob_dist.normalize() dist.clear() weights = [(setToConstantMatrix(fov_key, c), p) for c, p in prob_dist.items() if p > 0] if len(weights) == 1: weights.append((noChangeMatrix(fov_key), 0)) dist['distribution'] = weights tree[il] = sub_tree return tree
def _makeLightToggleAction(self, agent): """ Action to toggle the light switch in a loc that has one. Toggling a switch in a loc affects the light status in all rooms that share its light """ locKey = stateKey(agent.name, 'loc') locsWithLights = set(self.sharedLights.keys()) ## Legal if I'm in a room with a light switch legalityTree = makeTree({ 'if': equalRow(locKey, locsWithLights), True: True, False: False }) action = agent.addAction({'verb': 'toggleLight'}, makeTree(legalityTree)) ## Instead of iterating over locations, I'll iterate over those that have ## switches and create a tree for each affected room for switch, affected in self.sharedLights.items(): for aff in affected: affFlag = stateKey(WORLD, 'light' + str(aff)) txnTree = { 'if': equalRow(locKey, switch), True: { 'if': equalRow(affFlag, True), True: setToConstantMatrix(affFlag, False), False: setToConstantMatrix(affFlag, True) }, False: noChangeMatrix(affFlag) } self.world.setDynamics(affFlag, action, makeTree(txnTree)) self.lightActions[agent.name] = action
def makeExpiryDynamics(self): vic_colors = [ color for color in self.color_names if color not in {WHITE_STR, RED_STR} ] # update victim loc counters for loc in self.world_map.all_locations: red_ctr = stateKey(WORLD, 'ctr_' + loc + '_' + RED_STR) for color in vic_colors: ctr = stateKey(WORLD, 'ctr_' + loc + '_' + color) expire = self.color_expiry[color] # RED: if death time is reached, copy amount of alive victims to counter deathTree = { 'if': thresholdRow(self.world.time, expire), True: addFeatureMatrix(red_ctr, ctr), False: noChangeMatrix(red_ctr) } self.world.setDynamics(red_ctr, True, makeTree(deathTree)) # GREEN and GOLD: if death time reached, zero-out alive victims of that color deathTree = { 'if': thresholdRow(self.world.time, expire), True: setToConstantMatrix(ctr, 0), False: noChangeMatrix(ctr) } self.world.setDynamics(ctr, True, makeTree(deathTree))
def test_partial_observability(self): rddl = ''' domain my_test { requirements = { partially-observed }; pvariables { p : { state-fluent, int, default = 1 }; q : { observ-fluent, int }; a : { action-fluent, bool, default = false }; }; cpfs { q' = 3; p' = q + 1; }; reward = 0; } non-fluents my_test_empty { domain = my_test; } instance my_test_inst { domain = my_test; init-state { a; }; } ''' conv = Converter() conv.convert_str(rddl) agent = next(iter(conv.world.agents.values())) self.assertNotIn(stateKey(WORLD, '__p'), agent.omega) self.assertIn(stateKey(WORLD, 'q'), agent.omega) p = conv.world.getState(WORLD, '__p', unique=True) self.assertEqual(p, 1) conv.world.step() conv.world.step() p = conv.world.getState(WORLD, '__p', unique=True) self.assertEqual(p, 4) q = conv.world.getState(WORLD, 'q', unique=True) self.assertEqual(q, 3)
def _sense1Location(self, beepKey, nbrLoc): nbrYCt = stateKey(WORLD, 'ctr_' + nbrLoc + '_' + GOLD_STR) nbrGCt = stateKey(WORLD, 'ctr_' + nbrLoc + '_' + GREEN_STR) yDistr = {'2': 1 - PROB_NO_BEEP, 'none': PROB_NO_BEEP} gDistr = {'1': 1 - PROB_NO_BEEP, 'none': PROB_NO_BEEP} if PROB_NO_BEEP == 0: tree = { 'if': thresholdRow(nbrYCt, 0), True: setToConstantMatrix(beepKey, '2'), False: { 'if': thresholdRow(nbrGCt, 0), True: setToConstantMatrix(beepKey, '1'), False: setToConstantMatrix(beepKey, 'none') } } return tree tree = { 'if': thresholdRow(nbrYCt, 0), True: { 'distribution': [(setToConstantMatrix(beepKey, c), p) for c, p in yDistr.items()] }, False: { 'if': thresholdRow(nbrGCt, 0), True: { 'distribution': [(setToConstantMatrix(beepKey, c), p) for c, p in gDistr.items()] }, False: setToConstantMatrix(beepKey, 'none') } } return tree
def _createSensorDyn(self, human): for d in Directions: beepKey = stateKey(human.name, 'sensor_' + d.name) locsWithNbrs = list(self.world_map.neighbors[d.value].keys()) tree = { 'if': equalRow(makeFuture(stateKey(human.name, 'loc')), locsWithNbrs), None: setToConstantMatrix(beepKey, 'none') } for il, loc in enumerate(locsWithNbrs): nbr = self.world_map.neighbors[d.value][loc] tree[il] = self._sense1Location(beepKey, nbr) self.world.setDynamics(beepKey, True, makeTree(tree))
def makeMoveResetFOV(self, agent): fovKey = stateKey(agent.name, 'vicInFOV') for direction in range(4): action = self.moveActions[agent.name][direction] ## Reset FoV tree = setToConstantMatrix(fovKey, 'none') self.world.setDynamics(fovKey, action, makeTree(tree))
def parseBeep(self, msg, msgIdx, ts): numBeeps = len(msg['message'].split()) targetRoom = msg['room_name'] if targetRoom not in self.roomToVicDict.keys(): self.logger.error('%d Beeps from %s but no victims at %s' % (numBeeps, targetRoom, ts)) return 1 victims = self.roomToVicDict[targetRoom] cond1 = (numBeeps == 1) and 'Green' in victims and 'Gold' not in victims cond2 = (numBeeps == 2) and 'Gold' in victims if not (cond1 or cond2): self.logger.error('%d Beep from %s but wrong victims %s' % (numBeeps, targetRoom, victims)) return 1 direction = self.world_map.getDirection(self.lastParsedLoc, targetRoom) if len(direction) > 1: self.logger.error( 'In %s beep from %s %d steps away at %s' % (self.lastParsedLoc, targetRoom, len(direction), ts)) return 1 if direction[0] == -1: self.logger.error('In %s beep from %s UNCONNECTED at %s' % (self.lastParsedLoc, targetRoom, ts)) return 1 self.logger.debug('Heard %d beeps from %s at %s' % (numBeeps, targetRoom, ts)) direc = Directions(direction[0]).name sensorKey = stateKey(self.human, 'sensor_' + direc) self.actions.append([BEEP, [sensorKey, str(numBeeps)], msgIdx, ts]) return 0
def run1Action(self, player, actStruct, prune_threshold): [actType, actAndOutcomes, testbedMsgId, trueTime] = actStruct[0] act = actAndOutcomes[0] outcomes = actAndOutcomes[1:] timeInSec = MISSION_DURATION - (trueTime[0] * 60) - trueTime[1] self.logger.info('Running msg %d: %s' % (testbedMsgId, ','.join(map(str, actAndOutcomes)))) # before any action, manually sync the time feature with the game's time (invert timer) clockKey = stateKey(WORLD, 'seconds') self.worldsetFeature(clockKey, timeInSec, recurse=True) if self.processor is not None: self.processor.pre_step(self.world) if act not in self.worldagents[self.playerToAgent[player]].getLegalActions(): self.logger.error('Illegal %s' % (act)) raise ValueError('Illegal action!') selDict = {} if len(outcomes) > 0: dur = outcomes[0] curTime = self.worldgetFeature(clockKey, unique=True) newTime = curTime + dur selDict[clockKey] = newTime self.logger.debug('Time now %d triage until %d' % (curTime, newTime)) self.logger.info('Injecting %s' % (selDict)) selDict = {k: self.worldvalue2float(k, v) for k, v in selDict.items()} self.worldstep(act, select=selDict, threshold=prune_threshold) self.worldmodelGC() self.summarizeState(trueTime) if self.processor is not None: self.processor.post_step(self.world, None if act is None else self.worldgetAction(player))
def get_mission_phase_key(): """ Gets the named key of the feature corresponding to the current phase of the mission (related with mission time). :rtype: str :return: the corresponding PsychSim feature key. """ return stateKey(WORLD, PHASE_FEATURE)
def get_mission_seconds_key(): """ Gets the named key of the feature corresponding to the number of seconds since the start of the mission. :rtype: str :return: the corresponding PsychSim feature key. """ return stateKey(WORLD, 'seconds')
def _makeMoveActions(self, agent): """ N/E/S/W actions Legality: if current location has a neighbor in the given direction Dynamics: 1) change human's location; 2) set the seen flag for new location to True 3) Set the observable victim variables to the first victim at the new location, if any 4) Reset the crosshair/approached vars to none """ self.moveActions[agent.name] = [] locKey = stateKey(agent.name, 'loc') for direction in Directions: # Legal if current location has a neighbor in the given direction locsWithNbrs = set(self.neighbors[direction.value].keys()) legalityTree = makeTree({ 'if': equalRow(locKey, locsWithNbrs), True: True, False: False }) action = agent.addAction({ 'verb': 'move', 'object': direction.name }, legalityTree) self.moveActions[agent.name].append(action) # Dynamics of this move action: change the agent's location to 'this' location lstlocsWithNbrs = list(locsWithNbrs) tree = {'if': equalRow(locKey, lstlocsWithNbrs)} for il, loc in enumerate(lstlocsWithNbrs): tree[il] = setToConstantMatrix( locKey, self.neighbors[direction.value][loc]) self.world.setDynamics(locKey, action, makeTree(tree)) # move increments the counter of the location we moved to for dest in self.all_locations: destKey = stateKey(agent.name, 'locvisits_' + str(dest)) tree = makeTree({ 'if': equalRow(makeFuture(locKey), dest), True: incrementMatrix(destKey, 1), False: noChangeMatrix(destKey) }) self.world.setDynamics(destKey, action, tree) # increment time self.world.setDynamics( self.world.time, action, makeTree(incrementMatrix(self.world.time, MOVE_TIME_INC)))
def get_location_key(agent): """ Gets the named key of the feature corresponding to the agent's current location / room in the environment. :param Agent agent: the agent for which to get the feature. :rtype: str :return: the corresponding PsychSim feature key. """ return stateKey(agent.name, 'loc')
def get_fov_key(agent): """ Gets the named key of the feature corresponding to the color of the victim in the agent's field-of-view (FOV). :param Agent agent: the agent for which to get the feature. :rtype: str :return: the corresponding PsychSim feature key. """ return stateKey(agent.name, FOV_FEATURE)
def get_light_status_key(location): """ Gets the named key of the feature corresponding to the status of light (on/off) in a location. :param str location: the location / room of the environment. :rtype: str :return: the corresponding PsychSim feature key. """ return stateKey(WORLD, 'light' + str(location))
def get_num_triaged_key(agent, color): """ Gets the named key of the feature corresponding to the number of victims of a color that the agent has triaged. :param Agent agent: the agent for which to get the feature. :param str color: the victim's color. :rtype: str :return: the corresponding PsychSim feature key. """ return stateKey(agent.name, 'numsaved_' + color)
def get_triaged_key(agent, color): """ Gets the named key of the feature corresponding to whether the agent has triaged a victim of the given color. :param Agent agent: the agent for which to get the feature. :param str color: the victim's color. :rtype: str :return: the corresponding PsychSim feature key. """ return stateKey(agent.name, 'saved_' + color)
def get_num_visits_location_key(agent, location): """ Gets the named key of the feature corresponding to the number of visits to a location made by an agent. :param Agent agent: the agent for which to get the feature. :param str location: the location / room of the environment. :rtype: str :return: the corresponding PsychSim feature key. """ return stateKey(agent.name, 'locvisits_' + location)
def get_num_victims_location_key(location, color): """ Gets the named key of the feature corresponding to number of victims of some type in the given location. :param str location: the location / room of the environment. :param str color: the victim's color. :rtype: str :return: the corresponding PsychSim feature key. """ return stateKey(WORLD, 'ctr_' + location + '_' + color)
def get_sensor_key(agent, direction): """ Gets the named key of the feature corresponding to the status of the agent's sensor in the given direction. :param Agent agent: the agent for which to get the feature. :param Directions direction: the direction of the sensor. :rtype: str :return: the corresponding PsychSim feature key. """ return stateKey(agent.name, 'sensor_' + direction.name)
def tryHorizon(world, hz, triageAgent, initLoc): pos = stateKey(triageAgent.name, 'loc') for i in range(1, hz + 1): print('====================================') print('Horizon: {}'.format(str(i)), 'init pos', initLoc) # reset world.setFeature(pos, initLoc) triageAgent.setHorizon(i) for t in range(i): print(triageAgent.getActions()) world.step() print('>>> Took Action', world.getValue(actionKey(triageAgent.name)), triageAgent.reward())
def makeVictimReward(self, agent, model=None, rwd_dict=None): """ Human gets reward if flag is set """ # collects victims saved of each color weights = {} for color in self.color_names: rwd = rwd_dict[color] if rwd_dict is not None and color in rwd_dict else \ COLOR_REWARDS[color] if color in COLOR_REWARDS else None if rwd is None or rwd == 0: continue saved_key = stateKey(agent.name, 'saved_' + color) weights[saved_key] = rwd rwd_key = rewardKey(agent.name) agent.setReward(makeTree(dynamicsMatrix(rwd_key, weights)), 1., model)
def test_actions_param_conditions(self): agents = {'John': 1.22, 'Paul': 3.75, 'George': -1.14, 'Ringo': 4.73} rddl = f''' domain my_test {{ types {{ agent : object; }}; pvariables {{ p(agent) : {{ state-fluent, real, default = 0 }}; a1(agent) : {{ action-fluent, bool, default = false }}; a2(agent) : {{ action-fluent, bool, default = false }}; }}; cpfs {{ p'(?a) = if ( a1(?a) ) then p(?a) + 1 else if ( a2(?a) ) then p(?a) - 1 else 0; }}; reward = - sum_{{?a : agent}} p(?a); }} non-fluents my_test_empty {{ domain = my_test; objects {{ agent : {{ {", ".join(agents.keys())} }}; }}; }} instance my_test_inst {{ domain = my_test; init-state {{ {'; '.join(f'p({a}) = {v}' for a, v in agents.items())}; }}; horizon = 0; }} ''' conv = Converter(const_as_assert=True) conv.convert_str(rddl) conv.world.step() for ag_name in conv.world.agents.keys(): a1 = conv.actions[ag_name][Converter.get_feature_name( ('a1', ag_name))] a2 = conv.actions[ag_name][Converter.get_feature_name( ('a2', ag_name))] p = stateKey(ag_name, 'p') self.assertIn(a1, conv.world.dynamics) self.assertIn(a2, conv.world.dynamics) self.assertIn(True, conv.world.dynamics) self.assertIn(p, conv.world.dynamics[a1]) self.assertIn(p, conv.world.dynamics[a2]) self.assertIn(p, conv.world.dynamics[True]) self.assertIn(a1, conv.world.dynamics[p]) self.assertIn(a2, conv.world.dynamics[p]) self.assertIn(True, conv.world.dynamics[p])
def makeSearchAction(self, agent): action = agent.addAction({'verb': 'search'}) # default: FOV is none fov_key = stateKey(agent.name, FOV_FEATURE) self.world.setDynamics(fov_key, True, makeTree(setToConstantMatrix(fov_key, 'none'))) # A victim can randomly appear in FOV fov_tree = self.makeRandomFOVDistr(agent) self.world.setDynamics(fov_key, action, makeTree(fov_tree)) # increment time self.world.setDynamics( self.world.time, action, makeTree(incrementMatrix(self.world.time, SEARCH_TIME_INC))) self.searchActs[agent.name] = action
def test_actions_conditions_multi(self): rddl = ''' domain my_test { types { agent : object; }; pvariables { p : { state-fluent, int, default = 0 }; a1(agent) : { action-fluent, bool, default = false }; a2(agent) : { action-fluent, bool, default = false }; }; cpfs { p' = if (exists_{?a : agent} [a1(?a)] ) then p + 1 else if ( exists_{?a : agent} [a2(?a)] ) then p - 1 else 0; }; reward = -p; } non-fluents my_test_empty { domain = my_test; objects { agent: { Paul, John, George, Ringo }; }; } instance my_test_inst { domain = my_test; init-state { p = 0; }; horizon = 2; } ''' conv = Converter(const_as_assert=True) conv.convert_str(rddl) conv.world.step() for ag_name in conv.world.agents.keys(): a1 = conv.actions[ag_name][Converter.get_feature_name( ('a1', ag_name))] a2 = conv.actions[ag_name][Converter.get_feature_name( ('a2', ag_name))] p = stateKey(WORLD, 'p') self.assertIn(a1, conv.world.dynamics) self.assertIn(a2, conv.world.dynamics) self.assertIn(True, conv.world.dynamics) self.assertIn(p, conv.world.dynamics[a1]) self.assertIn(p, conv.world.dynamics[a2]) self.assertIn(p, conv.world.dynamics[True]) self.assertIn(a1, conv.world.dynamics[p]) self.assertIn(a2, conv.world.dynamics[p]) self.assertIn(True, conv.world.dynamics[p])
def _make_fov_color_dist(self, loc, cur_idx): if cur_idx == len(self.color_names): dist = {'none': 1} return dist, [dist] color = self.color_names[cur_idx] clr_counter = stateKey(WORLD, 'ctr_' + loc + '_' + color) tree = {'if': equalRow(clr_counter, 0)} branch, branch_leaves = self._make_fov_color_dist(loc, cur_idx + 1) for dist in branch_leaves: dist[color] = 0 tree[True] = branch tree_leaves = branch_leaves branch, branch_leaves = self._make_fov_color_dist(loc, cur_idx + 1) for dist in branch_leaves: dist[color] = 2 tree[False] = branch tree_leaves.extend(branch_leaves) return tree, tree_leaves
def test_if_action_dynamics(self): rddl = ''' domain my_test { pvariables { p : { state-fluent, int, default = 0 }; q : { state-fluent, int, default = 1 }; a1 : { action-fluent, bool, default = false }; a2 : { action-fluent, bool, default = false }; }; cpfs { p' = if (a1) then p + 1 else if (a2) then p - 1 else 0; }; reward = -p; } non-fluents my_test_empty { domain = my_test; } instance my_test_inst { domain = my_test; init-state { a1; }; horizon = 2; } ''' conv = Converter(const_as_assert=True) conv.convert_str(rddl) conv.world.step() ag_name = next(iter(conv.world.agents.keys())) a1 = conv.actions[ag_name]['a1'] a2 = conv.actions[ag_name]['a2'] p = stateKey(WORLD, 'p') self.assertIn(a1, conv.world.dynamics) self.assertIn(a2, conv.world.dynamics) self.assertIn(True, conv.world.dynamics) self.assertIn(p, conv.world.dynamics[a1]) self.assertIn(p, conv.world.dynamics[a2]) self.assertIn(p, conv.world.dynamics[True]) self.assertIn(a1, conv.world.dynamics[p]) self.assertIn(a2, conv.world.dynamics[p]) self.assertIn(True, conv.world.dynamics[p])
def test_fluent_exists_rel(self): objs = {'x1': 1, 'x2': 2, 'x3': 3, 'x4': 4} rddl = f''' domain my_test {{ types {{ obj : object; }}; pvariables {{ p : {{ state-fluent, bool, default = false }}; q(obj) : {{ state-fluent, int, default = -1 }}; a : {{ action-fluent, bool, default = false }}; }}; cpfs {{ p' = exists_{{?x : obj}}[ q(?x) > 3 ]; }}; reward = 0; }} non-fluents my_test_nf {{ domain = my_test; objects {{ obj : {{{', '.join(objs.keys())}}}; }}; }} instance my_test_inst {{ domain = my_test; init-state {{ {'; '.join(f'q({o})={v}' for o, v in objs.items())}; }}; }} ''' conv = Converter() conv.convert_str(rddl) dyn = conv.world.getDynamics(stateKey(WORLD, 'p'), True)[0] self.assertFalse(dyn.branch.isConjunction) self.assertEqual(len(dyn.branch.planes), len(objs)) p = conv.world.getState(WORLD, 'p', unique=True) self.assertEqual(p, False) conv.world.step() p = conv.world.getState(WORLD, 'p', unique=True) self.assertEqual(p, any(q > 3 for q in objs.values()))
def _createTriageAction(self, agent, color): loc_key = stateKey(agent.name, 'loc') # legal only if any "active" victim of given color is in the same loc tree = { 'if': equalRow(loc_key, self.world_map.all_locations), None: False } for i, loc in enumerate(self.world_map.all_locations): vicsInLocOfClrKey = stateKey(WORLD, 'ctr_' + loc + '_' + color) tree[i] = { 'if': thresholdRow(vicsInLocOfClrKey, 0), True: True, False: False } action = agent.addAction({'verb': 'triage_' + color}, makeTree(tree)) # different triage time thresholds according to victim type threshold = 7 if color == GREEN_STR else 14 long_enough = differenceRow(makeFuture(self.world.time), self.world.time, threshold) # make triage dynamics for counters of each loc for loc in self.world_map.all_locations: # successful triage conditions conds = [equalRow(loc_key, loc), long_enough] # location-specific counter of vics of this color: if successful, decrement vicsInLocOfClrKey = stateKey(WORLD, 'ctr_' + loc + '_' + color) tree = makeTree( anding(conds, incrementMatrix(vicsInLocOfClrKey, -1), noChangeMatrix(vicsInLocOfClrKey))) self.world.setDynamics(vicsInLocOfClrKey, action, tree) # white: increment vicsInLocOfClrKey = stateKey(WORLD, 'ctr_' + loc + '_' + WHITE_STR) tree = makeTree( anding(conds, incrementMatrix(vicsInLocOfClrKey, 1), noChangeMatrix(vicsInLocOfClrKey))) self.world.setDynamics(vicsInLocOfClrKey, action, tree) # Color saved counter: increment saved_key = stateKey(agent.name, 'numsaved_' + color) tree = { 'if': long_enough, True: incrementMatrix(saved_key, 1), False: noChangeMatrix(saved_key) } self.world.setDynamics(saved_key, action, makeTree(tree)) # Color saved: according to difference diff = {makeFuture(saved_key): 1, saved_key: -1} saved_key = stateKey(agent.name, 'saved_' + color) self.world.setDynamics(saved_key, action, makeTree(dynamicsMatrix(saved_key, diff))) self.world.setDynamics( saved_key, True, makeTree(setFalseMatrix(saved_key))) # default: set to False # increment time self.world.setDynamics( self.world.time, action, makeTree(incrementMatrix(self.world.time, threshold))) self.triageActs[agent.name][color] = action
def runTimeless(self, world, start, end, ffwdTo=0, prune_threshold=None, permissive=False): self.logger.debug(self.actions[start]) if start == 0: loc = self.actions[0] world.setState(self.human, 'loc', loc, recurse=True) # world.agents[self.human].setBelief(stateKey(self.human, 'loc'), loc) world.setState(self.human, 'locvisits_' + loc, 1, recurse=True) # world.agents[self.human].setBelief(stateKey(self.human, 'locvisits_' + loc), 1) start = 1 clockKey = stateKey(WORLD, 'seconds') t = start while True: if (t >= end) or (t >= len(self.actions)): break actStruct = self.actions[t] actType = actStruct[0] act = actStruct[1][0] testbedMsgId = actStruct[-2] trueTime = actStruct[-1] timeInSec = MISSION_DURATION - (trueTime[0] * 60) - trueTime[1] self.logger.info( '%d) Running msg %d: %s' % (t + start, testbedMsgId, ','.join(map(str, actStruct[1])))) # before any action, manually sync the time feature with the game's time (invert timer) world.setFeature(clockKey, timeInSec, recurse=True) if self.processor is not None: self.processor.pre_step(world) selDict = dict() if act not in world.agents[self.human].getLegalActions(): self.logger.error('Illegal %s' % (act)) raise ValueError('Illegal action!') if actType == MOVE: pass if actType == TRIAGE: dur = actStruct[1][1] curTime = world.getFeature(clockKey, unique=True) newTime = curTime + dur selDict[clockKey] = newTime self.logger.debug('Time now %d triage until %d' % (curTime, newTime)) t = t + 1 self.logger.info('Injecting %s' % (selDict)) selDict = {k: world.value2float(k, v) for k, v in selDict.items()} world.step(act, select=selDict, threshold=prune_threshold) world.modelGC() self.summarizeState(world, trueTime) if self.processor is not None: self.processor.post_step( world, None if act is None else world.getAction(self.human)) if t + start - 1 > ffwdTo: input('press any key.. ')