def getActionAndUpdate(self, state, reward): action = np.random.choice(self.actions, 1, p=hp.PRIOR)[0] # If Mario is stuck, overwrite action with jump if state is not None: mpos = feat.marioPosition(state.getTiles()) if mpos is not None: if feat.stuck(state.getTiles(), mpos): self.stuck_duration += 1 # If stuck for too long, rescue him if self.stuck_duration > hp.STUCK_DURATION: print "MODEL: Mario is stuck. Forcing jump to rescue..." # On ground, get started with jump if feat.groundVertDistance(state.getTiles(), mpos) == 0: action = random.choice([0, 10]) # Jump! else: action = 10 self.jumps += 1 # Stop jumping and reset if self.jumps > hp.MAX_JUMPS: self.jumps = 0 self.stuck_duration = 0 return action
def getActionAndUpdate(self, state, reward): state = state.getTiles() mpos = ft.marioPosition(state) action = None # Get Mario unstuck if self.stuck_duration > 30: print "Stuck!" if self.jumps > 20: self.jumps = 0 self.stuck_duration = 0 self.jumps += 1 if ft.groundVertDistance(state, mpos) == 0: action = random.choice([10, 0]) else: action = 10 elif mpos is not None: # Check if stuck if not ft.canMoveRight(state, mpos): self.stuck_duration += 1 # Check if enemy left if ft.distRightEnemy(state, mpos) <= 0.2 and ft.distUpEnemy( state, mpos) < 0.1 and ft.distDownEnemy(state, mpos) < 0.2: print "Enemy ahead!" action = random.choice([0, 11]) # Check if enemy right elif 0.0625 < ft.distLeftEnemy(state, mpos) <= 0.1: print("Enemy behind!", ft.distLeftEnemy(state, mpos)) action = random.choice([0, 11]) # Check if gap elif ft.groundRightDistance(state, mpos) <= 0.3: print "Gap ahead!" if ft.groundRightDistance(state, mpos) <= 0.06: print "Need to jump!" action = 10 else: action = 9 random_action = np.random.choice(self.actions, 1, p=hp.PRIOR)[0] if not action: return random_action else: return np.random.choice([action, random_action], 1, p=[0.99, 0.01])[0]
i = 1 # Begin training loop while i <= hp.TRAINING_ITERATIONS: print('-- Resetting agent') agent.reset() rewardFunction.reset() print('-- START playing iteration %d / %d' % (i + j, hp.TRAINING_ITERATIONS + j)) # Sample first action randomly action = env.action_space.sample() state = None while (ft.marioPosition(state) is None): state, reward, _, info = env.step(action) state = util.State(state, info['distance'], None) # Compute custom reward reward = rewardFunction.getReward(reward, info) dead = win = False # Begin main action-perception loop while not (info['iteration'] > i): # Check if Mario is at the end of the level if info['distance'] >= hp.LEVEL_WIN_DIST: win = True