def getActionAndUpdate(self, state, reward):

        action = np.random.choice(self.actions, 1, p=hp.PRIOR)[0]

        # If Mario is stuck, overwrite action with jump
        if state is not None:
            mpos = feat.marioPosition(state.getTiles())
            if mpos is not None:
                if feat.stuck(state.getTiles(), mpos):
                    self.stuck_duration += 1

                    # If stuck for too long, rescue him
                    if self.stuck_duration > hp.STUCK_DURATION:
                        print "MODEL: Mario is stuck. Forcing jump to rescue..."

                        # On ground, get started with jump
                        if feat.groundVertDistance(state.getTiles(),
                                                   mpos) == 0:
                            action = random.choice([0, 10])
                        # Jump!
                        else:
                            action = 10
                            self.jumps += 1

                        # Stop jumping and reset
                        if self.jumps > hp.MAX_JUMPS:
                            self.jumps = 0
                            self.stuck_duration = 0

        return action
    def getActionAndUpdate(self, state, reward):

        state = state.getTiles()
        mpos = ft.marioPosition(state)

        action = None

        # Get Mario unstuck
        if self.stuck_duration > 30:
            print "Stuck!"
            if self.jumps > 20:
                self.jumps = 0
                self.stuck_duration = 0
            self.jumps += 1
            if ft.groundVertDistance(state, mpos) == 0:
                action = random.choice([10, 0])
            else:
                action = 10

        elif mpos is not None:

            # Check if stuck
            if not ft.canMoveRight(state, mpos):
                self.stuck_duration += 1

            # Check if enemy left
            if ft.distRightEnemy(state, mpos) <= 0.2 and ft.distUpEnemy(
                    state, mpos) < 0.1 and ft.distDownEnemy(state, mpos) < 0.2:
                print "Enemy ahead!"
                action = random.choice([0, 11])

            # Check if enemy right
            elif 0.0625 < ft.distLeftEnemy(state, mpos) <= 0.1:
                print("Enemy behind!", ft.distLeftEnemy(state, mpos))
                action = random.choice([0, 11])

            # Check if gap
            elif ft.groundRightDistance(state, mpos) <= 0.3:
                print "Gap ahead!"
                if ft.groundRightDistance(state, mpos) <= 0.06:
                    print "Need to jump!"
                    action = 10
                else:
                    action = 9

        random_action = np.random.choice(self.actions, 1, p=hp.PRIOR)[0]

        if not action:
            return random_action
        else:
            return np.random.choice([action, random_action], 1, p=[0.99,
                                                                   0.01])[0]
Exemple #3
0
    i = 1

    # Begin training loop
    while i <= hp.TRAINING_ITERATIONS:

        print('-- Resetting agent')
        agent.reset()
        rewardFunction.reset()

        print('-- START playing iteration %d / %d' %
              (i + j, hp.TRAINING_ITERATIONS + j))

        # Sample first action randomly
        action = env.action_space.sample()
        state = None
        while (ft.marioPosition(state) is None):
            state, reward, _, info = env.step(action)

        state = util.State(state, info['distance'], None)

        # Compute custom reward
        reward = rewardFunction.getReward(reward, info)

        dead = win = False

        # Begin main action-perception loop
        while not (info['iteration'] > i):

            # Check if Mario is at the end of the level
            if info['distance'] >= hp.LEVEL_WIN_DIST:
                win = True