Beispiel #1
0
    def env_step(self, thisAction):
        intAction = thisAction.intArray[0]
        obs, reward = self.takeAction(intAction)

        theObs = obs

        returnRO = Reward_observation_terminal()
        returnRO.r = reward
        returnRO.o = theObs
        returnRO.terminal = mdptetris.isgameover()

        return returnRO
Beispiel #2
0
    def env_step(self,thisAction):
        intAction = thisAction.intArray[0]
        obs, reward = self.takeAction(intAction)

        theObs = obs

        returnRO = Reward_observation_terminal()
        returnRO.r = reward
        returnRO.o = theObs
        returnRO.terminal = mdptetris.isgameover()

        return returnRO
Beispiel #3
0
    def takeAction(self, intAction):
        # intAction is interpreted as an index into the
        # cross product between columns and rotations.
        rotation, column = intAction % 4, int(intAction/4)+1

        # Next, the game restricts the rotations and columns based
        # on the current piece. So, we map the selected rotation and column appropriately
        rotation %= mdptetris.num_rotate_actions()
        column = min(column, mdptetris.num_column_actions(rotation))

        # Take the action
        lines_cleared = mdptetris.drop_piece(rotation, column)
        #if lines_cleared > 0:
        #    print "Cleared!"
        #print "Action", rotation, column
        obs = self.getObservation()
        reward = self.computeReward(lines_cleared) if not mdptetris.isgameover() else -1.0
        return obs, reward
Beispiel #4
0
    def takeAction(self, intAction):
        # intAction is interpreted as an index into the
        # cross product between columns and rotations.
        rotation, column = intAction % 4, int(intAction / 4) + 1

        # Next, the game restricts the rotations and columns based
        # on the current piece. So, we map the selected rotation and column appropriately
        rotation %= mdptetris.num_rotate_actions()
        column = min(column, mdptetris.num_column_actions(rotation))

        # Take the action
        lines_cleared = mdptetris.drop_piece(rotation, column)
        #if lines_cleared > 0:
        #    print "Cleared!"
        #print "Action", rotation, column
        obs = self.getObservation()
        reward = self.computeReward(
            lines_cleared) if not mdptetris.isgameover() else -1.0
        return obs, reward