def env_step(self, thisAction): intAction = thisAction.intArray[0] obs, reward = self.takeAction(intAction) theObs = obs returnRO = Reward_observation_terminal() returnRO.r = reward returnRO.o = theObs returnRO.terminal = mdptetris.isgameover() return returnRO
def env_step(self,thisAction): intAction = thisAction.intArray[0] obs, reward = self.takeAction(intAction) theObs = obs returnRO = Reward_observation_terminal() returnRO.r = reward returnRO.o = theObs returnRO.terminal = mdptetris.isgameover() return returnRO
def takeAction(self, intAction): # intAction is interpreted as an index into the # cross product between columns and rotations. rotation, column = intAction % 4, int(intAction/4)+1 # Next, the game restricts the rotations and columns based # on the current piece. So, we map the selected rotation and column appropriately rotation %= mdptetris.num_rotate_actions() column = min(column, mdptetris.num_column_actions(rotation)) # Take the action lines_cleared = mdptetris.drop_piece(rotation, column) #if lines_cleared > 0: # print "Cleared!" #print "Action", rotation, column obs = self.getObservation() reward = self.computeReward(lines_cleared) if not mdptetris.isgameover() else -1.0 return obs, reward
def takeAction(self, intAction): # intAction is interpreted as an index into the # cross product between columns and rotations. rotation, column = intAction % 4, int(intAction / 4) + 1 # Next, the game restricts the rotations and columns based # on the current piece. So, we map the selected rotation and column appropriately rotation %= mdptetris.num_rotate_actions() column = min(column, mdptetris.num_column_actions(rotation)) # Take the action lines_cleared = mdptetris.drop_piece(rotation, column) #if lines_cleared > 0: # print "Cleared!" #print "Action", rotation, column obs = self.getObservation() reward = self.computeReward( lines_cleared) if not mdptetris.isgameover() else -1.0 return obs, reward