def getAction(self, rawState, epsilon): legalActions = rawState.getLegalActions() legalActions.remove(Directions.STOP) if util.flipCoin(epsilon): return random.choice(legalActions) else: qValues = [(Directions.getIndex(action), self.getQValue(rawState, action)) for action in legalActions] qValues = sorted(qValues, key=lambda x: x[1], reverse=True) for index, qValue in qValues: action = Directions.fromIndex(index) if action in legalActions: return action
def getAction(self, rawState, epsilon): legalActions = rawState.getLegalActions() legalActions.remove(Directions.STOP) qState = self.trainingRoom.featuresExtractor.getFeatures( rawState, None) if util.flipCoin(epsilon): return random.choice(legalActions) else: qValues = list( enumerate(self.model.model.predict(np.array([qState]))[0])) qValues = sorted(qValues, key=lambda x: x[1], reverse=True) for index, qValue in qValues: action = Directions.fromIndex(index) if action in legalActions: return action
def getLegalActions(state): legalActions = state.getLegalActions() if Directions.STOP in legalActions: legalActions.remove(Directions.STOP) return np.array([Directions.fromIndex(i) in legalActions for i in range(4)])