def _action(self, state): """ Return best action given state and win/loss Like predict process in NN Best action: argmax( R(s, a) + gamma * max(R(s', a')) ) """ try: act = -1 rewards = self.eval(state) for i in np.argsort(rewards[0, :].ravel())[::-1]: if chkEmpty(state, i): act = i break assert act != -1 return act except: print_exc() set_trace()
def predict(self, state): for i in xrange(6, -1, -1): if chkEmpty(state, i): return i
def rndAction(state): try: return choice([i for i in xrange(7) if chkEmpty(state, i)]) except: print_exc() set_trace()