def random_policy(state, flag): st = judge(state, flag) l = len(st) if l == 0: return [0, 0] else: p = random.randint(0, l - 1) return [flag, st[p]]
def mid_policy(state, flag): st = judge(state, flag) l = len(st) if (l == 0): return [0, 0] else: action = [] for i in range(l): action.append(value_list[i]) num = max(action) p = action.index(num) return [flag, st[p]]