Ejemplo n.º 1
0
def random_policy(state, flag):
    st = judge(state, flag)
    l = len(st)
    if l == 0:
        return [0, 0]
    else:
        p = random.randint(0, l - 1)
        return [flag, st[p]]
Ejemplo n.º 2
0
 def mid_policy(state, flag):
     st = judge(state, flag)
     l = len(st)
     if (l == 0):
         return [0, 0]
     else:
         action = []
         for i in range(l):
             action.append(value_list[i])
         num = max(action)
         p = action.index(num)
         return [flag, st[p]]