def updateQ(c_state, action, alpha, gamma): num_s = add_state(c_state) num_a = lists.action_list.index(action) reward = 0 #get_reward() piece = c_state[1] n_board_state, line_reward = pt.answer(c_state[0], piece[1], piece[2], action[0], action[1]) for i in range(len(n_board_state)): for j in range(len(n_board_state[i])): if n_board_state[i][j] > 0 and n_board_state[i][j] < 8: n_board_state[i][j] = 1 n_state = [n_board_state, pt.getNextPiece()] num_sn = add_state(n_state) if num_sn: pass else:num_sn = add_state(n_state) # else: # lists.state_list.append(n_state) # num_sn = lists.state_list.index(n_state) Q_s_a = get_value(num_s, num_a) r_s_a, height = get_reward(c_state, action) max_Q = get_max_Qvalue(num_sn) value = Q_s_a + alpha * (r_s_a + gamma * max_Q - Q_s_a) if len(lists.Qtable)<=num_s or len(lists.Qtable[num_s])<=num_a: for i in range(len(lists.Qtable), num_s+1): lists.Qtable.append([]) for i in range(len(lists.Qtable[num_s]), num_a+1): lists.Qtable[num_s].append(0) lists.Qtable[num_s][num_a] = value return
def get_reward(c_state, action): #- c_board_state: 現在の盤面の状態 #- scores = [40, 100, 300, 1200] #- index: action_listの中でQを最大化するactionを示す番号 c_board_state,piece = c_state n_board_state, line_reward = pt.answer(c_board_state, piece[1], piece[2], action[0], action[1]) erace_line = line(line_reward) #- 消したライン数 block_reward = reward = 0 ## for i in range(0,len(n_board_state)): ## for j in range(0,len(n_board_state[i])): ## if n_board_state[i][j] != 0 and n_board_state[i][j] != 8: n_board_state[i][j] = 1 #- 一番下の空きスペースを埋めた分だけrewardを与える.隙間を作ったら減点 ## trans_c = zip(*c_board_state) ## trans_n = zip(*n_board_state) max_h = 7#21 ## for width in range(1, 5):#11 ## r = 0 # if trans_c[width][:22 - erace_line] != trans_n[width][erace_line:22]: ## if trans_c[width][:8 - erace_line] != trans_n[width][erace_line:8]: ## r = 3 # l = [num for num in list(trans_n[width]) if num > 0] # max_1 = l[0] # start = list(trans_n[width]).index(max_1) # if l.count(0) + 2 <= len(l): max_2 = l[1] # else: max_2 = 8 # if max_2 == max_1: # max_2 = [num for num in list(trans_n[width]) if num != max_1 and num > 0][0] # print "max_1:",max_1,"max_2:",max_2 # start = list(trans_n[width]).index(max_1) # final = list(trans_n[width]).index(max_2) ## if [num for num in list(trans_n[width]) if num == 1] != []: ## start = list(trans_n[width]).index(1) ## r -= list(trans_n[width])[erace_line + start:].count(0) ## else: start = 8#22 ## r -= 6 - start#20 ## if max_h > start: ## max_h = start ## block_reward += r block_reward = 100/(numpy.max(c_board_state) - numpy.min(c_board_state)) reward = block_reward + line_reward return reward, max_h
[8,0,0,0,0,0,0,0,0,0,0,8], [8,0,0,0,0,0,0,0,0,0,0,8], [8,0,0,0,0,0,0,0,0,0,0,8], [8,0,0,0,0,0,0,0,0,0,0,8], [8,0,0,0,0,0,0,0,0,0,0,8], [8,0,0,0,0,0,0,0,0,0,0,8], [8,8,8,8,8,8,8,8,8,8,8,8], [8,8,8,8,8,8,8,8,8,8,8,8], [8,8,8,8,8,8,8,8,8,8,8,8], [8,8,8,8,8,8,8,8,8,8,8,8], [8,8,8,8,8,8,8,8,8,8,8,8], [8,8,8,8,8,8,8,8,8,8,8,8]] b = pt.getBoard() ans = pt.answer(pt.getBoard(),pt.getPiece()[1], pt.getPiece()[2], 0, 2) #ans = pt.answer([2,3,4,5,4,3],pt.getPiece()[1], pt.getPiece()[2], 0, 2) pprint(ans) pt.rotate('ccw') pt.rotate('ccw') # メインループ while(pt.alive()): ######################### # ループごとの解析、操作をここに書く pt.drop() # 次のループへ pt.loop() step += 1