def qLearning(maxsteps = 100, rate_a = 0.6, rate_g = 0.3): #- state = [board_state, tetrimino]: 盤面の状態, テトリミノ #- steps, maxsteps: エピソード数 #- state_list: 状態リスト.盤面の状態と落ちてきている駒 #- action_list: 行動リスト.[横移動回数,回転の回数]を保持 #- Qtable: Q値を格納する二次元配列 steps = 0 flag = 0 while steps < maxsteps: rate = change_rate(steps/maxsteps, rate_a, rate_g) alpha, gamma = rate_a * rate, rate_g * rate #state_list = [] pt.init(0.5) alive = 1 old_board = board = [] print "step:", steps, "start" while alive: #- ゲームオーバーになるまで if flag == 0: board = pt.getBoard() if old_board != board: for i in range(0,len(board)): for j in range(0,len(board[i])): if board[i][j] != 0 and board[i][j] != 8: board[i][j] = 1 piece = pt.getPiece() state = [board, piece] #- 行動の探査・搾取 action = egreedy.eGreedy(state) # softmaxでもよいが、今はとりあえず #- state_list, action_list, Qtableの更新 if action not in lists.action_list: lists.action_list.append(action) add_state(state) updateQ.updateQ(state, action, alpha, gamma) #- actionの実行 old_board = board flag = 1 #次の状態を準備 if flag == 1: step = 0 while flag != 2: flag = doaction(action, piece) step += 1 if step % 100 == 0: flag = 2 elif flag == 2: pt.loop() #pt.move('down') pt.drop() if pt.param.flag['update']: pt.loop() flag = 0 if pt.param.flag['gameover']: alive = flag = 0 steps += 1 reset() check = 1 # if len(lists.Qtable) != len(lists.state_list): print "check" while check: if [] in lists.Qtable: lists.Qtable.remove([]) else: check = 0 # if len(lists.Qtable) != len(lists.state_list): print len(lists.state_list),len(lists.Qtable) # flag = steps / maxsteps return lists.state_list, lists.action_list, lists.Qtable
[8,8,8,8,8,8,8,8,8,8,8,8]] b = pt.getBoard() ans = pt.answer(pt.getBoard(),pt.getPiece()[1], pt.getPiece()[2], 0, 2) #ans = pt.answer([2,3,4,5,4,3],pt.getPiece()[1], pt.getPiece()[2], 0, 2) pprint(ans) pt.rotate('ccw') pt.rotate('ccw') # メインループ while(pt.alive()): ######################### # ループごとの解析、操作をここに書く pt.drop() # 次のループへ pt.loop() step += 1 # 終了 pt.init(100) pprint(pt.getBoard()) pt.quit()