Beispiel #1
0
def qLearning(maxsteps = 100, rate_a = 0.6, rate_g = 0.3):
	#- state = [board_state, tetrimino]:	盤面の状態, テトリミノ
	#- steps, maxsteps:	エピソード数
	#- state_list:	状態リスト.盤面の状態と落ちてきている駒
	#- action_list:	行動リスト.[横移動回数,回転の回数]を保持
	#- Qtable:	Q値を格納する二次元配列

	steps = 0
	flag = 0

	while steps < maxsteps:
		rate = change_rate(steps/maxsteps, rate_a, rate_g)
		alpha, gamma = rate_a * rate, rate_g * rate
		#state_list = []
		pt.init(0.5)
		alive = 1
		old_board = board = []
		
		print "step:", steps, "start"
		while alive: #- ゲームオーバーになるまで
			if flag == 0:
				board = pt.getBoard()
				if old_board != board:
					for i in range(0,len(board)):
						for j in range(0,len(board[i])):
							if board[i][j] != 0 and board[i][j] != 8:	board[i][j] = 1
					piece = pt.getPiece()
					state = [board, piece]

					#- 行動の探査・搾取
					action = egreedy.eGreedy(state) # softmaxでもよいが、今はとりあえず

					#- state_list, action_list, Qtableの更新
					if action not in lists.action_list:
						lists.action_list.append(action)
					add_state(state)
					updateQ.updateQ(state, action, alpha, gamma)

					#- actionの実行
					old_board = board
					flag = 1

			#次の状態を準備
			if flag == 1:
				step = 0
				while flag != 2:
					flag = doaction(action, piece)
					step += 1
					if step % 100 == 0:
						flag = 2

			elif flag == 2:
				pt.loop()
				#pt.move('down')
                                pt.drop()
				if pt.param.flag['update']:
					pt.loop()
					flag = 0
				if pt.param.flag['gameover']:	alive = flag = 0

		steps += 1
		reset()
		check = 1
#		if len(lists.Qtable) != len(lists.state_list):	print "check"
		while check:
			if [] in lists.Qtable:	lists.Qtable.remove([])
			else:	check = 0

#		if len(lists.Qtable) != len(lists.state_list):	print len(lists.state_list),len(lists.Qtable)
#		flag = steps / maxsteps

	return lists.state_list, lists.action_list, lists.Qtable
Beispiel #2
0
       [8,8,8,8,8,8,8,8,8,8,8,8]]

b = pt.getBoard()

ans = pt.answer(pt.getBoard(),pt.getPiece()[1], pt.getPiece()[2], 0, 2)
#ans = pt.answer([2,3,4,5,4,3],pt.getPiece()[1], pt.getPiece()[2], 0, 2)

pprint(ans)
pt.rotate('ccw')
pt.rotate('ccw')

# メインループ
while(pt.alive()):
    #########################
    # ループごとの解析、操作をここに書く

    pt.drop()
    # 次のループへ
    pt.loop()
    step += 1


# 終了

pt.init(100)
pprint(pt.getBoard())


pt.quit()